diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000000..fdee6325d0 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,21 @@ +# This file contains file patterns that triggers automatic +# code review requests from users that are owners of these files +# Order matters, the last match has the highest precedence + +# library folders +lib/colvars/* @giacomofiorin +lib/compress/* @akohlmey +lib/kokkos/* @stanmoore1 +lib/molfile/* @akohlmey +lib/qmmm/* @akohlmey +lib/vtk/* @rbberger + +# packages +src/KOKKOS @stanmoore1 +src/USER-CGSDK @akohlmey +src/USER-COLVARS @giacomofiorin +src/USER-OMP @akohlmey +src/USER-QMMM @akohlmey + +# tools +tools/msi2lmp/* @akohlmey diff --git a/.gitignore b/.gitignore index be3df8b163..3b2a1add67 100644 --- a/.gitignore +++ b/.gitignore @@ -33,10 +33,11 @@ log.cite .Trashes ehthumbs.db Thumbs.db -test -test_meam -srcBACKUP -model -doc/old -doc/html +#cmake +/build* +/CMakeCache.txt +/CMakeFiles/ +/Makefile +/cmake_install.cmake +/lmp diff --git a/LICENSE b/LICENSE index a0c2723a0c..f9489c8cf8 100644 --- a/LICENSE +++ b/LICENSE @@ -3,7 +3,7 @@ GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. -59 Temple Place - Suite 330, Boston, MA 02111-1307, USA +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. diff --git a/bench/FERMI/README b/bench/FERMI/README index db3f527bdc..b66e560775 100644 --- a/bench/FERMI/README +++ b/bench/FERMI/README @@ -1,55 +1,21 @@ These are input scripts used to run versions of several of the -benchmarks in the top-level bench directory using the GPU and -USER-CUDA accelerator packages. The results of running these scripts -on two different machines (a desktop with 2 Tesla GPUs and the ORNL -Titan supercomputer) are shown on the "GPU (Fermi)" section of the -Benchmark page of the LAMMPS WWW site: lammps.sandia.gov/bench. +benchmarks in the top-level bench directory using the GPU accelerator +package. The results of running these scripts on two different machines +(a desktop with 2 Tesla GPUs and the ORNL Titan supercomputer) are shown +on the "GPU (Fermi)" section of the Benchmark page of the LAMMPS WWW +site: lammps.sandia.gov/bench. Examples are shown below of how to run these scripts. This assumes -you have built 3 executables with both the GPU and USER-CUDA packages +you have built 3 executables with the GPU package installed, e.g. lmp_linux_single lmp_linux_mixed lmp_linux_double -The precision (single, mixed, double) refers to the GPU and USER-CUDA -package precision. See the README files in the lib/gpu and lib/cuda -directories for instructions on how to build the packages with -different precisions. The GPU and USER-CUDA sub-sections of the -doc/Section_accelerate.html file also describes this process. - -Make.py -d ~/lammps -j 16 -p #all orig -m linux -o cpu -a exe -Make.py -d ~/lammps -j 16 -p #all opt orig -m linux -o opt -a exe -Make.py -d ~/lammps -j 16 -p #all omp orig -m linux -o omp -a exe -Make.py -d ~/lammps -j 16 -p #all gpu orig -m linux \ - -gpu mode=double arch=20 -o gpu_double -a libs exe -Make.py -d ~/lammps -j 16 -p #all gpu orig -m linux \ - -gpu mode=mixed arch=20 -o gpu_mixed -a libs exe -Make.py -d ~/lammps -j 16 -p #all gpu orig -m linux \ - -gpu mode=single arch=20 -o gpu_single -a libs exe -Make.py -d ~/lammps -j 16 -p #all cuda orig -m linux \ - -cuda mode=double arch=20 -o cuda_double -a libs exe -Make.py -d ~/lammps -j 16 -p #all cuda orig -m linux \ - -cuda mode=mixed arch=20 -o cuda_mixed -a libs exe -Make.py -d ~/lammps -j 16 -p #all cuda orig -m linux \ - -cuda mode=single arch=20 -o cuda_single -a libs exe -Make.py -d ~/lammps -j 16 -p #all intel orig -m linux -o intel_cpu -a exe -Make.py -d ~/lammps -j 16 -p #all kokkos orig -m linux -o kokkos_omp -a exe -Make.py -d ~/lammps -j 16 -p #all kokkos orig -kokkos cuda arch=20 \ - -m cuda -o kokkos_cuda -a exe - -Make.py -d ~/lammps -j 16 -p #all opt omp gpu cuda intel kokkos orig \ - -gpu mode=double arch=20 -cuda mode=double arch=20 -m linux \ - -o all -a libs exe - -Make.py -d ~/lammps -j 16 -p #all opt omp gpu cuda intel kokkos orig \ - -kokkos cuda arch=20 -gpu mode=double arch=20 \ - -cuda mode=double arch=20 -m cuda -o all_cuda -a libs exe - ------------------------------------------------------------------------ -To run on just CPUs (without using the GPU or USER-CUDA styles), +To run on just CPUs (without using the GPU styles), do something like the following: mpirun -np 1 lmp_linux_double -v x 8 -v y 8 -v z 8 -v t 100 < in.lj @@ -81,23 +47,5 @@ node via a "-ppn" setting. ------------------------------------------------------------------------ -To run with the USER-CUDA package, do something like the following: - -mpirun -np 1 lmp_linux_single -c on -sf cuda -v x 16 -v y 16 -v z 16 -v t 100 < in.lj -mpirun -np 2 lmp_linux_double -c on -sf cuda -pk cuda 2 -v x 32 -v y 64 -v z 64 -v t 100 < in.eam - -The "xyz" settings determine the problem size. The "t" setting -determines the number of timesteps. The "np" setting determines how -many MPI tasks (per node) the problem will run on. The numeric -argument to the "-pk" setting is the number of GPUs (per node); 1 GPU -is the default. Note that the number of MPI tasks must equal the -number of GPUs (both per node) with the USER-CUDA package. - -These mpirun commands run on a single node. To run on multiple nodes, -scale up the "-np" setting, and control the number of MPI tasks per -node via a "-ppn" setting. - ------------------------------------------------------------------------- - If the script has "titan" in its name, it was run on the Titan supercomputer at ORNL. diff --git a/bench/README b/bench/README index 85d71cbb5d..0806fcded6 100644 --- a/bench/README +++ b/bench/README @@ -71,49 +71,33 @@ integration ---------------------------------------------------------------------- -Here is a src/Make.py command which will perform a parallel build of a -LAMMPS executable "lmp_mpi" with all the packages needed by all the -examples. This assumes you have an MPI installed on your machine so -that "mpicxx" can be used as the wrapper compiler. It also assumes -you have an Intel compiler to use as the base compiler. You can leave -off the "-cc mpi wrap=icc" switch if that is not the case. You can -also leave off the "-fft fftw3" switch if you do not have the FFTW -(v3) installed as an FFT package, in which case the default KISS FFT -library will be used. - -cd src -Make.py -j 16 -p none molecule manybody kspace granular rigid orig \ - -cc mpi wrap=icc -fft fftw3 -a file mpi - ----------------------------------------------------------------------- - Here is how to run each problem, assuming the LAMMPS executable is named lmp_mpi, and you are using the mpirun command to launch parallel runs: Serial (one processor runs): -lmp_mpi < in.lj -lmp_mpi < in.chain -lmp_mpi < in.eam -lmp_mpi < in.chute -lmp_mpi < in.rhodo +lmp_mpi -in in.lj +lmp_mpi -in in.chain +lmp_mpi -in in.eam +lmp_mpi -in in.chute +lmp_mpi -in in.rhodo Parallel fixed-size runs (on 8 procs in this case): -mpirun -np 8 lmp_mpi < in.lj -mpirun -np 8 lmp_mpi < in.chain -mpirun -np 8 lmp_mpi < in.eam -mpirun -np 8 lmp_mpi < in.chute -mpirun -np 8 lmp_mpi < in.rhodo +mpirun -np 8 lmp_mpi -in in.lj +mpirun -np 8 lmp_mpi -in in.chain +mpirun -np 8 lmp_mpi -in in.eam +mpirun -np 8 lmp_mpi -in in.chute +mpirun -np 8 lmp_mpi -in in.rhodo Parallel scaled-size runs (on 16 procs in this case): -mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 < in.lj -mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 < in.chain.scaled -mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 < in.eam -mpirun -np 16 lmp_mpi -var x 4 -var y 4 < in.chute.scaled -mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 < in.rhodo.scaled +mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 -in in.lj +mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 -in in.chain.scaled +mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 -in in.eam +mpirun -np 16 lmp_mpi -var x 4 -var y 4 -in in.chute.scaled +mpirun -np 16 lmp_mpi -var x 2 -var y 2 -var z 4 -in in.rhodo.scaled For each of the scaled-size runs you must set 3 variables as -var command line switches. The variables x,y,z are used in the input diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt new file mode 100644 index 0000000000..76c28fcb72 --- /dev/null +++ b/cmake/CMakeLists.txt @@ -0,0 +1,547 @@ +######################################## +# CMake build system +# This file is part of LAMMPS +# Created by Christoph Junghans and Richard Berger +cmake_minimum_required(VERSION 3.1) + +project(lammps) +set(SOVERSION 0) +set(LAMMPS_SOURCE_DIR ${CMAKE_SOURCE_DIR}/../src) +set(LAMMPS_LIB_SOURCE_DIR ${CMAKE_SOURCE_DIR}/../lib) +set(LAMMPS_LIB_BINARY_DIR ${CMAKE_BINARY_DIR}/lib) + +#To not conflict with old Makefile build system, we build everything here +file(GLOB LIB_SOURCES ${LAMMPS_SOURCE_DIR}/*.cpp) +file(GLOB LMP_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp) +list(REMOVE_ITEM LIB_SOURCES ${LMP_SOURCES}) + +# Cmake modules/macros are in a subdirectory to keep this file cleaner +set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/Modules) + +if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CXX_FLAGS) + #release comes with -O3 by default + set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE) +endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CXX_FLAGS) + +foreach(STYLE_FILE style_angle.h style_atom.h style_body.h style_bond.h style_command.h style_compute.h style_dihedral.h style_dump.h + style_fix.h style_improper.h style_integrate.h style_kspace.h style_minimize.h style_nbin.h style_npair.h style_nstencil.h + style_ntopo.h style_pair.h style_reader.h style_region.h) + if(EXISTS ${LAMMPS_SOURCE_DIR}/${STYLE_FILE}) + message(FATAL_ERROR "There is a ${STYLE_FILE} in ${LAMMPS_SOURCE_DIR}, please clean up the source directory first") + endif() +endforeach() + +enable_language(CXX) + +###################################################################### +# compiler tests +# these need ot be done early (before further tests). +##################################################################### +include(CheckCCompilerFlag) + +######################################################################## +# User input options # +######################################################################## +option(BUILD_SHARED_LIBS "Build shared libs" OFF) +option(INSTALL_LIB "Install lammps library and header" ON) +include(GNUInstallDirs) + +set(LAMMPS_LINK_LIBS) +option(ENABLE_MPI "Build MPI version" OFF) +if(ENABLE_MPI) + find_package(MPI REQUIRED) + include_directories(${MPI_C_INCLUDE_PATH}) + list(APPEND LAMMPS_LINK_LIBS ${MPI_CXX_LIBRARIES}) + option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF) + if(LAMMPS_LONGLONG_TO_LONG) + add_definitions(-DLAMMPS_LONGLONG_TO_LONG) + endif() +else() + file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.c) + list(APPEND LIB_SOURCES ${MPI_SOURCES}) + include_directories(${LAMMPS_SOURCE_DIR}/STUBS) +endif() + +set(LAMMPS_SIZE_LIMIT "LAMMPS_SMALLBIG" CACHE STRING "Lammps size limit") +set_property(CACHE LAMMPS_SIZE_LIMIT PROPERTY STRINGS LAMMPS_SMALLBIG LAMMPS_BIGBIG LAMMPS_SMALLSMALL) +add_definitions(-D${LAMMPS_SIZE_LIMIT}) + +set(LAMMPS_MEMALIGN "64" CACHE STRING "enables the use of the posix_memalign() call instead of malloc() when large chunks or memory are allocated by LAMMPS") +add_definitions(-DLAMMPS_MEMALIGN=${LAMMPS_MEMALIGN}) + +option(LAMMPS_EXCEPTIONS "enable the use of C++ exceptions for error messages (useful for library interface)" OFF) +if(LAMMPS_EXCEPTIONS) + add_definitions(-DLAMMPS_EXCEPTIONS) +endif() + +option(CMAKE_VERBOSE_MAKEFILE "Verbose makefile" OFF) + +option(ENABLE_TESTING "Enable testing" OFF) +if(ENABLE_TESTING) + enable_testing() +endif(ENABLE_TESTING) + +option(ENABLE_ALL "Build all default packages" OFF) +set(DEFAULT_PACKAGES ASPHERE BODY CLASS2 COLLOID COMPRESS CORESHELL DIPOLE GRANULAR + KSPACE MANYBODY MC MEAM MISC MOLECULE PERI QEQ + REAX REPLICA RIGID SHOCK SNAP SRD) +set(OTHER_PACKAGES KIM PYTHON MSCG MPIIO VORONOI POEMS + USER-ATC USER-AWPMD USER-CGDNA + USER-CGSDK USER-COLVARS USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF + USER-FEP USER-H5MD USER-LB USER-MANIFOLD USER-MEAMC USER-MGPT USER-MISC + USER-MOLFILE USER-NETCDF USER-PHONON USER-QTB USER-REAXC USER-SMD + USER-SMTBQ USER-SPH USER-TALLY USER-VTK USER-QUIP USER-QMMM) +set(ACCEL_PACKAGES USER-OMP KOKKOS OPT USER-INTEL GPU) +foreach(PKG ${DEFAULT_PACKAGES}) + option(ENABLE_${PKG} "Build ${PKG} Package" ${ENABLE_ALL}) +endforeach() +foreach(PKG ${ACCEL_PACKAGES} ${OTHER_PACKAGES}) + option(ENABLE_${PKG} "Build ${PKG} Package" OFF) +endforeach() + +macro(pkg_depends PKG1 PKG2) + if(ENABLE_${PKG1} AND NOT ENABLE_${PKG2}) + message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}") + endif() +endmacro() + +pkg_depends(MPIIO MPI) +pkg_depends(QEQ MANYBODY) +pkg_depends(USER-ATC MANYBODY) +pkg_depends(USER-H5MD MPI) +pkg_depends(USER-LB MPI) +pkg_depends(USER-MISC MANYBODY) +pkg_depends(USER-PHONON KSPACE) + +if(ENABLE_BODY AND ENABLE_POEMS) + message(FATAL_ERROR "BODY and POEMS cannot be enabled at the same time") +endif() + +###################################################### +# packages with special compiler needs or external libs +###################################################### +if(ENABLE_REAX OR ENABLE_MEAM OR ENABLE_USER-QUIP OR ENABLE_USER-QMMM) + enable_language(Fortran) +endif() + +if(ENABLE_KOKKOS OR ENABLE_MSCG) + # starting with CMake 3.1 this is all you have to do to enforce C++11 + set(CMAKE_CXX_STANDARD 11) # C++11... + set(CMAKE_CXX_STANDARD_REQUIRED ON) #...is required... + set(CMAKE_CXX_EXTENSIONS OFF) #...without compiler extensions like gnu++11 +endif() + +if(ENABLE_USER-OMP OR ENABLE_KOKKOS OR ENABLE_USER-INTEL) + find_package(OpenMP REQUIRED) + set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") + set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") +endif() + +if(ENABLE_KSPACE) + set(FFT "KISSFFT" CACHE STRING "FFT library for KSPACE package") + set_property(CACHE FFT PROPERTY STRINGS KISSFFT FFTW3 MKL FFTW2) + if(NOT FFT STREQUAL "KISSFFT") + find_package(${FFT} REQUIRED) + add_definitions(-DFFT_${FFT}) + include_directories(${${FFT}_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${${FFT}_LIBRARIES}) + endif() + set(PACK_OPTIMIZATION "PACK_ARRAY" CACHE STRING "Optimization for FFT") + set_property(CACHE PACK_OPTIMIZATION PROPERTY STRINGS PACK_ARRAY PACK_POINTER PACK_MEMCPY) + if(NOT PACK_OPTIMIZATION STREQUAL "PACK_ARRAY") + add_definitions(-D${PACK_OPTIMIZATION}) + endif() +endif() + +if(ENABLE_MISC) + option(LAMMPS_XDR "include XDR compatibility files for doing particle dumps in XTC format" OFF) + if(LAMMPS_XDR) + add_definitions(-DLAMMPS_XDR) + endif() +endif() + +if(ENABLE_MSCG OR ENABLE_USER-ATC OR ENABLE_USER-AWPMD OR ENABLE_USER-QUIP) + find_package(LAPACK) + if(LAPACK_FOUND) + list(APPEND LAMMPS_LINK_LIBS ${LAPACK_LIBRARIES}) + else() + enable_language(Fortran) + file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/*.f) + list(APPEND LIB_SOURCES ${LAPACK_SOURCES}) + endif() +endif() + +if(ENABLE_PYTHON) + find_package(PythonInterp REQUIRED) + find_package(PythonLibs REQUIRED) + add_definitions(-DLMP_PYTHON) + include_directories(${PYTHON_INCLUDE_DIR}) + list(APPEND LAMMPS_LINK_LIBS ${PYTHON_LIBRARY}) + if(NOT PYTHON_INSTDIR) + execute_process(COMMAND ${PYTHON_EXECUTABLE} + -c "import distutils.sysconfig as cg; print(cg.get_python_lib(1,0,prefix='${CMAKE_INSTALL_PREFIX}'))" + OUTPUT_VARIABLE PYTHON_INSTDIR OUTPUT_STRIP_TRAILING_WHITESPACE) + endif() + install(FILES ${CMAKE_SOURCE_DIR}/../python/lammps.py DESTINATION ${PYTHON_INSTDIR}) + if(NOT BUILD_SHARED_LIBS) + message(FATAL_ERROR "Python package need lammps to be build shared, use -DBUILD_SHARED_LIBS=ON") + endif() +endif() + +find_package(JPEG) +if(JPEG_FOUND) + add_definitions(-DLAMMPS_JPEG) + include_directories(${JPEG_INCLUDE_DIR}) + list(APPEND LAMMPS_LINK_LIBS ${JPEG_LIBRARIES}) +endif() + +find_package(PNG) +find_package(ZLIB) +if(PNG_FOUND AND ZLIB_FOUND) + include_directories(${PNG_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${PNG_LIBRARIES} ${ZLIB_LIBRARIES}) + add_definitions(-DLAMMPS_PNG) +endif() + +find_program(GZIP_EXECUTABLE gzip) +find_package_handle_standard_args(GZIP REQUIRED_VARS GZIP_EXECUTABLE) +if(GZIP_FOUND) + add_definitions(-DLAMMPS_GZIP) +endif() + +find_program(FFMPEG_EXECUTABLE ffmpeg) +find_package_handle_standard_args(FFMPEG REQUIRED_VARS FFMPEG_EXECUTABLE) +if(FFMPEG_FOUND) + add_definitions(-DLAMMPS_FFMPEG) +endif() + +if(ENABLE_VORONOI) + find_package(VORO REQUIRED) #some distros + include_directories(${VORO_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES}) +endif() + +if(ENABLE_USER-MOLFILE) + list(APPEND LAMMPS_LINK_LIBS ${CMAKE_DL_LIBS}) +endif() + +if(ENABLE_USER-NETCDF) + find_package(NetCDF REQUIRED) + include_directories(NETCDF_INCLUDE_DIR) + list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARY}) + add_definitions(-DLMP_HAS_NETCDF -DNC_64BIT_DATA=0x0020) +endif() + +if(ENABLE_USER-SMD) + find_package(Eigen3 REQUIRED) + include_directories(${EIGEN3_INCLUDE_DIR}) +endif() + +if(ENABLE_USER-QUIP) + find_package(QUIP REQUIRED) + list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) +endif() + +if(ENABLE_USER-QMMM) + find_package(QE REQUIRED) + include_directories(${QE_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES}) +endif() + +if(ENABLE_USER-AWPMD) + include_directories(${LAMMPS_LIB_SOURCE_DIR}/awpmd/systems/interact + ${LAMMPS_LIB_SOURCE_DIR}/awpmd/ivutils/include) +endif() + +if(ENABLE_USER-H5MD) + find_package(HDF5 REQUIRED) + list(APPEND LAMMPS_LINK_LIBS ${HDF5_LIBRARIES}) + include_directories(${HDF5_INCLUDE_DIRS} ${LAMMPS_LIB_SOURCE_DIR}/h5md/include) +endif() + +if(ENABLE_USER-VTK) + find_package(VTK REQUIRED NO_MODULE) + include(${VTK_USE_FILE}) + add_definitions(-DLAMMPS_VTK) + list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES}) +endif() + +if(ENABLE_KIM) + find_package(KIM REQUIRED) + list(APPEND LAMMPS_LINK_LIBS ${KIM_LIBRARIES}) + include_directories(${KIM_INCLUDE_DIRS}) +endif() + +if(ENABLE_MSCG) + find_package(GSL REQUIRED) + set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/mscg) + set(MSCG_TARBALL ${LAMMPS_LIB_MSCG_BIN_DIR}/MS-CG-master.zip) + set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_MSCG_BIN_DIR}/MSCG-release-master/src) + if(NOT EXISTS ${LAMMPS_LIB_MSCG_BIN_DIR}) + if(NOT EXISTS ${MSCG_TARBALL}) + message(STATUS "Downloading ${MSCG_TARBALL}") + file(DOWNLOAD + https://github.com/uchicago-voth/MSCG-release/archive/master.zip + ${MSCG_TARBALL} SHOW_PROGRESS) #EXPECTED_MD5 cannot be due due to master + endif() + message(STATUS "Unpacking ${MSCG_TARBALL}") + execute_process(COMMAND ${CMAKE_COMMAND} -E tar xvf ${MSCG_TARBALL} + WORKING_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/mscg) + endif() + file(GLOB MSCG_SOURCES ${LAMMPS_LIB_MSCG_BIN_DIR}/*.cpp) + list(APPEND LIB_SOURCES ${MSCG_SOURCES}) + foreach(MSCG_SOURCE ${MSCG_SOURCES}) + set_property(SOURCE ${MSCG_SOURCE} APPEND PROPERTY COMPILE_DEFINITIONS + DIMENSION=3 _exclude_gromacs=1) + endforeach() + include_directories(${LAMMPS_LIB_MSCG_BIN_DIR} ${GSL_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${GSL_LIBRARIES}) +endif() + +######################################################################## +# Basic system tests (standard libraries, headers, functions, types) # +######################################################################## +include(CheckIncludeFile) +foreach(HEADER math.h) + check_include_file(${HEADER} FOUND_${HEADER}) + if(NOT FOUND_${HEADER}) + message(FATAL_ERROR "Could not find needed header - ${HEADER}") + endif(NOT FOUND_${HEADER}) +endforeach(HEADER) + +set(MATH_LIBRARIES "m" CACHE STRING "math library") +mark_as_advanced( MATH_LIBRARIES ) +include(CheckLibraryExists) +foreach(FUNC sin cos) + check_library_exists(${MATH_LIBRARIES} ${FUNC} "" FOUND_${FUNC}_${MATH_LIBRARIES}) + if(NOT FOUND_${FUNC}_${MATH_LIBRARIES}) + message(FATAL_ERROR "Could not find needed math function - ${FUNC}") + endif(NOT FOUND_${FUNC}_${MATH_LIBRARIES}) +endforeach(FUNC) +list(APPEND LAMMPS_LINK_LIBS ${MATH_LIBRARIES}) + +###################################### +# Generate Basic Style files +###################################### +include(StyleHeaderUtils) +RegisterStyles(${LAMMPS_SOURCE_DIR}) + +############################################## +# add sources of enabled packages +############################################ +foreach(PKG ${DEFAULT_PACKAGES} ${OTHER_PACKAGES}) + if(ENABLE_${PKG}) + set(${PKG}_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/${PKG}) + + # detects styles in package and adds them to global list + RegisterStyles(${${PKG}_SOURCES_DIR}) + + file(GLOB ${PKG}_SOURCES ${${PKG}_SOURCES_DIR}/*.cpp) + list(APPEND LIB_SOURCES ${${PKG}_SOURCES}) + include_directories(${${PKG}_SOURCES_DIR}) + endif() +endforeach() + +############################################## +# add lib sources of (simple) enabled packages +############################################ +foreach(SIMPLE_LIB REAX MEAM POEMS USER-ATC USER-AWPMD USER-COLVARS USER-H5MD + USER-MOLFILE USER-QMMM) + if(ENABLE_${SIMPLE_LIB}) + string(REGEX REPLACE "^USER-" "" SIMPLE_LIB "${SIMPLE_LIB}") + string(TOLOWER "${SIMPLE_LIB}" INC_DIR) + file(GLOB_RECURSE ${SIMPLE_LIB}_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR}/*.F + ${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR}/*.c ${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR}/*.cpp) + list(APPEND LIB_SOURCES ${${SIMPLE_LIB}_SOURCES}) + include_directories(${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR}) + endif() +endforeach() + +###################################################################### +# packages which selectively include variants based on enabled styles +# e.g. accelerator packages +###################################################################### +if(ENABLE_USER-OMP) + set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP) + set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp + ${USER-OMP_SOURCES_DIR}/thr_omp.cpp + ${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp + ${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp) + set_property(GLOBAL PROPERTY "OMP_SOURCES" "${USER-OMP_SOURCES}") + + # detects styles which have USER-OMP version + RegisterStylesExt(${USER-OMP_SOURCES_DIR} omp OMP_SOURCES) + + get_property(USER-OMP_SOURCES GLOBAL PROPERTY OMP_SOURCES) + + list(APPEND LIB_SOURCES ${USER-OMP_SOURCES}) + include_directories(${USER-OMP_SOURCES_DIR}) +endif() + +if(ENABLE_KOKKOS) + set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) + set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos) + add_definitions(-DLMP_KOKKOS) + add_subdirectory(${LAMMPS_LIB_KOKKOS_SRC_DIR} ${LAMMPS_LIB_KOKKOS_BIN_DIR}) + + set(Kokkos_INCLUDE_DIRS ${LAMMPS_LIB_KOKKOS_SRC_DIR}/core/src + ${LAMMPS_LIB_KOKKOS_SRC_DIR}/containers/src + ${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src + ${LAMMPS_LIB_KOKKOS_BIN_DIR}) + include_directories(${Kokkos_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS kokkos) + + set(KOKKOS_PKG_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/KOKKOS) + set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/fix_nh_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp) + set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}") + + # detects styles which have KOKKOS version + RegisterStylesExt(${KOKKOS_PKG_SOURCES_DIR} kokkos KOKKOS_PKG_SOURCES) + + get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES) + + list(APPEND LIB_SOURCES ${KOKKOS_PKG_SOURCES}) + include_directories(${KOKKOS_PKG_SOURCES_DIR}) +endif() + +if(ENABLE_OPT) + set(OPT_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/OPT) + set(OPT_SOURCES) + set_property(GLOBAL PROPERTY "OPT_SOURCES" "${OPT_SOURCES}") + + # detects styles which have OPT version + RegisterStylesExt(${OPT_SOURCES_DIR} opt OPT_SOURCES) + + get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES) + + list(APPEND LIB_SOURCES ${OPT_SOURCES}) + include_directories(${OPT_SOURCES_DIR}) +endif() + +if(ENABLE_USER-INTEL) + set(USER-INTEL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-INTEL) + set(USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/intel_preprocess.h + ${USER-INTEL_SOURCES_DIR}/intel_buffers.h + ${USER-INTEL_SOURCES_DIR}/intel_buffers.cpp + ${USER-INTEL_SOURCES_DIR}/math_extra_intel.h + ${USER-INTEL_SOURCES_DIR}/nbin_intel.h + ${USER-INTEL_SOURCES_DIR}/nbin_intel.cpp + ${USER-INTEL_SOURCES_DIR}/npair_intel.h + ${USER-INTEL_SOURCES_DIR}/npair_intel.cpp + ${USER-INTEL_SOURCES_DIR}/intel_simd.h + ${USER-INTEL_SOURCES_DIR}/intel_intrinsics.h) + + set_property(GLOBAL PROPERTY "USER-INTEL_SOURCES" "${USER-INTEL_SOURCES}") + + # detects styles which have USER-INTEL version + RegisterStylesExt(${USER-INTEL_SOURCES_DIR} opt USER-INTEL_SOURCES) + + get_property(USER-INTEL_SOURCES GLOBAL PROPERTY USER-INTEL_SOURCES) + + list(APPEND LIB_SOURCES ${USER-INTEL_SOURCES}) + include_directories(${USER-INTEL_SOURCES_DIR}) +endif() + +if(ENABLE_GPU) + find_package(CUDA REQUIRED) + find_program(BIN2C bin2c) + if(NOT BIN2C) + message(FATAL_ERROR "Couldn't find bin2c, use -DBIN2C helping cmake to find it.") + endif() + include_directories(${CUDA_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) + set(GPU_PREC "SINGLE_DOUBLE" CACHE STRING "Lammps gpu precision size") + set_property(CACHE GPU_PREC PROPERTY STRINGS SINGLE_DOUBLE SINGLE_SINGLE DOUBLE_DOUBLE) + add_definitions(-D_${GPU_PREC}) + add_definitions(-DNV_KERNEL -DUCL_CUDADR) + option(CUDPP_OPT "Enable CUDPP_OPT" ON) + + set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU) + set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h) + + set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}") + + # detects styles which have GPU version + RegisterStylesExt(${GPU_SOURCES_DIR} opt GPU_SOURCES) + + get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES) + + file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cpp) + file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cu ${CMAKE_SOURCE_DIR}/gpu/*.cu) + file(GLOB_RECURSE GPU_NOT_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu) + list(REMOVE_ITEM GPU_LIB_CU ${GPU_NOT_LIB_CU}) + include_directories(${GPU_SOURCES_DIR} ${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu) + if(CUDPP_OPT) + include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) + add_definitions(-DCUDPP_OPT) + file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/*.cpp) + file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/*.cu) + endif() + cuda_compile(GPU_OBJS ${GPU_LIB_CU} ${GPU_LIB_CUDPP_CU} OPTIONS $<$:-Xcompiler=-fPIC>) + file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu) + foreach(CU_OBJ ${GPU_OBJS}) + get_filename_component(CU_NAME ${CU_OBJ} NAME_WE) + string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}") + add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h + COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h + DEPENDS ${CU_OBJ} + COMMENT "Generating ${CU_NAME}_cubin.h") + list(APPEND LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h) + if(${CU_NAME} STREQUAL "pppm_d") #pppm_d doesn't get linked into the lib + set(CU_FORBIDDEN_OBJ "${CU_OBJ}") + endif() + endforeach() + list(REMOVE_ITEM GPU_OBJS "${CU_FORBIDDEN_OBJ}") + list(APPEND LIB_SOURCES ${GPU_SOURCES} ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS}) + set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h") +endif() + +###################################################### +# Generate style headers based on global list of +# styles registered during package selection +###################################################### +set(LAMMPS_STYLE_HEADERS_DIR ${CMAKE_CURRENT_BINARY_DIR}/styles) + +GenerateStyleHeaders(${LAMMPS_STYLE_HEADERS_DIR}) + +include_directories(${LAMMPS_SOURCE_DIR}) +include_directories(${LAMMPS_STYLE_HEADERS_DIR}) + +########################################### +# Actually add executable and lib to build +############################################ +add_library(lammps ${LIB_SOURCES}) +target_link_libraries(lammps ${LAMMPS_LINK_LIBS}) +set_target_properties(lammps PROPERTIES SOVERSION ${SOVERSION}) +if(INSTALL_LIB) + install(TARGETS lammps LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + install(FILES ${LAMMPS_SOURCE_DIR}/lammps.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}) +elseif(BUILD_SHARED_LIBS) + message(FATAL_ERROR "Shared library has to be installed, use -DINSTALL_LIB=ON to install lammps with a library") +endif() + +add_executable(lmp ${LMP_SOURCES}) +target_link_libraries(lmp lammps) +install(TARGETS lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) +if(ENABLE_TESTING) + add_test(ShowHelp ${CMAKE_CURRENT_BINARY_DIR}/lmp -help) +endif() + +################################## +# Print package summary +################################## +foreach(PKG ${DEFAULT_PACKAGES} ${OTHER_PACKAGES} ${ACCEL_PACKAGES}) + if(ENABLE_${PKG}) + message(STATUS "Building package: ${PKG}") + endif() +endforeach() diff --git a/cmake/Modules/FindFFTW2.cmake b/cmake/Modules/FindFFTW2.cmake new file mode 100644 index 0000000000..c77e6cf8e9 --- /dev/null +++ b/cmake/Modules/FindFFTW2.cmake @@ -0,0 +1,22 @@ +# - Find fftw2 +# Find the native FFTW2 headers and libraries. +# +# FFTW2_INCLUDE_DIRS - where to find fftw2.h, etc. +# FFTW2_LIBRARIES - List of libraries when using fftw2. +# FFTW2_FOUND - True if fftw2 found. +# + +find_path(FFTW2_INCLUDE_DIR fftw.h) + +find_library(FFTW2_LIBRARY NAMES fftw) + +set(FFTW2_LIBRARIES ${FFTW2_LIBRARY}) +set(FFTW2_INCLUDE_DIRS ${FFTW2_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set FFTW2_FOUND to TRUE +# if all listed variables are TRUE + +find_package_handle_standard_args(FFTW2 DEFAULT_MSG FFTW2_LIBRARY FFTW2_INCLUDE_DIR) + +mark_as_advanced(FFTW2_INCLUDE_DIR FFTW2_LIBRARY ) diff --git a/cmake/Modules/FindFFTW3.cmake b/cmake/Modules/FindFFTW3.cmake new file mode 100644 index 0000000000..552bcc4257 --- /dev/null +++ b/cmake/Modules/FindFFTW3.cmake @@ -0,0 +1,25 @@ +# - Find fftw3 +# Find the native FFTW3 headers and libraries. +# +# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc. +# FFTW3_LIBRARIES - List of libraries when using fftw3. +# FFTW3_FOUND - True if fftw3 found. +# + +find_package(PkgConfig) + +pkg_check_modules(PC_FFTW3 fftw3) +find_path(FFTW3_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3_INCLUDE_DIRS}) + +find_library(FFTW3_LIBRARY NAMES fftw3 HINTS ${PC_FFTW3_LIBRARY_DIRS}) + +set(FFTW3_LIBRARIES ${FFTW3_LIBRARY}) +set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE +# if all listed variables are TRUE + +find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARY FFTW3_INCLUDE_DIR) + +mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY ) diff --git a/cmake/Modules/FindKIM.cmake b/cmake/Modules/FindKIM.cmake new file mode 100644 index 0000000000..a01f817cf6 --- /dev/null +++ b/cmake/Modules/FindKIM.cmake @@ -0,0 +1,22 @@ +# - Find kim +# Find the native KIM headers and libraries. +# +# KIM_INCLUDE_DIRS - where to find kim.h, etc. +# KIM_LIBRARIES - List of libraries when using kim. +# KIM_FOUND - True if kim found. +# + +find_path(KIM_INCLUDE_DIR KIM_API.h PATH_SUFFIXES kim-api-v1) + +find_library(KIM_LIBRARY NAMES kim-api-v1) + +set(KIM_LIBRARIES ${KIM_LIBRARY}) +set(KIM_INCLUDE_DIRS ${KIM_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set KIM_FOUND to TRUE +# if all listed variables are TRUE + +find_package_handle_standard_args(KIM DEFAULT_MSG KIM_LIBRARY KIM_INCLUDE_DIR) + +mark_as_advanced(KIM_INCLUDE_DIR KIM_LIBRARY ) diff --git a/cmake/Modules/FindMKL.cmake b/cmake/Modules/FindMKL.cmake new file mode 100644 index 0000000000..4246062103 --- /dev/null +++ b/cmake/Modules/FindMKL.cmake @@ -0,0 +1,22 @@ +# - Find mkl +# Find the native MKL headers and libraries. +# +# MKL_INCLUDE_DIRS - where to find mkl.h, etc. +# MKL_LIBRARIES - List of libraries when using mkl. +# MKL_FOUND - True if mkl found. +# + +find_path(MKL_INCLUDE_DIR mkl_dfti.h HINTS $ENV{MKLROOT}/include) + +find_library(MKL_LIBRARY NAMES mkl_rt HINTS $ENV{MKLROOT}/lib $ENV{MKLROOT}/lib/intel64) + +set(MKL_LIBRARIES ${MKL_LIBRARY}) +set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set MKL_FOUND to TRUE +# if all listed variables are TRUE + +find_package_handle_standard_args(MKL DEFAULT_MSG MKL_LIBRARY MKL_INCLUDE_DIR) + +mark_as_advanced(MKL_INCLUDE_DIR MKL_LIBRARY ) diff --git a/cmake/Modules/FindNetCDF.cmake b/cmake/Modules/FindNetCDF.cmake new file mode 100644 index 0000000000..a28c959acf --- /dev/null +++ b/cmake/Modules/FindNetCDF.cmake @@ -0,0 +1,118 @@ +# - Find NetCDF +# Find the native NetCDF includes and library +# +# NETCDF_INCLUDE_DIR - user modifiable choice of where netcdf headers are +# NETCDF_LIBRARY - user modifiable choice of where netcdf libraries are +# +# Your package can require certain interfaces to be FOUND by setting these +# +# NETCDF_CXX - require the C++ interface and link the C++ library +# NETCDF_F77 - require the F77 interface and link the fortran library +# NETCDF_F90 - require the F90 interface and link the fortran library +# +# Or equivalently by calling FindNetCDF with a COMPONENTS argument containing one or +# more of "CXX;F77;F90". +# +# When interfaces are requested the user has access to interface specific hints: +# +# NETCDF_${LANG}_INCLUDE_DIR - where to search for interface header files +# NETCDF_${LANG}_LIBRARY - where to search for interface libraries +# +# This module returns these variables for the rest of the project to use. +# +# NETCDF_FOUND - True if NetCDF found including required interfaces (see below) +# NETCDF_LIBRARIES - All netcdf related libraries. +# NETCDF_INCLUDE_DIRS - All directories to include. +# NETCDF_HAS_INTERFACES - Whether requested interfaces were found or not. +# NETCDF_${LANG}_INCLUDE_DIRS/NETCDF_${LANG}_LIBRARIES - C/C++/F70/F90 only interface +# +# Normal usage would be: +# set (NETCDF_F90 "YES") +# find_package (NetCDF REQUIRED) +# target_link_libraries (uses_everthing ${NETCDF_LIBRARIES}) +# target_link_libraries (only_uses_f90 ${NETCDF_F90_LIBRARIES}) + +#search starting from user editable cache var +if (NETCDF_INCLUDE_DIR AND NETCDF_LIBRARY) + # Already in cache, be silent + set (NETCDF_FIND_QUIETLY TRUE) +endif () + +set(USE_DEFAULT_PATHS "NO_DEFAULT_PATH") +if(NETCDF_USE_DEFAULT_PATHS) + set(USE_DEFAULT_PATHS "") +endif() + +find_path (NETCDF_INCLUDE_DIR netcdf.h + HINTS "${NETCDF_DIR}/include") +mark_as_advanced (NETCDF_INCLUDE_DIR) +set (NETCDF_C_INCLUDE_DIRS ${NETCDF_INCLUDE_DIR}) + +find_library (NETCDF_LIBRARY NAMES netcdf + HINTS "${NETCDF_DIR}/lib") +mark_as_advanced (NETCDF_LIBRARY) + +set (NETCDF_C_LIBRARIES ${NETCDF_LIBRARY}) + +#start finding requested language components +set (NetCDF_libs "") +set (NetCDF_includes "${NETCDF_INCLUDE_DIR}") + +get_filename_component (NetCDF_lib_dirs "${NETCDF_LIBRARY}" PATH) +set (NETCDF_HAS_INTERFACES "YES") # will be set to NO if we're missing any interfaces + +macro (NetCDF_check_interface lang header libs) + if (NETCDF_${lang}) + #search starting from user modifiable cache var + find_path (NETCDF_${lang}_INCLUDE_DIR NAMES ${header} + HINTS "${NETCDF_INCLUDE_DIR}" + HINTS "${NETCDF_${lang}_ROOT}/include" + ${USE_DEFAULT_PATHS}) + + find_library (NETCDF_${lang}_LIBRARY NAMES ${libs} + HINTS "${NetCDF_lib_dirs}" + HINTS "${NETCDF_${lang}_ROOT}/lib" + ${USE_DEFAULT_PATHS}) + + mark_as_advanced (NETCDF_${lang}_INCLUDE_DIR NETCDF_${lang}_LIBRARY) + + #export to internal varS that rest of project can use directly + set (NETCDF_${lang}_LIBRARIES ${NETCDF_${lang}_LIBRARY}) + set (NETCDF_${lang}_INCLUDE_DIRS ${NETCDF_${lang}_INCLUDE_DIR}) + + if (NETCDF_${lang}_INCLUDE_DIR AND NETCDF_${lang}_LIBRARY) + list (APPEND NetCDF_libs ${NETCDF_${lang}_LIBRARY}) + list (APPEND NetCDF_includes ${NETCDF_${lang}_INCLUDE_DIR}) + else () + set (NETCDF_HAS_INTERFACES "NO") + message (STATUS "Failed to find NetCDF interface for ${lang}") + endif () + endif () +endmacro () + +list (FIND NetCDF_FIND_COMPONENTS "CXX" _nextcomp) +if (_nextcomp GREATER -1) + set (NETCDF_CXX 1) +endif () +list (FIND NetCDF_FIND_COMPONENTS "F77" _nextcomp) +if (_nextcomp GREATER -1) + set (NETCDF_F77 1) +endif () +list (FIND NetCDF_FIND_COMPONENTS "F90" _nextcomp) +if (_nextcomp GREATER -1) + set (NETCDF_F90 1) +endif () +NetCDF_check_interface (CXX netcdfcpp.h netcdf_c++) +NetCDF_check_interface (F77 netcdf.inc netcdff) +NetCDF_check_interface (F90 netcdf.mod netcdff) + +#export accumulated results to internal varS that rest of project can depend on +list (APPEND NetCDF_libs "${NETCDF_C_LIBRARIES}") +set (NETCDF_LIBRARIES ${NetCDF_libs}) +set (NETCDF_INCLUDE_DIRS ${NetCDF_includes}) + +# handle the QUIETLY and REQUIRED arguments and set NETCDF_FOUND to TRUE if +# all listed variables are TRUE +include (FindPackageHandleStandardArgs) +find_package_handle_standard_args (NetCDF + DEFAULT_MSG NETCDF_LIBRARIES NETCDF_INCLUDE_DIRS NETCDF_HAS_INTERFACES) diff --git a/cmake/Modules/FindQE.cmake b/cmake/Modules/FindQE.cmake new file mode 100644 index 0000000000..4484bd4db2 --- /dev/null +++ b/cmake/Modules/FindQE.cmake @@ -0,0 +1,29 @@ +# - Find quantum-espresso +# Find the native QE headers and libraries. +# +# QE_INCLUDE_DIRS - where to find quantum-espresso.h, etc. +# QE_LIBRARIES - List of libraries when using quantum-espresso. +# QE_FOUND - True if quantum-espresso found. +# + +find_path(QE_INCLUDE_DIR libqecouple.h PATH_SUFFIXES COUPLE/include) + +find_library(QECOUPLE_LIBRARY NAMES qecouple) +find_library(PW_LIBRARY NAMES pw) +find_library(QEMOD_LIBRARY NAMES qemod) +find_library(QEFFT_LIBRARY NAMES qefft) +find_library(QELA_LIBRARY NAMES qela) +find_library(CLIB_LIBRARY NAMES clib) +find_library(IOTK_LIBRARY NAMES iotk) + + +set(QE_LIBRARIES ${QECOUPLE_LIBRARY} ${PW_LIBRARY} ${QEMOD_LIBRARY} ${QEFFT_LIBRARY} ${QELA_LIBRARY} ${CLIB_LIBRARY} ${IOTK_LIBRARY}) +set(QE_INCLUDE_DIRS ${QE_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set QE_FOUND to TRUE +# if all listed variables are TRUE + +find_package_handle_standard_args(QE DEFAULT_MSG QECOUPLE_LIBRARY PW_LIBRARY QEMOD_LIBRARY QEFFT_LIBRARY QELA_LIBRARY CLIB_LIBRARY IOTK_LIBRARY QE_INCLUDE_DIR) + +mark_as_advanced(QE_INCLUDE_DIR QECOUPLE_LIBRARY PW_LIBRARY QEMOD_LIBRARY QEFFT_LIBRARY QELA_LIBRARY CLIB_LIBRARY IOTK_LIBRARY) diff --git a/cmake/Modules/FindQUIP.cmake b/cmake/Modules/FindQUIP.cmake new file mode 100644 index 0000000000..4ee1baf4f8 --- /dev/null +++ b/cmake/Modules/FindQUIP.cmake @@ -0,0 +1,18 @@ +# - Find quip +# Find the native QUIP libraries. +# +# QUIP_LIBRARIES - List of libraries when using fftw3. +# QUIP_FOUND - True if fftw3 found. +# + +find_library(QUIP_LIBRARY NAMES quip) + +set(QUIP_LIBRARIES ${QUIP_LIBRARY}) + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set QUIP_FOUND to TRUE +# if all listed variables are TRUE + +find_package_handle_standard_args(QUIP DEFAULT_MSG QUIP_LIBRARY) + +mark_as_advanced(QUIP_LIBRARY) diff --git a/cmake/Modules/FindVORO.cmake b/cmake/Modules/FindVORO.cmake new file mode 100644 index 0000000000..b0cccbcd1d --- /dev/null +++ b/cmake/Modules/FindVORO.cmake @@ -0,0 +1,22 @@ +# - Find voro++ +# Find the native VORO headers and libraries. +# +# VORO_INCLUDE_DIRS - where to find voro++.hh, etc. +# VORO_LIBRARIES - List of libraries when using voro++. +# VORO_FOUND - True if voro++ found. +# + +find_path(VORO_INCLUDE_DIR voro++.hh PATH_SUFFIXES voro++) + +find_library(VORO_LIBRARY NAMES voro++) + +set(VORO_LIBRARIES ${VORO_LIBRARY}) +set(VORO_INCLUDE_DIRS ${VORO_INCLUDE_DIR}) + +include(FindPackageHandleStandardArgs) +# handle the QUIETLY and REQUIRED arguments and set VORO_FOUND to TRUE +# if all listed variables are TRUE + +find_package_handle_standard_args(VORO DEFAULT_MSG VORO_LIBRARY VORO_INCLUDE_DIR) + +mark_as_advanced(VORO_INCLUDE_DIR VORO_LIBRARY ) diff --git a/cmake/Modules/StyleHeaderUtils.cmake b/cmake/Modules/StyleHeaderUtils.cmake new file mode 100644 index 0000000000..9939a7505a --- /dev/null +++ b/cmake/Modules/StyleHeaderUtils.cmake @@ -0,0 +1,132 @@ +function(FindStyleHeaders path style_class file_pattern headers) + file(GLOB files "${path}/${file_pattern}*.h") + get_property(hlist GLOBAL PROPERTY ${headers}) + + foreach(file_name ${files}) + file(STRINGS ${file_name} is_style LIMIT_COUNT 1 REGEX ${style_class}) + if(is_style) + list(APPEND hlist ${file_name}) + endif() + endforeach() + set_property(GLOBAL PROPERTY ${headers} "${hlist}") +endfunction(FindStyleHeaders) + +function(FindStyleHeadersExt path style_class extension headers sources) + get_property(hlist GLOBAL PROPERTY ${headers}) + get_property(slist GLOBAL PROPERTY ${sources}) + set(ext_list) + get_filename_component(abs_path "${path}" ABSOLUTE) + + foreach(file_name ${hlist}) + get_filename_component(basename ${file_name} NAME_WE) + set(ext_file_name "${abs_path}/${basename}_${extension}.h") + if(EXISTS "${ext_file_name}") + file(STRINGS ${ext_file_name} is_style LIMIT_COUNT 1 REGEX ${style_class}) + if(is_style) + list(APPEND ext_list ${ext_file_name}) + + set(source_file_name "${abs_path}/${basename}_${extension}.cpp") + if(EXISTS "${source_file_name}") + list(APPEND slist ${source_file_name}) + endif() + endif() + endif() + endforeach() + + list(APPEND hlist ${ext_list}) + set_property(GLOBAL PROPERTY ${headers} "${hlist}") + set_property(GLOBAL PROPERTY ${sources} "${slist}") +endfunction(FindStyleHeadersExt) + +function(CreateStyleHeader path filename) + math(EXPR N "${ARGC}-2") + + set(temp "") + if(N GREATER 0) + math(EXPR ARG_END "${ARGC}-1") + + foreach(IDX RANGE 2 ${ARG_END}) + list(GET ARGV ${IDX} FNAME) + get_filename_component(FNAME ${FNAME} NAME) + set(temp "${temp}#include \"${FNAME}\"\n") + endforeach() + endif() + message(STATUS "Generating ${filename}...") + file(WRITE "${path}/${filename}.tmp" "${temp}" ) + execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${path}/${filename}.tmp" "${path}/${filename}") +endfunction(CreateStyleHeader) + +function(GenerateStyleHeader path property style) + get_property(files GLOBAL PROPERTY ${property}) + #message("${property} = ${files}") + CreateStyleHeader("${path}" "style_${style}.h" ${files}) +endfunction(GenerateStyleHeader) + +function(RegisterStyles search_path) + FindStyleHeaders(${search_path} ANGLE_CLASS angle_ ANGLE ) # angle ) # force + FindStyleHeaders(${search_path} ATOM_CLASS atom_vec_ ATOM_VEC ) # atom ) # atom atom_vec_hybrid + FindStyleHeaders(${search_path} BODY_CLASS body_ BODY ) # body ) # atom_vec_body + FindStyleHeaders(${search_path} BOND_CLASS bond_ BOND ) # bond ) # force + FindStyleHeaders(${search_path} COMMAND_CLASS "" COMMAND ) # command ) # input + FindStyleHeaders(${search_path} COMPUTE_CLASS compute_ COMPUTE ) # compute ) # modify + FindStyleHeaders(${search_path} DIHEDRAL_CLASS dihedral_ DIHEDRAL ) # dihedral ) # force + FindStyleHeaders(${search_path} DUMP_CLASS dump_ DUMP ) # dump ) # output write_dump + FindStyleHeaders(${search_path} FIX_CLASS fix_ FIX ) # fix ) # modify + FindStyleHeaders(${search_path} IMPROPER_CLASS improper_ IMPROPER ) # improper ) # force + FindStyleHeaders(${search_path} INTEGRATE_CLASS "" INTEGRATE ) # integrate ) # update + FindStyleHeaders(${search_path} KSPACE_CLASS "" KSPACE ) # kspace ) # force + FindStyleHeaders(${search_path} MINIMIZE_CLASS min_ MINIMIZE ) # minimize ) # update + FindStyleHeaders(${search_path} NBIN_CLASS nbin_ NBIN ) # nbin ) # neighbor + FindStyleHeaders(${search_path} NPAIR_CLASS npair_ NPAIR ) # npair ) # neighbor + FindStyleHeaders(${search_path} NSTENCIL_CLASS nstencil_ NSTENCIL ) # nstencil ) # neighbor + FindStyleHeaders(${search_path} NTOPO_CLASS ntopo_ NTOPO ) # ntopo ) # neighbor + FindStyleHeaders(${search_path} PAIR_CLASS pair_ PAIR ) # pair ) # force + FindStyleHeaders(${search_path} READER_CLASS reader_ READER ) # reader ) # read_dump + FindStyleHeaders(${search_path} REGION_CLASS region_ REGION ) # region ) # domain +endfunction(RegisterStyles) + +function(RegisterStylesExt search_path extension sources) + FindStyleHeadersExt(${search_path} ANGLE_CLASS ${extension} ANGLE ${sources}) + FindStyleHeadersExt(${search_path} ATOM_CLASS ${extension} ATOM_VEC ${sources}) + FindStyleHeadersExt(${search_path} BODY_CLASS ${extension} BODY ${sources}) + FindStyleHeadersExt(${search_path} BOND_CLASS ${extension} BOND ${sources}) + FindStyleHeadersExt(${search_path} COMMAND_CLASS ${extension} COMMAND ${sources}) + FindStyleHeadersExt(${search_path} COMPUTE_CLASS ${extension} COMPUTE ${sources}) + FindStyleHeadersExt(${search_path} DIHEDRAL_CLASS ${extension} DIHEDRAL ${sources}) + FindStyleHeadersExt(${search_path} DUMP_CLASS ${extension} DUMP ${sources}) + FindStyleHeadersExt(${search_path} FIX_CLASS ${extension} FIX ${sources}) + FindStyleHeadersExt(${search_path} IMPROPER_CLASS ${extension} IMPROPER ${sources}) + FindStyleHeadersExt(${search_path} INTEGRATE_CLASS ${extension} INTEGRATE ${sources}) + FindStyleHeadersExt(${search_path} KSPACE_CLASS ${extension} KSPACE ${sources}) + FindStyleHeadersExt(${search_path} MINIMIZE_CLASS ${extension} MINIMIZE ${sources}) + FindStyleHeadersExt(${search_path} NBIN_CLASS ${extension} NBIN ${sources}) + FindStyleHeadersExt(${search_path} NPAIR_CLASS ${extension} NPAIR ${sources}) + FindStyleHeadersExt(${search_path} NSTENCIL_CLASS ${extension} NSTENCIL ${sources}) + FindStyleHeadersExt(${search_path} NTOPO_CLASS ${extension} NTOPO ${sources}) + FindStyleHeadersExt(${search_path} PAIR_CLASS ${extension} PAIR ${sources}) + FindStyleHeadersExt(${search_path} READER_CLASS ${extension} READER ${sources}) + FindStyleHeadersExt(${search_path} REGION_CLASS ${extension} REGION ${sources}) +endfunction(RegisterStylesExt) + +function(GenerateStyleHeaders output_path) + GenerateStyleHeader(${output_path} ANGLE angle ) # force + GenerateStyleHeader(${output_path} ATOM_VEC atom ) # atom atom_vec_hybrid + GenerateStyleHeader(${output_path} BODY body ) # atom_vec_body + GenerateStyleHeader(${output_path} BOND bond ) # force + GenerateStyleHeader(${output_path} COMMAND command ) # input + GenerateStyleHeader(${output_path} COMPUTE compute ) # modify + GenerateStyleHeader(${output_path} DIHEDRAL dihedral ) # force + GenerateStyleHeader(${output_path} DUMP dump ) # output write_dump + GenerateStyleHeader(${output_path} FIX fix ) # modify + GenerateStyleHeader(${output_path} IMPROPER improper ) # force + GenerateStyleHeader(${output_path} INTEGRATE integrate ) # update + GenerateStyleHeader(${output_path} KSPACE kspace ) # force + GenerateStyleHeader(${output_path} MINIMIZE minimize ) # update + GenerateStyleHeader(${output_path} NBIN nbin ) # neighbor + GenerateStyleHeader(${output_path} NPAIR npair ) # neighbor + GenerateStyleHeader(${output_path} NSTENCIL nstencil ) # neighbor + GenerateStyleHeader(${output_path} NTOPO ntopo ) # neighbor + GenerateStyleHeader(${output_path} PAIR pair ) # force + GenerateStyleHeader(${output_path} READER reader ) # read_dump + GenerateStyleHeader(${output_path} REGION region ) # domain +endfunction(GenerateStyleHeaders) diff --git a/cmake/README b/cmake/README new file mode 100644 index 0000000000..cc67cceb52 --- /dev/null +++ b/cmake/README @@ -0,0 +1,19 @@ +cmake-buildsystem +----------------- + +To use the cmake build system instead of the make-driven one, do: +``` +cmake /path/to/lammps/source/cmake +``` +(please note the cmake directory as the very end) + +To enable package, e.g. GPU do +``` +cmake /path/to/lammps/source/cmake -DENABLE_GPU=ON +``` + +cmake has many many options, do get an overview use the curses-based cmake interface, ccmake: +``` +ccmake /path/to/lammps/source/cmake +``` +(Don't forget to press "g" for generate once you are done with configuring) diff --git a/cmake/gpu/lal_pppm_d.cu b/cmake/gpu/lal_pppm_d.cu new file mode 100644 index 0000000000..a49a535013 --- /dev/null +++ b/cmake/gpu/lal_pppm_d.cu @@ -0,0 +1,4 @@ +#define grdtyp double +#define grdtyp4 double4 + +#include "lal_pppm.cu" diff --git a/cmake/gpu/lal_pppm_f.cu b/cmake/gpu/lal_pppm_f.cu new file mode 100644 index 0000000000..e7f5116fa0 --- /dev/null +++ b/cmake/gpu/lal_pppm_f.cu @@ -0,0 +1,4 @@ +#define grdtyp float +#define grdtyp4 float4 + +#include "lal_pppm.cu" diff --git a/doc/src/Eqs/fix_wall_ees.jpg b/doc/src/Eqs/fix_wall_ees.jpg new file mode 100644 index 0000000000..0f99dae8f7 Binary files /dev/null and b/doc/src/Eqs/fix_wall_ees.jpg differ diff --git a/doc/src/Eqs/fix_wall_ees.tex b/doc/src/Eqs/fix_wall_ees.tex new file mode 100644 index 0000000000..c8b62067a8 --- /dev/null +++ b/doc/src/Eqs/fix_wall_ees.tex @@ -0,0 +1,10 @@ +\documentclass[12pt]{article} + +\begin{document} + +$$ +E = \epsilon \left[ \frac{2 \sigma_{LJ}^{12} \left(7 r^5+14 r^3 \sigma_{n}^2+3 r \sigma_{n}^4\right) }{945 \left(r^2-\sigma_{n}^2\right)^7} -\frac{ \sigma_{LJ}^6 \left(2 r \sigma_{n}^3+\sigma_{n}^2 \left(r^2-\sigma_{n}^2\right)\log{ \left[\frac{r-\sigma_{n}}{r+\sigma_{n}}\right]}\right) }{12 \sigma_{n}^5 \left(r^2-\sigma_{n}^2\right)} \right]\qquad \sigma_n < r < r_c +$$ + + +\end{document} diff --git a/doc/src/JPG/bow_tutorial_01.png b/doc/src/JPG/bow_tutorial_01.png new file mode 100755 index 0000000000..2060927250 Binary files /dev/null and b/doc/src/JPG/bow_tutorial_01.png differ diff --git a/doc/src/JPG/bow_tutorial_01_small.png b/doc/src/JPG/bow_tutorial_01_small.png new file mode 100755 index 0000000000..a31ad22501 Binary files /dev/null and b/doc/src/JPG/bow_tutorial_01_small.png differ diff --git a/doc/src/JPG/bow_tutorial_02.png b/doc/src/JPG/bow_tutorial_02.png new file mode 100755 index 0000000000..166a0d1a3c Binary files /dev/null and b/doc/src/JPG/bow_tutorial_02.png differ diff --git a/doc/src/JPG/bow_tutorial_02_small.png b/doc/src/JPG/bow_tutorial_02_small.png new file mode 100755 index 0000000000..a8fa24957a Binary files /dev/null and b/doc/src/JPG/bow_tutorial_02_small.png differ diff --git a/doc/src/JPG/bow_tutorial_03.png b/doc/src/JPG/bow_tutorial_03.png new file mode 100755 index 0000000000..4058a4577c Binary files /dev/null and b/doc/src/JPG/bow_tutorial_03.png differ diff --git a/doc/src/JPG/bow_tutorial_03_small.png b/doc/src/JPG/bow_tutorial_03_small.png new file mode 100755 index 0000000000..e440c89483 Binary files /dev/null and b/doc/src/JPG/bow_tutorial_03_small.png differ diff --git a/doc/src/JPG/bow_tutorial_04.png b/doc/src/JPG/bow_tutorial_04.png new file mode 100755 index 0000000000..d0c454ef06 Binary files /dev/null and b/doc/src/JPG/bow_tutorial_04.png differ diff --git a/doc/src/JPG/bow_tutorial_04_small.png b/doc/src/JPG/bow_tutorial_04_small.png new file mode 100755 index 0000000000..fae576a7bb Binary files /dev/null and b/doc/src/JPG/bow_tutorial_04_small.png differ diff --git a/doc/src/JPG/bow_tutorial_05.png b/doc/src/JPG/bow_tutorial_05.png new file mode 100755 index 0000000000..8d1dcc581d Binary files /dev/null and b/doc/src/JPG/bow_tutorial_05.png differ diff --git a/doc/src/JPG/bow_tutorial_06.png b/doc/src/JPG/bow_tutorial_06.png new file mode 100755 index 0000000000..00c3947b1d Binary files /dev/null and b/doc/src/JPG/bow_tutorial_06.png differ diff --git a/doc/src/JPG/bow_tutorial_07.png b/doc/src/JPG/bow_tutorial_07.png new file mode 100755 index 0000000000..aad4e5eaae Binary files /dev/null and b/doc/src/JPG/bow_tutorial_07.png differ diff --git a/doc/src/JPG/bow_tutorial_08.png b/doc/src/JPG/bow_tutorial_08.png new file mode 100755 index 0000000000..beb1bcff28 Binary files /dev/null and b/doc/src/JPG/bow_tutorial_08.png differ diff --git a/doc/src/JPG/bow_tutorial_09.png b/doc/src/JPG/bow_tutorial_09.png new file mode 100755 index 0000000000..187f4bbdb2 Binary files /dev/null and b/doc/src/JPG/bow_tutorial_09.png differ diff --git a/doc/src/JPG/bow_tutorial_10.png b/doc/src/JPG/bow_tutorial_10.png new file mode 100755 index 0000000000..4603a46637 Binary files /dev/null and b/doc/src/JPG/bow_tutorial_10.png differ diff --git a/doc/src/JPG/fix_wall_ees_image.jpg b/doc/src/JPG/fix_wall_ees_image.jpg new file mode 100644 index 0000000000..00f958b760 Binary files /dev/null and b/doc/src/JPG/fix_wall_ees_image.jpg differ diff --git a/doc/src/JPG/user_intel.png b/doc/src/JPG/user_intel.png index 302b50124a..7ec83b3207 100755 Binary files a/doc/src/JPG/user_intel.png and b/doc/src/JPG/user_intel.png differ diff --git a/doc/src/Manual.txt b/doc/src/Manual.txt index 36391731d0..bb2e1b8114 100644 --- a/doc/src/Manual.txt +++ b/doc/src/Manual.txt @@ -1,7 +1,7 @@ LAMMPS Users Manual - + @@ -21,7 +21,7 @@

LAMMPS Documentation :c,h3 -6 Jul 2017 version :c,h4 +17 Aug 2017 version :c,h4 Version info: :h4 @@ -79,7 +79,7 @@ bug reports and feature requests are mainly coordinated through the "LAMMPS project on GitHub."_https://github.com/lammps/lammps The lammps.org domain, currently hosting "public continuous integration testing"_https://ci.lammps.org/job/lammps/ and "precompiled Linux -RPM and Windows installer packages"_http://rpm.lammps.org is located +RPM and Windows installer packages"_http://packages.lammps.org is located at Temple University and managed by Richard Berger, richard.berger at temple.edu. @@ -261,7 +261,6 @@ END_RST --> :link(start_6,Section_start.html#start_6) :link(start_7,Section_start.html#start_7) :link(start_8,Section_start.html#start_8) -:link(start_9,Section_start.html#start_9) :link(cmd_1,Section_commands.html#cmd_1) :link(cmd_2,Section_commands.html#cmd_2) diff --git a/doc/src/PDF/colvars-refman-lammps.pdf b/doc/src/PDF/colvars-refman-lammps.pdf index 37201275fe..ad15752107 100644 Binary files a/doc/src/PDF/colvars-refman-lammps.pdf and b/doc/src/PDF/colvars-refman-lammps.pdf differ diff --git a/doc/src/Section_accelerate.txt b/doc/src/Section_accelerate.txt index 64b80c1a55..bb0c93b8aa 100644 --- a/doc/src/Section_accelerate.txt +++ b/doc/src/Section_accelerate.txt @@ -56,7 +56,7 @@ timings; you can simply extrapolate from short runs. For the set of runs, look at the timing data printed to the screen and log file at the end of each LAMMPS run. "This -section"_Section_start.html#start_8 of the manual has an overview. +section"_Section_start.html#start_7 of the manual has an overview. Running on one (or a few processors) should give a good estimate of the serial performance and what portions of the timestep are taking @@ -226,16 +226,16 @@ re-build LAMMPS | make machine | prepare and test a regular LAMMPS simulation | lmp_machine -in in.script; mpirun -np 32 lmp_machine -in in.script | -enable specific accelerator support via '-k on' "command-line switch"_Section_start.html#start_7, | +enable specific accelerator support via '-k on' "command-line switch"_Section_start.html#start_6, | only needed for KOKKOS package | -set any needed options for the package via "-pk" "command-line switch"_Section_start.html#start_7 or "package"_package.html command, | +set any needed options for the package via "-pk" "command-line switch"_Section_start.html#start_6 or "package"_package.html command, | only if defaults need to be changed | -use accelerated styles in your input via "-sf" "command-line switch"_Section_start.html#start_7 or "suffix"_suffix.html command | lmp_machine -in in.script -sf gpu +use accelerated styles in your input via "-sf" "command-line switch"_Section_start.html#start_6 or "suffix"_suffix.html command | lmp_machine -in in.script -sf gpu :tb(c=2,s=|) -Note that the first 4 steps can be done as a single command, using the -src/Make.py tool. This tool is discussed in "Section -2.4"_Section_start.html#start_4 of the manual, and its use is +Note that the first 4 steps can be done as a single command with +suitable make command invocations. This is discussed in "Section +4"_Section_packages.html of the manual, and its use is illustrated in the individual accelerator sections. Typically these steps only need to be done once, to create an executable that uses one or more accelerator packages. diff --git a/doc/src/Section_commands.txt b/doc/src/Section_commands.txt index e17645e6d0..571c6c4920 100644 --- a/doc/src/Section_commands.txt +++ b/doc/src/Section_commands.txt @@ -734,7 +734,9 @@ package"_Section_start.html#start_3. "smd/wall/surface"_fix_smd_wall_surface.html, "temp/rescale/eff"_fix_temp_rescale_eff.html, "ti/spring"_fix_ti_spring.html, -"ttm/mod"_fix_ttm.html :tb(c=6,ea=c) +"ttm/mod"_fix_ttm.html, +"wall/ees"_fix_wall_ees.html, +"wall/region/ees"_fix_wall_ees.html :tb(c=6,ea=c) :line @@ -890,8 +892,8 @@ KOKKOS, o = USER-OMP, t = OPT. "hybrid"_pair_hybrid.html, "hybrid/overlay"_pair_hybrid.html, "adp (o)"_pair_adp.html, -"airebo (o)"_pair_airebo.html, -"airebo/morse (o)"_pair_airebo.html, +"airebo (oi)"_pair_airebo.html, +"airebo/morse (oi)"_pair_airebo.html, "beck (go)"_pair_beck.html, "body"_pair_body.html, "bop"_pair_bop.html, @@ -925,8 +927,8 @@ KOKKOS, o = USER-OMP, t = OPT. "dpd/tstat (go)"_pair_dpd.html, "dsmc"_pair_dsmc.html, "eam (gkiot)"_pair_eam.html, -"eam/alloy (gkot)"_pair_eam.html, -"eam/fs (gkot)"_pair_eam.html, +"eam/alloy (gkiot)"_pair_eam.html, +"eam/fs (gkiot)"_pair_eam.html, "eim (o)"_pair_eim.html, "gauss (go)"_pair_gauss.html, "gayberne (gio)"_pair_gayberne.html, @@ -940,9 +942,9 @@ KOKKOS, o = USER-OMP, t = OPT. "kim"_pair_kim.html, "lcbop"_pair_lcbop.html, "line/lj"_pair_line_lj.html, -"lj/charmm/coul/charmm (ko)"_pair_charmm.html, +"lj/charmm/coul/charmm (kio)"_pair_charmm.html, "lj/charmm/coul/charmm/implicit (ko)"_pair_charmm.html, -"lj/charmm/coul/long (giko)"_pair_charmm.html, +"lj/charmm/coul/long (gkio)"_pair_charmm.html, "lj/charmm/coul/msm"_pair_charmm.html, "lj/charmmfsw/coul/charmmfsh"_pair_charmm.html, "lj/charmmfsw/coul/long"_pair_charmm.html, @@ -988,7 +990,7 @@ KOKKOS, o = USER-OMP, t = OPT. "polymorphic"_pair_polymorphic.html, "python"_pair_python.html, "reax"_pair_reax.html, -"rebo (o)"_pair_airebo.html, +"rebo (oi)"_pair_airebo.html, "resquared (go)"_pair_resquared.html, "snap"_pair_snap.html, "soft (go)"_pair_soft.html, diff --git a/doc/src/Section_errors.txt b/doc/src/Section_errors.txt index 40e61a248e..f5829f92fb 100644 --- a/doc/src/Section_errors.txt +++ b/doc/src/Section_errors.txt @@ -71,7 +71,7 @@ style", with ... being fix, compute, pair, etc, it means that you mistyped the style name or that the command is part of an optional package which was not compiled into your executable. The list of available styles in your executable can be listed by using "the -h -command-line argument"_Section_start.html#start_7. The installation +command-line argument"_Section_start.html#start_6. The installation and compilation of optional packages is explained in the "installation instructions"_Section_start.html#start_3. @@ -7886,8 +7886,8 @@ keyword to allow for additional bonds to be formed :dd {New bond exceeded special list size in fix bond/create} :dt -See the "special_bonds extra" command -(or the "read_data extra/special/per/atom" command) +See the "read_data extra/special/per/atom" command +(or the "create_box extra/special/per/atom" command) for info on how to leave space in the special bonds list to allow for additional bonds to be formed. :dd @@ -9666,8 +9666,8 @@ you are running. :dd {Special list size exceeded in fix bond/create} :dt -See the special_bonds extra command -(or the read_data extra/special/per/atom command) +See the "read_data extra/special/per/atom" command +(or the "create_box extra/special/per/atom" command) for info on how to leave space in the special bonds list to allow for additional bonds to be formed. :dd diff --git a/doc/src/Section_example.txt b/doc/src/Section_example.txt index 26dc3b9698..f8b39be173 100644 --- a/doc/src/Section_example.txt +++ b/doc/src/Section_example.txt @@ -49,6 +49,7 @@ Lists of both kinds of directories are given below. Lowercase directories :h4 accelerate: run with various acceleration options (OpenMP, GPU, Phi) +airebo: polyethylene with AIREBO potential balance: dynamic load balancing, 2d system body: body particles, 2d system cmap: CMAP 5-body contributions to CHARMM force field diff --git a/doc/src/Section_howto.txt b/doc/src/Section_howto.txt index f2f2561af8..6d699fe24b 100644 --- a/doc/src/Section_howto.txt +++ b/doc/src/Section_howto.txt @@ -54,7 +54,7 @@ restart files can be saved to disk using the "restart"_restart.html command. At a later time, these binary files can be read via a "read_restart"_read_restart.html command in a new script. Or they can be converted to text data files using the "-r command-line -switch"_Section_start.html#start_7 and read by a +switch"_Section_start.html#start_6 and read by a "read_data"_read_data.html command in a new script. Here we give examples of 2 scripts that read either a binary restart @@ -337,7 +337,7 @@ All of the above examples work whether you are running on 1 or multiple processors, but assumed you are running LAMMPS on a single partition of processors. LAMMPS can be run on multiple partitions via the "-partition" command-line switch as described in "this -section"_Section_start.html#start_7 of the manual. +section"_Section_start.html#start_6 of the manual. In the last 2 examples, if LAMMPS were run on 3 partitions, the same scripts could be used if the "index" and "loop" variables were @@ -387,7 +387,7 @@ for more info on packages. In all these cases, you must run with one or more processors per replica. The processors assigned to each replica are determined at run-time by using the "-partition command-line -switch"_Section_start.html#start_7 to launch LAMMPS on multiple +switch"_Section_start.html#start_6 to launch LAMMPS on multiple partitions, which in this context are the same as replicas. E.g. these commands: @@ -395,7 +395,7 @@ mpirun -np 16 lmp_linux -partition 8x2 -in in.temper mpirun -np 8 lmp_linux -partition 8x1 -in in.neb :pre would each run 8 replicas, on either 16 or 8 processors. Note the use -of the "-in command-line switch"_Section_start.html#start_7 to specify +of the "-in command-line switch"_Section_start.html#start_6 to specify the input script which is required when running in multi-replica mode. Also note that with MPI installed on a machine (e.g. your desktop), @@ -1872,7 +1872,7 @@ void lammps_free(void *) :pre The lammps_open() function is used to initialize LAMMPS, passing in a list of strings as if they were "command-line -arguments"_Section_start.html#start_7 when LAMMPS is run in +arguments"_Section_start.html#start_6 when LAMMPS is run in stand-alone mode from the command line, and a MPI communicator for LAMMPS to run under. It returns a ptr to the LAMMPS object that is created, and which is used in subsequent library calls. The diff --git a/doc/src/Section_packages.txt b/doc/src/Section_packages.txt index 76f88b8ab8..16864bcdc4 100644 --- a/doc/src/Section_packages.txt +++ b/doc/src/Section_packages.txt @@ -25,6 +25,17 @@ There are two kinds of packages in LAMMPS, standard and user packages: "Table of standard packages"_#table_standard "Table of user packages"_#table_user :ul +Either of these kinds of packages may work as is, may require some +additional code compiled located in the lib folder, or may require +an external library to be downloaded, compiled, installed, and LAMMPS +configured to know about its location and additional compiler flags. +You can often do the build of the internal or external libraries +in one step by typing "make lib-name args='...'" from the src dir, +with appropriate arguments included in args='...'. If you just type +"make lib-name" you should see a help message about supported flags +and some examples. For more details about this, please study the +tables below and the sections about the individual packages. + Standard packages are supported by the LAMMPS developers and are written in a syntax and style consistent with the rest of LAMMPS. This means the developers will answer questions about them, debug and @@ -34,7 +45,9 @@ LAMMPS. User packages have been contributed by users, and begin with the "user" prefix. If they are a single command (single file), they are typically in the user-misc package. User packages don't necessarily -meet the requirements of the standard packages. If you have problems +meet the requirements of the standard packages. This means the +developers will try to keep things working and usually can answer +technical questions about compiling the package. If you have problems using a feature provided in a user package, you may need to contact the contributor directly to get help. Information on how to submit additions you make to LAMMPS as single files or as a standard or user @@ -78,10 +91,10 @@ Package, Description, Doc page, Example, Library "COMPRESS"_#COMPRESS, I/O compression, "dump */gz"_dump.html, -, sys "CORESHELL"_#CORESHELL, adiabatic core/shell model, "Section 6.6.25"_Section_howto.html#howto_25, coreshell, - "DIPOLE"_#DIPOLE, point dipole particles, "pair_style dipole/cut"_pair_dipole.html, dipole, - -"GPU"_#GPU, GPU-enabled styles, "Section 5.3.1"_accelerate_gpu.html, WWW bench, int +"GPU"_#GPU, GPU-enabled styles, "Section 5.3.1"_accelerate_gpu.html, "Benchmarks"_http://lammps.sandia.gov/bench.html, int "GRANULAR"_#GRANULAR, granular systems, "Section 6.6.6"_Section_howto.html#howto_6, pour, - -"KIM"_#KIM, openKIM wrapper, "pair_style kim"_pair_kim.html, kim, ext -"KOKKOS"_#KOKKOS, Kokkos-enabled styles, "Section 5.3.3"_accelerate_kokkos.html, WWW bench, - +"KIM"_#KIM, OpenKIM wrapper, "pair_style kim"_pair_kim.html, kim, ext +"KOKKOS"_#KOKKOS, Kokkos-enabled styles, "Section 5.3.3"_accelerate_kokkos.html, "Benchmarks"_http://lammps.sandia.gov/bench.html, - "KSPACE"_#KSPACE, long-range Coulombic solvers, "kspace_style"_kspace_style.html, peptide, - "MANYBODY"_#MANYBODY, many-body potentials, "pair_style tersoff"_pair_tersoff.html, shear, - "MC"_#MC, Monte Carlo options, "fix gcmc"_fix_gcmc.html, -, - @@ -90,7 +103,7 @@ Package, Description, Doc page, Example, Library "MOLECULE"_#MOLECULE, molecular system force fields, "Section 6.6.3"_Section_howto.html#howto_3, peptide, - "MPIIO"_#MPIIO, MPI parallel I/O dump and restart, "dump"_dump.html, -, - "MSCG"_#MSCG, multi-scale coarse-graining wrapper, "fix mscg"_fix_mscg.html, mscg, ext -"OPT"_#OPT, optimized pair styles, "Section 5.3.5"_accelerate_opt.html, WWW bench, - +"OPT"_#OPT, optimized pair styles, "Section 5.3.5"_accelerate_opt.html, "Benchmarks"_http://lammps.sandia.gov/bench.html, - "PERI"_#PERI, Peridynamics models, "pair_style peri"_pair_peri.html, peri, - "POEMS"_#POEMS, coupled rigid body motion, "fix poems"_fix_poems.html, rigid, int "PYTHON"_#PYTHON, embed Python code in an input script, "python"_python.html, python, sys @@ -101,8 +114,7 @@ Package, Description, Doc page, Example, Library "SHOCK"_#SHOCK, shock loading methods, "fix msst"_fix_msst.html, -, - "SNAP"_#SNAP, quantum-fitted potential, "pair snap"_pair_snap.html, snap, - "SRD"_#SRD, stochastic rotation dynamics, "fix srd"_fix_srd.html, srd, - -"VORONOI"_#VORONOI, Voronoi tesselation, "compute voronoi/atom"_compute_voronoi_atom.html, -, ext -:tb(ea=c,ca1=l) +"VORONOI"_#VORONOI, Voronoi tesselation, "compute voronoi/atom"_compute_voronoi_atom.html, -, ext :tb(ea=c,ca1=l) [USER packages] :link(table_user),p @@ -118,7 +130,7 @@ Package, Description, Doc page, Example, Library "USER-EFF"_#USER-EFF, electron force field,"pair_style eff/cut"_pair_eff.html, USER/eff, - "USER-FEP"_#USER-FEP, free energy perturbation,"compute fep"_compute_fep.html, USER/fep, - "USER-H5MD"_#USER-H5MD, dump output via HDF5,"dump h5md"_dump_h5md.html, -, ext -"USER-INTEL"_#USER-INTEL, optimized Intel CPU and KNL styles,"Section 5.3.2"_accelerate_intel.html, WWW bench, - +"USER-INTEL"_#USER-INTEL, optimized Intel CPU and KNL styles,"Section 5.3.2"_accelerate_intel.html, "Benchmarks"_http://lammps.sandia.gov/bench.html, - "USER-LB"_#USER-LB, Lattice Boltzmann fluid,"fix lb/fluid"_fix_lb_fluid.html, USER/lb, - "USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surfaces,"fix manifoldforce"_fix_manifoldforce.html, USER/manifold, - "USER-MEAMC"_#USER-MEAMC, modified EAM potential (C++), "pair_style meam/c"_pair_meam.html, meam, - @@ -126,7 +138,7 @@ Package, Description, Doc page, Example, Library "USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER/misc, - "USER-MOLFILE"_#USER-MOLFILE, "VMD"_vmd_home molfile plug-ins,"dump molfile"_dump_molfile.html, -, ext "USER-NETCDF"_#USER-NETCDF, dump output via NetCDF,"dump netcdf"_dump_netcdf.html, -, ext -"USER-OMP"_#USER-OMP, OpenMP-enabled styles,"Section 5.3.4"_accelerate_omp.html, WWW bench, - +"USER-OMP"_#USER-OMP, OpenMP-enabled styles,"Section 5.3.4"_accelerate_omp.html, "Benchmarks"_http://lammps.sandia.gov/bench.html, - "USER-PHONON"_#USER-PHONON, phonon dynamical matrix,"fix phonon"_fix_phonon.html, USER/phonon, - "USER-QMMM"_#USER-QMMM, QM/MM coupling,"fix qmmm"_fix_qmmm.html, USER/qmmm, ext "USER-QTB"_#USER-QTB, quantum nuclear effects,"fix qtb"_fix_qtb.html "fix qbmsst"_fix_qbmsst.html, qtb, - @@ -136,8 +148,7 @@ Package, Description, Doc page, Example, Library "USER-SMTBQ"_#USER-SMTBQ, second moment tight binding QEq potential,"pair_style smtbq"_pair_smtbq.html, USER/smtbq, - "USER-SPH"_#USER-SPH, smoothed particle hydrodynamics,"SPH User Guide"_PDF/SPH_LAMMPS_userguide.pdf, USER/sph, - "USER-TALLY"_#USER-TALLY, pairwise tally computes,"compute XXX/tally"_compute_tally.html, USER/tally, - -"USER-VTK"_#USER-VTK, dump output via VTK, "compute vtk"_dump_vtk.html, -, ext -:tb(ea=c,ca1=l) +"USER-VTK"_#USER-VTK, dump output via VTK, "compute vtk"_dump_vtk.html, -, ext :tb(ea=c,ca1=l) :line :line @@ -364,12 +375,15 @@ GPU package :link(GPU),h4 [Contents:] Dozens of pair styles and a version of the PPPM long-range Coulombic -solver optimized for NVIDIA GPUs. All such styles have a "gpu" as a -suffix in their style name. "Section 5.3.1"_accelerate_gpu.html gives -details of what hardware and Cuda software is required on your system, +solver optimized for GPUs. All such styles have a "gpu" as a +suffix in their style name. The GPU code can be compiled with either +CUDA or OpenCL, however the OpenCL variants are no longer actively +maintained and only the CUDA versions are regularly tested. +"Section 5.3.1"_accelerate_gpu.html gives details of what +hardware and GPU software is required on your system, and details on how to build and use this package. Its styles can be invoked at run time via the "-sf gpu" or "-suffix gpu" "command-line -switches"_Section_start.html#start_7. See also the "KOKKOS"_#KOKKOS +switches"_Section_start.html#start_6. See also the "KOKKOS"_#KOKKOS package, which has GPU-enabled styles. [Authors:] Mike Brown (Intel) while at Sandia and ORNL and Trung Nguyen @@ -378,32 +392,41 @@ package, which has GPU-enabled styles. [Install or un-install:] Before building LAMMPS with this package, you must first build the GPU -library in lib/gpu from a set of provided C and Cuda files. You can +library in lib/gpu from a set of provided C and CUDA files. You can do this manually if you prefer; follow the instructions in -lib/gpu/README. You can also do it in one step from the lammps/src +lib/gpu/README. Please note, that the GPU library uses MPI calls, so +you have to make certain to use the same MPI library (or the STUBS +library) settings as the main LAMMPS code. That same applies to the +-DLAMMPS_BIGBIG, -DLAMMPS_SMALLBIG, or -DLAMMPS_SMALLSMALL define. + +You can also do it in one step from the lammps/src dir, using a command like these, which simply invoke the lib/gpu/Install.py script with the specified args: -make lib-gpu # print help message -make lib-gpu args="-m" # build GPU library with default Makefile.linux -make lib-gpu args="-i xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision -make lib-gpu args="-i xk7 -p single -o xk7.single -m" # ditto, also build GPU library +make lib-gpu # print help message +make lib-gpu args="-b" # build GPU library with default Makefile.linux +make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision +make lib-gpu args="-m mpi -p mixed -b" # build GPU library with mixed precision using settings in Makefile.mpi :pre -Note that this procedure starts with one of the existing -Makefile.machine files in lib/gpu. It allows you to alter 4 important -settings in that Makefile, via the -h, -a, -p, -e switches, -and save the new Makefile, if desired: +Note that this procedure through the '-m machine' flag starts with one of +the existing Makefile.machine files in lib/gpu. For your convenience, +machine makefiles for "mpi" and "serial" are provided, which have the +same settings as the corresponding machine makefiles in the main LAMMPS +source folder. In addition you can alter 4 important settings in that +Makefile, via the -h, -a, -p, -e switches, and also save a copy of the +new Makefile, if desired: -CUDA_HOME = where NVIDIA Cuda software is installed on your system +CUDA_HOME = where NVIDIA CUDA software is installed on your system CUDA_ARCH = what GPU hardware you have (see help message for details) CUDA_PRECISION = precision (double, mixed, single) EXTRAMAKE = which Makefile.lammps.* file to copy to Makefile.lammps :ul -If the library build is successful, 2 files should be created: -lib/gpu/libgpu.a and lib/gpu/Makefile.lammps. The latter has settings -that enable LAMMPS to link with Cuda libraries. If the settings in -Makefile.lammps for your machine are not correct, the LAMMPS build -will fail. +If the library build is successful, at least 3 files should be created: +lib/gpu/libgpu.a, lib/gpu/nvc_get_devices, and lib/gpu/Makefile.lammps. +The latter has settings that enable LAMMPS to link with CUDA libraries. +If the settings in Makefile.lammps for your machine are not correct, +the LAMMPS build will fail, and lib/gpu/Makefile.lammps may need to +be edited. You can then install/un-install the package and build LAMMPS in the usual manner: @@ -427,8 +450,8 @@ src/GPU/README lib/gpu/README "Section 5.3"_Section_accelerate.html#acc_3 "Section 5.3.1"_accelerate_gpu.html -"Section 2.7 -sf gpu"_Section_start.html#start_7 -"Section 2.7 -pk gpu"_Section_start.html#start_7 +"Section 2.6 -sf gpu"_Section_start.html#start_6 +"Section 2.6 -pk gpu"_Section_start.html#start_6 "package gpu"_package.html Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 for pair styles followed by (g) "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul @@ -492,14 +515,40 @@ Minnesota). [Install or un-install:] -Using this package requires the KIM library and its models -(interatomic potentials) to be downloaded and installed on your -system. The library can be downloaded and built in lib/kim or -elsewhere on your system. Details of the download, build, and install -process for KIM are given in the lib/kim/README file. +Before building LAMMPS with this package, you must first download and +build the KIM library and include the KIM models that you want to +use. You can do this manually if you prefer; follow the instructions +in lib/kim/README. You can also do it in one step from the lammps/src +dir, using a command like these, which simply invoke the +lib/kim/Install.py script with the specified args. -Once that process is complete, you can then install/un-install the -package and build LAMMPS in the usual manner: +make lib-kim # print help message +make lib-kim args="-b " # (re-)install KIM API lib with only example models +make lib-kim args="-b -a Glue_Ercolessi_Adams_Al__MO_324507536345_001" # ditto plus one model +make lib-kim args="-b -a everything" # install KIM API lib with all models +make lib-kim args="-n -a EAM_Dynamo_Ackland_W__MO_141627196590_002" # add one model or model driver +make lib-kim args="-p /usr/local/kim-api" # use an existing KIM API installation at the provided location +make lib-kim args="-p /usr/local/kim-api -a EAM_Dynamo_Ackland_W__MO_141627196590_002" # ditto but add one model or driver :pre + +Note that in LAMMPS lingo, a KIM model driver is a pair style +(e.g. EAM or Tersoff). A KIM model is a pair style for a particular +element or alloy and set of parameters, e.g. EAM for Cu with a +specific EAM potential file. Also note that installing the KIM API +library with all its models, may take around 30 min to build. Of +course you only need to do that once. + +See the list of KIM model drivers here: +https://openkim.org/kim-items/model-drivers/alphabetical + +See the list of all KIM models here: +https://openkim.org/kim-items/models/by-model-drivers + +See the list of example KIM models included by default here: +https://openkim.org/kim-api in the "What is in the KIM API source +package?" section + +You can then install/un-install the package and build LAMMPS in the +usual manner: make yes-kim make machine :pre @@ -523,13 +572,13 @@ KOKKOS package :link(KOKKOS),h4 Dozens of atom, pair, bond, angle, dihedral, improper, fix, compute styles adapted to compile using the Kokkos library which can convert -them to OpenMP or Cuda code so that they run efficiently on multicore +them to OpenMP or CUDA code so that they run efficiently on multicore CPUs, KNLs, or GPUs. All the styles have a "kk" as a suffix in their style name. "Section 5.3.3"_accelerate_kokkos.html gives details of what hardware and software is required on your system, and how to build and use this package. Its styles can be invoked at run time via the "-sf kk" or "-suffix kk" "command-line -switches"_Section_start.html#start_7. Also see the "GPU"_#GPU, +switches"_Section_start.html#start_6. Also see the "GPU"_#GPU, "OPT"_#OPT, "USER-INTEL"_#USER-INTEL, and "USER-OMP"_#USER-OMP packages, which have styles optimized for CPUs, KNLs, and GPUs. @@ -553,28 +602,28 @@ files for examples. For multicore CPUs using OpenMP: KOKKOS_DEVICES = OpenMP -KOKKOS_ARCH = HSW # HSW = Haswell, SNB = SandyBridge, BDW = Broadwell, etc +KOKKOS_ARCH = HSW # HSW = Haswell, SNB = SandyBridge, BDW = Broadwell, etc :pre For Intel KNLs using OpenMP: KOKKOS_DEVICES = OpenMP -KOKKOS_ARCH = KNL +KOKKOS_ARCH = KNL :pre -For NVIDIA GPUs using Cuda: +For NVIDIA GPUs using CUDA: KOKKOS_DEVICES = Cuda KOKKOS_ARCH = Pascal60,Power8 # P100 hosted by an IBM Power8, etc -KOKKOS_ARCH = Kepler37,Power8 # K80 hosted by an IBM Power8, etc +KOKKOS_ARCH = Kepler37,Power8 # K80 hosted by an IBM Power8, etc :pre For GPUs, you also need these 2 lines in your Makefile.machine before the CC line is defined, in this case for use with OpenMPI mpicxx. The 2 lines define a nvcc wrapper compiler, which will use nvcc for -compiling Cuda files or use a C++ compiler for non-Kokkos, non-Cuda +compiling CUDA files or use a C++ compiler for non-Kokkos, non-CUDA files. KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) export OMPI_CXX = $(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper -CC = mpicxx +CC = mpicxx :pre Once you have an appropriate Makefile.machine, you can install/un-install the package and build LAMMPS in the usual manner. @@ -597,9 +646,9 @@ src/KOKKOS/README lib/kokkos/README "Section 5.3"_Section_accelerate.html#acc_3 "Section 5.3.3"_accelerate_kokkos.html -"Section 2.7 -k on ..."_Section_start.html#start_7 -"Section 2.7 -sf kk"_Section_start.html#start_7 -"Section 2.7 -pk kokkos"_Section_start.html#start_7 +"Section 2.6 -k on ..."_Section_start.html#start_6 +"Section 2.6 -sf kk"_Section_start.html#start_6 +"Section 2.6 -pk kokkos"_Section_start.html#start_6 "package kokkos"_package.html Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (k) "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul @@ -710,6 +759,12 @@ MEAM package :link(MEAM),h4 A pair style for the modified embedded atom (MEAM) potential. +Please note that the MEAM package has been superseded by the +"USER-MEAMC"_#USER-MEAMC package, which is a direct translation +of the MEAM package to C++. USER-MEAMC contains additional +optimizations making it run faster than MEAM on most machines, +while providing the identical features and USER interface. + [Author:] Greg Wagner (Northwestern U) while at Sandia. [Install or un-install:] @@ -720,9 +775,10 @@ follow the instructions in lib/meam/README. You can also do it in one step from the lammps/src dir, using a command like these, which simply invoke the lib/meam/Install.py script with the specified args: -make lib-meam # print help message -make lib-meam args="-m gfortran" # build with GNU Fortran compiler -make lib-meam args="-m ifort" # build with Intel ifort compiler :pre +make lib-meam # print help message +make lib-meam args="-m mpi" # build with default Fortran compiler compatible with your MPI library +make lib-meam args="-m serial" # build with compiler compatible with "make serial" (GNU Fortran) +make lib-meam args="-m ifort" # build with Intel Fortran compiler using Makefile.ifort :pre The build should produce two files: lib/meam/libmeam.a and lib/meam/Makefile.lammps. The latter is copied from an existing @@ -765,6 +821,9 @@ A variety of compute, fix, pair, dump styles with specialized capabilities that don't align with other packages. Do a directory listing, "ls src/MISC", to see the list of commands. +NOTE: the MISC package contains styles that require using the +-restrict flag, when compiling with Intel compilers. + [Install or un-install:] make yes-misc @@ -878,9 +937,9 @@ University of Chicago. Before building LAMMPS with this package, you must first download and build the MS-CG library. Building the MS-CG library and using it from -LAMMPS requires a C++11 compatible compiler, and that LAPACK and GSL -(GNU Scientific Library) libraries be installed on your machine. See -the lib/mscg/README and MSCG/Install files for more details. +LAMMPS requires a C++11 compatible compiler and that the GSL +(GNU Scientific Library) headers and libraries are installed on your +machine. See the lib/mscg/README and MSCG/Install files for more details. Assuming these libraries are in place, you can do the download and build of MS-CG manually if you prefer; follow the instructions in @@ -888,15 +947,16 @@ lib/mscg/README. You can also do it in one step from the lammps/src dir, using a command like these, which simply invoke the lib/mscg/Install.py script with the specified args: -make lib-mscg # print help message -make lib-mscg args="-g -b -l" # download and build in default lib/mscg/MSCG-release-master -make lib-mscg args="-h . MSCG -g -b -l" # download and build in lib/mscg/MSCG -make lib-mscg args="-h ~ MSCG -g -b -l" # download and build in ~/mscg :pre +make lib-mscg # print help message +make lib-mscg args="-b -m serial" # download and build in lib/mscg/MSCG-release-master + # with the settings compatible with "make serial" +make lib-mscg args="-b -m mpi" # download and build in lib/mscg/MSCG-release-master + # with the settings compatible with "make mpi" +make lib-mscg args="-p /usr/local/mscg-release" # use the existing MS-CG installation in /usr/local/mscg-release :pre -Note that the final -l switch is to create 2 symbolic (soft) links, -"includelink" and "liblink", in lib/mscg to point to the MS-CG src -dir. When LAMMPS builds it will use these links. You should not need -to edit the lib/mscg/Makefile.lammps file. +Note that 2 symbolic (soft) links, "includelink" and "liblink", will be created in lib/mscg +to point to the MS-CG src/installation dir. When LAMMPS is built in src it will use these links. +You should not need to edit the lib/mscg/Makefile.lammps file. You can then install/un-install the package and build LAMMPS in the usual manner: @@ -926,7 +986,7 @@ CHARMM, and Morse potentials. The styles have an "opt" suffix in their style name. "Section 5.3.5"_accelerate_opt.html gives details of how to build and use this package. Its styles can be invoked at run time via the "-sf opt" or "-suffix opt" "command-line -switches"_Section_start.html#start_7. See also the "KOKKOS"_#KOKKOS, +switches"_Section_start.html#start_6. See also the "KOKKOS"_#KOKKOS, "USER-INTEL"_#USER-INTEL, and "USER-OMP"_#USER-OMP packages, which have styles optimized for CPU performance. @@ -942,18 +1002,18 @@ make no-opt make machine :pre NOTE: The compile flag "-restrict" must be used to build LAMMPS with -the OPT package. It should be added to the CCFLAGS line of your -Makefile.machine. See Makefile.opt in src/MAKE/OPTIONS for an -example. +the OPT package when using Intel compilers. It should be added to +the CCFLAGS line of your Makefile.machine. See Makefile.opt in +src/MAKE/OPTIONS for an example. -CCFLAGS: add -restrict :ul +CCFLAGS: add -restrict for Intel compilers :ul [Supporting info:] src/OPT: filenames -> commands "Section 5.3"_Section_accelerate.html#acc_3 "Section 5.3.5"_accelerate_opt.html -"Section 2.7 -sf opt"_Section_start.html#start_7 +"Section 2.6 -sf opt"_Section_start.html#start_6 Pair Styles section of "Section 3.5"_Section_commands.html#cmd_5 for pair styles followed by (t) "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul @@ -1015,9 +1075,10 @@ follow the instructions in lib/poems/README. You can also do it in one step from the lammps/src dir, using a command like these, which simply invoke the lib/poems/Install.py script with the specified args: -make lib-poems # print help message -make lib-poems args="-m g++" # build with GNU g++ compiler -make lib-poems args="-m icc" # build with Intel icc compiler :pre +make lib-poems # print help message +make lib-poems args="-m serial" # build with GNU g++ compiler (settings as with "make serial") +make lib-poems args="-m mpi" # build with default MPI C++ compiler (settings as with "make mpi") +make lib-poems args="-m icc" # build with Intel icc compiler :pre The build should produce two files: lib/poems/libpoems.a and lib/poems/Makefile.lammps. The latter is copied from an existing @@ -1127,9 +1188,10 @@ follow the instructions in lib/reax/README. You can also do it in one step from the lammps/src dir, using a command like these, which simply invoke the lib/reax/Install.py script with the specified args: -make lib-reax # print help message -make lib-reax args="-m gfortran" # build with GNU Fortran compiler -make lib-reax args="-m ifort" # build with Intel ifort compiler :pre +make lib-reax # print help message +make lib-reax args="-m serial" # build with GNU Fortran compiler (settings as with "make serial") +make lib-reax args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi") +make lib-reax args="-m ifort" # build with Intel ifort compiler :pre The build should produce two files: lib/reax/libreax.a and lib/reax/Makefile.lammps. The latter is copied from an existing @@ -1346,15 +1408,15 @@ one step from the lammps/src dir, using a command like these, which simply invoke the lib/voronoi/Install.py script with the specified args: -make lib-voronoi # print help message -make lib-voronoi args="-g -b -l" # download and build in default lib/voronoi/voro++-0.4.6 -make lib-voronoi args="-h . voro++ -g -b -l" # download and build in lib/voronoi/voro++ -make lib-voronoi args="-h ~ voro++ -g -b -l" # download and build in ~/voro++ :pre +make lib-voronoi # print help message +make lib-voronoi args="-b" # download and build the default version in lib/voronoi/voro++- +make lib-voronoi args="-p $HOME/voro++" # use existing Voro++ installation in $HOME/voro++ +make lib-voronoi args="-b -v voro++0.4.6" # download and build the 0.4.6 version in lib/voronoi/voro++-0.4.6 :pre -Note that the final -l switch is to create 2 symbolic (soft) links, -"includelink" and "liblink", in lib/voronoi to point to the Voro++ src -dir. When LAMMPS builds it will use these links. You should not need -to edit the lib/voronoi/Makefile.lammps file. +Note that 2 symbolic (soft) links, "includelink" and "liblink", are +created in lib/voronoi to point to the Voro++ src dir. When LAMMPS +builds in src it will use these links. You should not need to edit +the lib/voronoi/Makefile.lammps file. You can then install/un-install the package and build LAMMPS in the usual manner: @@ -1396,7 +1458,8 @@ from the lammps/src dir, using a command like these, which simply invoke the lib/atc/Install.py script with the specified args: make lib-atc # print help message -make lib-atc args="-m g++" # build with GNU g++ compiler +make lib-atc args="-m serial" # build with GNU g++ compiler and MPI STUBS (settings as with "make serial") +make lib-atc args="-m mpi" # build with default MPI compiler (settings as with "make mpi") make lib-atc args="-m icc" # build with Intel icc compiler :pre The build should produce two files: lib/atc/libatc.a and @@ -1413,8 +1476,10 @@ can either exist on your system, or you can use the files provided in lib/linalg. In the latter case you also need to build the library in lib/linalg with a command like these: -make lib-linalg # print help message -make lib-atc args="-m gfortran" # build with GNU Fortran compiler +make lib-linalg # print help message +make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial") +make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi") +make lib-linalg args="-m gfortran" # build with GNU Fortran compiler :pre You can then install/un-install the package and build LAMMPS in the usual manner: @@ -1454,9 +1519,10 @@ follow the instructions in lib/awpmd/README. You can also do it in one step from the lammps/src dir, using a command like these, which simply invoke the lib/awpmd/Install.py script with the specified args: -make lib-awpmd # print help message -make lib-awpmd args="-m g++" # build with GNU g++ compiler -make lib-awpmd args="-m icc" # build with Intel icc compiler :pre +make lib-awpmd # print help message +make lib-awpmd args="-m serial" # build with GNU g++ compiler and MPI STUBS (settings as with "make serial") +make lib-awpmd args="-m mpi" # build with default MPI compiler (settings as with "make mpi") +make lib-awpmd args="-m icc" # build with Intel icc compiler :pre The build should produce two files: lib/awpmd/libawpmd.a and lib/awpmd/Makefile.lammps. The latter is copied from an existing @@ -1472,8 +1538,10 @@ these can either exist on your system, or you can use the files provided in lib/linalg. In the latter case you also need to build the library in lib/linalg with a command like these: -make lib-linalg # print help message -make lib-atc args="-m gfortran" # build with GNU Fortran compiler +make lib-linalg # print help message +make lib-linalg args="-m serial" # build with GNU Fortran compiler (settings as with "make serial") +make lib-linalg args="-m mpi" # build with default MPI Fortran compiler (settings as with "make mpi") +make lib-linalg args="-m gfortran" # build with GNU Fortran compiler :pre You can then install/un-install the package and build LAMMPS in the usual manner: @@ -1566,9 +1634,11 @@ Restraints. A "fix colvars"_fix_colvars.html command is implemented which wraps a COLVARS library, which implements these methods. simulations. -[Authors:] Axel Kohlmeyer (Temple U). The COLVARS library was written -by Giacomo Fiorin (ICMS, Temple University, Philadelphia, PA, USA) and -Jerome Henin (LISM, CNRS, Marseille, France). +[Authors:] The COLVARS library is written and maintained by +Giacomo Fiorin (ICMS, Temple University, Philadelphia, PA, USA) +and Jerome Henin (LISM, CNRS, Marseille, France), originally for +the NAMD MD code, but with portability in mind. Axel Kohlmeyer +(Temple U) provided the interface to LAMMPS. [Install or un-install:] @@ -1580,7 +1650,9 @@ which simply invoke the lib/colvars/Install.py script with the specified args: make lib-colvars # print help message -make lib-colvars args="-m g++" # build with GNU g++ compiler :pre +make lib-colvars args="-m serial" # build with GNU g++ compiler (settings as with "make serial") +make lib-colvars args="-m mpi" # build with default MPI compiler (settings as with "make mpi") +make lib-colvars args="-m g++-debug" # build with GNU g++ compiler and colvars debugging enabled :pre The build should produce two files: lib/colvars/libcolvars.a and lib/colvars/Makefile.lammps. The latter is copied from an existing @@ -1863,12 +1935,17 @@ All of them have an "intel" in their style name. "Section 5.3.2"_accelerate_intel.html gives details of what hardware and compilers are required on your system, and how to build and use this package. Its styles can be invoked at run time via the "-sf intel" or -"-suffix intel" "command-line switches"_Section_start.html#start_7. +"-suffix intel" "command-line switches"_Section_start.html#start_6. Also see the "KOKKOS"_#KOKKOS, "OPT"_#OPT, and "USER-OMP"_#USER-OMP packages, which have styles optimized for CPUs and KNLs. You need to have an Intel compiler, version 14 or higher to take full -advantage of this package. +advantage of this package. While compilation with GNU compilers is +supported, performance will be suboptimal. + +NOTE: the USER-INTEL package contains styles that require using the +-restrict flag, when compiling with Intel compilers. + [Author:] Mike Brown (Intel). @@ -1885,17 +1962,17 @@ For CPUs: OPTFLAGS = -xHost -O2 -fp-model fast=2 -no-prec-div -qoverride-limits CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) +-fno-alias -ansi-alias -restrict $(OPTFLAGS) LINKFLAGS = -g -qopenmp $(OPTFLAGS) -LIB = -ltbbmalloc -ltbbmalloc_proxy +LIB = -ltbbmalloc -ltbbmalloc_proxy :pre For KNLs: OPTFLAGS = -xMIC-AVX512 -O2 -fp-model fast=2 -no-prec-div -qoverride-limits CCFLAGS = -g -qopenmp -DLAMMPS_MEMALIGN=64 -no-offload \ - -fno-alias -ansi-alias -restrict $(OPTFLAGS) +-fno-alias -ansi-alias -restrict $(OPTFLAGS) LINKFLAGS = -g -qopenmp $(OPTFLAGS) -LIB = -ltbbmalloc +LIB = -ltbbmalloc :pre Once you have an appropriate Makefile.machine, you can install/un-install the package and build LAMMPS in the usual manner. @@ -1919,8 +1996,8 @@ src/USER-INTEL: filenames -> commands src/USER-INTEL/README "Section 5.3"_Section_accelerate.html#acc_3 "Section 5.3.2"_accelerate_gpu.html -"Section 2.7 -sf intel"_Section_start.html#start_7 -"Section 2.7 -pk intel"_Section_start.html#start_7 +"Section 2.6 -sf intel"_Section_start.html#start_6 +"Section 2.6 -pk intel"_Section_start.html#start_6 "package intel"_package.html Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (i) src/USER-INTEL/TEST @@ -2193,18 +2270,22 @@ via OpenMP directives. All of them have an "omp" in their style name. "Section 5.3.4"_accelerate_omp.html gives details of what hardware and compilers are required on your system, and how to build and use this package. Its styles can be invoked at run time via the "-sf omp" or -"-suffix omp" "command-line switches"_Section_start.html#start_7. +"-suffix omp" "command-line switches"_Section_start.html#start_6. Also see the "KOKKOS"_#KOKKOS, "OPT"_#OPT, and "USER-INTEL"_#USER-INTEL packages, which have styles optimized for CPUs. [Author:] Axel Kohlmeyer (Temple U). -NOTE: The compile flags "-restrict" and "-fopenmp" must be used to -build LAMMPS with the USER-OMP package, as well as the link flag -"-fopenmp". They should be added to the CCFLAGS and LINKFLAGS lines -of your Makefile.machine. See src/MAKE/OPTIONS/Makefile.omp for an -example. +NOTE: To enable multi-threading support the compile flag "-fopenmp" +and the link flag "-fopenmp" (for GNU compilers, you have to look up +the equivalent flags for other compilers) must be used to build LAMMPS. +When using Intel compilers, also the "-restrict" flag is required. +The USER-OMP package can be compiled without enabling OpenMP; then +all code will be compiled as serial and the only improvement over the +regular styles are some data access optimization. These flags should +be added to the CCFLAGS and LINKFLAGS lines of your Makefile.machine. +See src/MAKE/OPTIONS/Makefile.omp for an example. Once you have an appropriate Makefile.machine, you can install/un-install the package and build LAMMPS in the usual manner: @@ -2217,7 +2298,7 @@ make machine :pre make no-user-omp make machine :pre -CCFLAGS: add -fopenmp and -restrict +CCFLAGS: add -fopenmp (and -restrict when using Intel compilers) LINKFLAGS: add -fopenmp :ul [Supporting info:] @@ -2226,8 +2307,8 @@ src/USER-OMP: filenames -> commands src/USER-OMP/README "Section 5.3"_Section_accelerate.html#acc_3 "Section 5.3.4"_accelerate_omp.html -"Section 2.7 -sf omp"_Section_start.html#start_7 -"Section 2.7 -pk omp"_Section_start.html#start_7 +"Section 2.6 -sf omp"_Section_start.html#start_6 +"Section 2.6 -pk omp"_Section_start.html#start_6 "package omp"_package.html Styles sections of "Section 3.5"_Section_commands.html#cmd_5 for styles followed by (o) "Benchmarks page"_http://lammps.sandia.gov/bench.html of web site :ul @@ -2286,12 +2367,14 @@ without changes to LAMMPS itself. Before building LAMMPS with this package, you must first build the QMMM library in lib/qmmm. You can do this manually if you prefer; -follow the first two steps explained in lib/colvars/README. You can +follow the first two steps explained in lib/qmmm/README. You can also do it in one step from the lammps/src dir, using a command like -these, which simply invoke the lib/colvars/Install.py script with the +these, which simply invoke the lib/qmmm/Install.py script with the specified args: make lib-qmmm # print help message +make lib-qmmm args="-m serial" # build with GNU Fortran compiler (settings as in "make serial") +make lib-qmmm args="-m mpi" # build with default MPI compiler (settings as in "make mpi") make lib-qmmm args="-m gfortran" # build with GNU Fortran compiler :pre The build should produce two files: lib/qmmm/libqmmm.a and @@ -2468,15 +2551,13 @@ follow the instructions in lib/smd/README. You can also do it in one step from the lammps/src dir, using a command like these, which simply invoke the lib/smd/Install.py script with the specified args: -make lib-smd # print help message -make lib-smd args="-g -l" # download in default lib/smd/eigen-eigen-* -make lib-smd args="-h . eigen -g -l" # download in lib/smd/eigen -make lib-smd args="-h ~ eigen -g -l" # download and build in ~/eigen :pre +make lib-smd # print help message +make lib-smd args="-b" # download and build in default lib/smd/eigen-eigen-... +make lib-smd args="-p /usr/include/eigen3" # use existing Eigen installation in /usr/include/eigen3 :pre -Note that the final -l switch is to create a symbolic (soft) link -named "includelink" in lib/smd to point to the Eigen dir. When LAMMPS -builds it will use this link. You should not need to edit the -lib/smd/Makefile.lammps file. +Note that a symbolic (soft) link named "includelink" is created in +lib/smd to point to the Eigen dir. When LAMMPS builds it will use +this link. You should not need to edit the lib/smd/Makefile.lammps file. You can then install/un-install the package and build LAMMPS in the usual manner: diff --git a/doc/src/Section_python.txt b/doc/src/Section_python.txt index 1e67fca321..f4b6bdad97 100644 --- a/doc/src/Section_python.txt +++ b/doc/src/Section_python.txt @@ -198,7 +198,7 @@ file and the shared library. 11.3 Building LAMMPS as a shared library :link(py_3),h4 Instructions on how to build LAMMPS as a shared library are given in -"Section 2.5"_Section_start.html#start_5. A shared library is one +"Section 2.4"_Section_start.html#start_4. A shared library is one that is dynamically loadable, which is what Python requires to wrap LAMMPS. On Linux this is a library file that ends in ".so", not ".a". @@ -217,7 +217,7 @@ NOTE: If you are building LAMMPS with an MPI or FFT library or other auxiliary libraries (used by various packages), then all of these extra libraries must also be shared libraries. If the LAMMPS shared-library build fails with an error complaining about this, see -"Section 2.5"_Section_start.html#start_5 for more details. +"Section 2.4"_Section_start.html#start_4 for more details. :line @@ -439,7 +439,7 @@ first importing from the lammps.py file: >>> CDLL("liblammps.so") :pre If an error occurs, carefully go thru the steps in "Section -2.5"_Section_start.html#start_5 and above about building a shared +2.4"_Section_start.html#start_4 and above about building a shared library and about insuring Python can find the necessary two files it needs. diff --git a/doc/src/Section_start.txt b/doc/src/Section_start.txt index dcd320655f..a25ec11cfe 100644 --- a/doc/src/Section_start.txt +++ b/doc/src/Section_start.txt @@ -14,11 +14,11 @@ experienced users. 2.1 "What's in the LAMMPS distribution"_#start_1 2.2 "Making LAMMPS"_#start_2 2.3 "Making LAMMPS with optional packages"_#start_3 -2.5 "Building LAMMPS as a library"_#start_4 -2.6 "Running LAMMPS"_#start_5 -2.7 "Command-line options"_#start_6 -2.8 "Screen output"_#start_7 -2.9 "Tips for users of previous versions"_#start_8 :all(b) +2.4 "Building LAMMPS as a library"_#start_4 +2.5 "Running LAMMPS"_#start_5 +2.6 "Command-line options"_#start_6 +2.7 "Screen output"_#start_7 +2.8 "Tips for users of previous versions"_#start_8 :all(b) :line @@ -434,20 +434,39 @@ files. Note that on some large parallel machines which use "modules" for their compile/link environements, you may simply need to include the correct module in your build environment. Or the parallel machine may have a vendor-provided FFT library which the compiler has no -trouble finding. +trouble finding. See the src/MAKE/OPTIONS/Makefile.fftw file for an +example of how to specify these variables to use the FFTW3 library. -FFTW is a fast, portable library that should also work on any -platform. You can download it from +FFTW is fast, portable library that should also work on any platform +and typically be faster than KISS FFT. You can download it from "www.fftw.org"_http://www.fftw.org. Both the legacy version 2.1.X and the newer 3.X versions are supported as -DFFT_FFTW2 or -DFFT_FFTW3. -Building FFTW for your box should be as simple as ./configure; make. -Note that on some platforms FFTW2 has been pre-installed, and uses -renamed files indicating the precision it was compiled with, -e.g. sfftw.h, or dfftw.h instead of fftw.h. In this case, you can -specify an additional define variable for FFT_INC called -DFFTW_SIZE, -which will select the correct include file. In this case, for FFT_LIB -you must also manually specify the correct library, namely -lsfftw or --ldfftw. +Building FFTW for your box should be as simple as ./configure; make; +make install. The install command typically requires root privileges +(e.g. invoke it via sudo), unless you specify a local directory with +the "--prefix" option of configure. Type "./configure --help" to see +various options. + +If you wish to have FFTW support for single-precision FFTs (see below +about -DFFT_SINGLE) in addition to the default double-precision FFTs, +you will need to build FFTW a second time for single-precision. For +FFTW3, do this via: + +make clean +./configure --enable-single; make; make install :pre + +which should produce the additional library libfftw3f.a. + +For FFTW2, do this: + +make clean +./configure --enable-float --enable-type-prefix; make; make install :pre + +which should produce the additional library libsfftw.a and additional +include file sfttw.a. Note that on some platforms FFTW2 has been +pre-installed for both single- and double-precision, and may already +have these files as well as libdfftw.a and dfftw.h for double +precision. The FFT_INC variable also allows for a -DFFT_SINGLE setting that will use single-precision FFTs with PPPM, which can speed-up long-range @@ -459,6 +478,16 @@ accuracy for reduced memory use and parallel communication costs for transposing 3d FFT data. Note that single precision FFTs have only been tested with the FFTW3, FFTW2, MKL, and KISS FFT options. +When using -DFFT_SINGLE with FFTW3 or FFTW2, you need to build FFTW +with support for single-precision, as explained above. For FFTW3 you +also need to include -lfftw3f with the FFT_LIB setting, in addition to +-lfftw3. For FFTW2, you also need to specify -DFFT_SIZE with the +FFT_INC setting and -lsfftw with the FFT_LIB setting (in place of +-lfftw). Similarly, if FFTW2 has been preinstalled with an explicit +double-precision library (libdfftw.a and not the default libfftw.a), +then you can specify -DFFT_SIZE (and not -DFFT_SINGLE), and specify +-ldfftw to use double-precision FFTs. + Step 7 :h6 The 3 JPG variables allow you to specify a JPEG and/or PNG library @@ -558,8 +587,7 @@ Typing "make clean-all" or "make clean-machine" will delete *.o object files created when LAMMPS is built, for either all builds or for a particular machine. -Changing the LAMMPS size limits via -DLAMMPS_SMALLBIG or --DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL :h6 +Changing the LAMMPS size limits via -DLAMMPS_SMALLBIG or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL :h6 As explained above, any of these 3 settings can be specified on the LMP_INC line in your low-level src/MAKE/Makefile.foo. @@ -630,22 +658,29 @@ utilities. For Cygwin and the MinGW cross-compilers, suitable makefiles are provided in src/MAKE/MACHINES. When using other compilers, like Visual C++ or Intel compilers for Windows, you may have to implement -your own build system. Since none of the current LAMMPS core developers -has significant experience building executables on Windows, we are -happy to distribute contributed instructions and modifications, but -we cannot provide support for those. +your own build system. Due to differences between the Windows OS +and Windows system libraries to Unix-like environments like Linux +or MacOS, when compiling for Windows a few adjustments may be needed: + +Do [not] set the -DLAMMPS_MEMALIGN define (see LMP_INC makefile variable) +Add -lwsock32 -lpsapi to the linker flags (see LIB makefile variable) +Try adding -static-libgcc or -static or both to the linker flags when your LAMMPS executable complains about missing .dll files :ul + +Since none of the current LAMMPS core developers has significant +experience building executables on Windows, we are happy to distribute +contributed instructions and modifications to improve the situation, +but we cannot provide support for those. With the so-called "Anniversary Update" to Windows 10, there is a Ubuntu Linux subsystem available for Windows, that can be installed and then used to compile/install LAMMPS as if you are running on a Ubuntu Linux system instead of Windows. -As an alternative, you can download "daily builds" (and some older -versions) of the installer packages from -"rpm.lammps.org/windows.html"_http://rpm.lammps.org/windows.html. -These executables are built with most optional packages and the -download includes documentation, potential files, some tools and -many examples, but no source code. +As an alternative, you can download pre-compiled installer packages from +"packages.lammps.org/windows.html"_http://packages.lammps.org/windows.html. +These executables are built with most optional packages included and the +download includes documentation, potential files, some tools and many +examples, but no source code. :line @@ -685,7 +720,7 @@ type lmp_machine -h :pre to run your executable with the optional "-h command-line -switch"_#start_7 for "help", which will list the styles and commands +switch"_#start_6 for "help", which will list the styles and commands known to your executable, and immediately exit. :line @@ -880,7 +915,7 @@ src/MAKE/OPTIONS, which include the settings. Note that the USER-INTEL and KOKKOS packages can use settings that build LAMMPS for different hardware. The USER-INTEL package can be compiled for Intel CPUs and KNLs; the KOKKOS package builds for CPUs (OpenMP), GPUs -(Cuda), and Intel KNLs. +(CUDA), and Intel KNLs. Makefile.intel_cpu Makefile.intel_phi @@ -1058,7 +1093,7 @@ LAMMPS to be built with one or more of its optional packages. :line On a Windows box, you can skip making LAMMPS and simply download an -installer package from "here"_http://rpm.lammps.org/windows.html +installer package from "here"_http://packages.lammps.org/windows.html For running the non-MPI executable, follow these steps: @@ -1070,18 +1105,27 @@ the [in.lj] input from the bench folder. (e.g. by typing: cd "Documents"). :l At the command prompt, type "lmp_serial -in in.lj", replacing [in.lj] with the name of your LAMMPS input script. :l + +The serial executable includes support for multi-threading +parallelization from the styles in the USER-OMP packages. + +To run with, e.g. 4 threads, type "lmp_serial -in in.lj -pk omp 4 -sf omp" :ule -For the MPI version, which allows you to run LAMMPS under Windows on -multiple processors, follow these steps: +For the MPI version, which allows you to run LAMMPS under Windows with +the more general message passing parallel library (LAMMPS has been +designed from ground up to use MPI efficiently), follow these steps: -Download and install -"MPICH2"_http://www.mcs.anl.gov/research/projects/mpich2/downloads/index.php?s=downloads -for Windows. :ulb,l +Download and install a compatible MPI library binary package: +for 32-bit Windows +"mpich2-1.4.1p1-win-ia32.msi"_download.lammps.org/thirdparty/mpich2-1.4.1p1-win-ia32.msi +and for 64-bit Windows +"mpich2-1.4.1p1-win-x86-64.msi"_download.lammps.org/thirdparty/mpich2-1.4.1p1-win-x86-64.msi +:ulb,l The LAMMPS Windows installer packages will automatically adjust your path for the default location of this MPI package. After the installation -of the MPICH software, it needs to be integrated into the system. +of the MPICH2 software, it needs to be integrated into the system. For this you need to start a Command Prompt in {Administrator Mode} (right click on the icon and select it). Change into the MPICH2 installation directory, then into the subdirectory [bin] and execute @@ -1100,7 +1144,7 @@ or mpiexec -np 4 lmp_mpi -in in.lj :pre -replacing in.lj with the name of your LAMMPS input script. For the latter +replacing [in.lj] with the name of your LAMMPS input script. For the latter case, you may be prompted to enter your password. :l In this mode, output may not immediately show up on the screen, so if @@ -1112,6 +1156,11 @@ something like: lmp_mpi -in in.lj :pre +And the parallel executable also includes OpenMP multi-threading, which +can be combined with MPI using something like: + +mpiexec -localonly 2 lmp_mpi -in in.lj -pk omp 2 -sf omp :pre + :ule :line diff --git a/doc/src/accelerate_gpu.txt b/doc/src/accelerate_gpu.txt index 2ac7d62f6c..816a31c788 100644 --- a/doc/src/accelerate_gpu.txt +++ b/doc/src/accelerate_gpu.txt @@ -54,7 +54,7 @@ specify the # of GPUs per node use GPU styles in your input script :ul The latter two steps can be done using the "-pk gpu" and "-sf gpu" -"command-line switches"_Section_start.html#start_7 respectively. Or +"command-line switches"_Section_start.html#start_6 respectively. Or the effect of the "-pk" or "-sf" switches can be duplicated by adding the "package gpu"_package.html or "suffix gpu"_suffix.html commands respectively to your input script. @@ -62,7 +62,7 @@ respectively to your input script. [Required hardware/software:] To use this package, you currently need to have an NVIDIA GPU and -install the NVIDIA Cuda software on your system: +install the NVIDIA CUDA software on your system: Check if you have an NVIDIA GPU: cat /proc/driver/nvidia/gpus/0/information Go to http://www.nvidia.com/object/cuda_get.html @@ -74,13 +74,8 @@ Run lammps/lib/gpu/nvc_get_devices (after building the GPU library, see below) t This requires two steps (a,b): build the GPU library, then build LAMMPS with the GPU package. -You can do both these steps in one line, using the src/Make.py script, -described in "Section 2.4"_Section_start.html#start_4 of the manual. -Type "Make.py -h" for help. If run from the src directory, this -command will create src/lmp_gpu using src/MAKE/Makefile.mpi as the -starting Makefile.machine: - -Make.py -p gpu -gpu mode=single arch=31 -o gpu -a lib-gpu file mpi :pre +You can do both these steps in one line as described in +"Section 4"_Section_packages.html of the manual. Or you can follow these two (a,b) steps: @@ -90,7 +85,7 @@ The GPU library is in lammps/lib/gpu. Select a Makefile.machine (in lib/gpu) appropriate for your system. You should pay special attention to 3 settings in this makefile. -CUDA_HOME = needs to be where NVIDIA Cuda software is installed on your system +CUDA_HOME = needs to be where NVIDIA CUDA software is installed on your system CUDA_ARCH = needs to be appropriate to your GPUs CUDA_PREC = precision (double, mixed, single) you desire :ul @@ -151,9 +146,9 @@ automatically if you create more MPI tasks/node than there are GPUs/mode. E.g. with 8 MPI tasks/node and 2 GPUs, each GPU will be shared by 4 MPI tasks. -Use the "-sf gpu" "command-line switch"_Section_start.html#start_7, +Use the "-sf gpu" "command-line switch"_Section_start.html#start_6, which will automatically append "gpu" to styles that support it. Use -the "-pk gpu Ng" "command-line switch"_Section_start.html#start_7 to +the "-pk gpu Ng" "command-line switch"_Section_start.html#start_6 to set Ng = # of GPUs/node to use. lmp_machine -sf gpu -pk gpu 1 -in in.script # 1 MPI task uses 1 GPU @@ -188,7 +183,7 @@ pair_style lj/cut/gpu 2.5 :pre You must also use the "package gpu"_package.html command to enable the GPU package, unless the "-sf gpu" or "-pk gpu" "command-line -switches"_Section_start.html#start_7 were used. It specifies the +switches"_Section_start.html#start_6 were used. It specifies the number of GPUs/node to use, as well as other options. [Speed-ups to expect:] diff --git a/doc/src/accelerate_intel.txt b/doc/src/accelerate_intel.txt index 155e29e367..a7c3382caa 100644 --- a/doc/src/accelerate_intel.txt +++ b/doc/src/accelerate_intel.txt @@ -29,8 +29,10 @@ Bond Styles: fene, harmonic :l Dihedral Styles: charmm, harmonic, opls :l Fixes: nve, npt, nvt, nvt/sllod :l Improper Styles: cvff, harmonic :l -Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne, -charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l +Pair Styles: airebo, airebo/morse, buck/coul/cut, buck/coul/long, +buck, eam, eam/alloy, eam/fs, gayberne, lj/charmm/coul/charmm, +lj/charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, rebo, +sw, tersoff :l K-Space Styles: pppm, pppm/disp :l :ule @@ -225,11 +227,9 @@ source /opt/intel/parallel_studio_xe_2016.3.067/psxevars.sh # or psxevars.csh for C-shell make intel_cpu_intelmpi :pre -Alternatively, the build can be accomplished with the src/Make.py -script, described in "Section 2.4"_Section_start.html#start_4 of the -manual. Type "Make.py -h" for help. For an example: - -Make.py -v -p intel omp -intel cpu -a file intel_cpu_intelmpi :pre +Alternatively this can be done as a single command with +suitable make command invocations. This is discussed in "Section +4"_Section_packages.html of the manual. Note that if you build with support for a Phi coprocessor, the same binary can be used on nodes with or without coprocessors installed. @@ -244,8 +244,7 @@ highly recommended for CCFLAGS and LINKFLAGS. LIB should include is required for CCFLAGS and "-qoffload" is required for LINKFLAGS. Other recommended CCFLAG options for best performance are "-O2 -fno-alias -ansi-alias -qoverride-limits fp-model fast=2 --no-prec-div". The Make.py command will add all of these -automatically. +-no-prec-div". NOTE: The vectorization and math capabilities can differ depending on the CPU. For Intel compilers, the "-x" flag specifies the type of @@ -301,7 +300,7 @@ Hyper-Threading technology disabled. To enable USER-INTEL optimizations for all available styles used in the input script, the "-sf intel" -"command-line switch"_Section_start.html#start_7 can be used without +"command-line switch"_Section_start.html#start_6 can be used without any requirement for editing the input script. This switch will automatically append "intel" to styles that support it. It also invokes a default command: "package intel 1"_package.html. This @@ -314,7 +313,7 @@ support, that 1 coprocessor per node will be used with automatic balancing of work between the CPU and the coprocessor. You can specify different options for the USER-INTEL package by using -the "-pk intel Nphi" "command-line switch"_Section_start.html#start_7 +the "-pk intel Nphi" "command-line switch"_Section_start.html#start_6 with keyword/value pairs as specified in the documentation. Here, Nphi = # of Xeon Phi coprocessors/node (ignored without offload support). Common options to the USER-INTEL package include {omp} to @@ -387,7 +386,7 @@ can performed automatically by using "-sf hybrid intel opt" or and "omp" suffixes can be appended manually in the input script. For the latter, the "package omp"_package.html command must be in the input script or the "-pk omp Nt" "command-line -switch"_Section_start.html#start_7 must be used where Nt is the +switch"_Section_start.html#start_6 must be used where Nt is the number of OpenMP threads. The number of OpenMP threads should not be set differently for the different packages. Note that the "suffix hybrid intel omp"_suffix.html command can also be used within the @@ -486,7 +485,7 @@ sorting"_atom_modify.html is changed to 1 so that the per-atom data is effectively sorted at every rebuild of the neighbor lists. All the available coprocessor threads on each Phi will be divided among MPI tasks, unless the {tptask} option of the "-pk intel" "command-line -switch"_Section_start.html#start_7 is used to limit the coprocessor +switch"_Section_start.html#start_6 is used to limit the coprocessor threads per MPI task. [Restrictions:] diff --git a/doc/src/accelerate_kokkos.txt b/doc/src/accelerate_kokkos.txt index 602c3191f6..8d87751f94 100644 --- a/doc/src/accelerate_kokkos.txt +++ b/doc/src/accelerate_kokkos.txt @@ -60,8 +60,7 @@ More details follow. use a C++11 compatible compiler make yes-kokkos make mpi KOKKOS_DEVICES=OpenMP # build with the KOKKOS package -make kokkos_omp # or Makefile.kokkos_omp already has variable set -Make.py -v -p kokkos -kokkos omp -o mpi -a file mpi # or one-line build via Make.py :pre +make kokkos_omp # or Makefile.kokkos_omp already has variable set :pre mpirun -np 16 lmp_mpi -k on -sf kk -in in.lj # 1 node, 16 MPI tasks/node, no threads mpirun -np 2 -ppn 1 lmp_mpi -k on t 16 -sf kk -in in.lj # 2 nodes, 1 MPI task/node, 16 threads/task @@ -82,8 +81,7 @@ use a C++11 compatible compiler KOKKOS_DEVICES = Cuda, OpenMP KOKKOS_ARCH = Kepler35 make yes-kokkos -make machine -Make.py -p kokkos -kokkos cuda arch=31 -o kokkos_cuda -a file kokkos_cuda :pre +make machine :pre mpirun -np 1 lmp_cuda -k on t 6 -sf kk -in in.lj # one MPI task, 6 threads on CPU mpirun -np 4 -ppn 1 lmp_cuda -k on t 6 -sf kk -in in.lj # ditto on 4 nodes :pre @@ -98,8 +96,7 @@ use a C++11 compatible compiler KOKKOS_DEVICES = OpenMP KOKKOS_ARCH = KNC make yes-kokkos -make machine -Make.py -p kokkos -kokkos phi -o kokkos_phi -a file mpi :pre +make machine :pre host=MIC, Intel Phi with 61 cores (240 threads/phi via 4x hardware threading): mpirun -np 1 lmp_g++ -k on t 240 -sf kk -in in.lj # 1 MPI task on 1 Phi, 1*240 = 240 @@ -116,7 +113,7 @@ To build with Kokkos support for CPUs, your compiler must support the OpenMP interface. You should have one or more multi-core CPUs so that multiple threads can be launched by each MPI task running on a CPU. -To build with Kokkos support for NVIDIA GPUs, NVIDIA Cuda software +To build with Kokkos support for NVIDIA GPUs, NVIDIA CUDA software version 7.5 or later must be installed on your system. See the discussion for the "GPU"_accelerate_gpu.html package for details of how to check and do this. @@ -135,16 +132,16 @@ mode like the USER-INTEL package supports. You must choose at build time whether to build for CPUs (OpenMP), GPUs, or Phi. -You can do any of these in one line, using the src/Make.py script, -described in "Section 2.4"_Section_start.html#start_4 of the manual. -Type "Make.py -h" for help. If run from the src directory, these +You can do any of these in one line, using the suitable make command +line flags as described in "Section 4"_Section_packages.html of the +manual. If run from the src directory, these commands will create src/lmp_kokkos_omp, lmp_kokkos_cuda, and lmp_kokkos_phi. Note that the OMP and PHI options use src/MAKE/Makefile.mpi as the starting Makefile.machine. The CUDA option uses src/MAKE/OPTIONS/Makefile.kokkos_cuda. The latter two steps can be done using the "-k on", "-pk kokkos" and -"-sf kk" "command-line switches"_Section_start.html#start_7 +"-sf kk" "command-line switches"_Section_start.html#start_6 respectively. Or the effect of the "-pk" or "-sf" switches can be duplicated by adding the "package kokkos"_package.html or "suffix kk"_suffix.html commands respectively to your input script. @@ -280,10 +277,10 @@ specify how many Phi coprocessors there are per node; each coprocessors is simply treated as running some number of MPI tasks. You must use the "-k on" "command-line -switch"_Section_start.html#start_7 to enable the KOKKOS package. It +switch"_Section_start.html#start_6 to enable the KOKKOS package. It takes additional arguments for hardware settings appropriate to your system. Those arguments are "documented -here"_Section_start.html#start_7. The two most commonly used +here"_Section_start.html#start_6. The two most commonly used options are: -k on t Nt g Ng :pre @@ -304,12 +301,12 @@ The "-k on" switch also issues a "package kokkos" command (with no additional arguments) which sets various KOKKOS options to default values, as discussed on the "package"_package.html command doc page. -Use the "-sf kk" "command-line switch"_Section_start.html#start_7, +Use the "-sf kk" "command-line switch"_Section_start.html#start_6, which will automatically append "kk" to styles that support it. Use -the "-pk kokkos" "command-line switch"_Section_start.html#start_7 if +the "-pk kokkos" "command-line switch"_Section_start.html#start_6 if you wish to change any of the default "package kokkos"_package.html optionns set by the "-k on" "command-line -switch"_Section_start.html#start_7. +switch"_Section_start.html#start_6. @@ -323,7 +320,7 @@ However, when running in MPI-only mode with 1 thread per MPI task, it will typically be faster to use "half" neighbor lists and set the Newton flag to "on", just as is the case for non-accelerated pair styles. You can do this with the "-pk" "command-line -switch"_Section_start.html#start_7. +switch"_Section_start.html#start_6. [Or run with the KOKKOS package by editing an input script:] @@ -332,7 +329,7 @@ appropriate thread and GPU values for host=OMP or host=MIC or device=CUDA are the same. You must still use the "-k on" "command-line -switch"_Section_start.html#start_7 to enable the KOKKOS package, and +switch"_Section_start.html#start_6 to enable the KOKKOS package, and specify its additional arguments for hardware options appropriate to your system, as documented above. @@ -343,7 +340,7 @@ pair_style lj/cut/kk 2.5 :pre You only need to use the "package kokkos"_package.html command if you wish to change any of its option defaults, as set by the "-k on" -"command-line switch"_Section_start.html#start_7. +"command-line switch"_Section_start.html#start_6. [Speed-ups to expect:] @@ -389,7 +386,7 @@ If N is the number of physical cores/node, then the number of MPI tasks/node * number of threads/task should not exceed N, and should typically equal N. Note that the default threads/task is 1, as set by the "t" keyword of the "-k" "command-line -switch"_Section_start.html#start_7. If you do not change this, no +switch"_Section_start.html#start_6. If you do not change this, no additional parallelism (beyond MPI) will be invoked on the host CPU(s). @@ -429,7 +426,7 @@ details). The -np setting of the mpirun command should set the number of MPI tasks/node to be equal to the # of physical GPUs on the node. -Use the "-k" "command-line switch"_Section_commands.html#start_7 to +Use the "-k" "command-line switch"_Section_commands.html#start_6 to specify the number of GPUs per node, and the number of threads per MPI task. As above for multi-core CPUs (and no GPU), if N is the number of physical cores/node, then the number of MPI tasks/node * number of diff --git a/doc/src/accelerate_omp.txt b/doc/src/accelerate_omp.txt index c8dd343861..fa7bef1a52 100644 --- a/doc/src/accelerate_omp.txt +++ b/doc/src/accelerate_omp.txt @@ -23,8 +23,7 @@ one or more 16-core nodes. More details follow. use -fopenmp with CCFLAGS and LINKFLAGS in Makefile.machine make yes-user-omp make mpi # build with USER-OMP package, if settings added to Makefile.mpi -make omp # or Makefile.omp already has settings -Make.py -v -p omp -o mpi -a file mpi # or one-line build via Make.py :pre +make omp # or Makefile.omp already has settings :pre lmp_mpi -sf omp -pk omp 16 < in.script # 1 MPI task, 16 threads mpirun -np 4 lmp_mpi -sf omp -pk omp 4 -in in.script # 4 MPI tasks, 4 threads/task @@ -40,14 +39,11 @@ each MPI task running on a CPU. The lines above illustrate how to include/build with the USER-OMP package in two steps, using the "make" command. Or how to do it with -one command via the src/Make.py script, described in "Section -2.4"_Section_start.html#start_4 of the manual. Type "Make.py -h" for -help. +one command as described in "Section 4"_Section_packages.html of the manual. Note that the CCFLAGS and LINKFLAGS settings in Makefile.machine must include "-fopenmp". Likewise, if you use an Intel compiler, the -CCFLAGS setting must include "-restrict". The Make.py command will -add these automatically. +CCFLAGS setting must include "-restrict". [Run with the USER-OMP package from the command line:] @@ -62,14 +58,14 @@ threads/task should not exceed the physical number of cores (on a node), otherwise performance will suffer. As in the lines above, use the "-sf omp" "command-line -switch"_Section_start.html#start_7, which will automatically append +switch"_Section_start.html#start_6, which will automatically append "omp" to styles that support it. The "-sf omp" switch also issues a default "package omp 0"_package.html command, which will set the number of threads per MPI task via the OMP_NUM_THREADS environment variable. You can also use the "-pk omp Nt" "command-line -switch"_Section_start.html#start_7, to explicitly set Nt = # of OpenMP +switch"_Section_start.html#start_6, to explicitly set Nt = # of OpenMP threads per MPI task to use, as well as additional options. Its syntax is the same as the "package omp"_package.html command whose doc page gives details, including the default values used if it is not diff --git a/doc/src/accelerate_opt.txt b/doc/src/accelerate_opt.txt index 704321ca07..845264b522 100644 --- a/doc/src/accelerate_opt.txt +++ b/doc/src/accelerate_opt.txt @@ -21,8 +21,7 @@ Here is a quick overview of how to use the OPT package. More details follow. make yes-opt -make mpi # build with the OPT package -Make.py -v -p opt -o mpi -a file mpi # or one-line build via Make.py :pre +make mpi # build with the OPT package :pre lmp_mpi -sf opt -in in.script # run in serial mpirun -np 4 lmp_mpi -sf opt -in in.script # run in parallel :pre @@ -35,18 +34,15 @@ None. The lines above illustrate how to build LAMMPS with the OPT package in two steps, using the "make" command. Or how to do it with one command -via the src/Make.py script, described in "Section -2.4"_Section_start.html#start_4 of the manual. Type "Make.py -h" for -help. +as described in "Section 4"_Section_packages.html of the manual. Note that if you use an Intel compiler to build with the OPT package, the CCFLAGS setting in your Makefile.machine must include "-restrict". -The Make.py command will add this automatically. [Run with the OPT package from the command line:] As in the lines above, use the "-sf opt" "command-line -switch"_Section_start.html#start_7, which will automatically append +switch"_Section_start.html#start_6, which will automatically append "opt" to styles that support it. [Or run with the OPT package by editing an input script:] diff --git a/doc/src/angle_charmm.txt b/doc/src/angle_charmm.txt index a02e604258..7ff7ef8fd4 100644 --- a/doc/src/angle_charmm.txt +++ b/doc/src/angle_charmm.txt @@ -63,7 +63,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_class2.txt b/doc/src/angle_class2.txt index 74f2544cd4..71a508d691 100644 --- a/doc/src/angle_class2.txt +++ b/doc/src/angle_class2.txt @@ -94,7 +94,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_cosine.txt b/doc/src/angle_cosine.txt index 4fb2ccaf7c..c0ce3c9301 100644 --- a/doc/src/angle_cosine.txt +++ b/doc/src/angle_cosine.txt @@ -50,7 +50,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_cosine_delta.txt b/doc/src/angle_cosine_delta.txt index 6ab214508c..830fd6db58 100644 --- a/doc/src/angle_cosine_delta.txt +++ b/doc/src/angle_cosine_delta.txt @@ -55,7 +55,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_cosine_periodic.txt b/doc/src/angle_cosine_periodic.txt index c6cd57e419..b5c53b1b0f 100644 --- a/doc/src/angle_cosine_periodic.txt +++ b/doc/src/angle_cosine_periodic.txt @@ -63,7 +63,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_cosine_shift.txt b/doc/src/angle_cosine_shift.txt index dc1a29a86b..6ed9fe2150 100644 --- a/doc/src/angle_cosine_shift.txt +++ b/doc/src/angle_cosine_shift.txt @@ -53,7 +53,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_cosine_shift_exp.txt b/doc/src/angle_cosine_shift_exp.txt index 48af5ba76a..44a68c1087 100644 --- a/doc/src/angle_cosine_shift_exp.txt +++ b/doc/src/angle_cosine_shift_exp.txt @@ -65,7 +65,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_cosine_squared.txt b/doc/src/angle_cosine_squared.txt index 23e1b150a8..065cdad542 100644 --- a/doc/src/angle_cosine_squared.txt +++ b/doc/src/angle_cosine_squared.txt @@ -55,7 +55,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_fourier.txt b/doc/src/angle_fourier.txt index f58ae8e4f4..da39e7cf32 100644 --- a/doc/src/angle_fourier.txt +++ b/doc/src/angle_fourier.txt @@ -51,7 +51,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_fourier_simple.txt b/doc/src/angle_fourier_simple.txt index 9da8ffed28..5adda6cb32 100644 --- a/doc/src/angle_fourier_simple.txt +++ b/doc/src/angle_fourier_simple.txt @@ -50,7 +50,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_harmonic.txt b/doc/src/angle_harmonic.txt index 12ee805218..4c74763964 100644 --- a/doc/src/angle_harmonic.txt +++ b/doc/src/angle_harmonic.txt @@ -57,7 +57,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_quartic.txt b/doc/src/angle_quartic.txt index fea2eb9e03..f7640bdfbc 100644 --- a/doc/src/angle_quartic.txt +++ b/doc/src/angle_quartic.txt @@ -57,7 +57,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/angle_table.txt b/doc/src/angle_table.txt index 61dd7b041e..bd6e167bd8 100644 --- a/doc/src/angle_table.txt +++ b/doc/src/angle_table.txt @@ -136,7 +136,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/balance.txt b/doc/src/balance.txt index 79728d6569..da6f59900d 100644 --- a/doc/src/balance.txt +++ b/doc/src/balance.txt @@ -394,7 +394,7 @@ weights. It assigns the same weight to each particle owned by a processor based on the total computational time spent by that processor. See details below on what time window is used. It uses the same timing information as is used for the "MPI task timing -breakdown"_Section_start.html#start_8, namely, for sections {Pair}, +breakdown"_Section_start.html#start_7, namely, for sections {Pair}, {Bond}, {Kspace}, and {Neigh}. The time spent in those portions of the timestep are measured for each MPI rank, summed, then divided by the number of particles owned by that processor. I.e. the weight is diff --git a/doc/src/bond_class2.txt b/doc/src/bond_class2.txt index aa05412387..9687a63168 100644 --- a/doc/src/bond_class2.txt +++ b/doc/src/bond_class2.txt @@ -56,7 +56,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_fene.txt b/doc/src/bond_fene.txt index 80d2a805c5..9050c3bf5c 100644 --- a/doc/src/bond_fene.txt +++ b/doc/src/bond_fene.txt @@ -59,7 +59,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_fene_expand.txt b/doc/src/bond_fene_expand.txt index 3908c16a7e..ff687444a9 100644 --- a/doc/src/bond_fene_expand.txt +++ b/doc/src/bond_fene_expand.txt @@ -62,7 +62,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_harmonic.txt b/doc/src/bond_harmonic.txt index 1cbd897dac..c18a7e0fd4 100644 --- a/doc/src/bond_harmonic.txt +++ b/doc/src/bond_harmonic.txt @@ -54,7 +54,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_harmonic_shift.txt b/doc/src/bond_harmonic_shift.txt index 8cb2d2ce7d..bf3b3c115a 100644 --- a/doc/src/bond_harmonic_shift.txt +++ b/doc/src/bond_harmonic_shift.txt @@ -55,7 +55,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_harmonic_shift_cut.txt b/doc/src/bond_harmonic_shift_cut.txt index 836d6afda4..1918ce00b6 100644 --- a/doc/src/bond_harmonic_shift_cut.txt +++ b/doc/src/bond_harmonic_shift_cut.txt @@ -55,7 +55,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_morse.txt b/doc/src/bond_morse.txt index 12e51f9bef..4f6a32e341 100644 --- a/doc/src/bond_morse.txt +++ b/doc/src/bond_morse.txt @@ -53,7 +53,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_nonlinear.txt b/doc/src/bond_nonlinear.txt index ac9f3369c2..434af62506 100644 --- a/doc/src/bond_nonlinear.txt +++ b/doc/src/bond_nonlinear.txt @@ -53,7 +53,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_quartic.txt b/doc/src/bond_quartic.txt index e61f4f0343..4dc7ad4a36 100644 --- a/doc/src/bond_quartic.txt +++ b/doc/src/bond_quartic.txt @@ -88,7 +88,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/bond_table.txt b/doc/src/bond_table.txt index cb096fba11..906d3e5d76 100644 --- a/doc/src/bond_table.txt +++ b/doc/src/bond_table.txt @@ -133,7 +133,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/compute_pressure.txt b/doc/src/compute_pressure.txt index 292e779f72..f0691ad207 100644 --- a/doc/src/compute_pressure.txt +++ b/doc/src/compute_pressure.txt @@ -117,7 +117,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/compute_rdf.txt b/doc/src/compute_rdf.txt index acbc0e4f0c..e462e85fc0 100644 --- a/doc/src/compute_rdf.txt +++ b/doc/src/compute_rdf.txt @@ -180,9 +180,18 @@ will register an arbitrarily large spike at whatever distance they happen to be at, and zero everywhere else. Coord(r) will show a step change from zero to one at the location of the spike in g(r). +NOTE: compute rdf can handle dynamic groups and systems where atoms +are added or removed, but this causes that certain normalization +parameters need to be recomputed in every step and include collective +communication operations. This will reduce performance and limit +parallel efficiency and scaling. For systems, where only the type +of atoms changes (e.g. when using "fix atom/swap"_fix_atom_swap.html), +you need to explicitly request the dynamic normalization updates +via "compute_modify dynamic yes"_compute_modify.html + [Related commands:] -"fix ave/time"_fix_ave_time.html +"fix ave/time"_fix_ave_time.html, "compute_modify"_compute_modify.html [Default:] diff --git a/doc/src/compute_temp.txt b/doc/src/compute_temp.txt index 0bd2d4b121..b88be79e20 100644 --- a/doc/src/compute_temp.txt +++ b/doc/src/compute_temp.txt @@ -79,7 +79,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/compute_temp_partial.txt b/doc/src/compute_temp_partial.txt index 163a00af52..fe2420b4e4 100644 --- a/doc/src/compute_temp_partial.txt +++ b/doc/src/compute_temp_partial.txt @@ -86,7 +86,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/compute_voronoi_atom.txt b/doc/src/compute_voronoi_atom.txt index d084fcee66..a280b2b151 100644 --- a/doc/src/compute_voronoi_atom.txt +++ b/doc/src/compute_voronoi_atom.txt @@ -217,7 +217,7 @@ This compute is part of the VORONOI package. It is only enabled if LAMMPS was built with that package. See the "Making LAMMPS"_Section_start.html#start_3 section for more info. -It also requiers you have a copy of the Voro++ library built and +It also requires you have a copy of the Voro++ library built and installed on your system. See instructions on obtaining and installing the Voro++ software in the src/VORONOI/README file. diff --git a/doc/src/dihedral_charmm.txt b/doc/src/dihedral_charmm.txt index 73dc67cdef..06abe054e4 100644 --- a/doc/src/dihedral_charmm.txt +++ b/doc/src/dihedral_charmm.txt @@ -128,7 +128,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_class2.txt b/doc/src/dihedral_class2.txt index 91ab6f3738..cb9fc72c22 100644 --- a/doc/src/dihedral_class2.txt +++ b/doc/src/dihedral_class2.txt @@ -153,7 +153,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_cosine_shift_exp.txt b/doc/src/dihedral_cosine_shift_exp.txt index 89614a3fdb..715682affc 100644 --- a/doc/src/dihedral_cosine_shift_exp.txt +++ b/doc/src/dihedral_cosine_shift_exp.txt @@ -64,7 +64,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_fourier.txt b/doc/src/dihedral_fourier.txt index 5682309b83..da892b59da 100644 --- a/doc/src/dihedral_fourier.txt +++ b/doc/src/dihedral_fourier.txt @@ -55,7 +55,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_harmonic.txt b/doc/src/dihedral_harmonic.txt index c763dcce22..d9a48ff384 100644 --- a/doc/src/dihedral_harmonic.txt +++ b/doc/src/dihedral_harmonic.txt @@ -65,7 +65,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_helix.txt b/doc/src/dihedral_helix.txt index fced983db0..1e907557b2 100644 --- a/doc/src/dihedral_helix.txt +++ b/doc/src/dihedral_helix.txt @@ -58,7 +58,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_multi_harmonic.txt b/doc/src/dihedral_multi_harmonic.txt index 5774a67685..7d3c2ea083 100644 --- a/doc/src/dihedral_multi_harmonic.txt +++ b/doc/src/dihedral_multi_harmonic.txt @@ -52,7 +52,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_nharmonic.txt b/doc/src/dihedral_nharmonic.txt index 0df28a05d4..8392d83899 100644 --- a/doc/src/dihedral_nharmonic.txt +++ b/doc/src/dihedral_nharmonic.txt @@ -52,7 +52,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_opls.txt b/doc/src/dihedral_opls.txt index afcc5d3514..d1a6ba3ff2 100644 --- a/doc/src/dihedral_opls.txt +++ b/doc/src/dihedral_opls.txt @@ -60,7 +60,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/dihedral_quadratic.txt b/doc/src/dihedral_quadratic.txt index 526b469f63..ca2f5aed40 100644 --- a/doc/src/dihedral_quadratic.txt +++ b/doc/src/dihedral_quadratic.txt @@ -53,7 +53,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/echo.txt b/doc/src/echo.txt index 8ef8ad05f8..3141c7a719 100644 --- a/doc/src/echo.txt +++ b/doc/src/echo.txt @@ -26,7 +26,7 @@ command to the screen and/or log file as it is read and processed. If an input script has errors, it can be useful to look at echoed output to see the last command processed. -The "command-line switch"_Section_start.html#start_5 -echo can be used +The "command-line switch"_Section_start.html#start_6 -echo can be used in place of this command. [Restrictions:] none diff --git a/doc/src/fix_addforce.txt b/doc/src/fix_addforce.txt index da9f98a6da..1cc0a15332 100644 --- a/doc/src/fix_addforce.txt +++ b/doc/src/fix_addforce.txt @@ -117,7 +117,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_aveforce.txt b/doc/src/fix_aveforce.txt index d980e9a211..5d7dec3e6a 100644 --- a/doc/src/fix_aveforce.txt +++ b/doc/src/fix_aveforce.txt @@ -77,7 +77,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_bond_create.txt b/doc/src/fix_bond_create.txt index a44c3103dd..c0045ac0f0 100644 --- a/doc/src/fix_bond_create.txt +++ b/doc/src/fix_bond_create.txt @@ -150,10 +150,9 @@ atoms. Note that adding a single bond always adds a new 1st neighbor but may also induce *many* new 2nd and 3rd neighbors, depending on the molecular topology of your system. The "extra special per atom" parameter must typically be set to allow for the new maximum total -size (1st + 2nd + 3rd neighbors) of this per-atom list. There are 3 +size (1st + 2nd + 3rd neighbors) of this per-atom list. There are 2 ways to do this. See the "read_data"_read_data.html or -"create_box"_create_box.html or "special_bonds extra" commands for -details. +"create_box"_create_box.html commands for details. NOTE: Even if you do not use the {atype}, {dtype}, or {itype} keywords, the list of topological neighbors is updated for atoms diff --git a/doc/src/fix_deform.txt b/doc/src/fix_deform.txt index d3254eece6..63d872eded 100644 --- a/doc/src/fix_deform.txt +++ b/doc/src/fix_deform.txt @@ -557,7 +557,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_enforce2d.txt b/doc/src/fix_enforce2d.txt index 1dce620033..5d04e96677 100644 --- a/doc/src/fix_enforce2d.txt +++ b/doc/src/fix_enforce2d.txt @@ -41,7 +41,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_freeze.txt b/doc/src/fix_freeze.txt index 6a4f6c2fcf..a63ee4cb32 100644 --- a/doc/src/fix_freeze.txt +++ b/doc/src/fix_freeze.txt @@ -45,7 +45,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_gcmc.txt b/doc/src/fix_gcmc.txt index 41ec38cffb..405738276f 100644 --- a/doc/src/fix_gcmc.txt +++ b/doc/src/fix_gcmc.txt @@ -383,6 +383,9 @@ called. Reneighboring is required. Can be run in parallel, but aspects of the GCMC part will not scale well in parallel. Only usable for 3D simulations. +When using fix gcmc in combination with fix shake or fix rigid, +only gcmc exchange moves are supported. + Note that very lengthy simulations involving insertions/deletions of billions of gas molecules may run out of atom or molecule IDs and trigger an error, so it is better to run multiple shorter-duration diff --git a/doc/src/fix_gravity.txt b/doc/src/fix_gravity.txt index 2cf1665c30..dae8ac5ed0 100644 --- a/doc/src/fix_gravity.txt +++ b/doc/src/fix_gravity.txt @@ -102,7 +102,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_langevin.txt b/doc/src/fix_langevin.txt index 534d83f6a9..93c73f5a5d 100644 --- a/doc/src/fix_langevin.txt +++ b/doc/src/fix_langevin.txt @@ -276,7 +276,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_momentum.txt b/doc/src/fix_momentum.txt index 4f94e2a857..bcf4465fb8 100644 --- a/doc/src/fix_momentum.txt +++ b/doc/src/fix_momentum.txt @@ -73,7 +73,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_msst.txt b/doc/src/fix_msst.txt index 43f35d6880..310692669a 100644 --- a/doc/src/fix_msst.txt +++ b/doc/src/fix_msst.txt @@ -25,7 +25,7 @@ keyword = {q} or {mu} or {p0} or {v0} or {e0} or {tscale} or {beta} or {dftb} :l {e0} value = initial total energy (energy units) {tscale} value = reduction in initial temperature (unitless fraction between 0.0 and 1.0) {dftb} value = {yes} or {no} for whether using MSST in conjunction with DFTB+ - {beta} value = scale factor on energy contribution of DFTB+ :pre + {beta} value = scale factor for improved energy conservation :pre :ule [Examples:] @@ -72,6 +72,14 @@ be calculated on the first step, after the energy specified by {tscale} is removed. The value of {e0} is not used in the dynamical equations, but is used in calculating the deviation from the Hugoniot. +The keyword {beta} is a scaling term that can be added to the MSST +ionic equations of motion to account for drift in the conserved +quantity during long timescale simulations, similar to a Berendson +thermostat. See "(Reed)"_#Reed and "(Goldman)"_#Goldman2 for more +details. The value of {beta} must be between 0.0 and 1.0 inclusive. +A value of 0.0 means no contribution, a value of 1.0 means a full +contribution. + Values of shockvel less than a critical value determined by the material response will not have compressive solutions. This will be reflected in lack of significant change of the volume in the MSST. @@ -95,23 +103,15 @@ or "_MSST_pe". The group for the new computes is "all". :line -The {dftb} and {beta} keywords are to allow this fix to be used when -LAMMPS is being driven by DFTB+, a density-functional tight-binding -code. - -If the keyword {dftb} is used with a value of {yes}, then the MSST -equations are altered to account for an energy contribution compute by -DFTB+. In this case, you must define a "fix -external"_fix_external.html command in your input script, which is -used to callback to DFTB+ during the LAMMPS timestepping. DFTB+ will -communicate its info to LAMMPS via that fix. - -The keyword {beta} is a scale factor on the DFTB+ energy contribution. -The value of {beta} must be between 0.0 and 1.0 inclusive. A value of -0.0 means no contribution, a value of 1.0 means a full contribution. - -(July 2017) More information about these keywords and the use of -LAMMPS with DFTB+ will be added to the LAMMMPS documention soon. +The {dftb} keyword is to allow this fix to be used when LAMMPS is +being driven by DFTB+, a density-functional tight-binding code. If the +keyword {dftb} is used with a value of {yes}, then the MSST equations +are altered to account for the electron entropy contribution to the +Hugonio relations and total energy. See "(Reed2)"_#Reed2 and +"(Goldman)"_#Goldman2 for details on this contribution. In this case, +you must define a "fix external"_fix_external.html command in your +input script, which is used to callback to DFTB+ during the LAMMPS +timestepping. DFTB+ will communicate its info to LAMMPS via that fix. :line @@ -182,4 +182,12 @@ timestep. :line :link(Reed) -[(Reed)] Reed, Fried, and Joannopoulos, Phys. Rev. Lett., 90, 235503 (2003). +[(Reed)] Reed, Fried, and Joannopoulos, Phys. Rev. Lett., 90, 235503 +(2003). + +:link(Reed2) +[(Reed2)] Reed, J. Phys. Chem. C, 116, 2205 (2012). + +:link(Goldman2) +[(Goldman)] Goldman, Srinivasan, Hamel, Fried, Gaus, and Elstner, +J. Phys. Chem. C, 117, 7885 (2013). diff --git a/doc/src/fix_nh.txt b/doc/src/fix_nh.txt index c1cc3e560a..8fa30ac222 100644 --- a/doc/src/fix_nh.txt +++ b/doc/src/fix_nh.txt @@ -492,7 +492,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nph_asphere.txt b/doc/src/fix_nph_asphere.txt index 3d151a724b..8c35b6a1a7 100644 --- a/doc/src/fix_nph_asphere.txt +++ b/doc/src/fix_nph_asphere.txt @@ -93,7 +93,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nph_body.txt b/doc/src/fix_nph_body.txt index 3a273be595..1e590f1cb3 100644 --- a/doc/src/fix_nph_body.txt +++ b/doc/src/fix_nph_body.txt @@ -92,7 +92,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nph_sphere.txt b/doc/src/fix_nph_sphere.txt index 9258f40c76..62b45edfd7 100644 --- a/doc/src/fix_nph_sphere.txt +++ b/doc/src/fix_nph_sphere.txt @@ -102,7 +102,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nphug.txt b/doc/src/fix_nphug.txt index ef3ffc4955..292e46f94a 100644 --- a/doc/src/fix_nphug.txt +++ b/doc/src/fix_nphug.txt @@ -152,7 +152,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_npt_asphere.txt b/doc/src/fix_npt_asphere.txt index 8fe98f1818..5f3979e36e 100644 --- a/doc/src/fix_npt_asphere.txt +++ b/doc/src/fix_npt_asphere.txt @@ -117,7 +117,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_npt_body.txt b/doc/src/fix_npt_body.txt index 772920df61..d89bf19db2 100644 --- a/doc/src/fix_npt_body.txt +++ b/doc/src/fix_npt_body.txt @@ -116,7 +116,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_npt_sphere.txt b/doc/src/fix_npt_sphere.txt index 24a8fede57..c4cf2cb08d 100644 --- a/doc/src/fix_npt_sphere.txt +++ b/doc/src/fix_npt_sphere.txt @@ -127,7 +127,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nve.txt b/doc/src/fix_nve.txt index 7ad8301877..c04c17858e 100644 --- a/doc/src/fix_nve.txt +++ b/doc/src/fix_nve.txt @@ -46,7 +46,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nve_asphere.txt b/doc/src/fix_nve_asphere.txt index 03846a2558..1f31fb9679 100644 --- a/doc/src/fix_nve_asphere.txt +++ b/doc/src/fix_nve_asphere.txt @@ -57,7 +57,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nve_sphere.txt b/doc/src/fix_nve_sphere.txt index f91a41f515..21dc6cba8a 100644 --- a/doc/src/fix_nve_sphere.txt +++ b/doc/src/fix_nve_sphere.txt @@ -77,7 +77,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nvt_asphere.txt b/doc/src/fix_nvt_asphere.txt index 77de1dea40..21b900f16a 100644 --- a/doc/src/fix_nvt_asphere.txt +++ b/doc/src/fix_nvt_asphere.txt @@ -98,7 +98,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nvt_body.txt b/doc/src/fix_nvt_body.txt index 1f04b85c8b..6a5e09ba7f 100644 --- a/doc/src/fix_nvt_body.txt +++ b/doc/src/fix_nvt_body.txt @@ -97,7 +97,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nvt_sllod.txt b/doc/src/fix_nvt_sllod.txt index 82631f22e3..392dbc281c 100644 --- a/doc/src/fix_nvt_sllod.txt +++ b/doc/src/fix_nvt_sllod.txt @@ -121,7 +121,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_nvt_sphere.txt b/doc/src/fix_nvt_sphere.txt index fa1c97bcce..ecf0922b79 100644 --- a/doc/src/fix_nvt_sphere.txt +++ b/doc/src/fix_nvt_sphere.txt @@ -108,7 +108,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_qbmsst.txt b/doc/src/fix_qbmsst.txt index 468206a57b..2c116fb0f8 100644 --- a/doc/src/fix_qbmsst.txt +++ b/doc/src/fix_qbmsst.txt @@ -78,7 +78,7 @@ especially when the temperature of the initial state is below the classical limit or there is a great change in the zero point energies between the initial and final states. Theoretical post processing quantum corrections of shock compressed water and methane have been -reported as much as 30% of the temperatures "(Goldman)"_#Goldman. A +reported as much as 30% of the temperatures "(Goldman)"_#Goldman1. A self-consistent method that couples the shock to a quantum thermal bath described by a colored noise Langevin thermostat has been developed by Qi et al "(Qi)"_#Qi and applied to shocked methane. The @@ -212,7 +212,7 @@ T_init=300.0. e0, p0, and v0 are calculated on the first step. :line -:link(Goldman) +:link(Goldman1) [(Goldman)] Goldman, Reed and Fried, J. Chem. Phys. 131, 204103 (2009) :link(Qi) diff --git a/doc/src/fix_qeq_comb.txt b/doc/src/fix_qeq_comb.txt index 30c5003e72..7f82404127 100644 --- a/doc/src/fix_qeq_comb.txt +++ b/doc/src/fix_qeq_comb.txt @@ -74,7 +74,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_qeq_reax.txt b/doc/src/fix_qeq_reax.txt index a1a19b7368..18450c7cd5 100644 --- a/doc/src/fix_qeq_reax.txt +++ b/doc/src/fix_qeq_reax.txt @@ -92,7 +92,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_reax_bonds.txt b/doc/src/fix_reax_bonds.txt index aadb0a9cbc..54aa7faef8 100644 --- a/doc/src/fix_reax_bonds.txt +++ b/doc/src/fix_reax_bonds.txt @@ -82,7 +82,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section_accelerate"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_reaxc_species.txt b/doc/src/fix_reaxc_species.txt index 9a588356e0..7c920791f7 100644 --- a/doc/src/fix_reaxc_species.txt +++ b/doc/src/fix_reaxc_species.txt @@ -151,7 +151,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section_accelerate"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_rigid.txt b/doc/src/fix_rigid.txt index dbadd3fa63..62969112f7 100644 --- a/doc/src/fix_rigid.txt +++ b/doc/src/fix_rigid.txt @@ -212,8 +212,9 @@ pour"_fix_pour.html. For bodystyle {single} the entire fix group of atoms is treated as one rigid body. This option is only allowed for the {rigid} styles. -For bodystyle {molecule}, each set of atoms in the fix group with a -different molecule ID is treated as a rigid body. This option is +For bodystyle {molecule}, atoms are grouped into rigid bodies by their +respective molecule IDs: each set of atoms in the fix group with the +same molecule ID is treated as a different rigid body. This option is allowed for both the {rigid} and {rigid/small} styles. Note that atoms with a molecule ID = 0 will be treated as a single rigid body. For a system with atomic solvent (typically this is atoms with @@ -675,7 +676,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_setforce.txt b/doc/src/fix_setforce.txt index 90766fc5bc..f5be0f93a5 100644 --- a/doc/src/fix_setforce.txt +++ b/doc/src/fix_setforce.txt @@ -82,7 +82,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_shake.txt b/doc/src/fix_shake.txt index 8b26aaa874..c187b17c6c 100644 --- a/doc/src/fix_shake.txt +++ b/doc/src/fix_shake.txt @@ -159,7 +159,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fix_wall_ees.txt b/doc/src/fix_wall_ees.txt new file mode 100644 index 0000000000..a8688e8e41 --- /dev/null +++ b/doc/src/fix_wall_ees.txt @@ -0,0 +1,117 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +fix wall/ees command :h3 +fix wall/region/ees command :h3 + +[Syntax:] + +fix ID group-ID style args :pre + +ID, group-ID are documented in "fix"_fix.html command :ulb,l +style = {wall/ees} or {wall/region/ees} :l + args for style {wall/ees}: one or more {face parameters} groups may be appended + face = {xlo} or {xhi} or {ylo} or {yhi} or {zlo} or {zhi} + parameters = coord epsilon sigma cutoff + coord = position of wall = EDGE or constant or variable + EDGE = current lo or hi edge of simulation box + constant = number like 0.0 or -30.0 (distance units) + variable = "equal-style variable"_variable.html like v_x or v_wiggle + epsilon = strength factor for wall-particle interaction (energy or energy/distance^2 units) + epsilon can be a variable (see below) + sigma = size factor for wall-particle interaction (distance units) + sigma can be a variable (see below) + cutoff = distance from wall at which wall-particle interaction is cut off (distance units) :pre + + args for style {wall/region/ees}: {region-ID} {epsilon} {sigma} {cutoff} + region-ID = region whose boundary will act as wall + epsilon = strength factor for wall-particle interaction (energy or energy/distance^2 units) + sigma = size factor for wall-particle interaction (distance units) + cutoff = distance from wall at which wall-particle interaction is cut off (distance units) :pre + :ule + +[Examples:] + +fix wallhi all wall/ees xlo -1.0 1.0 1.0 2.5 units box +fix wallhi all wall/ees xhi EDGE 1.0 1.0 2.5 +fix wallhi all wall/ees v_wiggle 23.2 1.0 1.0 2.5 +fix zwalls all wall/ees zlo 0.0 1.0 1.0 0.858 zhi 40.0 1.0 1.0 0.858 :pre + +fix ees_cube all wall/region/ees myCube 1.0 1.0 2.5 :pre + + +[Description:] + +Fix {wall/ees} bounds the simulation domain on one or more of its +faces with a flat wall that interacts with the ellipsoidal atoms in the +group by generating a force on the atom in a direction perpendicular to +the wall and a torque parallel with the wall.  The energy of +wall-particle interactions E is given by: + +:c,image(Eqs/fix_wall_ees.jpg) + +Introduced by Babadi and Ejtehadi in "(Babadi)"_#BabadiEjtehadi. Here, +{r} is the distance from the particle to the wall at position {coord}, +and Rc is the {cutoff} distance at which the  particle and wall no +longer interact. Also,  sigma_n is the distance between center of +ellipsoid and the nearest point of its surface to the wall  The energy +of the wall (see the image below). + +:c,image(JPG/fix_wall_ees_image.jpg) + +Details of using this command and specifications are the same as +fix/wall command. You can also find an example in USER/ees/ under +examples/ directory. + +The prefactor {epsilon} can be thought of as an +effective Hamaker constant with energy units for the strength of the +ellipsoid-wall interaction.  More specifically, the {epsilon} pre-factor += 8 * pi^2 * rho_wall * rho_ellipsoid * epsilon +* sigma_a * sigma_b * sigma_c, where epsilon is the LJ parameters for +the constituent LJ particles and sigma_a, sigma_b, and sigma_c are radii +of ellipsoidal particles. Rho_wall and rho_ellipsoid are the number +density of the constituent particles, in the wall and ellipsoid +respectively, in units of 1/volume. + +NOTE: You must insure that r is always bigger than sigma_n for +all particles in the group, or LAMMPS will generate an error.  This +means you cannot start your simulation with particles touching the wall +position {coord} (r = sigma_n) or with particles penetrating the wall (0 =< r < sigma_n) or with particles on the wrong side of the +wall (r < 0). + + +Fix {wall/region/ees} treats the surface of the geometric region defined +by the {region-ID} as a bounding wall which interacts with nearby +ellipsoidal particles according to the EES potential introduced above. + +Other details of this command are the same as for the "fix +wall/region"_fix_wall_region.html command. One may also find an example +of using this fix in the examples/USER/misc/ees/ directory. + +[Restrictions:] + +This fix is part of the USER-MISC package. It is only enabled if +LAMMPS was built with that package. See the "Making +LAMMPS"_Section_start.html#start_3 section for more info. + +This fix requires that atoms be ellipsoids as defined by the +"atom_style ellipsoid"_atom_style.html command. + +[Related commands:] + +"fix wall"_fix_wall.html, +"pair resquared"_pair_resquared.html + +[Default:] + +none + +:line + +:link(BabadiEjtehadi) +[(Babadi)] Babadi and Ejtehadi, EPL, 77 (2007) 23002. diff --git a/doc/src/fix_wall_reflect.txt b/doc/src/fix_wall_reflect.txt index 5b425316e0..954ec65bf6 100644 --- a/doc/src/fix_wall_reflect.txt +++ b/doc/src/fix_wall_reflect.txt @@ -142,7 +142,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/fixes.txt b/doc/src/fixes.txt index ce73ed99e3..3dc5e77e14 100644 --- a/doc/src/fixes.txt +++ b/doc/src/fixes.txt @@ -156,6 +156,7 @@ Fixes :h1 fix_viscosity fix_viscous fix_wall + fix_wall_ees fix_wall_gran fix_wall_gran_region fix_wall_piston diff --git a/doc/src/improper_class2.txt b/doc/src/improper_class2.txt index 0b41afe2db..14ec6258de 100644 --- a/doc/src/improper_class2.txt +++ b/doc/src/improper_class2.txt @@ -99,7 +99,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/improper_cossq.txt b/doc/src/improper_cossq.txt index e238063a8f..138a6a1650 100644 --- a/doc/src/improper_cossq.txt +++ b/doc/src/improper_cossq.txt @@ -65,7 +65,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/improper_cvff.txt b/doc/src/improper_cvff.txt index 72f346ba04..5f69eccc60 100644 --- a/doc/src/improper_cvff.txt +++ b/doc/src/improper_cvff.txt @@ -66,7 +66,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/improper_fourier.txt b/doc/src/improper_fourier.txt index 3a5354b1fe..f9062da207 100644 --- a/doc/src/improper_fourier.txt +++ b/doc/src/improper_fourier.txt @@ -60,7 +60,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/improper_harmonic.txt b/doc/src/improper_harmonic.txt index b47b0ca41f..bb17e5a641 100644 --- a/doc/src/improper_harmonic.txt +++ b/doc/src/improper_harmonic.txt @@ -70,7 +70,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/improper_ring.txt b/doc/src/improper_ring.txt index cba59399e7..c02d392474 100644 --- a/doc/src/improper_ring.txt +++ b/doc/src/improper_ring.txt @@ -69,7 +69,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/improper_umbrella.txt b/doc/src/improper_umbrella.txt index fafa2e7e4c..d6df9ee6cc 100644 --- a/doc/src/improper_umbrella.txt +++ b/doc/src/improper_umbrella.txt @@ -63,7 +63,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/jump.txt b/doc/src/jump.txt index 1b1a209511..4e3799f7b1 100644 --- a/doc/src/jump.txt +++ b/doc/src/jump.txt @@ -40,12 +40,12 @@ lmp_g++ < in.script :pre since the SELF option invokes the C-library rewind() call, which may not be supported for stdin on some systems or by some MPI implementations. This can be worked around by using the "-in -command-line argument"_Section_start.html#start_7, e.g. +command-line argument"_Section_start.html#start_6, e.g. lmp_g++ -in in.script :pre or by using the "-var command-line -argument"_Section_start.html#start_7 to pass the script name as a +argument"_Section_start.html#start_6 to pass the script name as a variable to the input script. In the latter case, a "variable"_variable.html called "fname" could be used in place of SELF, e.g. diff --git a/doc/src/lammps.book b/doc/src/lammps.book index d901ef3f77..76b6743657 100644 --- a/doc/src/lammps.book +++ b/doc/src/lammps.book @@ -282,6 +282,7 @@ fix_vector.html fix_viscosity.html fix_viscous.html fix_wall.html +fix_wall_ees.html fix_wall_gran.html fix_wall_gran_region.html fix_wall_piston.html diff --git a/doc/src/log.txt b/doc/src/log.txt index 460482ea1e..92bb12e6db 100644 --- a/doc/src/log.txt +++ b/doc/src/log.txt @@ -34,7 +34,7 @@ the same log file. The file "log.lammps" is the default log file for a LAMMPS run. The name of the initial log file can also be set by the command-line -switch -log. See "Section 2.7"_Section_start.html#start_7 for +switch -log. See "Section 2.6"_Section_start.html#start_6 for details. [Restrictions:] none diff --git a/doc/src/neb.txt b/doc/src/neb.txt index d2e8be3f03..144fe8bdef 100644 --- a/doc/src/neb.txt +++ b/doc/src/neb.txt @@ -51,7 +51,7 @@ follows the discussion in these 4 papers: "(HenkelmanA)"_#HenkelmanA, Each replica runs on a partition of one or more processors. Processor partitions are defined at run-time using the -partition command-line -switch; see "Section 2.7"_Section_start.html#start_7 of the manual. +switch; see "Section 2.6"_Section_start.html#start_6 of the manual. Note that if you have MPI installed, you can run a multi-replica simulation with more replicas (partitions) than you have physical processors, e.g you can run a 10-replica simulation on just one or two diff --git a/doc/src/neigh_modify.txt b/doc/src/neigh_modify.txt index 5c149d892d..c4544cb29b 100644 --- a/doc/src/neigh_modify.txt +++ b/doc/src/neigh_modify.txt @@ -109,7 +109,8 @@ atoms in the specified group. This can be useful for models where a large portion of the simulation is particles that do not interact with other particles or with each other via pairwise interactions. The group specified with this option must also be specified via the -"atom_modify first"_atom_modify.html command. +"atom_modify first"_atom_modify.html command. Note that specifying +"all" as the group-ID effectively turns off the {include} option. The {exclude} option turns off pairwise interactions between certain pairs of atoms, by not including them in the neighbor list. These are @@ -213,5 +214,5 @@ space. [Default:] The option defaults are delay = 10, every = 1, check = yes, once = no, -cluster = no, include = all, exclude = none, page = 100000, one = -2000, and binsize = 0.0. +cluster = no, include = all (same as no include option defined), +exclude = none, page = 100000, one = 2000, and binsize = 0.0. diff --git a/doc/src/neighbor.txt b/doc/src/neighbor.txt index 7b8f499ba8..062f79a5bb 100644 --- a/doc/src/neighbor.txt +++ b/doc/src/neighbor.txt @@ -66,7 +66,7 @@ stored in the list. When a run is finished, counts of the number of neighbors stored in the pairwise list and the number of times neighbor lists were built are printed to the screen and log file. See "this -section"_Section_start.html#start_8 for details. +section"_Section_start.html#start_7 for details. [Restrictions:] none diff --git a/doc/src/next.txt b/doc/src/next.txt index fe9dc97542..08f73b896c 100644 --- a/doc/src/next.txt +++ b/doc/src/next.txt @@ -71,7 +71,7 @@ next value (for each variable) is assigned to whichever processor partition executes the command first. All processors in the partition are assigned the same value(s). Running LAMMPS on multiple partitions of processors via the "-partition" command-line switch is described in -"this section"_Section_start.html#start_7 of the manual. {Universe}- +"this section"_Section_start.html#start_6 of the manual. {Universe}- and {uloop}-style variables are incremented using the files "tmp.lammps.variable" and "tmp.lammps.variable.lock" which you will see in your directory during and after such a LAMMPS run. diff --git a/doc/src/package.txt b/doc/src/package.txt index 18a26bd55c..1b9092644f 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -115,7 +115,7 @@ their initialization, before a simulation is defined. This command can also be specified from the command-line when launching LAMMPS, using the "-pk" "command-line -switch"_Section_start.html#start_7. The syntax is exactly the same as +switch"_Section_start.html#start_6. The syntax is exactly the same as when used in an input script. Note that all of the accelerator packages require the package command @@ -126,18 +126,18 @@ a default version of the command is typically invoked by other accelerator settings. The KOKKOS package requires a "-k on" "command-line -switch"_Section_start.html#start_7 respectively, which invokes a +switch"_Section_start.html#start_6 respectively, which invokes a "package kokkos" command with default settings. For the GPU, USER-INTEL, and USER-OMP packages, if a "-sf gpu" or "-sf -intel" or "-sf omp" "command-line switch"_Section_start.html#start_7 +intel" or "-sf omp" "command-line switch"_Section_start.html#start_6 is used to auto-append accelerator suffixes to various styles in the input script, then those switches also invoke a "package gpu", "package intel", or "package omp" command with default settings. NOTE: A package command for a particular style can be invoked multiple times when a simulation is setup, e.g. by the "-c on", "-k on", "-sf", -and "-pk" "command-line switches"_Section_start.html#start_7, and by +and "-pk" "command-line switches"_Section_start.html#start_6, and by using this command in an input script. Each time it is used all of the style options are set, either to default values or to specified settings. I.e. settings from previous invocations do not persist @@ -305,7 +305,7 @@ value via their package commands, but there is only a single global invoked, you should insure the two values are consistent. If they are not, the last one invoked will take precedence, for both packages. Also note that if the "-sf hybrid intel omp" "command-line -switch"_"_Section_start.html#start_7 is used, it invokes a "package +switch"_"_Section_start.html#start_6 is used, it invokes a "package intel" command, followed by a "package omp" command, both with a setting of {Nthreads} = 0. @@ -550,7 +550,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. [Related commands:] "suffix"_suffix.html, "-pk" "command-line -setting"_Section_start.html#start_7 +setting"_Section_start.html#start_6 [Default:] @@ -558,9 +558,9 @@ For the GPU package, the default is Ngpu = 1 and the option defaults are neigh = yes, newton = off, binsize = 0.0, split = 1.0, gpuID = 0 to Ngpu-1, tpa = 1, and device = not used. These settings are made automatically if the "-sf gpu" "command-line -switch"_Section_start.html#start_7 is used. If it is not used, you +switch"_Section_start.html#start_6 is used. If it is not used, you must invoke the package gpu command in your input script or via the -"-pk gpu" "command-line switch"_Section_start.html#start_7. +"-pk gpu" "command-line switch"_Section_start.html#start_6. For the USER-INTEL package, the default is Nphi = 1 and the option defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4, @@ -569,21 +569,21 @@ style being used. This value is output to the screen in the offload report at the end of each run. Note that all of these settings, except "omp" and "mode", are ignored if LAMMPS was not built with Xeon Phi coprocessor support. These settings are made automatically -if the "-sf intel" "command-line switch"_Section_start.html#start_7 +if the "-sf intel" "command-line switch"_Section_start.html#start_6 is used. If it is not used, you must invoke the package intel command in your input script or or via the "-pk intel" "command-line -switch"_Section_start.html#start_7. +switch"_Section_start.html#start_6. For the KOKKOS package, the option defaults neigh = full, neigh/qeq = full, newton = off, binsize = 0.0, and comm = device. These settings are made automatically by the required "-k on" "command-line -switch"_Section_start.html#start_7. You can change them bu using the +switch"_Section_start.html#start_6. You can change them bu using the package kokkos command in your input script or via the "-pk kokkos" -"command-line switch"_Section_start.html#start_7. +"command-line switch"_Section_start.html#start_6. For the OMP package, the default is Nthreads = 0 and the option defaults are neigh = yes. These settings are made automatically if -the "-sf omp" "command-line switch"_Section_start.html#start_7 is +the "-sf omp" "command-line switch"_Section_start.html#start_6 is used. If it is not used, you must invoke the package omp command in your input script or via the "-pk omp" "command-line -switch"_Section_start.html#start_7. +switch"_Section_start.html#start_6. diff --git a/doc/src/pair_adp.txt b/doc/src/pair_adp.txt index 457a797d95..9d2a48dcbc 100644 --- a/doc/src/pair_adp.txt +++ b/doc/src/pair_adp.txt @@ -137,7 +137,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_agni.txt b/doc/src/pair_agni.txt index 06dcccb9d9..402e537dad 100644 --- a/doc/src/pair_agni.txt +++ b/doc/src/pair_agni.txt @@ -70,7 +70,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated style explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_airebo.txt b/doc/src/pair_airebo.txt index 2e3083c34b..1aa017f278 100644 --- a/doc/src/pair_airebo.txt +++ b/doc/src/pair_airebo.txt @@ -7,20 +7,24 @@ :line pair_style airebo command :h3 +pair_style airebo/intel command :h3 pair_style airebo/omp command :h3 pair_style airebo/morse command :h3 +pair_style airebo/morse/intel command :h3 pair_style airebo/morse/omp command :h3 pair_style rebo command :h3 +pair_style rebo/intel command :h3 pair_style rebo/omp command :h3 [Syntax:] -pair_style style cutoff LJ_flag TORSION_flag :pre +pair_style style cutoff LJ_flag TORSION_flag cutoff_min :pre style = {airebo} or {airebo/morse} or {rebo} cutoff = LJ or Morse cutoff (sigma scale factor) (AIREBO and AIREBO-M only) LJ_flag = 0/1 to turn off/on the LJ or Morse term (AIREBO and AIREBO-M only, optional) -TORSION_flag = 0/1 to turn off/on the torsion term (AIREBO and AIREBO-M only, optional) :ul +TORSION_flag = 0/1 to turn off/on the torsion term (AIREBO and AIREBO-M only, optional) +cutoff_min = Start of the transition region of cutoff (sigma scale factor) (AIREBO and AIREBO-M only, optional) :ul [Examples:] @@ -60,7 +64,7 @@ The AIREBO potential consists of three terms: :c,image(Eqs/pair_airebo.jpg) By default, all three terms are included. For the {airebo} style, if -the two optional flag arguments to the pair_style command are +the first two optional flag arguments to the pair_style command are included, the LJ and torsional terms can be turned off. Note that both or neither of the flags must be included. If both of the LJ an torsional terms are turned off, it becomes the 2nd-generation REBO @@ -97,6 +101,12 @@ standard AIREBO potential, sigma_CC = 3.4 Angstroms, so with a scale factor of 3.0 (the argument in pair_style), the resulting E_LJ cutoff would be 10.2 Angstroms. +By default, the longer-ranged interaction is smoothly switched off +between 2.16 and 3.0 sigma. By specifying {cutoff_min} in addition +to {cutoff}, the switching can be configured to take place between +{cutoff_min} and {cutoff}. {cutoff_min} can only be specified if all +optional arguments are given. + The E_TORSION term is an explicit 4-body potential that describes various dihedral angle preferences in hydrocarbon configurations. @@ -178,7 +188,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_beck.txt b/doc/src/pair_beck.txt index 4e792754b8..e160f09b3d 100644 --- a/doc/src/pair_beck.txt +++ b/doc/src/pair_beck.txt @@ -63,7 +63,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_born.txt b/doc/src/pair_born.txt index d38d9e3191..a3cc744a22 100644 --- a/doc/src/pair_born.txt +++ b/doc/src/pair_born.txt @@ -152,7 +152,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_brownian.txt b/doc/src/pair_brownian.txt index 33eed77629..79b71e91c7 100644 --- a/doc/src/pair_brownian.txt +++ b/doc/src/pair_brownian.txt @@ -85,7 +85,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "this section"_Section_accelerate.html of the manual for more diff --git a/doc/src/pair_buck.txt b/doc/src/pair_buck.txt index e705e735fb..d18b39d5d9 100644 --- a/doc/src/pair_buck.txt +++ b/doc/src/pair_buck.txt @@ -152,7 +152,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_buck_long.txt b/doc/src/pair_buck_long.txt index ba18738e4d..05e760e1b2 100644 --- a/doc/src/pair_buck_long.txt +++ b/doc/src/pair_buck_long.txt @@ -114,7 +114,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_charmm.txt b/doc/src/pair_charmm.txt index 1e78607c08..75a8e4bff9 100644 --- a/doc/src/pair_charmm.txt +++ b/doc/src/pair_charmm.txt @@ -7,6 +7,7 @@ :line pair_style lj/charmm/coul/charmm command :h3 +pair_style lj/charmm/coul/charmm/intel command :h3 pair_style lj/charmm/coul/charmm/omp command :h3 pair_style lj/charmm/coul/charmm/implicit command :h3 pair_style lj/charmm/coul/charmm/implicit/omp command :h3 @@ -195,7 +196,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_class2.txt b/doc/src/pair_class2.txt index 23b90aae2d..36fae5068b 100644 --- a/doc/src/pair_class2.txt +++ b/doc/src/pair_class2.txt @@ -114,7 +114,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_colloid.txt b/doc/src/pair_colloid.txt index a0df1d464e..83b15b358b 100644 --- a/doc/src/pair_colloid.txt +++ b/doc/src/pair_colloid.txt @@ -139,7 +139,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_comb.txt b/doc/src/pair_comb.txt index 3a2f380bfa..f5461b1cbc 100644 --- a/doc/src/pair_comb.txt +++ b/doc/src/pair_comb.txt @@ -124,7 +124,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_coul.txt b/doc/src/pair_coul.txt index 4a601e90c0..29e5beed3c 100644 --- a/doc/src/pair_coul.txt +++ b/doc/src/pair_coul.txt @@ -274,7 +274,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_dipole.txt b/doc/src/pair_dipole.txt index 985581cac8..2516e5eae4 100644 --- a/doc/src/pair_dipole.txt +++ b/doc/src/pair_dipole.txt @@ -198,7 +198,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_dpd.txt b/doc/src/pair_dpd.txt index 62a5faffed..9dd204ad2d 100644 --- a/doc/src/pair_dpd.txt +++ b/doc/src/pair_dpd.txt @@ -121,7 +121,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_eam.txt b/doc/src/pair_eam.txt index 4d3c2b2dea..a0026432ec 100644 --- a/doc/src/pair_eam.txt +++ b/doc/src/pair_eam.txt @@ -14,6 +14,7 @@ pair_style eam/omp command :h3 pair_style eam/opt command :h3 pair_style eam/alloy command :h3 pair_style eam/alloy/gpu command :h3 +pair_style eam/alloy/intel command :h3 pair_style eam/alloy/kk command :h3 pair_style eam/alloy/omp command :h3 pair_style eam/alloy/opt command :h3 @@ -21,6 +22,7 @@ pair_style eam/cd command :h3 pair_style eam/cd/omp command :h3 pair_style eam/fs command :h3 pair_style eam/fs/gpu command :h3 +pair_style eam/fs/intel command :h3 pair_style eam/fs/kk command :h3 pair_style eam/fs/omp command :h3 pair_style eam/fs/opt command :h3 @@ -381,7 +383,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for more diff --git a/doc/src/pair_edip.txt b/doc/src/pair_edip.txt index 86453859d3..e5b1420b59 100644 --- a/doc/src/pair_edip.txt +++ b/doc/src/pair_edip.txt @@ -121,7 +121,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_eim.txt b/doc/src/pair_eim.txt index 3f068d4040..75ad2d4683 100644 --- a/doc/src/pair_eim.txt +++ b/doc/src/pair_eim.txt @@ -148,7 +148,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_gayberne.txt b/doc/src/pair_gayberne.txt index 8639f220a4..c923578586 100644 --- a/doc/src/pair_gayberne.txt +++ b/doc/src/pair_gayberne.txt @@ -145,7 +145,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_gran.txt b/doc/src/pair_gran.txt index 62a58b3504..d7e87af013 100644 --- a/doc/src/pair_gran.txt +++ b/doc/src/pair_gran.txt @@ -191,7 +191,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_gromacs.txt b/doc/src/pair_gromacs.txt index 3aca8c3cd3..ec84a2d57a 100644 --- a/doc/src/pair_gromacs.txt +++ b/doc/src/pair_gromacs.txt @@ -103,7 +103,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_hbond_dreiding.txt b/doc/src/pair_hbond_dreiding.txt index 9641e294fa..d3cf90ec14 100644 --- a/doc/src/pair_hbond_dreiding.txt +++ b/doc/src/pair_hbond_dreiding.txt @@ -178,7 +178,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_hybrid.txt b/doc/src/pair_hybrid.txt index 5166fe1f84..fc1824cf62 100644 --- a/doc/src/pair_hybrid.txt +++ b/doc/src/pair_hybrid.txt @@ -330,7 +330,7 @@ LAMMPS was built with those packages. See the You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_kim.txt b/doc/src/pair_kim.txt index 5a623e5ece..c5d910e27c 100644 --- a/doc/src/pair_kim.txt +++ b/doc/src/pair_kim.txt @@ -27,13 +27,34 @@ pair_coeff * * Ar Ar :pre [Description:] This pair style is a wrapper on the "Knowledge Base for Interatomic -Models (KIM)"_https://openkim.org repository of interatomic potentials, -so that they can be used by LAMMPS scripts. +Models (OpenKIM)"_https://openkim.org repository of interatomic +potentials, so that they can be used by LAMMPS scripts. -In KIM lingo, a potential is a "model" and a model contains both the -analytic formulas that define the potential as well as the parameters -needed to run it for one or more materials, including coefficients and -cutoffs. +Note that in LAMMPS lingo, a KIM model driver is a pair style +(e.g. EAM or Tersoff). A KIM model is a pair style for a particular +element or alloy and set of parameters, e.g. EAM for Cu with a +specific EAM potential file. + +See the current list of "KIM model +drivers"_https://openkim.org/kim-items/model-drivers/alphabetical. + +See the current list of all "KIM +models"_https://openkim.org/kim-items/models/by-model-drivers + +See the list of "example KIM models"_https://openkim.org/kim-api which +are included in the KIM library by default, in the "What is in the KIM +API source package?" section. + +To use this pair style, you must first download and install the KIM +API library from the "OpenKIM website"_https://openkim.org. The "KIM +section of Section packages"_Section_packages.html#KIM has +instructions on how to do this with a simple make command, when +building LAMMPS. + +See the examples/kim dir for an input script that uses a KIM model +(potential) for Lennard-Jones. + +:line The argument {virialmode} determines how the global virial is calculated. If {KIMvirial} is specified, the KIM model performs the diff --git a/doc/src/pair_lj.txt b/doc/src/pair_lj.txt index 5c8e31ac42..058d54fb59 100644 --- a/doc/src/pair_lj.txt +++ b/doc/src/pair_lj.txt @@ -253,7 +253,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lj96.txt b/doc/src/pair_lj96.txt index 6e7c3cbaec..83f6ec063d 100644 --- a/doc/src/pair_lj96.txt +++ b/doc/src/pair_lj96.txt @@ -61,7 +61,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lj_cubic.txt b/doc/src/pair_lj_cubic.txt index d33e3ec09b..4ca8c3c141 100644 --- a/doc/src/pair_lj_cubic.txt +++ b/doc/src/pair_lj_cubic.txt @@ -75,7 +75,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lj_expand.txt b/doc/src/pair_lj_expand.txt index c5f0c88a75..e0838426f6 100644 --- a/doc/src/pair_lj_expand.txt +++ b/doc/src/pair_lj_expand.txt @@ -65,7 +65,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lj_long.txt b/doc/src/pair_lj_long.txt index da9f37b9c3..6be4562d18 100644 --- a/doc/src/pair_lj_long.txt +++ b/doc/src/pair_lj_long.txt @@ -168,7 +168,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lj_smooth.txt b/doc/src/pair_lj_smooth.txt index 133773abd0..b1678cad58 100644 --- a/doc/src/pair_lj_smooth.txt +++ b/doc/src/pair_lj_smooth.txt @@ -74,7 +74,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lj_smooth_linear.txt b/doc/src/pair_lj_smooth_linear.txt index a48c441f54..5f7c226cee 100644 --- a/doc/src/pair_lj_smooth_linear.txt +++ b/doc/src/pair_lj_smooth_linear.txt @@ -61,7 +61,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lj_soft.txt b/doc/src/pair_lj_soft.txt index e372092cf0..2ef133da55 100644 --- a/doc/src/pair_lj_soft.txt +++ b/doc/src/pair_lj_soft.txt @@ -219,7 +219,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_lubricate.txt b/doc/src/pair_lubricate.txt index 501a043801..b39c7545c7 100644 --- a/doc/src/pair_lubricate.txt +++ b/doc/src/pair_lubricate.txt @@ -154,7 +154,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "this section"_Section_accelerate.html of the manual for more diff --git a/doc/src/pair_meam_spline.txt b/doc/src/pair_meam_spline.txt index 2295a6640b..6653b397a0 100644 --- a/doc/src/pair_meam_spline.txt +++ b/doc/src/pair_meam_spline.txt @@ -118,7 +118,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_morse.txt b/doc/src/pair_morse.txt index 5fbb6d5c0a..3eb5ac5afe 100644 --- a/doc/src/pair_morse.txt +++ b/doc/src/pair_morse.txt @@ -113,7 +113,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_nb3b_harmonic.txt b/doc/src/pair_nb3b_harmonic.txt index 3f7066c826..2395707fb4 100644 --- a/doc/src/pair_nb3b_harmonic.txt +++ b/doc/src/pair_nb3b_harmonic.txt @@ -104,7 +104,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_nm.txt b/doc/src/pair_nm.txt index 9096bdc523..81cea1a38d 100644 --- a/doc/src/pair_nm.txt +++ b/doc/src/pair_nm.txt @@ -145,7 +145,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_peri.txt b/doc/src/pair_peri.txt index 6ffd8122aa..deca093e3b 100644 --- a/doc/src/pair_peri.txt +++ b/doc/src/pair_peri.txt @@ -127,7 +127,7 @@ G (force/area units) horizon (distance units) s00 (unitless) alpha (unitless) -m_yield_stress (force/area units) +m_yield_stress (force/area units) :ul K is the bulk modulus and G is the shear modulus. The horizon is a cutoff distance and s00 and alpha are used as a bond breaking @@ -151,7 +151,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_quip.txt b/doc/src/pair_quip.txt index 12dcd244e2..9436b0c4ed 100644 --- a/doc/src/pair_quip.txt +++ b/doc/src/pair_quip.txt @@ -80,6 +80,22 @@ LAMMPS"_Section_start.html#start_3 section for more info. QUIP potentials are parametrized in electron-volts and Angstroms and therefore should be used with LAMMPS metal "units"_units.html. +QUIP potentials are generally not designed to work with the scaling +factors set by the "special_bonds"_special_bonds.html command. The +recommended setting in molecular systems is to include all +interactions, i.e. to use {special_bonds lj/coul 1.0 1.0 1.0}. Scaling +factors > 0.0 will be ignored and treated as 1.0. The only exception +to this rule is if you know that your QUIP potential needs to exclude +bonded, 1-3, or 1-4 interactions and does not already do this exclusion +within QUIP. Then a factor 0.0 needs to be used which will remove such +pairs from the neighbor list. This needs to be very carefully tested, +because it may remove pairs from the neighbor list that are still +required. + +Pair style {quip} cannot be used with pair style {hybrid}, only +with {hybrid/overlay} and only the {quip} substyle is applied to +all atom types. + [Related commands:] "pair_coeff"_pair_coeff.html diff --git a/doc/src/pair_reaxc.txt b/doc/src/pair_reaxc.txt index cfa88673d7..b9dc6e0ed8 100644 --- a/doc/src/pair_reaxc.txt +++ b/doc/src/pair_reaxc.txt @@ -311,7 +311,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_resquared.txt b/doc/src/pair_resquared.txt index 2e0034ed3b..9ad95eb5fc 100644 --- a/doc/src/pair_resquared.txt +++ b/doc/src/pair_resquared.txt @@ -157,7 +157,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_sdk.txt b/doc/src/pair_sdk.txt index 1c348eaaf7..360136a4ea 100644 --- a/doc/src/pair_sdk.txt +++ b/doc/src/pair_sdk.txt @@ -97,7 +97,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_soft.txt b/doc/src/pair_soft.txt index ec1c06729a..08fa88c477 100644 --- a/doc/src/pair_soft.txt +++ b/doc/src/pair_soft.txt @@ -94,7 +94,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_sw.txt b/doc/src/pair_sw.txt index 6025b9b11b..6ed8f00236 100644 --- a/doc/src/pair_sw.txt +++ b/doc/src/pair_sw.txt @@ -156,7 +156,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. When using the USER-INTEL package with this style, there is an diff --git a/doc/src/pair_table.txt b/doc/src/pair_table.txt index 01c577cd98..b99491b477 100644 --- a/doc/src/pair_table.txt +++ b/doc/src/pair_table.txt @@ -229,7 +229,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_tersoff.txt b/doc/src/pair_tersoff.txt index 23a20ad0fd..918e889924 100644 --- a/doc/src/pair_tersoff.txt +++ b/doc/src/pair_tersoff.txt @@ -191,7 +191,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_tersoff_mod.txt b/doc/src/pair_tersoff_mod.txt index ff703063b3..e0c2b5a5cb 100644 --- a/doc/src/pair_tersoff_mod.txt +++ b/doc/src/pair_tersoff_mod.txt @@ -143,7 +143,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_tersoff_zbl.txt b/doc/src/pair_tersoff_zbl.txt index 18e54749aa..21d57e4e88 100644 --- a/doc/src/pair_tersoff_zbl.txt +++ b/doc/src/pair_tersoff_zbl.txt @@ -201,7 +201,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_thole.txt b/doc/src/pair_thole.txt index 61ca0b5c35..41a4059cee 100644 --- a/doc/src/pair_thole.txt +++ b/doc/src/pair_thole.txt @@ -142,7 +142,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_vashishta.txt b/doc/src/pair_vashishta.txt index 9c275a61d3..d9c66d45c0 100644 --- a/doc/src/pair_vashishta.txt +++ b/doc/src/pair_vashishta.txt @@ -183,7 +183,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_yukawa.txt b/doc/src/pair_yukawa.txt index 26acdb2ccb..61d6bde6a9 100644 --- a/doc/src/pair_yukawa.txt +++ b/doc/src/pair_yukawa.txt @@ -60,7 +60,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_yukawa_colloid.txt b/doc/src/pair_yukawa_colloid.txt index ecdc1496ab..2037a9451f 100644 --- a/doc/src/pair_yukawa_colloid.txt +++ b/doc/src/pair_yukawa_colloid.txt @@ -92,7 +92,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/pair_zbl.txt b/doc/src/pair_zbl.txt index 154fdc1c13..5ab672171b 100644 --- a/doc/src/pair_zbl.txt +++ b/doc/src/pair_zbl.txt @@ -82,7 +82,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/partition.txt b/doc/src/partition.txt index 9c1d560c83..610eee99b3 100644 --- a/doc/src/partition.txt +++ b/doc/src/partition.txt @@ -27,7 +27,7 @@ partition yes 6* fix all nvt temp 1.0 1.0 0.1 :pre This command invokes the specified command on a subset of the partitions of processors you have defined via the -partition -command-line switch. See "Section 2.6"_Section_start.html#start_7 +command-line switch. See "Section 2.6"_Section_start.html#start_6 for an explanation of the switch. Normally, every input script command in your script is invoked by @@ -49,7 +49,7 @@ argument. Partitions are numbered from 1 to Np, where Np is the number of partitions specified by the "-partition command-line -switch"_Section_start.html#start_7. +switch"_Section_start.html#start_6. {N} can be specified in one of two ways. An explicit numeric value can be used, as in the 1st example above. Or a wild-card asterisk can diff --git a/doc/src/prd.txt b/doc/src/prd.txt index 247d422b1c..3c0305e316 100644 --- a/doc/src/prd.txt +++ b/doc/src/prd.txt @@ -63,7 +63,7 @@ event to occur. Each replica runs on a partition of one or more processors. Processor partitions are defined at run-time using the -partition command-line -switch; see "Section 2.7"_Section_start.html#start_7 of the manual. +switch; see "Section 2.6"_Section_start.html#start_6 of the manual. Note that if you have MPI installed, you can run a multi-replica simulation with more replicas (partitions) than you have physical processors, e.g you can run a 10-replica simulation on one or two diff --git a/doc/src/processors.txt b/doc/src/processors.txt index 781049af9c..e54b2cede3 100644 --- a/doc/src/processors.txt +++ b/doc/src/processors.txt @@ -82,7 +82,7 @@ sub-domain. Also note that if multiple partitions are being used then P is the number of processors in this partition; see "this -section"_Section_start.html#start_7 for an explanation of the +section"_Section_start.html#start_6 for an explanation of the -partition command-line switch. Also note that you can prefix the processors command with the "partition"_partition.html command to easily specify different Px,Py,Pz values for different partitions. @@ -249,7 +249,7 @@ partition {Precv} which is enforced when each is setting up their own mapping of their processors to the simulation box. Each of {Psend} and {Precv} must be integers from 1 to Np, where Np is the number of partitions you have defined via the "-partition command-line -switch"_Section_start.html#start_7. +switch"_Section_start.html#start_6. A "dependency" means that the sending partition will create its regular 3d grid as Px by Py by Pz and after it has done this, it will @@ -286,7 +286,7 @@ processors and their mapping to the 3d grid to the specified file processors in the manner you desired, which can be tricky to figure out, especially when running on multiple partitions or on, a multicore machine or when the processor ranks were reordered by use of the -"-reorder command-line switch"_Section_start.html#start_7 or due to +"-reorder command-line switch"_Section_start.html#start_6 or due to use of MPI-specific launch options such as a config file. If you have multiple partitions you should insure that each one writes @@ -300,9 +300,9 @@ The IDs are the processor's rank in this simulation (the world), the universe (of multiple simulations), and the original MPI communicator used to instantiate LAMMPS, respectively. The world and universe IDs will only be different if you are running on more than one partition; -see the "-partition command-line switch"_Section_start.html#start_7. +see the "-partition command-line switch"_Section_start.html#start_6. The universe and original IDs will only be different if you used the -"-reorder command-line switch"_Section_start.html#start_7 to reorder +"-reorder command-line switch"_Section_start.html#start_6 to reorder the processors differently than their rank in the original communicator LAMMPS was instantiated with. @@ -332,7 +332,7 @@ The {part} keyword (for the receiving partition) only works with the [Related commands:] -"partition"_partition.html, "-reorder command-line switch"_Section_start.html#start_7 +"partition"_partition.html, "-reorder command-line switch"_Section_start.html#start_6 [Default:] diff --git a/doc/src/read_data.txt b/doc/src/read_data.txt index 6785eb1066..a8aca53693 100644 --- a/doc/src/read_data.txt +++ b/doc/src/read_data.txt @@ -62,7 +62,7 @@ simulation. The file can be ASCII text or a gzipped text file atom coordinates; see the "read_restart"_read_restart.html and "create_atoms"_create_atoms.html commands for alternative methods. Also see the explanation of the "-restart command-line -switch"_Section_start.html#start_7 which can convert a restart file to +switch"_Section_start.html#start_6 which can convert a restart file to a data file. This command can be used multiple times to add new atoms and their diff --git a/doc/src/read_restart.txt b/doc/src/read_restart.txt index d0f4b16175..d1091542b8 100644 --- a/doc/src/read_restart.txt +++ b/doc/src/read_restart.txt @@ -81,7 +81,7 @@ wrong. Because restart files are binary, they may not be portable to other machines. In this case, you can use the "-restart command-line -switch"_Section_start.html#start_7 to convert a restart file to a data +switch"_Section_start.html#start_6 to convert a restart file to a data file. Similar to how restart files are written (see the diff --git a/doc/src/region.txt b/doc/src/region.txt index 885e5e45f8..5039e4a516 100644 --- a/doc/src/region.txt +++ b/doc/src/region.txt @@ -375,7 +375,7 @@ LAMMPS"_Section_start.html#start_3 section for more info. You can specify the accelerated styles explicitly in your input script by including their suffix, or you can use the "-suffix command-line -switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can +switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. See "Section 5"_Section_accelerate.html of the manual for diff --git a/doc/src/restart.txt b/doc/src/restart.txt index 5e0c2a9ea5..7c39ae1404 100644 --- a/doc/src/restart.txt +++ b/doc/src/restart.txt @@ -125,7 +125,7 @@ Restart files can be read by a "read_restart"_read_restart.html command to restart a simulation from a particular state. Because the file is binary (to enable exact restarts), it may not be readable on another machine. In this case, you can use the "-r command-line -switch"_Section_start.html#start_7 to convert a restart file to a data +switch"_Section_start.html#start_6 to convert a restart file to a data file. NOTE: Although the purpose of restart files is to enable restarting a diff --git a/doc/src/run_style.txt b/doc/src/run_style.txt index a67899420b..ba836a07dd 100644 --- a/doc/src/run_style.txt +++ b/doc/src/run_style.txt @@ -69,7 +69,7 @@ The {verlet} style is a standard velocity-Verlet integrator. The {verlet/split} style is also a velocity-Verlet integrator, but it splits the force calculation within each timestep over 2 partitions of -processors. See "Section 2.7"_Section_start.html#start_7 for an +processors. See "Section 2.6"_Section_start.html#start_6 for an explanation of the -partition command-line switch. Specifically, this style performs all computation except the @@ -115,7 +115,7 @@ When you run in 2-partition mode with the {verlet/split} style, the thermodynamic data for the entire simulation will be output to the log and screen file of the 1st partition, which are log.lammps.0 and screen.0 by default; see the "-plog and -pscreen command-line -switches"_Section_start.html#start_7 to change this. The log and +switches"_Section_start.html#start_6 to change this. The log and screen file for the 2nd partition will not contain thermodynamic output beyond the 1st timestep of the run. @@ -259,7 +259,7 @@ Accelerated styles take the same arguments and should produce the same results, except for round-off and precision issues. You can specify {respa/omp} explicitly in your input script, or -you can use the "-suffix command-line switch"_Section_start.html#start_7 +you can use the "-suffix command-line switch"_Section_start.html#start_6 when you invoke LAMMPS, or you can use the "suffix"_suffix.html command in your input script. diff --git a/doc/src/special_bonds.txt b/doc/src/special_bonds.txt index 6a661015bd..1021c4856b 100644 --- a/doc/src/special_bonds.txt +++ b/doc/src/special_bonds.txt @@ -25,9 +25,7 @@ keyword = {amber} or {charmm} or {dreiding} or {fene} or {lj/coul} or {lj} or {c {coul} values = w1,w2,w3 w1,w2,w3 = weights (0.0 to 1.0) on pairwise Coulombic interactions {angle} value = {yes} or {no} - {dihedral} value = {yes} or {no} - {extra} value = N - N = number of extra 1-2,1-3,1-4 interactions to save space for :pre + {dihedral} value = {yes} or {no} :pre :ule Examples: @@ -36,8 +34,7 @@ special_bonds amber special_bonds charmm special_bonds fene dihedral no special_bonds lj/coul 0.0 0.0 0.5 angle yes dihedral yes -special_bonds lj 0.0 0.0 0.5 coul 0.0 0.0 0.0 dihedral yes -special_bonds lj/coul 0 1 1 extra 2 :pre +special_bonds lj 0.0 0.0 0.5 coul 0.0 0.0 0.0 dihedral yes :pre [Description:] @@ -178,14 +175,6 @@ interaction between atoms 2 and 5 will be unaffected (full weighting of 1.0). If the {dihedral} keyword is specified as {no} which is the default, then the 2,5 interaction will also be weighted by 0.5. -The {extra} keyword can be used when additional bonds will be created -during a simulation run, e.g. by the "fix -bond/create"_fix_bond_create.html command. It can also be used if -molecules will be added to the system, e.g. via the "fix -deposit"_fix_deposit.html, or "fix pour"_fix_pour.html commands, which -will have atoms with more special neighbors than any atom in the -current system has. - :line NOTE: LAMMPS stores and maintains a data structure with a list of the @@ -194,8 +183,9 @@ the system). If new bonds are created (or molecules added containing atoms with more special neighbors), the size of this list needs to grow. Note that adding a single bond always adds a new 1st neighbor but may also induce *many* new 2nd and 3rd neighbors, depending on the -molecular topology of your system. Using the {extra} keyword leaves -empty space in the list for this N additional 1st, 2nd, or 3rd +molecular topology of your system. Using the {extra/special/per/atom} +keyword to either "read_data"_read_data.html or "create_box"_create_box.html +reserves empty space in the list for this N additional 1st, 2nd, or 3rd neighbors to be added. If you do not do this, you may get an error when bonds (or molecules) are added. @@ -203,8 +193,7 @@ when bonds (or molecules) are added. NOTE: If you reuse this command in an input script, you should set all the options you need each time. This command cannot be used a 2nd -time incrementally, e.g. to add some extra storage locations via the -{extra} keyword. E.g. these two commands: +time incrementally. E.g. these two commands: special_bonds lj 0.0 1.0 1.0 special_bonds coul 0.0 0.0 1.0 @@ -221,25 +210,6 @@ Coul: coul 0.0 0.0 1.0 because the LJ settings are reset to their default values each time the command is issued. -Likewise - -special_bonds amber -special_bonds extra 2 :pre - -is not the same as this single command: - -special_bonds amber extra 2 :pre - -since in the former case, the 2nd command will reset all the LJ and -Coulombic weights to 0.0 (the default). - -One exception to this rule is the {extra} option itself. It is not -reset to its default value of 0 each time the special_bonds command is -invoked. This is because it can also be set by the -"read_data"_read_data.html and "create_box"_create_box.html commands, -so this command will not override those settings unless you explicitly -use {extra} as an option. - [Restrictions:] none [Related commands:] diff --git a/doc/src/suffix.txt b/doc/src/suffix.txt index 127719cdb5..74f69b6dfe 100644 --- a/doc/src/suffix.txt +++ b/doc/src/suffix.txt @@ -28,7 +28,7 @@ suffix kk :pre This command allows you to use variants of various styles if they exist. In that respect it operates the same as the "-suffix -command-line switch"_Section_start.html#start_7. It also has options +command-line switch"_Section_start.html#start_6. It also has options to turn off or back on any suffix setting made via the command line. The specified style can be {gpu}, {intel}, {kk}, {omp}, {opt} or @@ -50,7 +50,7 @@ Intel(R) Xeon Phi(TM) coprocessors. :l KOKKOS = a collection of atom, pair, and fix styles optimized to run using the Kokkos library on various kinds of hardware, including GPUs -via Cuda and many-core chips via OpenMP or threading. :l +via CUDA and many-core chips via OpenMP or threading. :l USER-OMP = a collection of pair, bond, angle, dihedral, improper, kspace, compute, and fix styles with support for OpenMP @@ -105,6 +105,6 @@ input script. [Related commands:] -"Command-line switch -suffix"_Section_start.html#start_7 +"Command-line switch -suffix"_Section_start.html#start_6 [Default:] none diff --git a/doc/src/temper.txt b/doc/src/temper.txt index be7edfba43..b1c47c8076 100644 --- a/doc/src/temper.txt +++ b/doc/src/temper.txt @@ -32,7 +32,7 @@ replicas (ensembles) of a system. Two or more replicas must be used. Each replica runs on a partition of one or more processors. Processor partitions are defined at run-time using the -partition command-line -switch; see "Section 2.7"_Section_start.html#start_7 of the +switch; see "Section 2.6"_Section_start.html#start_6 of the manual. Note that if you have MPI installed, you can run a multi-replica simulation with more replicas (partitions) than you have physical processors, e.g you can run a 10-replica simulation on one or @@ -70,7 +70,7 @@ As a tempering run proceeds, multiple log files and screen output files are created, one per replica. By default these files are named log.lammps.M and screen.M where M is the replica number from 0 to N-1, with N = # of replicas. See the "section on command-line -switches"_Section_start.html#start_7 for info on how to change these +switches"_Section_start.html#start_6 for info on how to change these names. The main screen and log file (log.lammps) will list information about diff --git a/doc/src/thermo_style.txt b/doc/src/thermo_style.txt index 36ec7bf12e..6102169ee3 100644 --- a/doc/src/thermo_style.txt +++ b/doc/src/thermo_style.txt @@ -255,7 +255,7 @@ The {part} keyword is useful for multi-replica or multi-partition simulations to indicate which partition this output and this file corresponds to, or for use in a "variable"_variable.html to append to a filename for output specific to this partition. See "Section -2.7"_Section_start.html#start_7 of the manual for details on running +2.6"_Section_start.html#start_6 of the manual for details on running in multi-partition mode. The {timeremain} keyword returns the remaining seconds when a diff --git a/doc/src/timer.txt b/doc/src/timer.txt index 39a6c542b7..768c3e1353 100644 --- a/doc/src/timer.txt +++ b/doc/src/timer.txt @@ -40,7 +40,7 @@ time is spent in different sections of the code and thus can provide information for determining performance and load imbalance problems. This can be done at different levels of detail and accuracy. For more information about the timing output, see this "discussion of screen -output in Section 2.8"_Section_start.html#start_8. +output in Section 2.7"_Section_start.html#start_7. The {off} setting will turn all time measurements off. The {loop} setting will only measure the total time for a run and not collect any diff --git a/doc/src/tutorial_bash_on_windows.txt b/doc/src/tutorial_bash_on_windows.txt new file mode 100644 index 0000000000..66712bdffa --- /dev/null +++ b/doc/src/tutorial_bash_on_windows.txt @@ -0,0 +1,203 @@ +"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c + +:link(lws,http://lammps.sandia.gov) +:link(ld,Manual.html) +:link(lc,Section_commands.html#comm) + +:line + +Using LAMMPS with Bash on Windows :h3 +[written by Richard Berger] + +:line +Starting with Windows 10 you can install Linux tools directly in Windows. This +allows you to compile LAMMPS following the same procedure as on a real Ubuntu +Linux installation. Software can be easily installed using the package manager +via apt-get and all files are accessible in both the Windows Explorer and your +Linux shell (bash). This avoids switching to a different operating system or +installing a virtual machine. Everything runs on Windows. + +Installing Bash on Windows :h4 + +Prerequisites :h5 + +Windows 10 (64bit only) +Latest updates installed :ul + +Enable developer mode :h5 +You enable this feature by first opening Windows Settings and enabling +Developer mode. Go to the Windows settings and search for "developer". This +will allow you to install software which comes from outside of the Windows +Store. You might be prompted to reboot your compute. Please do so. + +:image(JPG/bow_tutorial_01_small.png,JPG/bow_tutorial_01.png) +:image(JPG/bow_tutorial_02_small.png,JPG/bow_tutorial_02.png) +:image(JPG/bow_tutorial_03_small.png,JPG/bow_tutorial_03.png) + +Install Windows Subsystem for Linux :h5 + +Next you must ensure that the Window Subsystem for Linux is installed. Again, +search for "enable windows features" in the Settings dialog. This opens a +dialog with a list of features you can install. Add a checkmark to Windows +Subsystem for Linux (Beta) and press OK. + +:image(JPG/bow_tutorial_04_small.png,JPG/bow_tutorial_04.png) +:image(JPG/bow_tutorial_05.png,JPG/bow_tutorial_05.png) + +Install Bash for Windows :h5 + +After installation completes, type "bash" in the Windows Start menu search. +Select the first found option. This will launch a command-line window which +will prompt you about installing Ubuntu on Windows. Confirm with "y" and press +enter. This will then download Ubuntu for Windows. + +:image(JPG/bow_tutorial_06.png) +:image(JPG/bow_tutorial_07.png) + +During installation, you will be asked for a new password. This will be used +for installing new software and running commands with sudo. + +:image(JPG/bow_tutorial_08.png) + +Type exit to close the command-line window. + +Go to the Start menu and type "bash" again. This time you will see a "Bash on +Ubuntu on Windows" Icon. Start this program. + +:image(JPG/bow_tutorial_09.png) + +Congratulations, you have installed [Bash on Ubuntu on Windows]. + +:image(JPG/bow_tutorial_10.png) + +:line + +Compiling LAMMPS in Bash on Windows :h4 + +The installation of LAMMPS in this environment is identical to working inside +of a real Ubuntu Linux installation. At the time writing, it uses Ubuntu 16.04. + +Installing prerequisite packages :h5 + +First upgrade all existing packages using + +sudo apt update +sudo apt upgrade -y :pre + +Next install the following packages, which include compilers and libraries +needed for various LAMMPS features: + +sudo apt install -y build-essential ccache gfortran openmpi-bin libopenmpi-dev libfftw3-dev libjpeg-dev libpng12-dev python-dev python-virtualenv libblas-dev liblapack-dev libhdf5-serial-dev hdf5-tools :pre + +Files in Ubuntu on Windows :h5 + +When you launch "Bash on Ubuntu on Windows" you will start out in your Linux +user home directory /home/[username]. You can access your Windows user directory +using the /mnt/c/Users/[username] folder. + + +Download LAMMPS :h5 + +Obtain a copy of the LAMMPS code and go into it using "cd" + +Option 1: Downloading LAMMPS tarball using wget :h6 + +wget http://lammps.sandia.gov/tars/lammps-stable.tar.gz +tar xvzf lammps-stable.tar.gz +cd lammps-31Mar17 :pre + +Option 2: Obtaining LAMMPS code from GitHub :h6 + +git clone https://github.com/lammps/lammps.git +cd lammps :pre + +Compiling LAMMPS :h5 + +At this point you can compile LAMMPS like on Ubuntu Linux. + +Compiling serial version :h6 + +cd src/ +make -j 4 serial :pre + +This will create an executable called lmp_serial in the src/ directory + +Compiling MPI version :h6 + +cd src/ +make -j 4 mpi :pre + +This will create an executable called lmp_mpi in the src/ directory + +:line + +Finally, please note the absolute path of your src folder. You can get this using + +pwd :pre + +or + +echo $PWD :pre + +To run any examples you need the location of the executable. For now, let us +save this location in a temporary variable + +LAMMPS_DIR=$PWD :pre + +:line + +Running an example script :h5 + +Once compiled you can execute some of the LAMMPS examples. Switch into the +examples/melt folder + +cd ../examples/melt :pre + +The full path of the serial executable is $LAMMPS_DIR/lmp_serial, while the mpi +version is $LAMMPS_DIR/lmp_mpi. You can run the melt example with either +version as follows: + +$LAMMPS_DIR/lmp_serial -in in.melt :pre + +or + +mpirun -np 4 $LAMMPS_DIR/lmp_mpi -in in.melt :pre + +Note the use of our variable $LAMMPS_DIR, which expands into the full path of +the LAMMPS src folder we saved earlier. + +Adding your executable directory to your PATH :h6 + +You can avoid having to type the full path of your LAMMPS binary by adding its +parent folder to the PATH environment variable as follows: + +export PATH=$LAMMPS_DIR:$PATH :pre + +Input scripts can then be run like this: + +lmp_serial -in in.melt :pre + +or + +mpirun -np 4 lmp_mpi -in in.melt :pre + +However, this PATH variable will not persist if you close your bash window. +To persist this setting edit the $HOME/.bashrc file using your favorite editor +and add this line + +export PATH=/full/path/to/your/lammps/src:$PATH :pre + +[Example:] + +For an executable lmp_serial with a full path + +/home/richard/lammps/src/lmp_serial :pre + +the PATH variable should be + +export PATH=/home/richard/lammps/src:$PATH :pre + +NOTE: This should give you a jump start when trying to run LAMMPS on Windows. +To become effective in this environment I encourage you to look into Linux +tutorials explaining Bash and Basic Unix commands (e.g., "Linux +Journey"_https://linuxjourney.com) diff --git a/doc/src/tutorial_drude.txt b/doc/src/tutorial_drude.txt index b9a167b804..f6e7eed40b 100644 --- a/doc/src/tutorial_drude.txt +++ b/doc/src/tutorial_drude.txt @@ -176,12 +176,13 @@ By recognizing the fix {drude}, LAMMPS will find and store matching DC-DP pairs and will treat DP as equivalent to their DC in the {special bonds} relations. It may be necessary to extend the space for storing such special relations. In this case extra space should -be reserved by using the {extra} keyword of the {special_bonds} +be reserved by using the {extra/special/per/atom} keyword of either +the "read_data"_read_data.html or "create_box"_create_box.html command. With our phenol, there is 1 more special neighbor for which space is required. Otherwise LAMMPS crashes and gives the required value. -special_bonds lj/coul 0.0 0.0 0.5 extra 1 :pre +read_data data-p.lmp extra/special/per/atom 1 :pre Let us assume we want to run a simple NVT simulation at 300 K. Note that Drude oscillators need to be thermalized at a low temperature in diff --git a/doc/src/tutorial_pylammps.txt b/doc/src/tutorial_pylammps.txt index 78cdd241fb..52eb6415db 100644 --- a/doc/src/tutorial_pylammps.txt +++ b/doc/src/tutorial_pylammps.txt @@ -48,21 +48,17 @@ System-wide Installation :h3 Step 1: Building LAMMPS as a shared library :h4 To use LAMMPS inside of Python it has to be compiled as shared library. This -library is then loaded by the Python interface. In this example, we use the -Make.py utility to create a Makefile with C++ exceptions, PNG, JPEG and FFMPEG -output support enabled. Finally, we also enable the MOLECULE package and compile -using the generated {auto} Makefile. +library is then loaded by the Python interface. In this example we enable the +MOLECULE package and compile LAMMPS with C++ exceptions, PNG, JPEG and FFMPEG +output support enabled. cd $LAMMPS_DIR/src :pre -# generate custom Makefile -python Make.py -jpg -png -s ffmpeg exceptions -m mpi -a file :pre - # add packages if necessary make yes-MOLECULE :pre # compile shared library using Makefile -make mode=shlib auto :pre +make mpi mode=shlib LMP_INC="-DLAMMPS_PNG -DLAMMPS_JPEG -DLAMMPS_FFMPEG -DLAMMPS_EXCEPTIONS" JPG_LIB="-lpng -ljpeg" :pre Step 2: Installing the LAMMPS Python package :h4 diff --git a/doc/src/tutorials.txt b/doc/src/tutorials.txt index 569ad892b7..338439ac8e 100644 --- a/doc/src/tutorials.txt +++ b/doc/src/tutorials.txt @@ -8,6 +8,7 @@ Tutorials :h1 tutorial_drude tutorial_github tutorial_pylammps + tutorial_bash_on_windows body manifolds diff --git a/doc/src/variable.txt b/doc/src/variable.txt index e32e82ef4d..e3b7c5de0d 100644 --- a/doc/src/variable.txt +++ b/doc/src/variable.txt @@ -178,7 +178,7 @@ This means variables can NOT be re-defined in an input script (with two exceptions, read further). This is to allow an input script to be processed multiple times without resetting the variables; see the "jump"_jump.html or "include"_include.html commands. It also means -that using the "command-line switch"_Section_start.html#start_7 -var +that using the "command-line switch"_Section_start.html#start_6 -var will override a corresponding index variable setting in the input script. @@ -248,7 +248,7 @@ variable. {Index} style variables with a single string value can also be set by using the command-line switch -var; see "this -section"_Section_start.html#start_7 for details. +section"_Section_start.html#start_6 for details. The {loop} style is identical to the {index} style except that the strings are the integers from 1 to N inclusive, if only one argument N @@ -264,7 +264,7 @@ N1 <= N2 and N2 >= 0 is required. For the {world} style, one or more strings are specified. There must be one string for each processor partition or "world". See "this -section"_Section_start.html#start_7 of the manual for information on +section"_Section_start.html#start_6 of the manual for information on running LAMMPS with multiple partitions via the "-partition" command-line switch. This variable command assigns one string to each world. All processors in the world are assigned the same string. The @@ -277,7 +277,7 @@ different partitions. For the {universe} style, one or more strings are specified. There must be at least as many strings as there are processor partitions or -"worlds". See "this page"_Section_start.html#start_7 for information +"worlds". See "this page"_Section_start.html#start_6 for information on running LAMMPS with multiple partitions via the "-partition" command-line switch. This variable command initially assigns one string to each world. When a "next"_next.html command is encountered diff --git a/doc/src/write_data.txt b/doc/src/write_data.txt index 033199e98b..39e5a7f811 100644 --- a/doc/src/write_data.txt +++ b/doc/src/write_data.txt @@ -59,7 +59,7 @@ If you want to do more exact restarts, using binary files, see the "restart"_restart.html, "write_restart"_write_restart.html, and "read_restart"_read_restart.html commands. You can also convert binary restart files to text data files, after a simulation has run, -using the "-r command-line switch"_Section_start.html#start_7. +using the "-r command-line switch"_Section_start.html#start_6. NOTE: Only limited information about a simulation is stored in a data file. For example, no information about atom "groups"_group.html and diff --git a/doc/src/write_restart.txt b/doc/src/write_restart.txt index 8160eec3df..ff3b652dba 100644 --- a/doc/src/write_restart.txt +++ b/doc/src/write_restart.txt @@ -66,7 +66,7 @@ Restart files can be read by a "read_restart"_read_restart.html command to restart a simulation from a particular state. Because the file is binary (to enable exact restarts), it may not be readable on another machine. In this case, you can use the "-r command-line -switch"_Section_start.html#start_7 to convert a restart file to a data +switch"_Section_start.html#start_6 to convert a restart file to a data file. NOTE: Although the purpose of restart files is to enable restarting a diff --git a/examples/COUPLE/README b/examples/COUPLE/README index c8c9e0e31b..83e7463531 100644 --- a/examples/COUPLE/README +++ b/examples/COUPLE/README @@ -41,8 +41,8 @@ fortran a simple wrapper on the LAMMPS library API that can be called from Fortran fortran2 a more sophisticated wrapper on the LAMMPS library API that can be called from Fortran -fortran3 wrapper written by Nir Goldman (LLNL), as an +fortran_dftb wrapper written by Nir Goldman (LLNL), as an extension to fortran2, used for calling LAMMPS - from Fortran DFTB+ code + from Fortran DFTB+ tight-binding code Each sub-directory has its own README with more details. diff --git a/examples/COUPLE/fortran3/LAMMPS-wrapper.cpp b/examples/COUPLE/fortran_dftb/LAMMPS-wrapper.cpp similarity index 100% rename from examples/COUPLE/fortran3/LAMMPS-wrapper.cpp rename to examples/COUPLE/fortran_dftb/LAMMPS-wrapper.cpp diff --git a/examples/COUPLE/fortran3/LAMMPS-wrapper.h b/examples/COUPLE/fortran_dftb/LAMMPS-wrapper.h similarity index 100% rename from examples/COUPLE/fortran3/LAMMPS-wrapper.h rename to examples/COUPLE/fortran_dftb/LAMMPS-wrapper.h diff --git a/examples/COUPLE/fortran3/LAMMPS-wrapper2.cpp b/examples/COUPLE/fortran_dftb/LAMMPS-wrapper2.cpp similarity index 71% rename from examples/COUPLE/fortran3/LAMMPS-wrapper2.cpp rename to examples/COUPLE/fortran_dftb/LAMMPS-wrapper2.cpp index f245c44d79..d16b49cc50 100644 --- a/examples/COUPLE/fortran3/LAMMPS-wrapper2.cpp +++ b/examples/COUPLE/fortran_dftb/LAMMPS-wrapper2.cpp @@ -47,11 +47,35 @@ void lammps_set_callback (void *ptr) { return; } +void lammps_set_external_vector_length (void *ptr, int n) { + class LAMMPS *lmp = (class LAMMPS *) ptr; + int ifix = lmp->modify->find_fix_by_style("external"); + FixExternal *fix = (FixExternal *) lmp->modify->fix[ifix]; + fix->set_vector_length(n); + return; +} + +void lammps_set_external_vector (void *ptr, int n, double val) { + class LAMMPS *lmp = (class LAMMPS *) ptr; + int ifix = lmp->modify->find_fix_by_style("external"); + FixExternal *fix = (FixExternal *) lmp->modify->fix[ifix]; + fix->set_vector (n, val); + return; +} + void lammps_set_user_energy (void *ptr, double energy) { class LAMMPS *lmp = (class LAMMPS *) ptr; int ifix = lmp->modify->find_fix_by_style("external"); FixExternal *fix = (FixExternal *) lmp->modify->fix[ifix]; - fix->set_energy(energy); + fix->set_energy_global(energy); + return; +} + +void lammps_set_user_virial (void *ptr, double *virial) { + class LAMMPS *lmp = (class LAMMPS *) ptr; + int ifix = lmp->modify->find_fix_by_style("external"); + FixExternal *fix = (FixExternal *) lmp->modify->fix[ifix]; + fix->set_virial_global(virial); return; } diff --git a/examples/COUPLE/fortran3/LAMMPS-wrapper2.h b/examples/COUPLE/fortran_dftb/LAMMPS-wrapper2.h similarity index 89% rename from examples/COUPLE/fortran3/LAMMPS-wrapper2.h rename to examples/COUPLE/fortran_dftb/LAMMPS-wrapper2.h index 794006e3af..ed79015e78 100644 --- a/examples/COUPLE/fortran3/LAMMPS-wrapper2.h +++ b/examples/COUPLE/fortran_dftb/LAMMPS-wrapper2.h @@ -26,6 +26,9 @@ extern "C" { /* Prototypes for auxiliary functions */ void lammps_set_callback (void *); void lammps_set_user_energy (void*, double); +void lammps_set_user_virial (void*, double*); +void lammps_set_external_vector_length (void*, int); +void lammps_set_external_vector (void*, int, double); #ifdef __cplusplus } diff --git a/examples/COUPLE/fortran3/LAMMPS.F90 b/examples/COUPLE/fortran_dftb/LAMMPS.F90 similarity index 97% rename from examples/COUPLE/fortran3/LAMMPS.F90 rename to examples/COUPLE/fortran_dftb/LAMMPS.F90 index eb5b7f825b..9b18bbfa5f 100644 --- a/examples/COUPLE/fortran3/LAMMPS.F90 +++ b/examples/COUPLE/fortran_dftb/LAMMPS.F90 @@ -52,12 +52,16 @@ module LAMMPS C_NULL_CHAR, C_loc, C_F_pointer, lammps_instance => C_ptr implicit none private + public :: lammps_set_user_virial + public :: lammps_set_external_vector_length + public :: lammps_set_external_vector + public :: lammps_set_user_energy public :: lammps_open, lammps_open_no_mpi, lammps_close, lammps_file, & lammps_command, lammps_free, lammps_extract_global, & lammps_extract_atom, lammps_extract_compute, lammps_extract_fix, & lammps_extract_variable, lammps_get_natoms, lammps_gather_atoms, & - lammps_scatter_atoms, lammps_set_callback, lammps_set_user_energy - public :: lammps_instance, C_ptr, C_double, C_int + lammps_set_callback + public :: lammps_scatter_atoms, lammps_instance, C_ptr, C_double, C_int !! Functions supplemental to the prototypes in library.h. {{{1 !! The function definitions (in C++) are contained in LAMMPS-wrapper.cpp. @@ -218,6 +222,28 @@ module LAMMPS real(C_double), value :: energy end subroutine lammps_set_user_energy + subroutine lammps_set_user_virial (ptr, virial) & + bind (C, name='lammps_set_user_virial') + import :: C_ptr, C_double + type (C_ptr), value :: ptr + real(C_double) :: virial(6) + end subroutine lammps_set_user_virial + + subroutine lammps_set_external_vector_length (ptr, n) & + bind (C, name='lammps_set_external_vector_length') + import :: C_ptr, C_double, C_int + type(C_ptr), value :: ptr + integer (C_int), value :: n + end subroutine lammps_set_external_vector_length + + subroutine lammps_set_external_vector (ptr, n, val) & + bind (C, name='lammps_set_external_vector') + import :: C_ptr, C_int, C_double + type (C_ptr), value :: ptr + integer (C_int), value :: n + real(C_double), value :: val + end subroutine lammps_set_external_vector + subroutine lammps_actual_gather_atoms (ptr, name, type, count, data) & bind (C, name='lammps_gather_atoms') import :: C_ptr, C_int, C_char diff --git a/examples/COUPLE/fortran3/README b/examples/COUPLE/fortran_dftb/README similarity index 78% rename from examples/COUPLE/fortran3/README rename to examples/COUPLE/fortran_dftb/README index 9effa35ec4..39a2f18169 100644 --- a/examples/COUPLE/fortran3/README +++ b/examples/COUPLE/fortran_dftb/README @@ -3,8 +3,9 @@ forces from a fortran code for a LAMMPS simulation. The reader should refer to the README file in COUPLE/fortran2 before proceeding. Here, the LAMMPS.F90 file has been modified slightly and additional files named LAMMPS-wrapper2.h and LAMMPS-wrapper2.cpp have been included in -order to supply wrapper functions to set the LAMMPS callback function -and total energy. +order to supply wrapper functions to set the LAMMPS callback function, +total energy, virial, and electronic entropy contribution (needed for +MSST simulations with a quantum code). In this example, the callback function is set to run the semi-empirical quantum code DFTB+ in serial and then read in the total @@ -20,11 +21,14 @@ etc. A few more important notes: --The stress tensor from DFTB+ is passed in to LAMMPS via pointer. -Calling the subroutine lammps_set_callback() is required in order to set a pointer to the callback function in LAMMPS. -The subroutine lammps_set_user_energy() passes in the potential energy - from DFTB+ to LAMMPS. + from DFTB+ to LAMMPS. Similarly, lammps_set_user_virial passes the stress tensor. + +-The electronic entropy contribution is set via lammps_set_external_vector(). Their needs + to be a call to lammps_set_external_vector_length() before this value can be + passed to LAMMPS. This example was created by Nir Goldman, whom you can contact with questions: diff --git a/examples/COUPLE/fortran3/data.diamond b/examples/COUPLE/fortran_dftb/data.diamond similarity index 100% rename from examples/COUPLE/fortran3/data.diamond rename to examples/COUPLE/fortran_dftb/data.diamond diff --git a/examples/COUPLE/fortran_dftb/dftb_in.hsd b/examples/COUPLE/fortran_dftb/dftb_in.hsd new file mode 100644 index 0000000000..104a4c04ce --- /dev/null +++ b/examples/COUPLE/fortran_dftb/dftb_in.hsd @@ -0,0 +1,40 @@ +#sample DFTB+ script to run this test code +Geometry = GenFormat { +<<< "lammps.gen" +} + +Driver = { +} + +Hamiltonian = DFTB { + LAMMPS = Yes # keyword to print energy, forces, and stress tensor to file(results.out) + SCC = No + MaxAngularMomentum = { + C = "p" + } + Charge = 0.0 + Eigensolver = Standard {} + Filling = Fermi { + Temperature [Kelvin] = 298.0 + } + SlaterKosterFiles = Type2FileNames { + Prefix = "~/slako/mio-1-1/" # the user must define the location of the skf files + Separator = "-" + Suffix = ".skf" + LowerCaseTypeName = No + } + KPointsAndWeights = { + 0.0000000000000 0.0000000000000 0.0000000000000 1.00000000000000 + } +} + +Options = { + CalculateForces = Yes + WriteDetailedOut = No + WriteBandOut = No + RandomSeed = 12345 +} + +ParserOptions = { + ParserVersion = 3 +} diff --git a/examples/COUPLE/fortran_dftb/dftb_pin.hsd b/examples/COUPLE/fortran_dftb/dftb_pin.hsd new file mode 100644 index 0000000000..6d9dea4a15 --- /dev/null +++ b/examples/COUPLE/fortran_dftb/dftb_pin.hsd @@ -0,0 +1,129 @@ +Geometry = GenFormat { +64 S +C +1 1 7.099007 7.117657 7.119139 +2 1 0.858709 0.867233 0.882294 +3 1 1.772527 1.811776 7.120239 +4 1 2.702145 2.681271 0.901362 +5 1 0.017539 1.794455 1.788454 +6 1 0.885593 2.694118 2.707994 +7 1 1.795055 7.120787 1.777896 +8 1 2.642849 0.868278 2.670699 +9 1 0.016060 0.017156 3.568644 +10 1 0.891891 0.896406 4.439286 +11 1 1.766086 1.764402 3.550134 +12 1 2.677349 2.648926 4.427174 +13 1 0.010133 1.771283 5.342173 +14 1 0.858153 2.653565 6.241596 +15 1 1.804087 0.020636 5.353268 +16 1 2.689680 0.907188 6.224575 +17 1 0.017845 3.577563 7.113016 +18 1 0.910027 4.459286 0.910286 +19 1 1.766394 5.376046 0.015526 +20 1 2.683727 6.220728 0.898553 +21 1 0.003357 5.363423 1.774139 +22 1 0.856735 6.238324 2.660213 +23 1 1.761079 3.549776 1.797054 +24 1 2.667227 4.463441 2.646074 +25 1 7.132499 3.551558 3.599764 +26 1 0.920387 4.482191 4.479257 +27 1 1.772194 5.337132 3.555569 +28 1 2.675010 6.251629 4.483124 +29 1 0.005702 5.371095 5.351147 +30 1 0.880807 6.249819 6.264231 +31 1 1.793177 3.592396 5.369939 +32 1 2.653179 4.463595 6.274044 +33 1 3.557243 7.118913 0.026006 +34 1 4.458971 0.889331 0.904950 +35 1 5.367903 1.759757 7.104941 +36 1 6.271565 2.658454 0.890168 +37 1 3.591915 1.768681 1.793880 +38 1 4.435612 2.662184 2.676722 +39 1 5.371040 0.000196 1.783464 +40 1 6.226453 0.886640 2.653384 +41 1 3.583339 0.005449 3.600177 +42 1 4.453692 0.909417 4.459713 +43 1 5.314554 1.805409 3.584215 +44 1 6.210181 2.642660 4.486206 +45 1 3.545704 1.802745 5.365369 +46 1 4.476660 2.701226 6.220451 +47 1 5.332820 0.029557 5.347965 +48 1 6.215725 0.915081 6.230289 +49 1 3.536446 3.551469 7.106600 +50 1 4.451181 4.426439 0.900180 +51 1 5.368735 5.377996 7.109524 +52 1 6.230666 6.220985 0.862175 +53 1 3.596626 5.372822 1.797613 +54 1 4.485613 6.221252 2.699652 +55 1 5.364421 3.549838 1.796281 +56 1 6.261739 4.459046 2.648152 +57 1 3.588752 3.581054 3.581755 +58 1 4.462342 4.467270 4.478800 +59 1 5.355202 5.318323 3.556531 +60 1 6.268570 6.259831 4.465795 +61 1 3.588636 5.354278 5.362327 +62 1 4.475747 6.263866 6.227803 +63 1 5.331158 3.554349 5.318368 +64 1 6.254581 4.436344 6.209681 +0.0 0.0 0.0 +7.13400000000000 0 0 +0 7.13400000000000 0 +0 0 7.13400000000000 +} +Driver = {} +Hamiltonian = DFTB { + LAMMPS = Yes + SCC = No + MaxAngularMomentum = { + C = "p" + } + Charge = 0.0 + Eigensolver = Standard {} + Filling = Fermi { + Temperature [Kelvin] = 298.0 + IndependentKFilling = No + } + SlaterKosterFiles = Type2FileNames { + Prefix = "~/slako/mio-1-1/" + Separator = "-" + Suffix = ".skf" + LowerCaseTypeName = No + } + KPointsAndWeights = { +0.0000000000000 0.0000000000000 0.0000000000000 1.00000000000000 + } + PolynomialRepulsive = {} + OldRepulsiveSum = No + OrbitalResolvedSCC = No + OldSKInterpolation = No + NoErep = No + Dispersion = {} + ThirdOrder = No + ThirdOrderFull = No +} +Options = { + CalculateForces = Yes + WriteDetailedOut = No + WriteBandOut = No + RandomSeed = 12345 + MullikenAnalysis = No + WriteEigenvectors = No + WriteAutotestTag = No + WriteDetailedXML = No + WriteResultsTag = No + AtomResolvedEnergies = No + WriteHS = No + WriteRealHS = No + MinimiseMemoryUsage = No + ShowFoldedCoords = No +} +ParserOptions = { + ParserVersion = 3 + WriteHSDInput = Yes + WriteXMLInput = No + StopAfterParsing = No + IgnoreUnprocessedNodes = No +} +Analysis = { + ProjectStates = {} +} diff --git a/examples/COUPLE/fortran3/in.simple b/examples/COUPLE/fortran_dftb/in.simple similarity index 100% rename from examples/COUPLE/fortran3/in.simple rename to examples/COUPLE/fortran_dftb/in.simple diff --git a/examples/COUPLE/fortran_dftb/log.simple b/examples/COUPLE/fortran_dftb/log.simple new file mode 100644 index 0000000000..3496e94ebe --- /dev/null +++ b/examples/COUPLE/fortran_dftb/log.simple @@ -0,0 +1,71 @@ +LAMMPS (6 Jul 2017) +units real +atom_style charge +atom_modify map array +atom_modify sort 0 0.0 +read_data data.diamond + triclinic box = (0 0 0) to (7.134 7.134 7.134) with tilt (0 0 0) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 64 atoms + reading velocities ... + 64 velocities +neighbor 1.0 bin +neigh_modify delay 0 every 5 check no +fix 1 all nve +fix 2 all external pf/callback 1 1 + +fix_modify 2 energy yes +thermo_style custom step temp etotal ke pe lx ly lz pxx pyy pzz press + +thermo 1 +timestep 0.5 + +run 10 +Neighbor list info ... + update every 5 steps, delay 0 steps, check no + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 0 + ghost atom cutoff = 0 + binsize = 7.134, bins = 1 1 1 + 0 neighbor lists, perpetual/occasional/extra = 0 0 0 +Per MPI rank memory allocation (min/avg/max) = 2.3 | 2.3 | 2.3 Mbytes +Step Temp TotEng KinEng PotEng Lx Ly Lz Pxx Pyy Pzz Press + 0 298.24835 -69593.587 56.008365 -69649.595 7.134 7.134 7.134 -19980.19 -21024.038 -21097.458 -20700.562 + 1 295.24358 -69593.585 55.444098 -69649.029 7.134 7.134 7.134 -19778.833 -20799.657 -20854.156 -20477.549 + 2 286.37211 -69593.58 53.778115 -69647.358 7.134 7.134 7.134 -19227.52 -20177.28 -20176.12 -19860.306 + 3 272.062 -69593.572 51.090804 -69644.663 7.134 7.134 7.134 -18360.869 -19189.684 -19100.021 -18883.525 + 4 253.01834 -69593.561 47.514575 -69641.075 7.134 7.134 7.134 -17198.143 -17855.03 -17652.036 -17568.403 + 5 230.19242 -69593.547 43.228073 -69636.775 7.134 7.134 7.134 -15750.247 -16183.764 -15854.145 -15929.386 + 6 204.71787 -69593.533 38.44418 -69631.977 7.134 7.134 7.134 -14083.498 -14247.434 -13789.835 -14040.256 + 7 177.82397 -69593.518 33.393748 -69626.911 7.134 7.134 7.134 -12340.963 -12202.878 -11623.171 -12055.671 + 8 150.76736 -69593.503 28.312758 -69621.816 7.134 7.134 7.134 -10637.824 -10180.827 -9495.0496 -10104.567 + 9 124.7737 -69593.49 23.431383 -69616.921 7.134 7.134 7.134 -9113.3842 -8339.0492 -7572.8076 -8341.747 + 10 100.98183 -69593.478 18.963481 -69612.442 7.134 7.134 7.134 -7833.9349 -6756.9749 -5945.8968 -6845.6022 +Loop time of 2.20497 on 1 procs for 10 steps with 64 atoms + +Performance: 0.196 ns/day, 122.499 hours/ns, 4.535 timesteps/s +0.2% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0 | 0 | 0 | 0.0 | 0.00 +Neigh | 1.4305e-06 | 1.4305e-06 | 1.4305e-06 | 0.0 | 0.00 +Comm | 4.22e-05 | 4.22e-05 | 4.22e-05 | 0.0 | 0.00 +Output | 0.00067687 | 0.00067687 | 0.00067687 | 0.0 | 0.03 +Modify | 2.2042 | 2.2042 | 2.2042 | 0.0 | 99.96 +Other | | 6.533e-05 | | | 0.00 + +Nlocal: 64 ave 64 max 64 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 0 +Ave neighs/atom = 0 +Neighbor list builds = 2 +Dangerous builds not checked +Total wall time: 0:00:02 diff --git a/examples/COUPLE/fortran3/makefile b/examples/COUPLE/fortran_dftb/makefile similarity index 95% rename from examples/COUPLE/fortran3/makefile rename to examples/COUPLE/fortran_dftb/makefile index 86dea30850..225bd0025a 100644 --- a/examples/COUPLE/fortran3/makefile +++ b/examples/COUPLE/fortran_dftb/makefile @@ -21,7 +21,7 @@ liblammps_fortran.so : LAMMPS.o LAMMPS-wrapper.o LAMMPS-wrapper2.o $(FC) $(FFLAGS) -shared -o $@ $^ simpleF.x: simple.o LAMMPS.o LAMMPS-wrapper.o LAMMPS-wrapper2.o - $(FC) $(FFLAGS) simple.o -o simpleF.x liblammps_fortran.a $(LAMMPS_SRC)/liblammps_mvapich.a -lstdc++ /usr/local/tools/fftw/lib/libfftw.a + $(FC) $(FFLAGS) simple.o -o simpleF.x liblammps_fortran.a $(LAMMPS_SRC)/liblammps_mvapich.a -lstdc++ /usr/lib64/libfftw3.a liblammps_fortran.a : LAMMPS.o LAMMPS-wrapper.o LAMMPS-wrapper2.o $(AR) rs $@ $^ diff --git a/examples/COUPLE/fortran3/simple.f90 b/examples/COUPLE/fortran_dftb/simple.f90 similarity index 91% rename from examples/COUPLE/fortran3/simple.f90 rename to examples/COUPLE/fortran_dftb/simple.f90 index 40f8bf8b86..4604b4e4a9 100644 --- a/examples/COUPLE/fortran3/simple.f90 +++ b/examples/COUPLE/fortran_dftb/simple.f90 @@ -13,7 +13,7 @@ type(c_ptr) :: c_pos, c_fext, c_ids double precision, pointer :: fext(:,:), pos(:,:) integer, intent(in) :: ids(nlocal) - real (C_double), dimension(:), pointer :: virial => NULL() + real(C_double) :: virial(6) real (C_double) :: etot real(C_double), pointer :: ts_lmp double precision :: stress(3,3), ts_dftb @@ -61,26 +61,21 @@ read(10,*)stress(i,:) enddo stress (:,:) = stress(:,:)*autoatm - etot = etot*econv - call lammps_extract_global(ts_lmp, lmp, 'TS_dftb') - ts_lmp = ts_dftb - do i = 1, nlocal - read(10,*)fext(:,ids(i)) - fext(:,ids(i)) = fext(:,ids(i))*fconv - enddo - close(10) - call lammps_set_user_energy (lmp, etot) - call lammps_extract_atom (virial, lmp, 'virial') - if (.not. associated(virial)) then - print*,'virial pointer not associated.' - STOP - endif virial(1) = stress(1,1)/(nktv2p/volume) virial(2) = stress(2,2)/(nktv2p/volume) virial(3) = stress(3,3)/(nktv2p/volume) virial(4) = stress(1,2)/(nktv2p/volume) virial(5) = stress(1,3)/(nktv2p/volume) virial(6) = stress(2,3)/(nktv2p/volume) + etot = etot*econv + call lammps_set_external_vector(lmp,1,ts_dftb*econv) + do i = 1, nlocal + read(10,*)fext(:,ids(i)) + fext(:,ids(i)) = fext(:,ids(i))*fconv + enddo + close(10) + call lammps_set_user_energy (lmp, etot) + call lammps_set_user_virial (lmp, virial) end subroutine end module callback @@ -103,6 +98,7 @@ program simple_fortran_callback call lammps_open_no_mpi ('lmp -log log.simple', lmp) call lammps_file (lmp, 'in.simple') call lammps_set_callback(lmp) + call lammps_set_external_vector_length(lmp,2) call lammps_command (lmp, 'run 10') call lammps_close (lmp) diff --git a/examples/README b/examples/README index 090ed733ac..dc622ef7c4 100644 --- a/examples/README +++ b/examples/README @@ -37,9 +37,8 @@ produce dump snapshots of the running simulation in any of 3 formats. If you uncomment the dump command in the input script, a text dump file will be produced, which can be animated by various visualization -programs (see http://lammps.sandia.gov/viz.html) such as VMD or -AtomEye. It can also be animated using the xmovie tool described in -the Additional Tools section of the LAMMPS documentation. +programs (see http://lammps.sandia.gov/viz.html) such as Ovito, VMD, +or AtomEye. If you uncomment the dump image command in the input script, and assuming you have built LAMMPS with a JPG library, JPG snapshot images @@ -59,6 +58,7 @@ These are the sample problems and their output in the various sub-directories: accelerate: use of all the various accelerator packages +airebo: polyethylene with AIREBO potential balance: dynamic load balancing, 2d system body: body particles, 2d system cmap: CMAP 5-body contributions to CHARMM force field @@ -106,20 +106,11 @@ tad: temperature-accelerated dynamics of vacancy diffusion in bulk Si vashishta: models using the Vashishta potential voronoi: Voronoi tesselation via compute voronoi/atom command -Here is a src/Make.py command which will perform a parallel build of a -LAMMPS executable "lmp_mpi" with all the packages needed by all the -examples, with the exception of the accelerate sub-directory. See the -accelerate/README for Make.py commands suitable for its example -scripts. - -cd src -Make.py -j 16 -p none std no-lib reax meam poems reaxc orig -a lib-all mpi - Here is how you might run and visualize one of the sample problems: cd indent cp ../../src/lmp_mpi . # copy LAMMPS executable to this dir -lmp_mpi < in.indent # run the problem +lmp_mpi -in in.indent # run the problem Running the simulation produces the files {dump.indent} and {log.lammps}. You can visualize the dump file as follows: diff --git a/examples/USER/misc/ees/Data_region b/examples/USER/misc/ees/Data_region new file mode 100644 index 0000000000..e48c5342d0 --- /dev/null +++ b/examples/USER/misc/ees/Data_region @@ -0,0 +1,28 @@ + +3 atoms +1 atom types +3 ellipsoids +0 60 xlo xhi +0 60 zlo zhi +0 60 ylo yhi + +Atoms +atom-ID atom-type ellipsoidflag density x y z +1 1 1 1 10 30 30 +2 1 1 1 30 10 30 +3 1 1 1 30 30 10 + + +Ellipsoids +atom-ID shapex shapey shapez quatw quati quatj quatk +1 14 6 8 0.89453 0.44700 0 0 +2 14 6 8 0.25755 0 0.96626 0 +3 14 6 8 0.95009 0 0 0.31197 + + +Velocities + +1 1.3 0 0 0 0 50 +2 0 .5 0 .1 3 10 +3 0 0 .9 .5 61 1 + diff --git a/examples/USER/misc/ees/Data_wall b/examples/USER/misc/ees/Data_wall new file mode 100644 index 0000000000..c4693e33fb --- /dev/null +++ b/examples/USER/misc/ees/Data_wall @@ -0,0 +1,22 @@ + +1 atoms +1 atom types +1 ellipsoids +0 60 xlo xhi +0 60 zlo zhi +0 60 ylo yhi + +Atoms +atom-ID atom-type ellipsoidflag density x y z +1 1 1 1 30 30 50 + + +Ellipsoids +atom-ID shapex shapey shapez quatw quati quatj quatk +1 14 6 8 0.44700 0 0.89453 0 + + +Velocities + +1 0 0 1 1 3 5 + diff --git a/examples/USER/misc/ees/README b/examples/USER/misc/ees/README new file mode 100644 index 0000000000..9f4cb4f159 --- /dev/null +++ b/examples/USER/misc/ees/README @@ -0,0 +1,13 @@ +Here one may find simple examples showing how "fix wall/ess" and "fix wall/region/ess" work. + + +--in.fix_wall_region: + + This input uses "Data_region" to setup a system of three particles colliding with a + cubic region which its walls interact with particle with EES potential. To find out details + of how to set parameters of "fix wall/region/ees" see documentaion. + +--in.fix_wall + + This input uses "Data_wall" to confine a ellipsoidal particle between two EES walls. + For more details lookup LAMMPS's documentation under "fix wall/ess" command. diff --git a/examples/USER/misc/ees/in.fix_wall b/examples/USER/misc/ees/in.fix_wall new file mode 100644 index 0000000000..42f03fb1a5 --- /dev/null +++ b/examples/USER/misc/ees/in.fix_wall @@ -0,0 +1,30 @@ +units lj +atom_style ellipsoid +boundary p p f +read_data Data_wall +#------------------------------------# +pair_style resquared 1 +pair_coeff 1 1 10.0 1.0 0.5 0.5 4 0.5 0.5 4 1 +#------------------------------------# +timestep 0.0002 +#------------------------------------# + +compute temp all temp/asphere +thermo_modify temp temp + +fix EES_substrate all wall/ees zhi EDGE 10 1 10 zlo EDGE 10 1 10 +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# + +thermo_style custom step temp press etotal f_EES_substrate f_EES_substrate[1] + +fix NVE all nve/asphere +#------------------------------------# +compute qw all property/atom quatw +compute qi all property/atom quati +compute qj all property/atom quatj +compute qk all property/atom quatk +#------------------------------------# +thermo 500 +#dump 1 all custom 1000 dump_substrate id type x y z c_qw c_qi c_qj c_qk +run 40000 + diff --git a/examples/USER/misc/ees/in.fix_wall_region b/examples/USER/misc/ees/in.fix_wall_region new file mode 100644 index 0000000000..c3a2ea2488 --- /dev/null +++ b/examples/USER/misc/ees/in.fix_wall_region @@ -0,0 +1,29 @@ +units lj +atom_style ellipsoid +boundary p p p +read_data Data_region +#------------------------------------# +region the_wall block 20. 40. 20. 40. 20. 40. side out +#------------------------------------# +pair_style resquared 1 +pair_coeff 1 1 10.0 1.0 0.5 0.5 4 0.5 0.5 4 1 +#------------------------------------# +timestep 0.0005 +#------------------------------------# + +fix EES_block all wall/region/ees the_wall 10. 1. 20 +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# + +thermo_style custom step temp press etotal f_EES_block[1] f_EES_block[3] + +fix NVE all nve/asphere +#------------------------------------# +compute qw all property/atom quatw +compute qi all property/atom quati +compute qj all property/atom quatj +compute qk all property/atom quatk +#------------------------------------# +thermo 500 +#dump 1 all custom 1000 dump_region id type x y z c_qw c_qi c_qj c_qk +run 50000 + diff --git a/examples/USER/misc/ees/log.23Jun17.fix_wall.g++.1 b/examples/USER/misc/ees/log.23Jun17.fix_wall.g++.1 new file mode 100644 index 0000000000..91e39687bb --- /dev/null +++ b/examples/USER/misc/ees/log.23Jun17.fix_wall.g++.1 @@ -0,0 +1,162 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +units lj +atom_style ellipsoid +boundary p p f +read_data Data_wall + orthogonal box = (0 0 0) to (60 60 60) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 1 atoms + 1 ellipsoids + reading velocities ... + 1 velocities +#------------------------------------# +pair_style resquared 1 +pair_coeff 1 1 10.0 1.0 0.5 0.5 4 0.5 0.5 4 1 +#------------------------------------# +timestep 0.0002 +#------------------------------------# + +compute temp all temp/asphere +thermo_modify temp temp + +fix EES_substrate all wall/ees zhi EDGE 10 1 10 zlo EDGE 10 1 10 +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# + +thermo_style custom step temp press etotal f_EES_substrate f_EES_substrate[1] +WARNING: New thermo_style command, previous thermo_modify settings will be lost (../output.cpp:705) + +fix NVE all nve/asphere +#------------------------------------# +compute qw all property/atom quatw +compute qi all property/atom quati +compute qj all property/atom quatj +compute qk all property/atom quatk +#------------------------------------# +thermo 500 +#dump 1 all custom 1000 dump_substrate id type x y z c_qw c_qi c_qj c_qk +run 40000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.3 + ghost atom cutoff = 1.3 + binsize = 0.65, bins = 93 93 93 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair resquared, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 7.95 | 7.95 | 7.95 Mbytes +Step Temp Press TotEng f_EES_substrate f_EES_substrate[1] + 0 0 0.00054301475 0 0 0 + 500 0 0.00054301501 0 -0.002011167 -0.00089853601 + 1000 0 0.0005430153 0 -0.0021039425 -0.00095953758 + 1500 0 0.00054301561 0 -0.0022030914 -0.0010262478 + 2000 0 0.00054301593 0 -0.002309218 -0.0010993652 + 2500 0 0.00054301628 0 -0.0024230015 -0.0011796956 + 3000 0 0.00054301666 0 -0.0025452078 -0.0012681725 + 3500 0 0.00054301707 0 -0.0026767034 -0.0013658817 + 4000 0 0.00054301751 0 -0.0028184722 -0.0014740918 + 4500 0 0.00054301798 0 -0.0029716352 -0.0015942917 + 5000 0 0.00054301849 0 -0.0031374752 -0.0017282378 + 5500 0 0.00054301905 0 -0.0033174662 -0.0018780129 + 6000 0 0.00054301965 0 -0.0035133093 -0.0020461007 + 6500 0 0.00054302031 0 -0.0037269778 -0.0022354811 + 7000 0 0.00054302103 0 -0.0039607721 -0.0024497521 + 7500 0 0.00054302182 0 -0.0042173892 -0.0026932881 + 8000 0 0.0005430227 0 -0.0045000102 -0.0029714471 + 8500 0 0.00054302366 0 -0.0048124114 -0.003290844 + 9000 0 0.00054302473 0 -0.0051591071 -0.0036597154 + 9500 0 0.00054302592 0 -0.0055455349 -0.0040884113 + 10000 0 0.00054302726 0 -0.0059782985 -0.0045900652 + 10500 0 0.00054302876 0 -0.0064654891 -0.0051815166 + 11000 0 0.00054303046 0 -0.0070171161 -0.0058845936 + 11500 0 0.0005430324 0 -0.0076456899 -0.0067279075 + 12000 0 0.00054303463 0 -0.0083670175 -0.0077493697 + 12500 0 0.00054303721 0 -0.0092012967 -0.0089996821 + 13000 0 0.00054304021 0 -0.010174616 -0.010546991 + 13500 0 0.00054304375 0 -0.011320967 -0.012482357 + 14000 0 0.00054304796 0 -0.012684757 -0.01492338 + 14500 0 0.00054305301 0 -0.014323176 -0.01800425 + 15000 0 0.00054305913 0 -0.016305242 -0.021804766 + 15500 0 0.0005430665 0 -0.018693849 -0.026019991 + 16000 0 0.00054307501 0 -0.021450982 -0.028460977 + 16500 0 0.0005430828 0 -0.023974925 -0.017549988 + 17000 0 0.00054307849 0 -0.022577692 0.07296284 + 17500 0 0.00054298744 0 0.0069237358 0.72962844 + 18000 0 0.00054212125 0 0.28756839 7.5171061 + 18500 0 0.00052809177 0 4.8331004 159.56814 + 19000 0 0.00019717774 0 112.04947 5692.3379 + 19500 0 0.00051978321 0 7.5250598 262.38764 + 20000 0 0.00054179603 0 0.39293697 10.289153 + 20500 0 0.00054296932 0 0.01279406 0.89377639 + 21000 0 0.00054308425 0 -0.02444466 0.081890707 + 21500 0 0.0005430907 0 -0.026532401 -0.021386086 + 22000 0 0.00054308271 0 -0.023944983 -0.032642459 + 22500 0 0.00054307381 0 -0.02106205 -0.029524272 + 23000 0 0.00054306612 0 -0.018569361 -0.024753431 + 23500 0 0.00054305976 0 -0.01650866 -0.020566675 + 24000 0 0.00054305452 0 -0.014811253 -0.017216347 + 24500 0 0.00054305017 0 -0.013402896 -0.014581066 + 25000 0 0.00054304653 0 -0.012222687 -0.01250069 + 25500 0 0.00054304345 0 -0.011223677 -0.0108418 + 26000 0 0.00054304081 0 -0.010370111 -0.0095034766 + 26500 0 0.00054303854 0 -0.0096346546 -0.0084112161 + 27000 0 0.00054303657 0 -0.0089962072 -0.0075100751 + 27500 0 0.00054303485 0 -0.0084382935 -0.0067592209 + 28000 0 0.00054303334 0 -0.0079478992 -0.0061279726 + 28500 0 0.000543032 0 -0.0075146283 -0.0055930001 + 29000 0 0.00054303081 0 -0.0071300893 -0.0051363504 + 29500 0 0.00054302976 0 -0.0067874426 -0.00474405 + 30000 0 0.00054302881 0 -0.0064810641 -0.0044051051 + 30500 0 0.00054302796 0 -0.0062062911 -0.0041107799 + 31000 0 0.0005430272 0 -0.0059592289 -0.0038540677 + 31500 0 0.00054302651 0 -0.0057366023 -0.0036293011 + 32000 0 0.00054302589 0 -0.0055356393 -0.0034318592 + 32500 0 0.00054302533 0 -0.0053539804 -0.0032579475 + 33000 0 0.00054302482 0 -0.0051896066 -0.0031044289 + 33500 0 0.00054302436 0 -0.0050407818 -0.0029686945 + 34000 0 0.00054302395 0 -0.0049060063 -0.002848562 + 34500 0 0.00054302357 0 -0.0047839795 -0.002742197 + 35000 0 0.00054302323 0 -0.0046735688 -0.0026480503 + 35500 0 0.00054302292 0 -0.0045737849 -0.0025648085 + 36000 0 0.00054302264 0 -0.0044837605 -0.0024913535 + 36500 0 0.00054302239 0 -0.0044027327 -0.0024267309 + 37000 0 0.00054302217 0 -0.0043300292 -0.0023701236 + 37500 0 0.00054302197 0 -0.0042650554 -0.0023208304 + 38000 0 0.00054302179 0 -0.0042072838 -0.0022782486 + 38500 0 0.00054302163 0 -0.0041562461 -0.0022418592 + 39000 0 0.0005430215 0 -0.0041115244 -0.0022112152 + 39500 0 0.00054302138 0 -0.0040727453 -0.0021859312 + 40000 0 0.00054302127 0 -0.0040395743 -0.0021656748 +Loop time of 0.111517 on 1 procs for 40000 steps with 1 atoms + +Performance: 6198147.516 tau/day, 358689.092 timesteps/s +98.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.0082421 | 0.0082421 | 0.0082421 | 0.0 | 7.39 +Neigh | 0.021163 | 0.021163 | 0.021163 | 0.0 | 18.98 +Comm | 0.045411 | 0.045411 | 0.045411 | 0.0 | 40.72 +Output | 0.0012326 | 0.0012326 | 0.0012326 | 0.0 | 1.11 +Modify | 0.022813 | 0.022813 | 0.022813 | 0.0 | 20.46 +Other | | 0.01265 | | | 11.35 + +Nlocal: 1 ave 1 max 1 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 0 +Ave neighs/atom = 0 +Neighbor list builds = 33 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/ees/log.23Jun17.fix_wall.g++.4 b/examples/USER/misc/ees/log.23Jun17.fix_wall.g++.4 new file mode 100644 index 0000000000..6c69ce3e72 --- /dev/null +++ b/examples/USER/misc/ees/log.23Jun17.fix_wall.g++.4 @@ -0,0 +1,162 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +units lj +atom_style ellipsoid +boundary p p f +read_data Data_wall + orthogonal box = (0 0 0) to (60 60 60) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 1 atoms + 1 ellipsoids + reading velocities ... + 1 velocities +#------------------------------------# +pair_style resquared 1 +pair_coeff 1 1 10.0 1.0 0.5 0.5 4 0.5 0.5 4 1 +#------------------------------------# +timestep 0.0002 +#------------------------------------# + +compute temp all temp/asphere +thermo_modify temp temp + +fix EES_substrate all wall/ees zhi EDGE 10 1 10 zlo EDGE 10 1 10 +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# + +thermo_style custom step temp press etotal f_EES_substrate f_EES_substrate[1] +WARNING: New thermo_style command, previous thermo_modify settings will be lost (../output.cpp:705) + +fix NVE all nve/asphere +#------------------------------------# +compute qw all property/atom quatw +compute qi all property/atom quati +compute qj all property/atom quatj +compute qk all property/atom quatk +#------------------------------------# +thermo 500 +#dump 1 all custom 1000 dump_substrate id type x y z c_qw c_qi c_qj c_qk +run 40000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.3 + ghost atom cutoff = 1.3 + binsize = 0.65, bins = 93 93 93 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair resquared, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.128 | 5.41 | 5.753 Mbytes +Step Temp Press TotEng f_EES_substrate f_EES_substrate[1] + 0 0 0.00054301475 0 0 0 + 500 0 0.00054301501 0 -0.002011167 -0.00089853601 + 1000 0 0.0005430153 0 -0.0021039425 -0.00095953758 + 1500 0 0.00054301561 0 -0.0022030914 -0.0010262478 + 2000 0 0.00054301593 0 -0.002309218 -0.0010993652 + 2500 0 0.00054301628 0 -0.0024230015 -0.0011796956 + 3000 0 0.00054301666 0 -0.0025452078 -0.0012681725 + 3500 0 0.00054301707 0 -0.0026767034 -0.0013658817 + 4000 0 0.00054301751 0 -0.0028184722 -0.0014740918 + 4500 0 0.00054301798 0 -0.0029716352 -0.0015942917 + 5000 0 0.00054301849 0 -0.0031374752 -0.0017282378 + 5500 0 0.00054301905 0 -0.0033174662 -0.0018780129 + 6000 0 0.00054301965 0 -0.0035133093 -0.0020461007 + 6500 0 0.00054302031 0 -0.0037269778 -0.0022354811 + 7000 0 0.00054302103 0 -0.0039607721 -0.0024497521 + 7500 0 0.00054302182 0 -0.0042173892 -0.0026932881 + 8000 0 0.0005430227 0 -0.0045000102 -0.0029714471 + 8500 0 0.00054302366 0 -0.0048124114 -0.003290844 + 9000 0 0.00054302473 0 -0.0051591071 -0.0036597154 + 9500 0 0.00054302592 0 -0.0055455349 -0.0040884113 + 10000 0 0.00054302726 0 -0.0059782985 -0.0045900652 + 10500 0 0.00054302876 0 -0.0064654891 -0.0051815166 + 11000 0 0.00054303046 0 -0.0070171161 -0.0058845936 + 11500 0 0.0005430324 0 -0.0076456899 -0.0067279075 + 12000 0 0.00054303463 0 -0.0083670175 -0.0077493697 + 12500 0 0.00054303721 0 -0.0092012967 -0.0089996821 + 13000 0 0.00054304021 0 -0.010174616 -0.010546991 + 13500 0 0.00054304375 0 -0.011320967 -0.012482357 + 14000 0 0.00054304796 0 -0.012684757 -0.01492338 + 14500 0 0.00054305301 0 -0.014323176 -0.01800425 + 15000 0 0.00054305913 0 -0.016305242 -0.021804766 + 15500 0 0.0005430665 0 -0.018693849 -0.026019991 + 16000 0 0.00054307501 0 -0.021450982 -0.028460977 + 16500 0 0.0005430828 0 -0.023974925 -0.017549988 + 17000 0 0.00054307849 0 -0.022577692 0.07296284 + 17500 0 0.00054298744 0 0.0069237358 0.72962844 + 18000 0 0.00054212125 0 0.28756839 7.5171061 + 18500 0 0.00052809177 0 4.8331004 159.56814 + 19000 0 0.00019717774 0 112.04947 5692.3379 + 19500 0 0.00051978321 0 7.5250598 262.38764 + 20000 0 0.00054179603 0 0.39293697 10.289153 + 20500 0 0.00054296932 0 0.01279406 0.89377639 + 21000 0 0.00054308425 0 -0.02444466 0.081890707 + 21500 0 0.0005430907 0 -0.026532401 -0.021386086 + 22000 0 0.00054308271 0 -0.023944983 -0.032642459 + 22500 0 0.00054307381 0 -0.02106205 -0.029524272 + 23000 0 0.00054306612 0 -0.018569361 -0.024753431 + 23500 0 0.00054305976 0 -0.01650866 -0.020566675 + 24000 0 0.00054305452 0 -0.014811253 -0.017216347 + 24500 0 0.00054305017 0 -0.013402896 -0.014581066 + 25000 0 0.00054304653 0 -0.012222687 -0.01250069 + 25500 0 0.00054304345 0 -0.011223677 -0.0108418 + 26000 0 0.00054304081 0 -0.010370111 -0.0095034766 + 26500 0 0.00054303854 0 -0.0096346546 -0.0084112161 + 27000 0 0.00054303657 0 -0.0089962072 -0.0075100751 + 27500 0 0.00054303485 0 -0.0084382935 -0.0067592209 + 28000 0 0.00054303334 0 -0.0079478992 -0.0061279726 + 28500 0 0.000543032 0 -0.0075146283 -0.0055930001 + 29000 0 0.00054303081 0 -0.0071300893 -0.0051363504 + 29500 0 0.00054302976 0 -0.0067874426 -0.00474405 + 30000 0 0.00054302881 0 -0.0064810641 -0.0044051051 + 30500 0 0.00054302796 0 -0.0062062911 -0.0041107799 + 31000 0 0.0005430272 0 -0.0059592289 -0.0038540677 + 31500 0 0.00054302651 0 -0.0057366023 -0.0036293011 + 32000 0 0.00054302589 0 -0.0055356393 -0.0034318592 + 32500 0 0.00054302533 0 -0.0053539804 -0.0032579475 + 33000 0 0.00054302482 0 -0.0051896066 -0.0031044289 + 33500 0 0.00054302436 0 -0.0050407818 -0.0029686945 + 34000 0 0.00054302395 0 -0.0049060063 -0.002848562 + 34500 0 0.00054302357 0 -0.0047839795 -0.002742197 + 35000 0 0.00054302323 0 -0.0046735688 -0.0026480503 + 35500 0 0.00054302292 0 -0.0045737849 -0.0025648085 + 36000 0 0.00054302264 0 -0.0044837605 -0.0024913535 + 36500 0 0.00054302239 0 -0.0044027327 -0.0024267309 + 37000 0 0.00054302217 0 -0.0043300292 -0.0023701236 + 37500 0 0.00054302197 0 -0.0042650554 -0.0023208304 + 38000 0 0.00054302179 0 -0.0042072838 -0.0022782486 + 38500 0 0.00054302163 0 -0.0041562461 -0.0022418592 + 39000 0 0.0005430215 0 -0.0041115244 -0.0022112152 + 39500 0 0.00054302138 0 -0.0040727453 -0.0021859312 + 40000 0 0.00054302127 0 -0.0040395743 -0.0021656748 +Loop time of 0.216115 on 4 procs for 40000 steps with 1 atoms + +Performance: 3198303.409 tau/day, 185087.003 timesteps/s +98.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.0020442 | 0.0047204 | 0.012008 | 6.1 | 2.18 +Neigh | 0.0069654 | 0.0072649 | 0.0074701 | 0.2 | 3.36 +Comm | 0.024762 | 0.039833 | 0.056166 | 7.4 | 18.43 +Output | 0.0020285 | 0.0023268 | 0.0026891 | 0.5 | 1.08 +Modify | 0.0081856 | 0.013537 | 0.029052 | 7.7 | 6.26 +Other | | 0.1484 | | | 68.68 + +Nlocal: 0.25 ave 1 max 0 min +Histogram: 3 0 0 0 0 0 0 0 0 1 +Nghost: 0.25 ave 1 max 0 min +Histogram: 3 0 0 0 0 0 0 0 0 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 0 +Ave neighs/atom = 0 +Neighbor list builds = 33 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/ees/log.23Jun17.fix_wall_region.g++.1 b/examples/USER/misc/ees/log.23Jun17.fix_wall_region.g++.1 new file mode 100644 index 0000000000..f4bd804127 --- /dev/null +++ b/examples/USER/misc/ees/log.23Jun17.fix_wall_region.g++.1 @@ -0,0 +1,180 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +units lj +atom_style ellipsoid +boundary p p p +read_data Data_region + orthogonal box = (0 0 0) to (60 60 60) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 3 atoms + 3 ellipsoids + reading velocities ... + 3 velocities +#------------------------------------# +region the_wall block 20. 40. 20. 40. 20. 40. side out +#------------------------------------# +pair_style resquared 1 +pair_coeff 1 1 10.0 1.0 0.5 0.5 4 0.5 0.5 4 1 +#------------------------------------# +timestep 0.0005 +#------------------------------------# + +fix EES_block all wall/region/ees the_wall 10. 1. 20 +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# + +thermo_style custom step temp press etotal f_EES_block[1] f_EES_block[3] + +fix NVE all nve/asphere +#------------------------------------# +compute qw all property/atom quatw +compute qi all property/atom quati +compute qj all property/atom quatj +compute qk all property/atom quatk +#------------------------------------# +thermo 500 +#dump 1 all custom 1000 dump_region id type x y z c_qw c_qi c_qj c_qk +run 50000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.3 + ghost atom cutoff = 1.3 + binsize = 0.65, bins = 93 93 93 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair resquared, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 7.958 | 7.958 | 7.958 Mbytes +Step Temp Press TotEng f_EES_block[1] f_EES_block[3] + 0 161.26842 0.0014932261 161.26842 -0.00042715909 -0.00015747012 + 500 161.26864 0.0014932281 161.26864 -0.00055836679 -0.00017557792 + 1000 161.26891 0.0014932306 161.26891 -0.00075239934 -0.00019646897 + 1500 161.26926 0.0014932339 161.26926 -0.0010543331 -0.0002206925 + 2000 161.26975 0.0014932385 161.26975 -0.0015566164 -0.00024893301 + 2500 161.27047 0.0014932451 161.27047 -0.0024700842 -0.00028205104 + 3000 161.27163 0.0014932558 161.27163 -0.0043191186 -0.00032113859 + 3500 161.27364 0.0014932745 161.27364 -0.0073109231 -0.00036759584 + 4000 161.26391 0.0014931843 161.26391 0.2453813 -0.00042323837 + 4500 77.783029 0.00072021324 77.783029 4908.2333 -0.00049044991 + 5000 160.23852 0.00148369 160.23852 0.13220034 -0.00057240356 + 5500 160.2431 0.0014837324 160.2431 -0.0072005112 -0.00067338844 + 6000 160.24148 0.0014837174 160.24148 -0.0040896209 -0.0007993028 + 6500 160.24071 0.0014837103 160.24071 -0.0023574992 -0.00095841662 + 7000 160.24038 0.0014837072 160.24038 -0.001495267 -0.001162584 + 7500 160.24031 0.0014837066 160.24031 -0.0010172907 -0.0014292316 + 8000 160.24043 0.0014837077 160.24043 -0.00072823316 -0.0017847384 + 8500 160.24073 0.0014837105 160.24073 -0.00054165121 -0.0022704187 + 9000 160.24121 0.0014837149 160.24121 -0.00041506183 -0.0029536182 + 9500 160.24192 0.0014837215 160.24192 -0.00032574317 -0.0039493769 + 10000 160.24293 0.0014837308 160.24293 -0.00026069929 -0.0054649542 + 10500 160.2444 0.0014837444 160.2444 -0.00021208476 -0.0078936604 + 11000 160.2466 0.0014837648 160.2466 -0.00017494913 -0.011981095 + 11500 160.25001 0.0014837964 160.25001 -0.00014605132 -0.018414768 + 12000 160.25411 0.0014838343 160.25411 -0.00012320207 -0.0069059119 + 12500 160.18929 0.0014832342 160.18929 -0.00010488251 1.4672359 + 13000 127.86814 0.0011839642 127.86814 -9.0014128e-05 1420.4476 + 13500 154.09961 0.0014268483 154.09961 -7.7815401e-05 5.4703004 + 14000 154.31359 0.0014288295 154.31359 -6.7709777e-05 0.025351973 + 14500 154.3112 0.0014288074 154.3112 -5.9265083e-05 -0.020243217 + 15000 154.30773 0.0014287753 154.30773 -5.2152714e-05 -0.013791198 + 15500 154.30551 0.0014287547 154.30551 -4.6119584e-05 -0.0090829354 + 16000 154.30409 0.0014287415 154.30409 -4.0968492e-05 -0.0062748728 + 16500 154.30315 0.0014287329 154.30315 -3.6544144e-05 -0.004532774 + 17000 154.30254 0.0014287272 154.30254 -3.2723062e-05 -0.003394041 + 17500 154.30216 0.0014287237 154.30216 -2.9406189e-05 -0.0026153428 + 18000 154.30195 0.0014287218 154.30195 -2.6513408e-05 -0.0020627306 + 18500 154.30188 0.0014287211 154.30188 -2.397943e-05 -0.0016584214 + 19000 154.30194 0.0014287216 154.30194 -2.1750674e-05 -0.0013550174 + 19500 154.3021 0.0014287232 154.3021 -1.9782885e-05 -0.0011224153 + 20000 154.30239 0.0014287258 154.30239 -1.8039282e-05 -0.00094080826 + 20500 154.30279 0.0014287295 154.30279 -1.6489128e-05 -0.00079676335 + 21000 154.30332 0.0014287345 154.30332 -1.5106598e-05 -0.00068092925 + 21500 154.30401 0.0014287409 154.30401 -1.3869884e-05 -0.000586646 + 22000 154.30489 0.001428749 154.30489 -1.2760487e-05 -0.00050907464 + 22500 154.30601 0.0014287593 154.30601 -1.1762643e-05 -0.00044463657 + 23000 154.30743 0.0014287725 154.30743 -1.0862863e-05 -0.00039064328 + 23500 154.30924 0.0014287893 154.30924 -1.004956e-05 -0.00034504622 + 24000 154.31159 0.001428811 154.31159 -9.3127419e-06 -0.0003062645 + 24500 154.31464 0.0014288393 154.31464 8.7817413e-06 -0.00027306395 + 25000 154.31848 0.0014288748 154.31848 9.4348998e-06 -0.00024447093 + 25500 154.32222 0.0014289094 154.32222 1.0150613e-05 -0.00021970994 + 26000 154.31667 0.0014288581 154.31667 1.0936298e-05 -0.0001981578 + 26500 154.19679 0.0014277481 154.19679 1.1800434e-05 -0.00017930967 + 27000 151.70582 0.0014046835 151.70582 1.2752738e-05 -0.00016275349 + 27500 144.06864 0.0013339689 144.06864 1.3804382e-05 -0.00014815061 + 28000 153.30039 0.0014194481 153.30039 1.4968247e-05 -0.00013522085 + 28500 153.70626 0.0014232061 153.70626 1.6259237e-05 -0.00012373107 + 29000 153.73143 0.0014234392 153.73143 1.7694652e-05 -0.00011348611 + 29500 153.72942 0.0014234205 153.72942 1.9294649e-05 -0.0001043218 + 30000 153.72536 0.001423383 153.72536 2.1082798e-05 -9.6099303e-05 + 30500 153.72189 0.0014233508 153.72189 2.3086777e-05 -8.8700684e-05 + 31000 153.71915 0.0014233255 153.71915 2.533922e-05 -8.2025314e-05 + 31500 153.71701 0.0014233056 153.71701 2.7878775e-05 -7.5986974e-05 + 32000 153.7153 0.0014232898 153.7153 3.0751438e-05 -7.05115e-05 + 32500 153.71392 0.0014232771 153.71392 3.4012214e-05 -6.5534861e-05 + 33000 153.7128 0.0014232667 153.7128 3.7727241e-05 -6.1001578e-05 + 33500 153.71187 0.001423258 153.71187 4.1976497e-05 -5.686342e-05 + 34000 153.71109 0.0014232508 153.71109 4.6857282e-05 -5.3078322e-05 + 34500 153.71043 0.0014232447 153.71043 5.2488748e-05 -4.9609488e-05 + 35000 153.70987 0.0014232395 153.70987 5.9017833e-05 -4.6424634e-05 + 35500 153.70939 0.0014232351 153.70939 6.6627108e-05 -4.3495356e-05 + 36000 153.70898 0.0014232313 153.70898 7.5545279e-05 -4.0796599e-05 + 36500 153.70863 0.001423228 153.70863 8.6061387e-05 -3.8306204e-05 + 37000 153.70832 0.0014232252 153.70832 9.8544264e-05 -3.6004526e-05 + 37500 153.70806 0.0014232227 153.70806 0.00011346953 -3.3874109e-05 + 38000 153.70783 0.0014232207 153.70783 0.00013145761 -3.1899404e-05 + 38500 153.70764 0.0014232189 153.70764 0.00015332826 -3.0066532e-05 + 39000 153.70748 0.0014232174 153.70748 0.00018017988 -2.836308e-05 + 39500 153.70736 0.0014232163 153.70736 0.00021350768 -2.6777922e-05 + 40000 153.70726 0.0014232154 153.70726 0.00025538329 -2.5301066e-05 + 40500 153.70719 0.0014232147 153.70719 0.00030873482 -2.3923522e-05 + 41000 153.70716 0.0014232145 153.70716 0.00037779644 -2.2637186e-05 + 41500 153.70717 0.0014232145 153.70717 0.00046885357 -2.1434741e-05 + 42000 153.70722 0.001423215 153.70722 0.00059152584 -2.0309568e-05 + 42500 153.70733 0.001423216 153.70733 0.00076107465 -1.9255668e-05 + 43000 153.70751 0.0014232177 153.70751 0.0010027741 -1.82676e-05 + 43500 153.7078 0.0014232203 153.7078 0.0013607156 -1.7340414e-05 + 44000 153.70823 0.0014232244 153.70823 0.0019168919 -1.6469607e-05 + 44500 153.70891 0.0014232306 153.70891 0.0028362183 -1.5651071e-05 + 45000 153.70999 0.0014232407 153.70999 0.0044814624 -1.4881056e-05 + 45500 153.71183 0.0014232577 153.71183 0.0076783372 -1.4156133e-05 + 46000 153.71504 0.0014232874 153.71504 0.012021529 -1.347316e-05 + 46500 153.70337 0.0014231794 153.70337 -0.27386631 -1.2829258e-05 + 47000 109.96863 0.0010182281 109.96863 -1552.3264 -1.2221783e-05 + 47500 56.442204 0.000522613 56.442204 -0.62595366 -1.1648303e-05 + 48000 56.439532 0.00052258826 56.439532 0.015282177 -1.1106581e-05 + 48500 56.439907 0.00052259173 56.439907 0.01178542 -1.0594552e-05 + 49000 56.44015 0.00052259399 56.44015 0.0080268131 -1.0110314e-05 + 49500 56.440316 0.00052259552 56.440316 0.0061338692 -9.6521057e-06 + 50000 56.440444 0.0005225967 56.440444 0.005195231 -9.2183009e-06 +Loop time of 0.344104 on 1 procs for 50000 steps with 3 atoms + +Performance: 6277171.077 tau/day, 145304.886 timesteps/s +98.2% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.023412 | 0.023412 | 0.023412 | 0.0 | 6.80 +Neigh | 0.13182 | 0.13182 | 0.13182 | 0.0 | 38.31 +Comm | 0.084006 | 0.084006 | 0.084006 | 0.0 | 24.41 +Output | 0.0023429 | 0.0023429 | 0.0023429 | 0.0 | 0.68 +Modify | 0.083383 | 0.083383 | 0.083383 | 0.0 | 24.23 +Other | | 0.01914 | | | 5.56 + +Nlocal: 3 ave 3 max 3 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1 ave 1 max 1 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 0 +Ave neighs/atom = 0 +Neighbor list builds = 210 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/ees/log.23Jun17.fix_wall_region.g++.4 b/examples/USER/misc/ees/log.23Jun17.fix_wall_region.g++.4 new file mode 100644 index 0000000000..6a5f6fca19 --- /dev/null +++ b/examples/USER/misc/ees/log.23Jun17.fix_wall_region.g++.4 @@ -0,0 +1,180 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +units lj +atom_style ellipsoid +boundary p p p +read_data Data_region + orthogonal box = (0 0 0) to (60 60 60) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 3 atoms + 3 ellipsoids + reading velocities ... + 3 velocities +#------------------------------------# +region the_wall block 20. 40. 20. 40. 20. 40. side out +#------------------------------------# +pair_style resquared 1 +pair_coeff 1 1 10.0 1.0 0.5 0.5 4 0.5 0.5 4 1 +#------------------------------------# +timestep 0.0005 +#------------------------------------# + +fix EES_block all wall/region/ees the_wall 10. 1. 20 +#^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^# + +thermo_style custom step temp press etotal f_EES_block[1] f_EES_block[3] + +fix NVE all nve/asphere +#------------------------------------# +compute qw all property/atom quatw +compute qi all property/atom quati +compute qj all property/atom quatj +compute qk all property/atom quatk +#------------------------------------# +thermo 500 +#dump 1 all custom 1000 dump_region id type x y z c_qw c_qi c_qj c_qk +run 50000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 1.3 + ghost atom cutoff = 1.3 + binsize = 0.65, bins = 93 93 93 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair resquared, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 5.629 | 5.722 | 5.754 Mbytes +Step Temp Press TotEng f_EES_block[1] f_EES_block[3] + 0 161.26842 0.0014932261 161.26842 -0.00042715909 -0.00015747012 + 500 161.26864 0.0014932281 161.26864 -0.00055836679 -0.00017557792 + 1000 161.26891 0.0014932306 161.26891 -0.00075239934 -0.00019646897 + 1500 161.26926 0.0014932339 161.26926 -0.0010543331 -0.0002206925 + 2000 161.26975 0.0014932385 161.26975 -0.0015566164 -0.00024893301 + 2500 161.27047 0.0014932451 161.27047 -0.0024700842 -0.00028205104 + 3000 161.27163 0.0014932558 161.27163 -0.0043191186 -0.00032113859 + 3500 161.27364 0.0014932745 161.27364 -0.0073109231 -0.00036759584 + 4000 161.26391 0.0014931843 161.26391 0.2453813 -0.00042323837 + 4500 77.783029 0.00072021324 77.783029 4908.2333 -0.00049044991 + 5000 160.23852 0.00148369 160.23852 0.13220034 -0.00057240356 + 5500 160.2431 0.0014837324 160.2431 -0.0072005112 -0.00067338844 + 6000 160.24148 0.0014837174 160.24148 -0.0040896209 -0.0007993028 + 6500 160.24071 0.0014837103 160.24071 -0.0023574992 -0.00095841662 + 7000 160.24038 0.0014837072 160.24038 -0.001495267 -0.001162584 + 7500 160.24031 0.0014837066 160.24031 -0.0010172907 -0.0014292316 + 8000 160.24043 0.0014837077 160.24043 -0.00072823316 -0.0017847384 + 8500 160.24073 0.0014837105 160.24073 -0.00054165121 -0.0022704187 + 9000 160.24121 0.0014837149 160.24121 -0.00041506183 -0.0029536182 + 9500 160.24192 0.0014837215 160.24192 -0.00032574317 -0.0039493769 + 10000 160.24293 0.0014837308 160.24293 -0.00026069929 -0.0054649542 + 10500 160.2444 0.0014837444 160.2444 -0.00021208476 -0.0078936604 + 11000 160.2466 0.0014837648 160.2466 -0.00017494913 -0.011981095 + 11500 160.25001 0.0014837964 160.25001 -0.00014605132 -0.018414768 + 12000 160.25411 0.0014838343 160.25411 -0.00012320207 -0.0069059119 + 12500 160.18929 0.0014832342 160.18929 -0.00010488251 1.4672359 + 13000 127.86814 0.0011839642 127.86814 -9.0014128e-05 1420.4476 + 13500 154.09961 0.0014268483 154.09961 -7.7815401e-05 5.4703004 + 14000 154.31359 0.0014288295 154.31359 -6.7709777e-05 0.025351973 + 14500 154.3112 0.0014288074 154.3112 -5.9265083e-05 -0.020243217 + 15000 154.30773 0.0014287753 154.30773 -5.2152714e-05 -0.013791198 + 15500 154.30551 0.0014287547 154.30551 -4.6119584e-05 -0.0090829354 + 16000 154.30409 0.0014287415 154.30409 -4.0968492e-05 -0.0062748728 + 16500 154.30315 0.0014287329 154.30315 -3.6544144e-05 -0.004532774 + 17000 154.30254 0.0014287272 154.30254 -3.2723062e-05 -0.003394041 + 17500 154.30216 0.0014287237 154.30216 -2.9406189e-05 -0.0026153428 + 18000 154.30195 0.0014287218 154.30195 -2.6513408e-05 -0.0020627306 + 18500 154.30188 0.0014287211 154.30188 -2.397943e-05 -0.0016584214 + 19000 154.30194 0.0014287216 154.30194 -2.1750674e-05 -0.0013550174 + 19500 154.3021 0.0014287232 154.3021 -1.9782885e-05 -0.0011224153 + 20000 154.30239 0.0014287258 154.30239 -1.8039282e-05 -0.00094080826 + 20500 154.30279 0.0014287295 154.30279 -1.6489128e-05 -0.00079676335 + 21000 154.30332 0.0014287345 154.30332 -1.5106598e-05 -0.00068092925 + 21500 154.30401 0.0014287409 154.30401 -1.3869884e-05 -0.000586646 + 22000 154.30489 0.001428749 154.30489 -1.2760487e-05 -0.00050907464 + 22500 154.30601 0.0014287593 154.30601 -1.1762643e-05 -0.00044463657 + 23000 154.30743 0.0014287725 154.30743 -1.0862863e-05 -0.00039064328 + 23500 154.30924 0.0014287893 154.30924 -1.004956e-05 -0.00034504622 + 24000 154.31159 0.001428811 154.31159 -9.3127419e-06 -0.0003062645 + 24500 154.31464 0.0014288393 154.31464 8.7817413e-06 -0.00027306395 + 25000 154.31848 0.0014288748 154.31848 9.4348998e-06 -0.00024447093 + 25500 154.32222 0.0014289094 154.32222 1.0150613e-05 -0.00021970994 + 26000 154.31667 0.0014288581 154.31667 1.0936298e-05 -0.0001981578 + 26500 154.19679 0.0014277481 154.19679 1.1800434e-05 -0.00017930967 + 27000 151.70582 0.0014046835 151.70582 1.2752738e-05 -0.00016275349 + 27500 144.06864 0.0013339689 144.06864 1.3804382e-05 -0.00014815061 + 28000 153.30039 0.0014194481 153.30039 1.4968247e-05 -0.00013522085 + 28500 153.70626 0.0014232061 153.70626 1.6259237e-05 -0.00012373107 + 29000 153.73143 0.0014234392 153.73143 1.7694652e-05 -0.00011348611 + 29500 153.72942 0.0014234205 153.72942 1.9294649e-05 -0.0001043218 + 30000 153.72536 0.001423383 153.72536 2.1082798e-05 -9.6099303e-05 + 30500 153.72189 0.0014233508 153.72189 2.3086777e-05 -8.8700684e-05 + 31000 153.71915 0.0014233255 153.71915 2.533922e-05 -8.2025314e-05 + 31500 153.71701 0.0014233056 153.71701 2.7878775e-05 -7.5986974e-05 + 32000 153.7153 0.0014232898 153.7153 3.0751438e-05 -7.05115e-05 + 32500 153.71392 0.0014232771 153.71392 3.4012214e-05 -6.5534861e-05 + 33000 153.7128 0.0014232667 153.7128 3.7727241e-05 -6.1001578e-05 + 33500 153.71187 0.001423258 153.71187 4.1976497e-05 -5.686342e-05 + 34000 153.71109 0.0014232508 153.71109 4.6857282e-05 -5.3078322e-05 + 34500 153.71043 0.0014232447 153.71043 5.2488748e-05 -4.9609488e-05 + 35000 153.70987 0.0014232395 153.70987 5.9017833e-05 -4.6424634e-05 + 35500 153.70939 0.0014232351 153.70939 6.6627108e-05 -4.3495356e-05 + 36000 153.70898 0.0014232313 153.70898 7.5545279e-05 -4.0796599e-05 + 36500 153.70863 0.001423228 153.70863 8.6061387e-05 -3.8306204e-05 + 37000 153.70832 0.0014232252 153.70832 9.8544264e-05 -3.6004526e-05 + 37500 153.70806 0.0014232227 153.70806 0.00011346953 -3.3874109e-05 + 38000 153.70783 0.0014232207 153.70783 0.00013145761 -3.1899404e-05 + 38500 153.70764 0.0014232189 153.70764 0.00015332826 -3.0066532e-05 + 39000 153.70748 0.0014232174 153.70748 0.00018017988 -2.836308e-05 + 39500 153.70736 0.0014232163 153.70736 0.00021350768 -2.6777922e-05 + 40000 153.70726 0.0014232154 153.70726 0.00025538329 -2.5301066e-05 + 40500 153.70719 0.0014232147 153.70719 0.00030873482 -2.3923522e-05 + 41000 153.70716 0.0014232145 153.70716 0.00037779644 -2.2637186e-05 + 41500 153.70717 0.0014232145 153.70717 0.00046885357 -2.1434741e-05 + 42000 153.70722 0.001423215 153.70722 0.00059152584 -2.0309568e-05 + 42500 153.70733 0.001423216 153.70733 0.00076107465 -1.9255668e-05 + 43000 153.70751 0.0014232177 153.70751 0.0010027741 -1.82676e-05 + 43500 153.7078 0.0014232203 153.7078 0.0013607156 -1.7340414e-05 + 44000 153.70823 0.0014232244 153.70823 0.0019168919 -1.6469607e-05 + 44500 153.70891 0.0014232306 153.70891 0.0028362183 -1.5651071e-05 + 45000 153.70999 0.0014232407 153.70999 0.0044814624 -1.4881056e-05 + 45500 153.71183 0.0014232577 153.71183 0.0076783372 -1.4156133e-05 + 46000 153.71504 0.0014232874 153.71504 0.012021529 -1.347316e-05 + 46500 153.70337 0.0014231794 153.70337 -0.27386631 -1.2829258e-05 + 47000 109.96863 0.0010182281 109.96863 -1552.3264 -1.2221783e-05 + 47500 56.442204 0.000522613 56.442204 -0.62595366 -1.1648303e-05 + 48000 56.439532 0.00052258826 56.439532 0.015282177 -1.1106581e-05 + 48500 56.439907 0.00052259173 56.439907 0.01178542 -1.0594552e-05 + 49000 56.44015 0.00052259399 56.44015 0.0080268131 -1.0110314e-05 + 49500 56.440316 0.00052259552 56.440316 0.0061338692 -9.6521057e-06 + 50000 56.440444 0.0005225967 56.440444 0.005195231 -9.2183009e-06 +Loop time of 0.483531 on 4 procs for 50000 steps with 3 atoms + +Performance: 4467138.628 tau/day, 103405.987 timesteps/s +97.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.0032809 | 0.010859 | 0.013737 | 4.2 | 2.25 +Neigh | 0.037744 | 0.039156 | 0.042488 | 1.0 | 8.10 +Comm | 0.19775 | 0.2088 | 0.21768 | 1.8 | 43.18 +Output | 0.0028036 | 0.0030343 | 0.0035536 | 0.6 | 0.63 +Modify | 0.011325 | 0.032141 | 0.039636 | 6.7 | 6.65 +Other | | 0.1895 | | | 39.20 + +Nlocal: 0.75 ave 1 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 3 +Nghost: 1.75 ave 3 max 1 min +Histogram: 2 0 0 0 0 1 0 0 0 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 0 +Ave neighs/atom = 0 +Neighbor list builds = 210 +Dangerous builds = 0 + +Total wall time: 0:00:00 diff --git a/examples/USER/misc/flow_gauss/README b/examples/USER/misc/flow_gauss/README index 4966cd2dc9..ef7cc82d96 100644 --- a/examples/USER/misc/flow_gauss/README +++ b/examples/USER/misc/flow_gauss/README @@ -1,45 +1,45 @@ The input script in.GD is an example simulation using Gaussian dynamics (GD). The simulation is of a simple 2d Lennard-Jones fluid flowing through a pipe. -For details see online LAMMPS documentation and +For details see online LAMMPS documentation and Strong and Eaves, J. Phys. Chem. Lett. 7(10) 2016, p. 1907. -Note that the run times and box size are chosen to allow a fast example run. -They are not adequate for a real simulation. +Note that the run times and box size are chosen to allow a fast example run. +They are not adequate for a real simulation. The script has the following parts: 1) initialize variables - These can be modified to customize the simulation. Note that if the - pipe dimensions L or d are changed, the geometry should be checked - by visualizing the coordinates in all.init.lammpstrj. + These can be modified to customize the simulation. Note that if the + pipe dimensions L or d are changed, the geometry should be checked + by visualizing the coordinates in all.init.lammpstrj. 2) create box - + 3) set up potential 4) create atoms 5) set up profile-unbiased thermostat (PUT) - see Evans and Morriss, Phys. Rev. Lett. 56(20) 1986, p. 2172 - By default, this uses boxes which contain on average 8 molecules. + see Evans and Morriss, Phys. Rev. Lett. 56(20) 1986, p. 2172 + By default, this uses boxes which contain on average 8 molecules. 6) equilibrate without GD - + 7) initialize the center-of-mass velocity and run to achieve steady-state - The system is initialized with a uniform velocity profile, which - relaxes over the course of the simulation. + The system is initialized with a uniform velocity profile, which + relaxes over the course of the simulation. 8) collect data - The data is output in several files: - GD.out contains the force that GD applies, and the flux in the x- and - y- directions. The output Jx should be equal to the value of - J set in section 1, which is 0.1 by default. - x_profiles contains the velocity, density, and pressure profiles in - the x-direction. The pressure profile is given by - (-1/2V)*(c_spa[1] + c_spa[2]), where V is the volume of a - slice. The pressure profile is computed with IK1, see - Todd, Evans, and Davis, Phys. Rev. E 52(2) 1995, p. 1627. - Note that to compare with the pump method, or to - compute a pressure drop, you must correct this pressure - profile as described in Strong 2016 above. - Vy_profile is the velocity profile inside the pipe along the - y-direction, u_x(y). + The data is output in several files: + GD.out contains the force that GD applies, and the flux in the x- and + y- directions. The output Jx should be equal to the value of + J set in section 1, which is 0.1 by default. + x_profiles contains the velocity, density, and pressure profiles in + the x-direction. The pressure profile is given by + (-1/2V)*(c_spa[1] + c_spa[2]), where V is the volume of a + slice. The pressure profile is computed with IK1, see + Todd, Evans, and Davis, Phys. Rev. E 52(2) 1995, p. 1627. + Note that to compare with the pump method, or to + compute a pressure drop, you must correct this pressure + profile as described in Strong 2016 above. + Vy_profile is the velocity profile inside the pipe along the + y-direction, u_x(y). diff --git a/examples/USER/misc/flow_gauss/in.GD b/examples/USER/misc/flow_gauss/in.GD old mode 100755 new mode 100644 index 8117715c12..bcff4d4c57 --- a/examples/USER/misc/flow_gauss/in.GD +++ b/examples/USER/misc/flow_gauss/in.GD @@ -7,83 +7,85 @@ clear #frequency for outputting info (timesteps) -variable dump_rate equal 50 -variable thermo_rate equal 10 +variable dump_rate equal 50 +variable thermo_rate equal 10 #equilibration time (timesteps) -variable equil equal 1000 +variable equil equal 1000 #stabilization time (timesteps to reach steady-state) -variable stabil equal 1000 +variable stabil equal 1000 #data collection time (timesteps) -variable run equal 2000 +variable run equal 2000 #length of pipe -variable L equal 30 +variable L equal 30 #width of pipe -variable d equal 20 +variable d equal 20 #flux (mass/sigma*tau) -variable J equal 0.1 +variable J equal 0.1 #simulation box dimensions -variable Lx equal 100 -variable Ly equal 40 +variable Lx equal 100 +variable Ly equal 40 #bulk fluid density -variable dens equal 0.8 +variable dens equal 0.8 #lattice spacing for wall atoms -variable aWall equal 1.0 #1.7472 +variable aWall equal 1.0 #1.7472 #timestep -variable ts equal 0.001 +variable ts equal 0.001 #temperature -variable T equal 2.0 +variable T equal 2.0 #thermostat damping constant -variable tdamp equal ${ts}*100 +variable tdamp equal ${ts}*100 -units lj -dimension 2 -atom_style atomic +units lj +dimension 2 +atom_style atomic ############################################################################### #create box #create lattice with the spacing aWall -variable rhoWall equal ${aWall}^(-2) -lattice sq ${rhoWall} +variable rhoWall equal ${aWall}^(-2) +lattice sq ${rhoWall} #modify input dimensions to be multiples of aWall -variable L1 equal round($L/${aWall})*${aWall} -variable d1 equal round($d/${aWall})*${aWall} -variable Ly1 equal round(${Ly}/${aWall})*${aWall} -variable Lx1 equal round(${Lx}/${aWall})*${aWall} +variable L1 equal round($L/${aWall})*${aWall} +variable d1 equal round($d/${aWall})*${aWall} +variable Ly1 equal round(${Ly}/${aWall})*${aWall} +variable Lx1 equal round(${Lx}/${aWall})*${aWall} #create simulation box -variable lx2 equal ${Lx1}/2 -variable ly2 equal ${Ly1}/2 -region simbox block -${lx2} ${lx2} -${ly2} ${ly2} 0 0.1 units box -create_box 2 simbox +variable lx2 equal ${Lx1}/2 +variable ly2 equal ${Ly1}/2 +region simbox block -${lx2} ${lx2} -${ly2} ${ly2} 0 0.1 units box +create_box 2 simbox ##################################################################### #set up potential -mass 1 1.0 #fluid atoms -mass 2 1.0 #wall atoms +mass 1 1.0 #fluid atoms +mass 2 1.0 #wall atoms -pair_style lj/cut 2.5 -pair_modify shift yes -pair_coeff 1 1 1.0 1.0 2.5 -pair_coeff 1 2 1.0 1.0 1.12246 -pair_coeff 2 2 0.0 0.0 0.0 +pair_style lj/cut 2.5 +pair_modify shift yes +pair_coeff 1 1 1.0 1.0 2.5 +pair_coeff 1 2 1.0 1.0 1.12246 +pair_coeff 2 2 0.0 0.0 -timestep ${ts} +neigh_modify exclude type 2 2 + +timestep ${ts} ##################################################################### #create atoms @@ -92,167 +94,169 @@ timestep ${ts} create_atoms 2 box #define region which is "walled off" -variable dhalf equal ${d1}/2 -variable Lhalf equal ${L1}/2 -region walltop block -${Lhalf} ${Lhalf} ${dhalf} EDGE -0.1 0.1 & - units box -region wallbot block -${Lhalf} ${Lhalf} EDGE -${dhalf} -0.1 0.1 & - units box -region outsidewall union 2 walltop wallbot side out +variable dhalf equal ${d1}/2 +variable Lhalf equal ${L1}/2 +region walltop block -${Lhalf} ${Lhalf} ${dhalf} EDGE -0.1 0.1 & + units box +region wallbot block -${Lhalf} ${Lhalf} EDGE -${dhalf} -0.1 0.1 & + units box +region outsidewall union 2 walltop wallbot side out #remove wall atoms outside wall region -group outside region outsidewall -delete_atoms group outside +group outside region outsidewall +delete_atoms group outside #remove wall atoms that aren't on edge of wall region -variable x1 equal ${Lhalf}-${aWall} -variable y1 equal ${dhalf}+${aWall} -region insideTop block -${x1} ${x1} ${y1} EDGE -0.1 0.1 units box -region insideBot block -${x1} ${x1} EDGE -${y1} -0.1 0.1 units box -region insideWall union 2 insideTop insideBot -group insideWall region insideWall -delete_atoms group insideWall +variable x1 equal ${Lhalf}-${aWall} +variable y1 equal ${dhalf}+${aWall} +region insideTop block -${x1} ${x1} ${y1} EDGE -0.1 0.1 units box +region insideBot block -${x1} ${x1} EDGE -${y1} -0.1 0.1 units box +region insideWall union 2 insideTop insideBot +group insideWall region insideWall +delete_atoms group insideWall #define new lattice, to give correct fluid density #y lattice const must be a multiple of aWall -variable atrue equal ${dens}^(-1/2) -variable ay equal round(${atrue}/${aWall})*${aWall} +variable atrue equal ${dens}^(-1/2) +variable ay equal round(${atrue}/${aWall})*${aWall} #choose x lattice const to give correct density -variable ax equal (${ay}*${dens})^(-1) +variable ax equal (${ay}*${dens})^(-1) #change Lx to be multiple of ax -variable Lx1 equal round(${Lx}/${ax})*${ax} -variable lx2 equal ${Lx1}/2 -change_box all x final -${lx2} ${lx2} units box +variable Lx1 equal round(${Lx}/${ax})*${ax} +variable lx2 equal ${Lx1}/2 +change_box all x final -${lx2} ${lx2} units box #define new lattice -lattice custom ${dens} & - a1 ${ax} 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 & - basis 0.0 0.0 0.0 +lattice custom ${dens} & + a1 ${ax} 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 & + basis 0.0 0.0 0.0 #fill in rest of box with bulk particles -variable delta equal 0.001 -variable Ldelt equal ${Lhalf}+${delta} -variable dDelt equal ${dhalf}-${delta} -region left block EDGE -${Ldelt} EDGE EDGE -0.1 0.1 units box -region right block ${Ldelt} EDGE EDGE EDGE -0.1 0.1 units box -region pipe block -${Ldelt} ${Ldelt} -${dDelt} ${dDelt} -0.1 0.1 & - units box +variable delta equal 0.001 +variable Ldelt equal ${Lhalf}+${delta} +variable dDelt equal ${dhalf}-${delta} +region left block EDGE -${Ldelt} EDGE EDGE -0.1 0.1 units box +region right block ${Ldelt} EDGE EDGE EDGE -0.1 0.1 units box +region pipe block -${Ldelt} ${Ldelt} -${dDelt} ${dDelt} -0.1 0.1 & + units box -region bulk union 3 left pipe right -create_atoms 1 region bulk +region bulk union 3 left pipe right +create_atoms 1 region bulk -group bulk type 1 -group wall type 2 +group bulk type 1 +group wall type 2 #remove atoms that are too close to wall delete_atoms overlap 0.9 bulk wall -neighbor 0.3 bin -neigh_modify delay 0 every 1 check yes +neighbor 0.3 bin +neigh_modify delay 0 every 1 check yes neigh_modify exclude group wall wall -velocity bulk create $T 78915 dist gaussian rot yes mom yes loop geom +velocity bulk create $T 78915 dist gaussian rot yes mom yes loop geom ##################################################################### #set up PUT #see Evans and Morriss, Phys. Rev. Lett. 56(20) 1986, p. 2172 #average number of particles per box, Evans and Morriss used 2.0 -variable NperBox equal 8.0 +variable NperBox equal 8.0 #calculate box sizes -variable boxSide equal sqrt(${NperBox}/${dens}) -variable nX equal round(lx/${boxSide}) -variable nY equal round(ly/${boxSide}) -variable dX equal lx/${nX} -variable dY equal ly/${nY} +variable boxSide equal sqrt(${NperBox}/${dens}) +variable nX equal round(lx/${boxSide}) +variable nY equal round(ly/${boxSide}) +variable dX equal lx/${nX} +variable dY equal ly/${nY} #temperature of fluid (excluding wall) -compute myT bulk temp +compute myT bulk temp #profile-unbiased temperature of fluid -compute myTp bulk temp/profile 1 1 0 xy ${nX} ${nY} +compute myTp bulk temp/profile 1 1 0 xy ${nX} ${nY} #thermo setup -thermo ${thermo_rate} -thermo_style custom step c_myT c_myTp etotal press +thermo ${thermo_rate} +thermo_style custom step c_myT c_myTp etotal press #dump initial configuration -dump 55 all custom 1 all.init.lammpstrj id type x y z vx vy vz -dump 56 wall custom 1 wall.init.lammpstrj id type x y z -dump_modify 55 sort id -dump_modify 56 sort id -run 0 -undump 55 -undump 56 +# dump 55 all custom 1 all.init.lammpstrj id type x y z vx vy vz +# dump 56 wall custom 1 wall.init.lammpstrj id type x y z +# dump_modify 55 sort id +# dump_modify 56 sort id +run 0 +# undump 55 +# undump 56 ##################################################################### #equilibrate without GD -fix nvt bulk nvt temp $T $T ${tdamp} -fix_modify nvt temp myTp -fix 2 bulk enforce2d +fix nvt bulk nvt temp $T $T ${tdamp} +fix_modify nvt temp myTp +fix 2 bulk enforce2d -run ${equil} +run ${equil} ##################################################################### #initialize the COM velocity and run to achieve steady-state #calculate velocity to add: V=J/rho_total -variable Vadd equal $J*lx*ly/count(bulk) +variable Vadd equal $J*lx*ly/count(bulk) #first remove any COM velocity, then add back the streaming velocity velocity bulk zero linear -velocity bulk set ${Vadd} 0.0 0.0 units box sum yes mom no +velocity bulk set ${Vadd} 0.0 0.0 units box sum yes mom no -fix GD bulk flow/gauss 1 0 0 #energy yes -#fix_modify GD energy yes +fix GD bulk flow/gauss 1 0 0 #energy yes +#fix_modify GD energy yes -run ${stabil} +run ${stabil} ##################################################################### #collect data #print the applied force and total flux to ensure conservation of Jx -variable Fapp equal f_GD[1] -compute vxBulk bulk reduce sum vx -compute vyBulk bulk reduce sum vy +variable Fapp equal f_GD[1] +compute vxBulk bulk reduce sum vx +compute vyBulk bulk reduce sum vy variable invVol equal 1.0/(lx*ly) -variable jx equal c_vxBulk*${invVol} -variable jy equal c_vyBulk*${invVol} -variable curr_step equal step -fix print_vCOM all print ${dump_rate} & - "${curr_step} ${Fapp} ${jx} ${jy}" file GD.out screen no & - title "timestep Fapp Jx Jy" +variable jx equal c_vxBulk*${invVol} +variable jy equal c_vyBulk*${invVol} +variable curr_step equal step +variable p_Fapp format Fapp %.3f +variable p_jx format jx %.5g +variable p_jy format jy %.5g +fix print_vCOM all print ${dump_rate} & + "${curr_step} ${p_Fapp} ${p_jx} ${p_jy}" file GD.out screen no & + title "timestep Fapp Jx Jy" -#compute IK1 pressure profile +#compute IK1 pressure profile #see Todd, Evans, and Davis, Phys. Rev. E 52(2) 1995, p. 1627 #use profile-unbiased temperature to remove the streaming velocity #from the kinetic part of the pressure -compute spa bulk stress/atom myTp +compute spa bulk stress/atom myTp #for the pressure profile, use the same grid as the PUT -compute chunkX bulk chunk/atom bin/1d x lower ${dX} units box +compute chunkX bulk chunk/atom bin/1d x lower ${dX} units box #output pressure profile and other profiles #the pressure profile is (-1/2V)*(c_spa[1] + c_spa[2]), where #V is the volume of a slice -fix profiles bulk ave/chunk 1 1 ${dump_rate} chunkX & - vx density/mass c_spa[1] c_spa[2] & - file x_profiles ave running overwrite +fix profiles bulk ave/chunk 1 1 ${dump_rate} chunkX & + vx density/mass c_spa[1] c_spa[2] & + file x_profiles ave running overwrite #compute velocity profile across the pipe with a finer grid -variable dYnew equal ${dY}/10 -compute chunkY bulk chunk/atom bin/1d y center ${dYnew} units box & - region pipe -fix velYprof bulk ave/chunk 1 1 ${dump_rate} chunkY & - vx file Vy_profile ave running overwrite +variable dYnew equal ${dY}/10 +compute chunkY bulk chunk/atom bin/1d y center ${dYnew} units box & + region pipe +fix velYprof bulk ave/chunk 1 1 ${dump_rate} chunkY & + vx file Vy_profile ave running overwrite #full trajectory -dump 7 bulk custom ${dump_rate} bulk.lammpstrj & - id type x y z -dump_modify 7 sort id +# dump 7 bulk custom ${dump_rate} bulk.lammpstrj id type x y z +# dump_modify 7 sort id -run ${run} +run ${run} diff --git a/examples/USER/misc/flow_gauss/log.6Jul17.GD.g++.1 b/examples/USER/misc/flow_gauss/log.6Jul17.GD.g++.1 new file mode 100644 index 0000000000..bb9167f490 --- /dev/null +++ b/examples/USER/misc/flow_gauss/log.6Jul17.GD.g++.1 @@ -0,0 +1,909 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +#LAMMPS input script +#in.GD +#see README for details + +############################################################################### +#initialize variables +clear + using 1 OpenMP thread(s) per MPI task + +#frequency for outputting info (timesteps) +variable dump_rate equal 50 +variable thermo_rate equal 10 + +#equilibration time (timesteps) +variable equil equal 1000 + +#stabilization time (timesteps to reach steady-state) +variable stabil equal 1000 + +#data collection time (timesteps) +variable run equal 2000 + +#length of pipe +variable L equal 30 + +#width of pipe +variable d equal 20 + +#flux (mass/sigma*tau) +variable J equal 0.1 + +#simulation box dimensions +variable Lx equal 100 +variable Ly equal 40 + +#bulk fluid density +variable dens equal 0.8 + +#lattice spacing for wall atoms +variable aWall equal 1.0 #1.7472 + +#timestep +variable ts equal 0.001 + +#temperature +variable T equal 2.0 + +#thermostat damping constant +variable tdamp equal ${ts}*100 +variable tdamp equal 0.001*100 + +units lj +dimension 2 +atom_style atomic + + +############################################################################### +#create box + +#create lattice with the spacing aWall +variable rhoWall equal ${aWall}^(-2) +variable rhoWall equal 1^(-2) +lattice sq ${rhoWall} +lattice sq 1 +Lattice spacing in x,y,z = 1 1 1 + +#modify input dimensions to be multiples of aWall +variable L1 equal round($L/${aWall})*${aWall} +variable L1 equal round(30/${aWall})*${aWall} +variable L1 equal round(30/1)*${aWall} +variable L1 equal round(30/1)*1 +variable d1 equal round($d/${aWall})*${aWall} +variable d1 equal round(20/${aWall})*${aWall} +variable d1 equal round(20/1)*${aWall} +variable d1 equal round(20/1)*1 +variable Ly1 equal round(${Ly}/${aWall})*${aWall} +variable Ly1 equal round(40/${aWall})*${aWall} +variable Ly1 equal round(40/1)*${aWall} +variable Ly1 equal round(40/1)*1 +variable Lx1 equal round(${Lx}/${aWall})*${aWall} +variable Lx1 equal round(100/${aWall})*${aWall} +variable Lx1 equal round(100/1)*${aWall} +variable Lx1 equal round(100/1)*1 + +#create simulation box +variable lx2 equal ${Lx1}/2 +variable lx2 equal 100/2 +variable ly2 equal ${Ly1}/2 +variable ly2 equal 40/2 +region simbox block -${lx2} ${lx2} -${ly2} ${ly2} 0 0.1 units box +region simbox block -50 ${lx2} -${ly2} ${ly2} 0 0.1 units box +region simbox block -50 50 -${ly2} ${ly2} 0 0.1 units box +region simbox block -50 50 -20 ${ly2} 0 0.1 units box +region simbox block -50 50 -20 20 0 0.1 units box +create_box 2 simbox +Created orthogonal box = (-50 -20 0) to (50 20 0.1) + 1 by 1 by 1 MPI processor grid + +##################################################################### +#set up potential + +mass 1 1.0 #fluid atoms +mass 2 1.0 #wall atoms + +pair_style lj/cut 2.5 +pair_modify shift yes +pair_coeff 1 1 1.0 1.0 2.5 +pair_coeff 1 2 1.0 1.0 1.12246 +pair_coeff 2 2 0.0 0.0 + +neigh_modify exclude type 2 2 + +timestep ${ts} +timestep 0.001 + +##################################################################### +#create atoms + +#create wall atoms everywhere +create_atoms 2 box +Created 4000 atoms + +#define region which is "walled off" +variable dhalf equal ${d1}/2 +variable dhalf equal 20/2 +variable Lhalf equal ${L1}/2 +variable Lhalf equal 30/2 +region walltop block -${Lhalf} ${Lhalf} ${dhalf} EDGE -0.1 0.1 units box +region walltop block -15 ${Lhalf} ${dhalf} EDGE -0.1 0.1 units box +region walltop block -15 15 ${dhalf} EDGE -0.1 0.1 units box +region walltop block -15 15 10 EDGE -0.1 0.1 units box +region wallbot block -${Lhalf} ${Lhalf} EDGE -${dhalf} -0.1 0.1 units box +region wallbot block -15 ${Lhalf} EDGE -${dhalf} -0.1 0.1 units box +region wallbot block -15 15 EDGE -${dhalf} -0.1 0.1 units box +region wallbot block -15 15 EDGE -10 -0.1 0.1 units box +region outsidewall union 2 walltop wallbot side out + +#remove wall atoms outside wall region +group outside region outsidewall +3349 atoms in group outside +delete_atoms group outside +Deleted 3349 atoms, new total = 651 + +#remove wall atoms that aren't on edge of wall region +variable x1 equal ${Lhalf}-${aWall} +variable x1 equal 15-${aWall} +variable x1 equal 15-1 +variable y1 equal ${dhalf}+${aWall} +variable y1 equal 10+${aWall} +variable y1 equal 10+1 +region insideTop block -${x1} ${x1} ${y1} EDGE -0.1 0.1 units box +region insideTop block -14 ${x1} ${y1} EDGE -0.1 0.1 units box +region insideTop block -14 14 ${y1} EDGE -0.1 0.1 units box +region insideTop block -14 14 11 EDGE -0.1 0.1 units box +region insideBot block -${x1} ${x1} EDGE -${y1} -0.1 0.1 units box +region insideBot block -14 ${x1} EDGE -${y1} -0.1 0.1 units box +region insideBot block -14 14 EDGE -${y1} -0.1 0.1 units box +region insideBot block -14 14 EDGE -11 -0.1 0.1 units box +region insideWall union 2 insideTop insideBot +group insideWall region insideWall +551 atoms in group insideWall +delete_atoms group insideWall +Deleted 551 atoms, new total = 100 + +#define new lattice, to give correct fluid density +#y lattice const must be a multiple of aWall +variable atrue equal ${dens}^(-1/2) +variable atrue equal 0.8^(-1/2) +variable ay equal round(${atrue}/${aWall})*${aWall} +variable ay equal round(1.11803398874989/${aWall})*${aWall} +variable ay equal round(1.11803398874989/1)*${aWall} +variable ay equal round(1.11803398874989/1)*1 + +#choose x lattice const to give correct density +variable ax equal (${ay}*${dens})^(-1) +variable ax equal (1*${dens})^(-1) +variable ax equal (1*0.8)^(-1) + +#change Lx to be multiple of ax +variable Lx1 equal round(${Lx}/${ax})*${ax} +variable Lx1 equal round(100/${ax})*${ax} +variable Lx1 equal round(100/1.25)*${ax} +variable Lx1 equal round(100/1.25)*1.25 +variable lx2 equal ${Lx1}/2 +variable lx2 equal 100/2 +change_box all x final -${lx2} ${lx2} units box +change_box all x final -50 ${lx2} units box +change_box all x final -50 50 units box + orthogonal box = (-50 -20 0) to (50 20 0.1) + +#define new lattice +lattice custom ${dens} a1 ${ax} 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +lattice custom 0.8 a1 ${ax} 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +lattice custom 0.8 a1 1.25 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +lattice custom 0.8 a1 1.25 0.0 0.0 a2 0.0 1 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +Lattice spacing in x,y,z = 1.25 1 1 + +#fill in rest of box with bulk particles +variable delta equal 0.001 +variable Ldelt equal ${Lhalf}+${delta} +variable Ldelt equal 15+${delta} +variable Ldelt equal 15+0.001 +variable dDelt equal ${dhalf}-${delta} +variable dDelt equal 10-${delta} +variable dDelt equal 10-0.001 +region left block EDGE -${Ldelt} EDGE EDGE -0.1 0.1 units box +region left block EDGE -15.001 EDGE EDGE -0.1 0.1 units box +region right block ${Ldelt} EDGE EDGE EDGE -0.1 0.1 units box +region right block 15.001 EDGE EDGE EDGE -0.1 0.1 units box +region pipe block -${Ldelt} ${Ldelt} -${dDelt} ${dDelt} -0.1 0.1 units box +region pipe block -15.001 ${Ldelt} -${dDelt} ${dDelt} -0.1 0.1 units box +region pipe block -15.001 15.001 -${dDelt} ${dDelt} -0.1 0.1 units box +region pipe block -15.001 15.001 -9.999 ${dDelt} -0.1 0.1 units box +region pipe block -15.001 15.001 -9.999 9.999 -0.1 0.1 units box + +region bulk union 3 left pipe right +create_atoms 1 region bulk +Created 2675 atoms + +group bulk type 1 +2675 atoms in group bulk +group wall type 2 +100 atoms in group wall + +#remove atoms that are too close to wall +delete_atoms overlap 0.9 bulk wall +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 72 29 1 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) command delete_atoms, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/2d + bin: standard + (2) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/2d/newton + bin: standard +Deleted 0 atoms, new total = 2775 + +neighbor 0.3 bin +neigh_modify delay 0 every 1 check yes +neigh_modify exclude group wall wall + +velocity bulk create $T 78915 dist gaussian rot yes mom yes loop geom +velocity bulk create 2 78915 dist gaussian rot yes mom yes loop geom + +##################################################################### +#set up PUT +#see Evans and Morriss, Phys. Rev. Lett. 56(20) 1986, p. 2172 + +#average number of particles per box, Evans and Morriss used 2.0 +variable NperBox equal 8.0 + +#calculate box sizes +variable boxSide equal sqrt(${NperBox}/${dens}) +variable boxSide equal sqrt(8/${dens}) +variable boxSide equal sqrt(8/0.8) +variable nX equal round(lx/${boxSide}) +variable nX equal round(lx/3.16227766016838) +variable nY equal round(ly/${boxSide}) +variable nY equal round(ly/3.16227766016838) +variable dX equal lx/${nX} +variable dX equal lx/32 +variable dY equal ly/${nY} +variable dY equal ly/13 + +#temperature of fluid (excluding wall) +compute myT bulk temp + +#profile-unbiased temperature of fluid +compute myTp bulk temp/profile 1 1 0 xy ${nX} ${nY} +compute myTp bulk temp/profile 1 1 0 xy 32 ${nY} +compute myTp bulk temp/profile 1 1 0 xy 32 13 + +#thermo setup +thermo ${thermo_rate} +thermo 10 +thermo_style custom step c_myT c_myTp etotal press + +#dump initial configuration +# dump 55 all custom 1 all.init.lammpstrj id type x y z vx vy vz +# dump 56 wall custom 1 wall.init.lammpstrj id type x y z +# dump_modify 55 sort id +# dump_modify 56 sort id +run 0 +WARNING: No fixes defined, atoms won't move (../verlet.cpp:55) +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 72 29 1 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/2d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.103 | 3.103 | 3.103 Mbytes +Step c_myT c_myTp TotEng Press + 0 2 2.0555109 0.77892922 7.3417096 +Loop time of 9.53674e-07 on 1 procs for 0 steps with 2775 atoms + +314.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0 | 0 | 0 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0 | 0 | 0 | 0.0 | 0.00 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 9.537e-07 | | |100.00 + +Nlocal: 2775 ave 2775 max 2775 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 510 ave 510 max 510 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 26406 ave 26406 max 26406 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 26406 +Ave neighs/atom = 9.51568 +Neighbor list builds = 0 +Dangerous builds = 0 +# undump 55 +# undump 56 + +##################################################################### +#equilibrate without GD + +fix nvt bulk nvt temp $T $T ${tdamp} +fix nvt bulk nvt temp 2 $T ${tdamp} +fix nvt bulk nvt temp 2 2 ${tdamp} +fix nvt bulk nvt temp 2 2 0.1 +fix_modify nvt temp myTp +WARNING: Temperature for fix modify is not for group all (../fix_nh.cpp:1395) +fix 2 bulk enforce2d + +run ${equil} +run 1000 +Per MPI rank memory allocation (min/avg/max) = 3.166 | 3.166 | 3.166 Mbytes +Step c_myT c_myTp TotEng Press + 0 2 2.0555109 0.77892922 7.3417096 + 10 1.9173594 1.9390034 0.77876976 7.6702228 + 20 1.7033394 1.6974676 0.77977799 8.5614784 + 30 1.5026161 1.4723993 0.78456655 9.4308258 + 40 1.4880481 1.4591602 0.79486693 9.6134304 + 50 1.6192437 1.6150635 0.81109069 9.2592835 + 60 1.7404087 1.7583444 0.82955456 8.952392 + 70 1.7757591 1.8006606 0.8452778 8.9717917 + 80 1.7573847 1.7813629 0.85769389 9.1936368 + 90 1.7491183 1.7726908 0.86882429 9.3712357 + 100 1.7798944 1.8079583 0.88029084 9.3871755 + 110 1.8440582 1.8793133 0.89259397 9.2582848 + 120 1.9191606 1.9673434 0.90533438 9.0680574 + 130 1.9883299 2.0484299 0.91755461 8.88117 + 140 2.0463366 2.1111872 0.92818114 8.7184178 + 150 2.0953769 2.167849 0.93639789 8.5713408 + 160 2.1442147 2.2216228 0.94145082 8.4082835 + 170 2.1797848 2.2631458 0.94246877 8.2767903 + 180 2.1863476 2.2700986 0.93873326 8.2311689 + 190 2.1832866 2.2710551 0.93003012 8.1959062 + 200 2.1937154 2.2868403 0.91642537 8.0842007 + 210 2.2022708 2.2915142 0.89824533 7.9575312 + 220 2.1884715 2.2770564 0.87677613 7.9000591 + 230 2.1671124 2.2496063 0.85409501 7.8673156 + 240 2.1560417 2.2379998 0.83167878 7.8003228 + 250 2.1421449 2.2240624 0.81004723 7.7491508 + 260 2.1172164 2.1971044 0.78931978 7.7457415 + 270 2.0856847 2.1672998 0.76956352 7.7719788 + 280 2.0670685 2.1449303 0.75073364 7.7524614 + 290 2.0639481 2.1428374 0.73258016 7.6727716 + 300 2.055776 2.1361719 0.7147669 7.6095248 + 310 2.038425 2.1209353 0.69722853 7.5797085 + 320 2.0203023 2.1066031 0.68006634 7.5521081 + 330 2.0118478 2.1039797 0.66330302 7.4877535 + 340 2.0159442 2.1096258 0.64673694 7.3761703 + 350 2.0166408 2.1075061 0.63020017 7.2788 + 360 2.0059407 2.0806316 0.61387618 7.2263941 + 370 1.9964281 2.0642074 0.59814148 7.1728041 + 380 1.9918446 2.0567527 0.58303017 7.101597 + 390 1.992835 2.0548138 0.56852431 7.0084774 + 400 2.0012934 2.0615016 0.55438401 6.8865948 + 410 2.0084291 2.073418 0.54034073 6.7697478 + 420 2.007464 2.0786717 0.52617041 6.6849032 + 430 1.9983712 2.0704366 0.51188183 6.6323103 + 440 1.9884651 2.0588515 0.49765394 6.5868356 + 450 1.982221 2.0467396 0.4837102 6.5311681 + 460 1.9738673 2.031238 0.47021649 6.4882783 + 470 1.9574246 2.0060447 0.45740021 6.4814923 + 480 1.9361065 1.9734507 0.44557947 6.4995199 + 490 1.9251024 1.9562469 0.43506067 6.4858343 + 500 1.9279545 1.9572145 0.42577835 6.4274765 + 510 1.9267504 1.9570246 0.41755013 6.3927027 + 520 1.9093405 1.9393872 0.41031829 6.4281888 + 530 1.8820555 1.9060756 0.40432569 6.5099401 + 540 1.86537 1.8912682 0.3999087 6.55843 + 550 1.8694252 1.9043192 0.39717519 6.5337875 + 560 1.8835224 1.9294105 0.39589322 6.4760141 + 570 1.8898719 1.9462433 0.39573596 6.4520041 + 580 1.8887698 1.9472764 0.39649878 6.4602989 + 590 1.8945125 1.9550624 0.39810844 6.4470226 + 600 1.9106571 1.9735939 0.40045321 6.3971026 + 610 1.9273243 1.98509 0.40330026 6.3474421 + 620 1.9351802 1.9888986 0.4064498 6.3340566 + 630 1.9337889 1.9846794 0.40981479 6.3610556 + 640 1.9257018 1.9757153 0.4134641 6.4184721 + 650 1.9204429 1.9718256 0.41750942 6.4679594 + 660 1.9220449 1.9701963 0.42202455 6.4919724 + 670 1.9230578 1.9707406 0.4270412 6.5178484 + 680 1.9204554 1.9740485 0.43255127 6.5572507 + 690 1.9201811 1.9762854 0.43847123 6.5869126 + 700 1.9271511 1.9867455 0.44474356 6.5882669 + 710 1.9418851 2.0042477 0.45120727 6.558573 + 720 1.9544547 2.0186724 0.4576061 6.5338329 + 730 1.9687971 2.0326169 0.46367507 6.4988775 + 740 1.9830308 2.0466267 0.46920367 6.4618136 + 750 1.9936981 2.0526606 0.47397868 6.4367349 + 760 2.0008431 2.0535449 0.47786748 6.4249001 + 770 1.9982133 2.0483219 0.48085757 6.4504786 + 780 1.9841544 2.0311693 0.48306488 6.5200512 + 790 1.9683122 2.0158738 0.48475632 6.5959263 + 800 1.9604618 2.003224 0.48619405 6.6392559 + 810 1.9629155 2.0075077 0.48756075 6.6406486 + 820 1.9683056 2.0110554 0.48883443 6.6269424 + 830 1.975409 2.0189161 0.48995399 6.6030215 + 840 1.9897264 2.035016 0.4907852 6.5485575 + 850 2.0094338 2.0555358 0.49104505 6.4719926 + 860 2.0217589 2.0643603 0.49040437 6.4233305 + 870 2.0147718 2.0641627 0.48866908 6.4491964 + 880 1.9883859 2.0324092 0.48592007 6.5488061 + 890 1.9625853 2.0028776 0.48263002 6.6452734 + 900 1.9520401 1.9889124 0.47925524 6.6808078 + 910 1.9559583 1.9952984 0.47597346 6.6573059 + 920 1.9657244 2.0083503 0.47268726 6.6073704 + 930 1.969288 2.0152339 0.4692054 6.5780416 + 940 1.9652206 2.0116384 0.4654438 6.5769812 + 950 1.9567495 1.9960693 0.46147541 6.5942022 + 960 1.9418452 1.980858 0.45753557 6.6369454 + 970 1.9247196 1.9585585 0.45390337 6.6888821 + 980 1.9128262 1.9481721 0.45090045 6.7198221 + 990 1.9167211 1.9451096 0.44869731 6.6912394 + 1000 1.935529 1.9662384 0.44728238 6.6079829 +Loop time of 1.307 on 1 procs for 1000 steps with 2775 atoms + +Performance: 66105.601 tau/day, 765.111 timesteps/s +98.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.7676 | 0.7676 | 0.7676 | 0.0 | 58.73 +Neigh | 0.088947 | 0.088947 | 0.088947 | 0.0 | 6.81 +Comm | 0.0094135 | 0.0094135 | 0.0094135 | 0.0 | 0.72 +Output | 0.019547 | 0.019547 | 0.019547 | 0.0 | 1.50 +Modify | 0.39755 | 0.39755 | 0.39755 | 0.0 | 30.42 +Other | | 0.02394 | | | 1.83 + +Nlocal: 2775 ave 2775 max 2775 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 527 ave 527 max 527 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 24332 ave 24332 max 24332 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 24332 +Ave neighs/atom = 8.76829 +Neighbor list builds = 38 +Dangerous builds = 0 + +##################################################################### +#initialize the COM velocity and run to achieve steady-state + +#calculate velocity to add: V=J/rho_total +variable Vadd equal $J*lx*ly/count(bulk) +variable Vadd equal 0.1*lx*ly/count(bulk) + +#first remove any COM velocity, then add back the streaming velocity +velocity bulk zero linear +velocity bulk set ${Vadd} 0.0 0.0 units box sum yes mom no +velocity bulk set 0.149532710280374 0.0 0.0 units box sum yes mom no + +fix GD bulk flow/gauss 1 0 0 #energy yes +#fix_modify GD energy yes + +run ${stabil} +run 1000 +Per MPI rank memory allocation (min/avg/max) = 3.166 | 3.166 | 3.166 Mbytes +Step c_myT c_myTp TotEng Press + 1000 1.9466974 1.9662384 0.45804438 6.615449 + 1010 1.9605467 1.9815754 0.45717241 6.5545496 + 1020 1.9560139 1.9823875 0.45660431 6.5672421 + 1030 1.9348326 1.9691606 0.45633148 6.6463667 + 1040 1.9167809 1.9449522 0.45657707 6.7139486 + 1050 1.9193541 1.943342 0.45767968 6.7014054 + 1060 1.9410751 1.9720491 0.45967742 6.6150379 + 1070 1.9658493 1.9964883 0.46221539 6.5178418 + 1080 1.9767205 2.0074304 0.46491236 6.4768594 + 1090 1.9714544 2.0003054 0.46759126 6.5026957 + 1100 1.9647035 1.9927455 0.4703109 6.5400181 + 1110 1.9657667 1.9959656 0.47317481 6.5519094 + 1120 1.9706062 1.9980802 0.476185 6.5512675 + 1130 1.9747655 2.0062292 0.47932281 6.554091 + 1140 1.9761245 2.0075076 0.48248327 6.5670381 + 1150 1.9744197 2.0073027 0.48562483 6.5914441 + 1160 1.9722698 2.0046687 0.48874207 6.6165575 + 1170 1.9692145 2.0013845 0.49187442 6.6438115 + 1180 1.9665609 1.9970724 0.49508053 6.6693821 + 1190 1.9625031 1.9908427 0.49843816 6.7002606 + 1200 1.960528 1.993084 0.50203044 6.7237076 + 1210 1.9649156 1.9981485 0.50587066 6.7217755 + 1220 1.9788059 2.0134511 0.50987442 6.6833452 + 1230 1.9952283 2.0343101 0.51379781 6.6340278 + 1240 2.0039391 2.0494196 0.51730872 6.6129751 + 1250 2.0019006 2.0526773 0.52014603 6.6320217 + 1260 1.9974025 2.0528914 0.52221385 6.6601786 + 1270 1.9953949 2.0561121 0.5234754 6.6796142 + 1280 1.9893864 2.0470375 0.5238632 6.7140134 + 1290 1.9694951 2.019253 0.5235093 6.798442 + 1300 1.9473901 1.9965919 0.52280384 6.8863369 + 1310 1.9511151 2.006161 0.52203882 6.8700917 + 1320 1.979341 2.0388959 0.52106938 6.7529595 + 1330 2.0073235 2.0720045 0.51935291 6.6297731 + 1340 2.0202482 2.0841419 0.51624273 6.55803 + 1350 2.0177489 2.0669046 0.51142591 6.5401753 + 1360 2.0069274 2.04717 0.50505824 6.5506533 + 1370 1.994854 2.0311383 0.49743042 6.5633001 + 1380 1.9793176 2.0077184 0.48890503 6.5859072 + 1390 1.9580907 1.9839831 0.48004316 6.6288992 + 1400 1.9415542 1.9594192 0.47143599 6.6534105 + 1410 1.9405188 1.9591825 0.46353105 6.620549 + 1420 1.9504784 1.9730647 0.45640199 6.5471784 + 1430 1.9594158 1.9819854 0.44995052 6.4802874 + 1440 1.9615108 1.9863792 0.44406411 6.44391 + 1450 1.9544127 1.9806249 0.43873409 6.4484818 + 1460 1.9384927 1.9614953 0.43408605 6.4905259 + 1470 1.9214711 1.9425515 0.43035972 6.5390434 + 1480 1.9170761 1.9300809 0.42775046 6.5409502 + 1490 1.9242904 1.9385731 0.42631007 6.5005057 + 1500 1.9307133 1.9446119 0.4258836 6.4660754 + 1510 1.9303576 1.9435389 0.42633976 6.4616415 + 1520 1.9248382 1.9408306 0.42765441 6.4832059 + 1530 1.9120794 1.9278123 0.42986958 6.5380951 + 1540 1.899122 1.9125029 0.4331459 6.5987181 + 1550 1.9030956 1.9187821 0.43765067 6.6012019 + 1560 1.9182961 1.9453782 0.44330842 6.5674222 + 1570 1.9272863 1.9613129 0.44971962 6.5619794 + 1580 1.931679 1.9698134 0.45643436 6.5780809 + 1590 1.9336692 1.9728684 0.46314752 6.6035675 + 1600 1.938895 1.9823104 0.46964519 6.6138411 + 1610 1.9510838 1.9937914 0.47568807 6.5916989 + 1620 1.9685387 2.0087314 0.48102339 6.5424432 + 1630 1.9894416 2.0295715 0.48539861 6.4757743 + 1640 1.9982699 2.0426949 0.48860411 6.4512418 + 1650 1.9901677 2.0363837 0.49062424 6.4879985 + 1660 1.9814216 2.0291326 0.49172203 6.5248034 + 1670 1.9812111 2.0293629 0.49218297 6.5253876 + 1680 1.9903906 2.0408376 0.49211747 6.4852787 + 1690 2.0015983 2.0538843 0.4914581 6.4325081 + 1700 2.009727 2.0503407 0.49011163 6.3878577 + 1710 2.0167822 2.0531002 0.4881688 6.3477054 + 1720 2.0189021 2.0445033 0.48564798 6.3273063 + 1730 2.0129713 2.0354734 0.48270666 6.3385541 + 1740 2.0048763 2.0199836 0.47950943 6.3587586 + 1750 1.9994843 2.0085942 0.47624908 6.3694119 + 1760 1.9940025 2.0072098 0.47305283 6.3816295 + 1770 1.9817431 1.9974066 0.46994486 6.4224295 + 1780 1.965171 1.9805421 0.4670779 6.4832371 + 1790 1.9474078 1.9662605 0.46466823 6.5516524 + 1800 1.9286009 1.9507751 0.46292015 6.6263366 + 1810 1.9168087 1.9437961 0.46199899 6.6759834 + 1820 1.9107555 1.9306323 0.46204129 6.7029857 + 1830 1.9135569 1.930819 0.46316484 6.6949737 + 1840 1.9345342 1.9553413 0.46532704 6.6178988 + 1850 1.9630349 1.9929548 0.46822932 6.5137866 + 1860 1.9820746 2.0188839 0.47135068 6.4489028 + 1870 1.9834959 2.0217145 0.47427805 6.4552721 + 1880 1.9731564 2.0120293 0.47692755 6.5100251 + 1890 1.9653605 2.0070624 0.47943307 6.5594235 + 1900 1.9630631 2.0095488 0.48192185 6.5912876 + 1910 1.9556778 2.0035006 0.48443107 6.6437189 + 1920 1.9408788 1.9828296 0.48710124 6.7228731 + 1930 1.9292393 1.9732376 0.49025327 6.7880112 + 1940 1.9263081 1.9708942 0.49416086 6.8162477 + 1950 1.9358375 1.976323 0.49899895 6.7946964 + 1960 1.9520543 1.9936542 0.50485961 6.7467481 + 1970 1.9709064 2.0108957 0.51165586 6.6909455 + 1980 1.9940026 2.0375428 0.51918913 6.6250463 + 1990 2.0171261 2.0646948 0.52705638 6.5649879 + 2000 2.0302713 2.0802515 0.53472229 6.5470853 +Loop time of 1.34877 on 1 procs for 1000 steps with 2775 atoms + +Performance: 64058.154 tau/day, 741.414 timesteps/s +98.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.77091 | 0.77091 | 0.77091 | 0.0 | 57.16 +Neigh | 0.085835 | 0.085835 | 0.085835 | 0.0 | 6.36 +Comm | 0.0093472 | 0.0093472 | 0.0093472 | 0.0 | 0.69 +Output | 0.019047 | 0.019047 | 0.019047 | 0.0 | 1.41 +Modify | 0.43949 | 0.43949 | 0.43949 | 0.0 | 32.58 +Other | | 0.02415 | | | 1.79 + +Nlocal: 2775 ave 2775 max 2775 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 530 ave 530 max 530 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 24404 ave 24404 max 24404 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 24404 +Ave neighs/atom = 8.79423 +Neighbor list builds = 36 +Dangerous builds = 0 + +##################################################################### +#collect data + +#print the applied force and total flux to ensure conservation of Jx +variable Fapp equal f_GD[1] +compute vxBulk bulk reduce sum vx +compute vyBulk bulk reduce sum vy +variable invVol equal 1.0/(lx*ly) +variable jx equal c_vxBulk*${invVol} +variable jx equal c_vxBulk*0.00025 +variable jy equal c_vyBulk*${invVol} +variable jy equal c_vyBulk*0.00025 +variable curr_step equal step +variable p_Fapp format Fapp %.3f +variable p_jx format jx %.5g +variable p_jy format jy %.5g +fix print_vCOM all print ${dump_rate} "${curr_step} ${p_Fapp} ${p_jx} ${p_jy}" file GD.out screen no title "timestep Fapp Jx Jy" +fix print_vCOM all print 50 "${curr_step} ${p_Fapp} ${p_jx} ${p_jy}" file GD.out screen no title "timestep Fapp Jx Jy" + +#compute IK1 pressure profile +#see Todd, Evans, and Davis, Phys. Rev. E 52(2) 1995, p. 1627 +#use profile-unbiased temperature to remove the streaming velocity +#from the kinetic part of the pressure +compute spa bulk stress/atom myTp + +#for the pressure profile, use the same grid as the PUT +compute chunkX bulk chunk/atom bin/1d x lower ${dX} units box +compute chunkX bulk chunk/atom bin/1d x lower 3.125 units box + +#output pressure profile and other profiles +#the pressure profile is (-1/2V)*(c_spa[1] + c_spa[2]), where +#V is the volume of a slice +fix profiles bulk ave/chunk 1 1 ${dump_rate} chunkX vx density/mass c_spa[1] c_spa[2] file x_profiles ave running overwrite +fix profiles bulk ave/chunk 1 1 50 chunkX vx density/mass c_spa[1] c_spa[2] file x_profiles ave running overwrite + +#compute velocity profile across the pipe with a finer grid +variable dYnew equal ${dY}/10 +variable dYnew equal 3.07692307692308/10 +compute chunkY bulk chunk/atom bin/1d y center ${dYnew} units box region pipe +compute chunkY bulk chunk/atom bin/1d y center 0.307692307692308 units box region pipe +fix velYprof bulk ave/chunk 1 1 ${dump_rate} chunkY vx file Vy_profile ave running overwrite +fix velYprof bulk ave/chunk 1 1 50 chunkY vx file Vy_profile ave running overwrite + +#full trajectory +# dump 7 bulk custom ${dump_rate} bulk.lammpstrj id type x y z +# dump_modify 7 sort id + +run ${run} +run 2000 +Per MPI rank memory allocation (min/avg/max) = 5.174 | 5.174 | 5.174 Mbytes +Step c_myT c_myTp TotEng Press + 2000 2.0302713 2.0802515 0.53472229 6.5470853 + 2010 2.0303419 2.0806129 0.54177821 6.5808527 + 2020 2.0245167 2.0792991 0.54803523 6.6381758 + 2030 2.0169072 2.065404 0.55345227 6.7008962 + 2040 2.0052526 2.0513817 0.55818432 6.7755868 + 2050 1.9953625 2.0366564 0.56245299 6.8382569 + 2060 2.0003667 2.0462109 0.56649798 6.8390557 + 2070 2.0238288 2.0834553 0.57023651 6.7637821 + 2080 2.045765 2.1173867 0.5730944 6.6861321 + 2090 2.0563925 2.1370313 0.57430831 6.6422581 + 2100 2.0620437 2.1480293 0.57319824 6.6080678 + 2110 2.0584437 2.1473173 0.56913597 6.5969671 + 2120 2.0532825 2.1393006 0.56154606 6.5799417 + 2130 2.0450143 2.1234905 0.55009479 6.5616931 + 2140 2.0229537 2.1004507 0.53511912 6.5854627 + 2150 1.9832556 2.0554119 0.51812599 6.6700591 + 2160 1.9444027 2.0110758 0.50163049 6.7534263 + 2170 1.9267473 1.9904528 0.48759542 6.76469 + 2180 1.9262232 1.9809353 0.47662199 6.7188048 + 2190 1.9359331 1.9854626 0.46836289 6.6406985 + 2200 1.9530728 1.9971865 0.4620366 6.5409943 + 2210 1.9657099 2.0056761 0.45692542 6.4639397 + 2220 1.9661008 2.0046161 0.45253504 6.4388081 + 2230 1.9574696 1.9947839 0.44864257 6.4528687 + 2240 1.9522284 1.9922663 0.44518111 6.4584458 + 2250 1.9518203 1.9950044 0.44206844 6.4491722 + 2260 1.9527908 1.9989603 0.4391804 6.4377912 + 2270 1.9452231 1.9932538 0.43643529 6.4607516 + 2280 1.9249341 1.9759145 0.43392742 6.5320897 + 2290 1.9087464 1.960985 0.43186869 6.5875176 + 2300 1.9103289 1.964731 0.43039882 6.5765021 + 2310 1.9182062 1.9783814 0.4294628 6.5434488 + 2320 1.9204281 1.9796609 0.42889381 6.5351629 + 2330 1.916279 1.9720659 0.42866391 6.5562619 + 2340 1.9062866 1.9587628 0.42890166 6.6033936 + 2350 1.9024117 1.9566812 0.42979475 6.6297969 + 2360 1.908153 1.960687 0.43141898 6.6215148 + 2370 1.9115944 1.9663337 0.43376668 6.6236491 + 2380 1.9086193 1.9637867 0.4367911 6.6529568 + 2390 1.9039907 1.9610268 0.44053991 6.6926343 + 2400 1.9034944 1.9609406 0.44508818 6.7193441 + 2410 1.9151521 1.9753641 0.4504458 6.7015957 + 2420 1.9314517 1.9925924 0.45644382 6.6669864 + 2430 1.9433933 2.0062001 0.46277215 6.6481527 + 2440 1.9504631 2.0087015 0.46917209 6.6475757 + 2450 1.9550092 2.0094957 0.47550077 6.6556459 + 2460 1.9609689 2.0147997 0.48170141 6.6568282 + 2470 1.9730726 2.0328127 0.48763131 6.6337545 + 2480 1.9838562 2.0466643 0.49303443 6.6143423 + 2490 1.9862031 2.0473388 0.49767532 6.6245587 + 2500 1.9817565 2.0455432 0.50152131 6.6573893 + 2510 1.9785788 2.0423176 0.50460561 6.6808042 + 2520 1.9823006 2.0505106 0.50696374 6.6726698 + 2530 1.9907178 2.0553736 0.50852885 6.6402082 + 2540 2.0005205 2.0690408 0.50919421 6.5966469 + 2550 2.0079727 2.0809816 0.50872954 6.5568419 + 2560 2.0133128 2.096271 0.50682742 6.5199915 + 2570 2.0141298 2.0990846 0.50314491 6.4951991 + 2580 2.0048768 2.0874319 0.49750096 6.5025454 + 2590 1.9876498 2.0638834 0.4900201 6.5333038 + 2600 1.9720479 2.0474479 0.48105263 6.5527157 + 2610 1.9596324 2.0355764 0.4710001 6.5547867 + 2620 1.9439039 2.0106405 0.46046644 6.5646889 + 2630 1.9321714 1.9924346 0.45021207 6.5589454 + 2640 1.9349378 1.9923889 0.44082833 6.5012762 + 2650 1.9448459 2.0069955 0.43251999 6.4228945 + 2660 1.9446852 2.0050346 0.42525857 6.3921645 + 2670 1.9325594 1.9884937 0.41913362 6.4169726 + 2680 1.9121687 1.9606084 0.41434428 6.4821267 + 2690 1.8923613 1.9339385 0.41105831 6.5517615 + 2700 1.8807238 1.9191801 0.40933203 6.5949447 + 2710 1.8797367 1.918758 0.40906826 6.6001309 + 2720 1.8852961 1.9225996 0.41005611 6.58191 + 2730 1.8937478 1.9357751 0.41204348 6.5541946 + 2740 1.9019279 1.9449374 0.41476104 6.5278575 + 2750 1.9134396 1.9614415 0.41800066 6.4890769 + 2760 1.9339551 1.9913779 0.42150554 6.4159805 + 2770 1.9597826 2.0220988 0.42487614 6.3232273 + 2780 1.9753466 2.0414907 0.42771704 6.2715489 + 2790 1.9720423 2.0402016 0.42976012 6.2949288 + 2800 1.9512893 2.0172711 0.43109201 6.3878056 + 2810 1.9232302 1.9870212 0.4320928 6.5101822 + 2820 1.9026913 1.959286 0.43326424 6.6024967 + 2830 1.9033802 1.9621601 0.43500785 6.6114274 + 2840 1.9214292 1.9833838 0.43733454 6.5508757 + 2850 1.9440563 2.0087358 0.43995473 6.4713496 + 2860 1.9589136 2.0211107 0.44250821 6.4232961 + 2870 1.9588429 2.022232 0.44477492 6.4355861 + 2880 1.9456751 2.0009513 0.44676532 6.5021746 + 2890 1.9269155 1.9782929 0.44877858 6.5926531 + 2900 1.9125262 1.9554653 0.45121196 6.6657808 + 2910 1.9187855 1.9572583 0.45438665 6.6589954 + 2920 1.9416112 1.9784518 0.45839212 6.5888253 + 2930 1.9613579 1.9975032 0.46305788 6.5317424 + 2940 1.9711529 2.0102501 0.46812715 6.5148943 + 2950 1.9707865 2.0133283 0.47345305 6.5389543 + 2960 1.9732526 2.0170219 0.47898306 6.5537092 + 2970 1.9871126 2.0282309 0.48465048 6.5273492 + 2980 1.9953449 2.0404164 0.49032615 6.5227325 + 2990 1.9909136 2.037246 0.49581423 6.5664662 + 3000 1.9872474 2.0307896 0.5011051 6.6060698 + 3010 1.9944885 2.0457308 0.5062755 6.6031811 + 3020 2.0103461 2.0599491 0.51116655 6.5654871 + 3030 2.0240275 2.077342 0.5154921 6.5358852 + 3040 2.0205953 2.0704954 0.51898871 6.5708937 + 3050 2.0032184 2.0463036 0.52167438 6.657741 + 3060 1.9889341 2.0265284 0.52385964 6.7329171 + 3070 1.9795143 2.0201081 0.52588914 6.7881407 + 3080 1.9713362 2.0123964 0.52797238 6.8362858 + 3090 1.9692592 2.0106467 0.53025538 6.8616268 + 3100 1.9722487 2.0259566 0.53277635 6.8689898 + 3110 1.9703322 2.0314028 0.53541462 6.895271 + 3120 1.9594359 2.0217586 0.53808512 6.954362 + 3130 1.9524729 2.0148628 0.5409094 6.9965233 + 3140 1.9630381 2.0260807 0.54400259 6.968082 + 3150 1.9902598 2.0549364 0.54720142 6.8698796 + 3160 2.029715 2.0923999 0.54995378 6.7193678 + 3170 2.0581544 2.1137995 0.55150021 6.6053728 + 3180 2.0590739 2.1156535 0.55123668 6.5919337 + 3190 2.0400682 2.0904721 0.54894762 6.6505757 + 3200 2.0211594 2.0682597 0.54484887 6.7046468 + 3210 2.012712 2.0573114 0.53922056 6.7130909 + 3220 2.0102377 2.0554701 0.53219251 6.6919068 + 3230 2.0017671 2.0505068 0.52386898 6.6867054 + 3240 1.9854941 2.0308454 0.51458791 6.7051085 + 3250 1.9767009 2.0187664 0.50486784 6.6916859 + 3260 1.9771733 2.0186148 0.49510721 6.6424305 + 3270 1.974003 2.0136039 0.48556818 6.6078903 + 3280 1.9627665 1.9989122 0.47654147 6.6067904 + 3290 1.9491247 1.9826247 0.46834865 6.6186709 + 3300 1.9414093 1.9724941 0.4612122 6.6119543 + 3310 1.9433901 1.9715482 0.45518879 6.570612 + 3320 1.9518837 1.9872717 0.45010165 6.5057947 + 3330 1.9603874 1.9957995 0.44566728 6.4428221 + 3340 1.9615962 1.9945224 0.44167201 6.4099339 + 3350 1.955918 1.9882866 0.4380303 6.4070811 + 3360 1.9463445 1.9763654 0.43480086 6.4241178 + 3370 1.9411187 1.9683081 0.4320639 6.4296577 + 3380 1.9407224 1.9580074 0.42991627 6.4210217 + 3390 1.9402479 1.9530447 0.42850635 6.4170536 + 3400 1.9451337 1.9555771 0.42787382 6.3990336 + 3410 1.9475586 1.9612432 0.42797178 6.3953251 + 3420 1.9434927 1.960532 0.4286887 6.4210681 + 3430 1.9339054 1.9516935 0.43003682 6.4707071 + 3440 1.9234014 1.9464343 0.43214965 6.5248205 + 3450 1.9191846 1.9444777 0.43516361 6.5558451 + 3460 1.923218 1.9594606 0.43915611 6.5549213 + 3470 1.9328953 1.9792053 0.44397878 6.5327637 + 3480 1.9466227 1.9997841 0.44940599 6.4954965 + 3490 1.9672374 2.0323219 0.45511091 6.4358811 + 3500 1.9799622 2.0479841 0.46061029 6.4100217 + 3510 1.97942 2.0493411 0.46551964 6.4368108 + 3520 1.9725674 2.0389602 0.46976379 6.4892049 + 3530 1.9716429 2.0389798 0.47344292 6.5200899 + 3540 1.9789254 2.0486162 0.47659268 6.5198212 + 3550 1.9872455 2.0577517 0.47908145 6.5144586 + 3560 1.9808834 2.0545963 0.48076562 6.5633282 + 3570 1.9637165 2.0335394 0.4816783 6.6519124 + 3580 1.9407948 2.0067763 0.48212406 6.7605224 + 3590 1.9226532 1.9825887 0.482523 6.8486041 + 3600 1.9135067 1.9700999 0.48328349 6.8977859 + 3610 1.9157516 1.9720028 0.48470695 6.8977759 + 3620 1.9328644 2.0001154 0.48688778 6.8361569 + 3630 1.9568208 2.0243053 0.48963934 6.7442107 + 3640 1.9824587 2.0569223 0.49259174 6.6452535 + 3650 1.9934906 2.0686357 0.49529039 6.6020218 + 3660 1.9996281 2.0747054 0.49732231 6.5808905 + 3670 2.0038801 2.0772777 0.49838834 6.5691351 + 3680 1.9941342 2.0712365 0.49826732 6.6088108 + 3690 1.9762631 2.0486045 0.49689109 6.6739003 + 3700 1.9667284 2.034939 0.49438991 6.7010266 + 3710 1.9615089 2.0168112 0.49093736 6.7040385 + 3720 1.9613068 2.014749 0.48673789 6.6813041 + 3730 1.9731234 2.0290151 0.48175562 6.6096756 + 3740 1.9829764 2.0461907 0.47575174 6.5424752 + 3750 1.9792839 2.0454423 0.4685271 6.5237752 + 3760 1.9599692 2.0287015 0.46022485 6.5616271 + 3770 1.935975 2.0000948 0.45138017 6.6136471 + 3780 1.9236713 1.9834802 0.44262437 6.6187463 + 3790 1.9268004 1.9875324 0.43430113 6.5632772 + 3800 1.932601 1.9872595 0.42649564 6.4984765 + 3810 1.9322506 1.9814946 0.41928856 6.4617054 + 3820 1.9245737 1.9712821 0.4128224 6.461378 + 3830 1.9148568 1.9555602 0.40721003 6.4774474 + 3840 1.9049961 1.9457058 0.4026118 6.5029211 + 3850 1.8915137 1.9265199 0.39914962 6.5483592 + 3860 1.8784768 1.9058055 0.39700153 6.5962113 + 3870 1.8755236 1.9045158 0.39632769 6.6079033 + 3880 1.8841415 1.9140314 0.39710038 6.5777071 + 3890 1.8958027 1.9331148 0.39918951 6.5359786 + 3900 1.9064085 1.948805 0.40238576 6.4998591 + 3910 1.9185092 1.9675732 0.40647523 6.4610682 + 3920 1.9342595 1.9933225 0.41115392 6.4122308 + 3930 1.9482664 2.007614 0.41603495 6.373684 + 3940 1.9557759 2.0161573 0.42084462 6.3636707 + 3950 1.9573687 2.016612 0.42540421 6.3804123 + 3960 1.9486354 1.9998027 0.42974612 6.4404943 + 3970 1.936214 1.980721 0.43412037 6.5176787 + 3980 1.9274292 1.9595259 0.43885103 6.5846211 + 3990 1.9233082 1.953436 0.44425085 6.6354275 + 4000 1.9289165 1.9522097 0.45042645 6.6513836 +Loop time of 2.49114 on 1 procs for 2000 steps with 2775 atoms + +Performance: 69365.902 tau/day, 802.846 timesteps/s +98.9% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.4257 | 1.4257 | 1.4257 | 0.0 | 57.23 +Neigh | 0.15501 | 0.15501 | 0.15501 | 0.0 | 6.22 +Comm | 0.017206 | 0.017206 | 0.017206 | 0.0 | 0.69 +Output | 0.034183 | 0.034183 | 0.034183 | 0.0 | 1.37 +Modify | 0.81531 | 0.81531 | 0.81531 | 0.0 | 32.73 +Other | | 0.04374 | | | 1.76 + +Nlocal: 2775 ave 2775 max 2775 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 517 ave 517 max 517 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 24366 ave 24366 max 24366 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 24366 +Ave neighs/atom = 8.78054 +Neighbor list builds = 72 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:05 diff --git a/examples/USER/misc/flow_gauss/log.6Jul17.GD.g++.4 b/examples/USER/misc/flow_gauss/log.6Jul17.GD.g++.4 new file mode 100644 index 0000000000..6171c0da5c --- /dev/null +++ b/examples/USER/misc/flow_gauss/log.6Jul17.GD.g++.4 @@ -0,0 +1,909 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +#LAMMPS input script +#in.GD +#see README for details + +############################################################################### +#initialize variables +clear + using 1 OpenMP thread(s) per MPI task + +#frequency for outputting info (timesteps) +variable dump_rate equal 50 +variable thermo_rate equal 10 + +#equilibration time (timesteps) +variable equil equal 1000 + +#stabilization time (timesteps to reach steady-state) +variable stabil equal 1000 + +#data collection time (timesteps) +variable run equal 2000 + +#length of pipe +variable L equal 30 + +#width of pipe +variable d equal 20 + +#flux (mass/sigma*tau) +variable J equal 0.1 + +#simulation box dimensions +variable Lx equal 100 +variable Ly equal 40 + +#bulk fluid density +variable dens equal 0.8 + +#lattice spacing for wall atoms +variable aWall equal 1.0 #1.7472 + +#timestep +variable ts equal 0.001 + +#temperature +variable T equal 2.0 + +#thermostat damping constant +variable tdamp equal ${ts}*100 +variable tdamp equal 0.001*100 + +units lj +dimension 2 +atom_style atomic + + +############################################################################### +#create box + +#create lattice with the spacing aWall +variable rhoWall equal ${aWall}^(-2) +variable rhoWall equal 1^(-2) +lattice sq ${rhoWall} +lattice sq 1 +Lattice spacing in x,y,z = 1 1 1 + +#modify input dimensions to be multiples of aWall +variable L1 equal round($L/${aWall})*${aWall} +variable L1 equal round(30/${aWall})*${aWall} +variable L1 equal round(30/1)*${aWall} +variable L1 equal round(30/1)*1 +variable d1 equal round($d/${aWall})*${aWall} +variable d1 equal round(20/${aWall})*${aWall} +variable d1 equal round(20/1)*${aWall} +variable d1 equal round(20/1)*1 +variable Ly1 equal round(${Ly}/${aWall})*${aWall} +variable Ly1 equal round(40/${aWall})*${aWall} +variable Ly1 equal round(40/1)*${aWall} +variable Ly1 equal round(40/1)*1 +variable Lx1 equal round(${Lx}/${aWall})*${aWall} +variable Lx1 equal round(100/${aWall})*${aWall} +variable Lx1 equal round(100/1)*${aWall} +variable Lx1 equal round(100/1)*1 + +#create simulation box +variable lx2 equal ${Lx1}/2 +variable lx2 equal 100/2 +variable ly2 equal ${Ly1}/2 +variable ly2 equal 40/2 +region simbox block -${lx2} ${lx2} -${ly2} ${ly2} 0 0.1 units box +region simbox block -50 ${lx2} -${ly2} ${ly2} 0 0.1 units box +region simbox block -50 50 -${ly2} ${ly2} 0 0.1 units box +region simbox block -50 50 -20 ${ly2} 0 0.1 units box +region simbox block -50 50 -20 20 0 0.1 units box +create_box 2 simbox +Created orthogonal box = (-50 -20 0) to (50 20 0.1) + 4 by 1 by 1 MPI processor grid + +##################################################################### +#set up potential + +mass 1 1.0 #fluid atoms +mass 2 1.0 #wall atoms + +pair_style lj/cut 2.5 +pair_modify shift yes +pair_coeff 1 1 1.0 1.0 2.5 +pair_coeff 1 2 1.0 1.0 1.12246 +pair_coeff 2 2 0.0 0.0 + +neigh_modify exclude type 2 2 + +timestep ${ts} +timestep 0.001 + +##################################################################### +#create atoms + +#create wall atoms everywhere +create_atoms 2 box +Created 4000 atoms + +#define region which is "walled off" +variable dhalf equal ${d1}/2 +variable dhalf equal 20/2 +variable Lhalf equal ${L1}/2 +variable Lhalf equal 30/2 +region walltop block -${Lhalf} ${Lhalf} ${dhalf} EDGE -0.1 0.1 units box +region walltop block -15 ${Lhalf} ${dhalf} EDGE -0.1 0.1 units box +region walltop block -15 15 ${dhalf} EDGE -0.1 0.1 units box +region walltop block -15 15 10 EDGE -0.1 0.1 units box +region wallbot block -${Lhalf} ${Lhalf} EDGE -${dhalf} -0.1 0.1 units box +region wallbot block -15 ${Lhalf} EDGE -${dhalf} -0.1 0.1 units box +region wallbot block -15 15 EDGE -${dhalf} -0.1 0.1 units box +region wallbot block -15 15 EDGE -10 -0.1 0.1 units box +region outsidewall union 2 walltop wallbot side out + +#remove wall atoms outside wall region +group outside region outsidewall +3349 atoms in group outside +delete_atoms group outside +Deleted 3349 atoms, new total = 651 + +#remove wall atoms that aren't on edge of wall region +variable x1 equal ${Lhalf}-${aWall} +variable x1 equal 15-${aWall} +variable x1 equal 15-1 +variable y1 equal ${dhalf}+${aWall} +variable y1 equal 10+${aWall} +variable y1 equal 10+1 +region insideTop block -${x1} ${x1} ${y1} EDGE -0.1 0.1 units box +region insideTop block -14 ${x1} ${y1} EDGE -0.1 0.1 units box +region insideTop block -14 14 ${y1} EDGE -0.1 0.1 units box +region insideTop block -14 14 11 EDGE -0.1 0.1 units box +region insideBot block -${x1} ${x1} EDGE -${y1} -0.1 0.1 units box +region insideBot block -14 ${x1} EDGE -${y1} -0.1 0.1 units box +region insideBot block -14 14 EDGE -${y1} -0.1 0.1 units box +region insideBot block -14 14 EDGE -11 -0.1 0.1 units box +region insideWall union 2 insideTop insideBot +group insideWall region insideWall +551 atoms in group insideWall +delete_atoms group insideWall +Deleted 551 atoms, new total = 100 + +#define new lattice, to give correct fluid density +#y lattice const must be a multiple of aWall +variable atrue equal ${dens}^(-1/2) +variable atrue equal 0.8^(-1/2) +variable ay equal round(${atrue}/${aWall})*${aWall} +variable ay equal round(1.11803398874989/${aWall})*${aWall} +variable ay equal round(1.11803398874989/1)*${aWall} +variable ay equal round(1.11803398874989/1)*1 + +#choose x lattice const to give correct density +variable ax equal (${ay}*${dens})^(-1) +variable ax equal (1*${dens})^(-1) +variable ax equal (1*0.8)^(-1) + +#change Lx to be multiple of ax +variable Lx1 equal round(${Lx}/${ax})*${ax} +variable Lx1 equal round(100/${ax})*${ax} +variable Lx1 equal round(100/1.25)*${ax} +variable Lx1 equal round(100/1.25)*1.25 +variable lx2 equal ${Lx1}/2 +variable lx2 equal 100/2 +change_box all x final -${lx2} ${lx2} units box +change_box all x final -50 ${lx2} units box +change_box all x final -50 50 units box + orthogonal box = (-50 -20 0) to (50 20 0.1) + +#define new lattice +lattice custom ${dens} a1 ${ax} 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +lattice custom 0.8 a1 ${ax} 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +lattice custom 0.8 a1 1.25 0.0 0.0 a2 0.0 ${ay} 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +lattice custom 0.8 a1 1.25 0.0 0.0 a2 0.0 1 0.0 a3 0.0 0.0 1.0 basis 0.0 0.0 0.0 +Lattice spacing in x,y,z = 1.25 1 1 + +#fill in rest of box with bulk particles +variable delta equal 0.001 +variable Ldelt equal ${Lhalf}+${delta} +variable Ldelt equal 15+${delta} +variable Ldelt equal 15+0.001 +variable dDelt equal ${dhalf}-${delta} +variable dDelt equal 10-${delta} +variable dDelt equal 10-0.001 +region left block EDGE -${Ldelt} EDGE EDGE -0.1 0.1 units box +region left block EDGE -15.001 EDGE EDGE -0.1 0.1 units box +region right block ${Ldelt} EDGE EDGE EDGE -0.1 0.1 units box +region right block 15.001 EDGE EDGE EDGE -0.1 0.1 units box +region pipe block -${Ldelt} ${Ldelt} -${dDelt} ${dDelt} -0.1 0.1 units box +region pipe block -15.001 ${Ldelt} -${dDelt} ${dDelt} -0.1 0.1 units box +region pipe block -15.001 15.001 -${dDelt} ${dDelt} -0.1 0.1 units box +region pipe block -15.001 15.001 -9.999 ${dDelt} -0.1 0.1 units box +region pipe block -15.001 15.001 -9.999 9.999 -0.1 0.1 units box + +region bulk union 3 left pipe right +create_atoms 1 region bulk +Created 2675 atoms + +group bulk type 1 +2675 atoms in group bulk +group wall type 2 +100 atoms in group wall + +#remove atoms that are too close to wall +delete_atoms overlap 0.9 bulk wall +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 72 29 1 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) command delete_atoms, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/2d + bin: standard + (2) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/2d/newton + bin: standard +Deleted 0 atoms, new total = 2775 + +neighbor 0.3 bin +neigh_modify delay 0 every 1 check yes +neigh_modify exclude group wall wall + +velocity bulk create $T 78915 dist gaussian rot yes mom yes loop geom +velocity bulk create 2 78915 dist gaussian rot yes mom yes loop geom + +##################################################################### +#set up PUT +#see Evans and Morriss, Phys. Rev. Lett. 56(20) 1986, p. 2172 + +#average number of particles per box, Evans and Morriss used 2.0 +variable NperBox equal 8.0 + +#calculate box sizes +variable boxSide equal sqrt(${NperBox}/${dens}) +variable boxSide equal sqrt(8/${dens}) +variable boxSide equal sqrt(8/0.8) +variable nX equal round(lx/${boxSide}) +variable nX equal round(lx/3.16227766016838) +variable nY equal round(ly/${boxSide}) +variable nY equal round(ly/3.16227766016838) +variable dX equal lx/${nX} +variable dX equal lx/32 +variable dY equal ly/${nY} +variable dY equal ly/13 + +#temperature of fluid (excluding wall) +compute myT bulk temp + +#profile-unbiased temperature of fluid +compute myTp bulk temp/profile 1 1 0 xy ${nX} ${nY} +compute myTp bulk temp/profile 1 1 0 xy 32 ${nY} +compute myTp bulk temp/profile 1 1 0 xy 32 13 + +#thermo setup +thermo ${thermo_rate} +thermo 10 +thermo_style custom step c_myT c_myTp etotal press + +#dump initial configuration +# dump 55 all custom 1 all.init.lammpstrj id type x y z vx vy vz +# dump 56 wall custom 1 wall.init.lammpstrj id type x y z +# dump_modify 55 sort id +# dump_modify 56 sort id +run 0 +WARNING: No fixes defined, atoms won't move (../verlet.cpp:55) +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 2.8 + ghost atom cutoff = 2.8 + binsize = 1.4, bins = 72 29 1 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/2d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 3.067 | 3.068 | 3.07 Mbytes +Step c_myT c_myTp TotEng Press + 0 2 2.0555109 0.77892922 7.3417096 +Loop time of 4.35114e-06 on 4 procs for 0 steps with 2775 atoms + +114.9% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0 | 0 | 0 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0 | 0 | 0 | 0.0 | 0.00 +Output | 0 | 0 | 0 | 0.0 | 0.00 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 4.351e-06 | | |100.00 + +Nlocal: 693.75 ave 800 max 578 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Nghost: 266.25 ave 325 max 198 min +Histogram: 1 1 0 0 0 0 0 0 0 2 +Neighs: 6601.5 ave 8000 max 5147 min +Histogram: 2 0 0 0 0 0 0 0 0 2 + +Total # of neighbors = 26406 +Ave neighs/atom = 9.51568 +Neighbor list builds = 0 +Dangerous builds = 0 +# undump 55 +# undump 56 + +##################################################################### +#equilibrate without GD + +fix nvt bulk nvt temp $T $T ${tdamp} +fix nvt bulk nvt temp 2 $T ${tdamp} +fix nvt bulk nvt temp 2 2 ${tdamp} +fix nvt bulk nvt temp 2 2 0.1 +fix_modify nvt temp myTp +WARNING: Temperature for fix modify is not for group all (../fix_nh.cpp:1395) +fix 2 bulk enforce2d + +run ${equil} +run 1000 +Per MPI rank memory allocation (min/avg/max) = 3.13 | 3.131 | 3.132 Mbytes +Step c_myT c_myTp TotEng Press + 0 2 2.0555109 0.77892922 7.3417096 + 10 1.9173594 1.9390034 0.77876976 7.6702228 + 20 1.7033394 1.6974676 0.77977799 8.5614784 + 30 1.5026161 1.4723993 0.78456655 9.4308258 + 40 1.4880481 1.4591602 0.79486693 9.6134304 + 50 1.6192437 1.6150635 0.81109069 9.2592835 + 60 1.7404087 1.7583444 0.82955456 8.952392 + 70 1.7757591 1.8006606 0.8452778 8.9717917 + 80 1.7573847 1.7813629 0.85769389 9.1936368 + 90 1.7491183 1.7726908 0.86882429 9.3712357 + 100 1.7798944 1.8079583 0.88029084 9.3871755 + 110 1.8440582 1.8793133 0.89259397 9.2582848 + 120 1.9191606 1.9673434 0.90533438 9.0680574 + 130 1.9883299 2.0484299 0.91755461 8.88117 + 140 2.0463366 2.1111872 0.92818114 8.7184178 + 150 2.0953769 2.167849 0.93639789 8.5713408 + 160 2.1442147 2.2216228 0.94145082 8.4082835 + 170 2.1797848 2.2631458 0.94246877 8.2767903 + 180 2.1863476 2.2700986 0.93873326 8.2311689 + 190 2.1832866 2.2710551 0.93003012 8.1959062 + 200 2.1937154 2.2868403 0.91642537 8.0842007 + 210 2.2022708 2.2915142 0.89824533 7.9575312 + 220 2.1884715 2.2770564 0.87677613 7.9000591 + 230 2.1671124 2.2496063 0.85409501 7.8673156 + 240 2.1560417 2.2379998 0.83167878 7.8003228 + 250 2.1421449 2.2240624 0.81004723 7.7491508 + 260 2.1172164 2.1971044 0.78931978 7.7457415 + 270 2.0856847 2.1672998 0.76956352 7.7719788 + 280 2.0670685 2.1449303 0.75073364 7.7524614 + 290 2.0639481 2.1428374 0.73258016 7.6727716 + 300 2.055776 2.1361719 0.7147669 7.6095248 + 310 2.038425 2.1209353 0.69722853 7.5797085 + 320 2.0203023 2.1066031 0.68006634 7.5521081 + 330 2.0118478 2.1039797 0.66330302 7.4877535 + 340 2.0159442 2.1096258 0.64673694 7.3761703 + 350 2.0166408 2.1075061 0.63020017 7.2788 + 360 2.0059407 2.0806316 0.61387618 7.2263941 + 370 1.9964281 2.0642074 0.59814148 7.1728041 + 380 1.9918446 2.0567527 0.58303017 7.101597 + 390 1.992835 2.0548138 0.56852431 7.0084774 + 400 2.0012934 2.0615016 0.55438401 6.8865948 + 410 2.0084291 2.073418 0.54034073 6.7697478 + 420 2.007464 2.0786717 0.52617041 6.6849032 + 430 1.9983712 2.0704366 0.51188183 6.6323103 + 440 1.9884651 2.0588515 0.49765394 6.5868356 + 450 1.982221 2.0467396 0.4837102 6.5311681 + 460 1.9738673 2.031238 0.47021649 6.4882783 + 470 1.9574246 2.0060447 0.45740021 6.4814923 + 480 1.9361065 1.9734507 0.44557947 6.4995199 + 490 1.9251024 1.9562469 0.43506067 6.4858343 + 500 1.9279545 1.9572145 0.42577835 6.4274765 + 510 1.9267504 1.9570246 0.41755013 6.3927027 + 520 1.9093405 1.9393872 0.41031829 6.4281888 + 530 1.8820555 1.9060756 0.40432569 6.5099401 + 540 1.86537 1.8912682 0.3999087 6.55843 + 550 1.8694252 1.9043192 0.39717519 6.5337875 + 560 1.8835224 1.9294105 0.39589322 6.4760141 + 570 1.8898719 1.9462433 0.39573596 6.4520041 + 580 1.8887698 1.9472764 0.39649878 6.4602989 + 590 1.8945125 1.9550624 0.39810844 6.4470226 + 600 1.9106571 1.9735939 0.40045321 6.3971026 + 610 1.9273243 1.98509 0.40330026 6.3474421 + 620 1.9351802 1.9888986 0.4064498 6.3340566 + 630 1.9337889 1.9846794 0.40981479 6.3610556 + 640 1.9257018 1.9757153 0.4134641 6.4184721 + 650 1.9204429 1.9718256 0.41750942 6.4679594 + 660 1.9220449 1.9701963 0.42202455 6.4919724 + 670 1.9230578 1.9707406 0.4270412 6.5178484 + 680 1.9204554 1.9740485 0.43255127 6.5572507 + 690 1.9201811 1.9762854 0.43847123 6.5869126 + 700 1.9271511 1.9867455 0.44474356 6.5882669 + 710 1.9418851 2.0042477 0.45120727 6.558573 + 720 1.9544547 2.0186724 0.4576061 6.5338329 + 730 1.9687971 2.0326169 0.46367507 6.4988775 + 740 1.9830308 2.0466267 0.46920367 6.4618136 + 750 1.9936981 2.0526606 0.47397868 6.4367349 + 760 2.0008431 2.0535449 0.47786748 6.4249001 + 770 1.9982133 2.0483219 0.48085757 6.4504786 + 780 1.9841544 2.0311693 0.48306488 6.5200512 + 790 1.9683122 2.0158738 0.48475632 6.5959263 + 800 1.9604618 2.003224 0.48619405 6.6392559 + 810 1.9629155 2.0075077 0.48756075 6.6406486 + 820 1.9683056 2.0110554 0.48883443 6.6269424 + 830 1.975409 2.0189161 0.48995399 6.6030215 + 840 1.9897264 2.035016 0.4907852 6.5485575 + 850 2.0094338 2.0555358 0.49104505 6.4719926 + 860 2.0217589 2.0643603 0.49040437 6.4233305 + 870 2.0147718 2.0641627 0.48866908 6.4491964 + 880 1.9883859 2.0324092 0.48592007 6.5488061 + 890 1.9625853 2.0028776 0.48263002 6.6452734 + 900 1.9520401 1.9889124 0.47925524 6.6808078 + 910 1.9559583 1.9952984 0.47597346 6.6573059 + 920 1.9657244 2.0083503 0.47268726 6.6073704 + 930 1.969288 2.0152339 0.4692054 6.5780416 + 940 1.9652206 2.0116384 0.4654438 6.5769812 + 950 1.9567495 1.9960693 0.46147541 6.5942022 + 960 1.9418452 1.980858 0.45753557 6.6369454 + 970 1.9247196 1.9585585 0.45390337 6.6888821 + 980 1.9128262 1.9481721 0.45090045 6.7198221 + 990 1.9167211 1.9451096 0.44869731 6.6912394 + 1000 1.935529 1.9662384 0.44728238 6.6079829 +Loop time of 0.474418 on 4 procs for 1000 steps with 2775 atoms + +Performance: 182118.045 tau/day, 2107.848 timesteps/s +98.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.13953 | 0.19068 | 0.23764 | 10.4 | 40.19 +Neigh | 0.016439 | 0.022345 | 0.027069 | 3.2 | 4.71 +Comm | 0.018215 | 0.068071 | 0.12178 | 18.6 | 14.35 +Output | 0.011982 | 0.012633 | 0.013047 | 0.4 | 2.66 +Modify | 0.14494 | 0.15597 | 0.16628 | 2.4 | 32.88 +Other | | 0.02472 | | | 5.21 + +Nlocal: 693.75 ave 800 max 584 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Nghost: 255.5 ave 323 max 192 min +Histogram: 2 0 0 0 0 0 0 0 1 1 +Neighs: 6083 ave 7384 max 4742 min +Histogram: 2 0 0 0 0 0 0 0 0 2 + +Total # of neighbors = 24332 +Ave neighs/atom = 8.76829 +Neighbor list builds = 38 +Dangerous builds = 0 + +##################################################################### +#initialize the COM velocity and run to achieve steady-state + +#calculate velocity to add: V=J/rho_total +variable Vadd equal $J*lx*ly/count(bulk) +variable Vadd equal 0.1*lx*ly/count(bulk) + +#first remove any COM velocity, then add back the streaming velocity +velocity bulk zero linear +velocity bulk set ${Vadd} 0.0 0.0 units box sum yes mom no +velocity bulk set 0.149532710280374 0.0 0.0 units box sum yes mom no + +fix GD bulk flow/gauss 1 0 0 #energy yes +#fix_modify GD energy yes + +run ${stabil} +run 1000 +Per MPI rank memory allocation (min/avg/max) = 3.13 | 3.131 | 3.132 Mbytes +Step c_myT c_myTp TotEng Press + 1000 1.9466974 1.9662384 0.45804438 6.615449 + 1010 1.9605467 1.9815754 0.45717241 6.5545496 + 1020 1.9560139 1.9823875 0.45660431 6.5672421 + 1030 1.9348326 1.9691606 0.45633148 6.6463667 + 1040 1.9167809 1.9449522 0.45657707 6.7139486 + 1050 1.9193541 1.943342 0.45767968 6.7014054 + 1060 1.9410751 1.9720491 0.45967742 6.6150379 + 1070 1.9658493 1.9964883 0.46221539 6.5178418 + 1080 1.9767205 2.0074304 0.46491236 6.4768594 + 1090 1.9714544 2.0003054 0.46759126 6.5026957 + 1100 1.9647035 1.9927455 0.4703109 6.5400181 + 1110 1.9657667 1.9959656 0.47317481 6.5519094 + 1120 1.9706062 1.9980802 0.476185 6.5512675 + 1130 1.9747655 2.0062292 0.47932281 6.554091 + 1140 1.9761245 2.0075076 0.48248327 6.5670381 + 1150 1.9744197 2.0073027 0.48562483 6.5914441 + 1160 1.9722698 2.0046687 0.48874207 6.6165575 + 1170 1.9692145 2.0013845 0.49187442 6.6438115 + 1180 1.9665609 1.9970724 0.49508053 6.6693821 + 1190 1.9625031 1.9908427 0.49843816 6.7002606 + 1200 1.960528 1.993084 0.50203044 6.7237076 + 1210 1.9649156 1.9981485 0.50587066 6.7217755 + 1220 1.9788059 2.0134511 0.50987442 6.6833452 + 1230 1.9952283 2.0343101 0.51379781 6.6340278 + 1240 2.0039391 2.0494196 0.51730872 6.6129751 + 1250 2.0019006 2.0526773 0.52014603 6.6320217 + 1260 1.9974025 2.0528914 0.52221385 6.6601786 + 1270 1.9953949 2.0561121 0.5234754 6.6796142 + 1280 1.9893864 2.0470375 0.5238632 6.7140134 + 1290 1.9694951 2.019253 0.5235093 6.798442 + 1300 1.9473901 1.9965919 0.52280384 6.8863369 + 1310 1.9511151 2.006161 0.52203882 6.8700917 + 1320 1.979341 2.0388959 0.52106938 6.7529595 + 1330 2.0073235 2.0720045 0.51935291 6.6297731 + 1340 2.0202482 2.0841419 0.51624273 6.55803 + 1350 2.0177489 2.0669046 0.51142591 6.5401753 + 1360 2.0069274 2.04717 0.50505824 6.5506533 + 1370 1.994854 2.0311383 0.49743042 6.5633001 + 1380 1.9793176 2.0077184 0.48890503 6.5859072 + 1390 1.9580907 1.9839831 0.48004316 6.6288992 + 1400 1.9415542 1.9594192 0.47143599 6.6534105 + 1410 1.9405188 1.9591825 0.46353105 6.620549 + 1420 1.9504784 1.9730647 0.45640199 6.5471784 + 1430 1.9594158 1.9819854 0.44995052 6.4802874 + 1440 1.9615108 1.9863792 0.44406411 6.44391 + 1450 1.9544127 1.9806249 0.43873409 6.4484818 + 1460 1.9384927 1.9614953 0.43408605 6.4905259 + 1470 1.9214711 1.9425515 0.43035972 6.5390434 + 1480 1.9170761 1.9300809 0.42775046 6.5409502 + 1490 1.9242904 1.9385731 0.42631007 6.5005057 + 1500 1.9307133 1.9446119 0.4258836 6.4660754 + 1510 1.9303576 1.9435389 0.42633976 6.4616415 + 1520 1.9248382 1.9408306 0.42765441 6.4832059 + 1530 1.9120794 1.9278123 0.42986958 6.5380951 + 1540 1.899122 1.9125029 0.4331459 6.5987181 + 1550 1.9030956 1.9187821 0.43765067 6.6012019 + 1560 1.9182961 1.9453782 0.44330842 6.5674222 + 1570 1.9272863 1.9613129 0.44971962 6.5619794 + 1580 1.931679 1.9698134 0.45643436 6.5780809 + 1590 1.9336692 1.9728684 0.46314752 6.6035675 + 1600 1.938895 1.9823104 0.46964519 6.6138411 + 1610 1.9510838 1.9937914 0.47568807 6.5916989 + 1620 1.9685387 2.0087314 0.48102339 6.5424432 + 1630 1.9894416 2.0295715 0.48539861 6.4757743 + 1640 1.9982699 2.0426949 0.48860411 6.4512418 + 1650 1.9901677 2.0363837 0.49062424 6.4879985 + 1660 1.9814216 2.0291326 0.49172203 6.5248034 + 1670 1.9812111 2.0293629 0.49218297 6.5253876 + 1680 1.9903906 2.0408376 0.49211747 6.4852787 + 1690 2.0015983 2.0538843 0.4914581 6.4325081 + 1700 2.009727 2.0503407 0.49011163 6.3878577 + 1710 2.0167822 2.0531002 0.4881688 6.3477054 + 1720 2.0189021 2.0445033 0.48564798 6.3273063 + 1730 2.0129713 2.0354734 0.48270666 6.3385541 + 1740 2.0048763 2.0199836 0.47950943 6.3587586 + 1750 1.9994843 2.0085942 0.47624908 6.3694119 + 1760 1.9940025 2.0072098 0.47305283 6.3816295 + 1770 1.9817431 1.9974066 0.46994486 6.4224295 + 1780 1.965171 1.9805421 0.4670779 6.4832371 + 1790 1.9474078 1.9662605 0.46466823 6.5516524 + 1800 1.9286009 1.9507751 0.46292015 6.6263366 + 1810 1.9168087 1.9437961 0.46199899 6.6759834 + 1820 1.9107555 1.9306323 0.46204129 6.7029857 + 1830 1.9135569 1.930819 0.46316484 6.6949737 + 1840 1.9345342 1.9553413 0.46532704 6.6178988 + 1850 1.9630349 1.9929548 0.46822932 6.5137866 + 1860 1.9820746 2.0188839 0.47135068 6.4489028 + 1870 1.9834959 2.0217145 0.47427805 6.4552721 + 1880 1.9731564 2.0120293 0.47692755 6.5100251 + 1890 1.9653605 2.0070624 0.47943307 6.5594235 + 1900 1.9630631 2.0095488 0.48192185 6.5912876 + 1910 1.9556778 2.0035006 0.48443107 6.6437189 + 1920 1.9408788 1.9828296 0.48710124 6.7228731 + 1930 1.9292393 1.9732376 0.49025327 6.7880112 + 1940 1.9263081 1.9708942 0.49416086 6.8162477 + 1950 1.9358375 1.976323 0.49899895 6.7946964 + 1960 1.9520543 1.9936542 0.50485961 6.7467481 + 1970 1.9709064 2.0108957 0.51165586 6.6909455 + 1980 1.9940026 2.0375428 0.51918913 6.6250463 + 1990 2.0171261 2.0646948 0.52705638 6.5649879 + 2000 2.0302713 2.0802515 0.53472229 6.5470853 +Loop time of 0.482133 on 4 procs for 1000 steps with 2775 atoms + +Performance: 179203.608 tau/day, 2074.116 timesteps/s +98.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.1081 | 0.18228 | 0.23471 | 12.7 | 37.81 +Neigh | 0.011443 | 0.019967 | 0.025651 | 4.1 | 4.14 +Comm | 0.01639 | 0.073615 | 0.15634 | 21.8 | 15.27 +Output | 0.011851 | 0.012603 | 0.013287 | 0.5 | 2.61 +Modify | 0.14306 | 0.16634 | 0.18018 | 3.6 | 34.50 +Other | | 0.02733 | | | 5.67 + +Nlocal: 693.75 ave 797 max 590 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Nghost: 259 ave 320 max 195 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Neighs: 6101 ave 7360 max 4853 min +Histogram: 2 0 0 0 0 0 0 0 0 2 + +Total # of neighbors = 24404 +Ave neighs/atom = 8.79423 +Neighbor list builds = 36 +Dangerous builds = 0 + +##################################################################### +#collect data + +#print the applied force and total flux to ensure conservation of Jx +variable Fapp equal f_GD[1] +compute vxBulk bulk reduce sum vx +compute vyBulk bulk reduce sum vy +variable invVol equal 1.0/(lx*ly) +variable jx equal c_vxBulk*${invVol} +variable jx equal c_vxBulk*0.00025 +variable jy equal c_vyBulk*${invVol} +variable jy equal c_vyBulk*0.00025 +variable curr_step equal step +variable p_Fapp format Fapp %.3f +variable p_jx format jx %.5g +variable p_jy format jy %.5g +fix print_vCOM all print ${dump_rate} "${curr_step} ${p_Fapp} ${p_jx} ${p_jy}" file GD.out screen no title "timestep Fapp Jx Jy" +fix print_vCOM all print 50 "${curr_step} ${p_Fapp} ${p_jx} ${p_jy}" file GD.out screen no title "timestep Fapp Jx Jy" + +#compute IK1 pressure profile +#see Todd, Evans, and Davis, Phys. Rev. E 52(2) 1995, p. 1627 +#use profile-unbiased temperature to remove the streaming velocity +#from the kinetic part of the pressure +compute spa bulk stress/atom myTp + +#for the pressure profile, use the same grid as the PUT +compute chunkX bulk chunk/atom bin/1d x lower ${dX} units box +compute chunkX bulk chunk/atom bin/1d x lower 3.125 units box + +#output pressure profile and other profiles +#the pressure profile is (-1/2V)*(c_spa[1] + c_spa[2]), where +#V is the volume of a slice +fix profiles bulk ave/chunk 1 1 ${dump_rate} chunkX vx density/mass c_spa[1] c_spa[2] file x_profiles ave running overwrite +fix profiles bulk ave/chunk 1 1 50 chunkX vx density/mass c_spa[1] c_spa[2] file x_profiles ave running overwrite + +#compute velocity profile across the pipe with a finer grid +variable dYnew equal ${dY}/10 +variable dYnew equal 3.07692307692308/10 +compute chunkY bulk chunk/atom bin/1d y center ${dYnew} units box region pipe +compute chunkY bulk chunk/atom bin/1d y center 0.307692307692308 units box region pipe +fix velYprof bulk ave/chunk 1 1 ${dump_rate} chunkY vx file Vy_profile ave running overwrite +fix velYprof bulk ave/chunk 1 1 50 chunkY vx file Vy_profile ave running overwrite + +#full trajectory +# dump 7 bulk custom ${dump_rate} bulk.lammpstrj id type x y z +# dump_modify 7 sort id + +run ${run} +run 2000 +Per MPI rank memory allocation (min/avg/max) = 5.138 | 5.139 | 5.14 Mbytes +Step c_myT c_myTp TotEng Press + 2000 2.0302713 2.0802515 0.53472229 6.5470853 + 2010 2.0303419 2.0806129 0.54177821 6.5808527 + 2020 2.0245167 2.0792991 0.54803523 6.6381758 + 2030 2.0169072 2.065404 0.55345227 6.7008962 + 2040 2.0052526 2.0513817 0.55818432 6.7755868 + 2050 1.9953625 2.0366564 0.56245299 6.8382569 + 2060 2.0003667 2.0462109 0.56649798 6.8390557 + 2070 2.0238288 2.0834553 0.57023651 6.7637821 + 2080 2.045765 2.1173867 0.5730944 6.6861321 + 2090 2.0563925 2.1370313 0.57430831 6.6422581 + 2100 2.0620437 2.1480293 0.57319824 6.6080678 + 2110 2.0584437 2.1473173 0.56913597 6.5969671 + 2120 2.0532825 2.1393006 0.56154606 6.5799417 + 2130 2.0450143 2.1234905 0.55009479 6.5616931 + 2140 2.0229537 2.1004507 0.53511912 6.5854627 + 2150 1.9832556 2.0554119 0.51812599 6.6700591 + 2160 1.9444027 2.0110758 0.50163049 6.7534263 + 2170 1.9267473 1.9904528 0.48759542 6.76469 + 2180 1.9262232 1.9809353 0.47662199 6.7188048 + 2190 1.9359331 1.9854626 0.46836289 6.6406985 + 2200 1.9530728 1.9971865 0.4620366 6.5409943 + 2210 1.9657099 2.0056761 0.45692542 6.4639397 + 2220 1.9661008 2.0046161 0.45253504 6.4388081 + 2230 1.9574696 1.9947839 0.44864257 6.4528687 + 2240 1.9522284 1.9922663 0.44518111 6.4584458 + 2250 1.9518203 1.9950044 0.44206844 6.4491722 + 2260 1.9527908 1.9989603 0.4391804 6.4377912 + 2270 1.9452231 1.9932538 0.43643529 6.4607516 + 2280 1.9249341 1.9759145 0.43392742 6.5320897 + 2290 1.9087464 1.960985 0.43186869 6.5875176 + 2300 1.9103289 1.964731 0.43039882 6.5765021 + 2310 1.9182062 1.9783814 0.4294628 6.5434488 + 2320 1.9204281 1.9796609 0.42889381 6.5351629 + 2330 1.916279 1.9720659 0.42866391 6.5562619 + 2340 1.9062866 1.9587628 0.42890166 6.6033936 + 2350 1.9024117 1.9566812 0.42979475 6.6297969 + 2360 1.908153 1.960687 0.43141898 6.6215148 + 2370 1.9115944 1.9663337 0.43376668 6.6236491 + 2380 1.9086193 1.9637867 0.4367911 6.6529568 + 2390 1.9039907 1.9610268 0.44053991 6.6926343 + 2400 1.9034944 1.9609406 0.44508818 6.7193441 + 2410 1.9151521 1.9753641 0.4504458 6.7015957 + 2420 1.9314517 1.9925924 0.45644382 6.6669864 + 2430 1.9433933 2.0062001 0.46277215 6.6481527 + 2440 1.9504631 2.0087015 0.46917209 6.6475757 + 2450 1.9550092 2.0094957 0.47550077 6.6556459 + 2460 1.9609689 2.0147997 0.48170141 6.6568282 + 2470 1.9730726 2.0328127 0.48763131 6.6337545 + 2480 1.9838562 2.0466643 0.49303443 6.6143423 + 2490 1.9862031 2.0473388 0.49767532 6.6245587 + 2500 1.9817565 2.0455432 0.50152131 6.6573893 + 2510 1.9785788 2.0423176 0.50460561 6.6808042 + 2520 1.9823006 2.0505106 0.50696374 6.6726698 + 2530 1.9907178 2.0553736 0.50852885 6.6402082 + 2540 2.0005205 2.0690408 0.50919421 6.5966469 + 2550 2.0079727 2.0809816 0.50872954 6.5568419 + 2560 2.0133128 2.096271 0.50682742 6.5199915 + 2570 2.0141298 2.0990846 0.50314491 6.4951991 + 2580 2.0048768 2.0874319 0.49750096 6.5025454 + 2590 1.9876498 2.0638834 0.4900201 6.5333038 + 2600 1.9720479 2.0474479 0.48105263 6.5527157 + 2610 1.9596324 2.0355764 0.4710001 6.5547867 + 2620 1.9439039 2.0106405 0.46046644 6.5646889 + 2630 1.9321714 1.9924346 0.45021207 6.5589454 + 2640 1.9349378 1.9923889 0.44082833 6.5012762 + 2650 1.9448459 2.0069955 0.43251999 6.4228945 + 2660 1.9446852 2.0050346 0.42525857 6.3921645 + 2670 1.9325594 1.9884937 0.41913362 6.4169726 + 2680 1.9121687 1.9606084 0.41434428 6.4821267 + 2690 1.8923613 1.9339385 0.41105831 6.5517615 + 2700 1.8807238 1.9191801 0.40933203 6.5949447 + 2710 1.8797367 1.918758 0.40906826 6.6001309 + 2720 1.8852961 1.9225996 0.41005611 6.58191 + 2730 1.8937478 1.9357751 0.41204348 6.5541946 + 2740 1.9019279 1.9449374 0.41476104 6.5278575 + 2750 1.9134396 1.9614415 0.41800066 6.4890769 + 2760 1.9339551 1.9913779 0.42150554 6.4159805 + 2770 1.9597826 2.0220988 0.42487614 6.3232273 + 2780 1.9753466 2.0414907 0.42771704 6.2715489 + 2790 1.9720423 2.0402016 0.42976012 6.2949288 + 2800 1.9512893 2.0172711 0.43109201 6.3878056 + 2810 1.9232302 1.9870212 0.4320928 6.5101822 + 2820 1.9026913 1.959286 0.43326424 6.6024967 + 2830 1.9033802 1.9621601 0.43500785 6.6114274 + 2840 1.9214292 1.9833838 0.43733454 6.5508757 + 2850 1.9440563 2.0087358 0.43995473 6.4713496 + 2860 1.9589136 2.0211107 0.44250821 6.4232961 + 2870 1.9588429 2.022232 0.44477492 6.4355861 + 2880 1.9456751 2.0009513 0.44676532 6.5021746 + 2890 1.9269155 1.9782929 0.44877858 6.5926531 + 2900 1.9125262 1.9554653 0.45121196 6.6657808 + 2910 1.9187855 1.9572583 0.45438665 6.6589954 + 2920 1.9416112 1.9784518 0.45839212 6.5888253 + 2930 1.9613579 1.9975032 0.46305788 6.5317424 + 2940 1.9711529 2.0102501 0.46812715 6.5148943 + 2950 1.9707865 2.0133283 0.47345305 6.5389543 + 2960 1.9732526 2.0170219 0.47898306 6.5537092 + 2970 1.9871126 2.0282309 0.48465048 6.5273492 + 2980 1.9953449 2.0404164 0.49032615 6.5227325 + 2990 1.9909136 2.037246 0.49581423 6.5664662 + 3000 1.9872474 2.0307896 0.50110509 6.6060698 + 3010 1.9944885 2.0457308 0.5062755 6.6031811 + 3020 2.0103461 2.0599491 0.51116655 6.5654871 + 3030 2.0240275 2.077342 0.5154921 6.5358852 + 3040 2.0205953 2.0704954 0.51898871 6.5708937 + 3050 2.0032184 2.0463036 0.52167438 6.657741 + 3060 1.9889341 2.0265284 0.52385964 6.7329171 + 3070 1.9795143 2.0201081 0.52588914 6.7881407 + 3080 1.9713362 2.0123964 0.52797238 6.8362858 + 3090 1.9692592 2.0106467 0.53025538 6.8616268 + 3100 1.9722487 2.0259566 0.53277635 6.8689898 + 3110 1.9703322 2.0314028 0.53541462 6.895271 + 3120 1.9594359 2.0217586 0.53808512 6.954362 + 3130 1.9524729 2.0148628 0.5409094 6.9965233 + 3140 1.9630381 2.0260807 0.54400259 6.968082 + 3150 1.9902598 2.0549364 0.54720142 6.8698796 + 3160 2.029715 2.0923999 0.54995378 6.7193678 + 3170 2.0581544 2.1137995 0.55150021 6.6053728 + 3180 2.059074 2.1156535 0.55123668 6.5919337 + 3190 2.0400682 2.0904721 0.54894762 6.6505757 + 3200 2.0211594 2.0682597 0.54484887 6.7046468 + 3210 2.012712 2.0573114 0.53922057 6.7130909 + 3220 2.0102377 2.0554701 0.53219251 6.6919069 + 3230 2.0017671 2.0505068 0.52386898 6.6867054 + 3240 1.9854941 2.0308454 0.51458792 6.7051085 + 3250 1.9767009 2.0187664 0.50486785 6.6916859 + 3260 1.9771733 2.0186148 0.49510722 6.6424305 + 3270 1.974003 2.0136039 0.48556819 6.6078903 + 3280 1.9627665 1.9989122 0.47654147 6.6067904 + 3290 1.9491247 1.9826248 0.46834866 6.6186709 + 3300 1.9414093 1.9724941 0.4612122 6.6119543 + 3310 1.9433901 1.9715482 0.45518879 6.570612 + 3320 1.9518837 1.9872717 0.45010165 6.5057947 + 3330 1.9603874 1.9957995 0.44566728 6.4428221 + 3340 1.9615962 1.9945224 0.44167201 6.4099339 + 3350 1.955918 1.9882866 0.4380303 6.4070811 + 3360 1.9463445 1.9763654 0.43480086 6.4241178 + 3370 1.9411187 1.9683081 0.43206391 6.4296577 + 3380 1.9407224 1.9580074 0.42991627 6.4210217 + 3390 1.9402479 1.9530447 0.42850635 6.4170536 + 3400 1.9451337 1.9555771 0.42787382 6.3990336 + 3410 1.9475586 1.9612432 0.42797178 6.3953251 + 3420 1.9434927 1.960532 0.4286887 6.4210681 + 3430 1.9339054 1.9516935 0.43003682 6.4707071 + 3440 1.9234014 1.9464343 0.43214965 6.5248205 + 3450 1.9191846 1.9444777 0.43516361 6.5558451 + 3460 1.923218 1.9594606 0.43915611 6.5549213 + 3470 1.9328953 1.9792053 0.44397878 6.5327637 + 3480 1.9466227 1.9997841 0.44940599 6.4954965 + 3490 1.9672374 2.0323219 0.45511091 6.4358811 + 3500 1.9799622 2.0479841 0.46061029 6.4100217 + 3510 1.97942 2.0493411 0.46551964 6.4368108 + 3520 1.9725674 2.0389602 0.46976378 6.4892049 + 3530 1.9716429 2.0389798 0.47344292 6.5200899 + 3540 1.9789254 2.0486162 0.47659268 6.5198212 + 3550 1.9872455 2.0577517 0.47908145 6.5144586 + 3560 1.9808834 2.0545962 0.48076561 6.5633282 + 3570 1.9637165 2.0335394 0.4816783 6.6519124 + 3580 1.9407948 2.0067763 0.48212405 6.7605224 + 3590 1.9226532 1.9825887 0.48252299 6.8486041 + 3600 1.9135067 1.9700999 0.48328348 6.8977858 + 3610 1.9157516 1.9720028 0.48470695 6.8977759 + 3620 1.9328644 2.0001154 0.48688777 6.8361569 + 3630 1.9568208 2.0243053 0.48963933 6.7442107 + 3640 1.9824587 2.0569223 0.49259173 6.6452535 + 3650 1.9934906 2.0686356 0.49529038 6.6020218 + 3660 1.9996281 2.0747054 0.4973223 6.5808904 + 3670 2.0038801 2.0772777 0.49838833 6.5691351 + 3680 1.9941342 2.0712365 0.49826732 6.6088107 + 3690 1.9762631 2.0486045 0.49689108 6.6739002 + 3700 1.9667284 2.0349391 0.4943899 6.7010265 + 3710 1.9615089 2.0168112 0.49093735 6.7040384 + 3720 1.9613068 2.0147489 0.48673788 6.6813041 + 3730 1.9731234 2.0290151 0.48175561 6.6096757 + 3740 1.9829764 2.0461907 0.47575173 6.5424752 + 3750 1.9792839 2.0454423 0.46852709 6.5237753 + 3760 1.9599692 2.0287014 0.46022484 6.5616271 + 3770 1.935975 2.0000948 0.45138016 6.6136471 + 3780 1.9236713 1.9834802 0.44262435 6.6187463 + 3790 1.9268004 1.9875324 0.43430112 6.5632772 + 3800 1.932601 1.9872595 0.42649563 6.4984764 + 3810 1.9322506 1.9814946 0.41928855 6.4617054 + 3820 1.9245737 1.9712821 0.4128224 6.4613779 + 3830 1.9148568 1.9555602 0.40721003 6.4774474 + 3840 1.9049961 1.9457058 0.40261179 6.5029211 + 3850 1.8915137 1.9265199 0.39914961 6.5483592 + 3860 1.8784768 1.9058055 0.39700153 6.5962113 + 3870 1.8755236 1.9045158 0.39632768 6.6079033 + 3880 1.8841415 1.9140314 0.39710037 6.577707 + 3890 1.8958027 1.9331149 0.39918951 6.5359785 + 3900 1.9064085 1.948805 0.40238576 6.499859 + 3910 1.9185092 1.9675733 0.40647523 6.4610682 + 3920 1.9342595 1.9933225 0.41115392 6.4122308 + 3930 1.9482664 2.0076139 0.41603495 6.3736841 + 3940 1.9557759 2.0161573 0.42084462 6.3636708 + 3950 1.9573687 2.016612 0.42540421 6.3804124 + 3960 1.9486354 1.9998027 0.42974612 6.4404944 + 3970 1.936214 1.9807209 0.43412037 6.5176788 + 3980 1.9274292 1.9595259 0.43885103 6.5846212 + 3990 1.9233082 1.953436 0.44425085 6.6354276 + 4000 1.9289166 1.9522097 0.45042645 6.6513835 +Loop time of 0.998413 on 4 procs for 2000 steps with 2775 atoms + +Performance: 173074.634 tau/day, 2003.179 timesteps/s +98.9% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.25646 | 0.3672 | 0.47947 | 15.7 | 36.78 +Neigh | 0.027925 | 0.039163 | 0.050221 | 4.5 | 3.92 +Comm | 0.032807 | 0.14565 | 0.27684 | 25.4 | 14.59 +Output | 0.025572 | 0.032272 | 0.035355 | 2.2 | 3.23 +Modify | 0.31519 | 0.35781 | 0.375 | 4.1 | 35.84 +Other | | 0.05632 | | | 5.64 + +Nlocal: 693.75 ave 805 max 582 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Nghost: 255.5 ave 312 max 199 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +Neighs: 6091.5 ave 7423 max 4780 min +Histogram: 2 0 0 0 0 0 0 0 0 2 + +Total # of neighbors = 24366 +Ave neighs/atom = 8.78054 +Neighbor list builds = 72 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:01 diff --git a/examples/USER/misc/flow_gauss/output-files/GD.out b/examples/USER/misc/flow_gauss/output-files/GD.out new file mode 100644 index 0000000000..e3049830bc --- /dev/null +++ b/examples/USER/misc/flow_gauss/output-files/GD.out @@ -0,0 +1,41 @@ +timestep Fapp Jx Jy +2050 -215.835 0.1 -0.002562 +2100 -220.455 0.1 -0.0019705 +2150 55.212 0.1 -0.0028338 +2200 87.052 0.1 -0.0042335 +2250 -62.998 0.1 -0.0045646 +2300 71.630 0.1 -0.0039858 +2350 43.159 0.1 -0.0029771 +2400 109.930 0.1 -0.0018522 +2450 110.735 0.1 -0.0011188 +2500 107.071 0.1 0.0005978 +2550 335.449 0.1 0.0010164 +2600 159.694 0.1 -0.00015953 +2650 6.532 0.1 -0.0004907 +2700 65.524 0.1 -0.00093116 +2750 79.662 0.1 -0.0033425 +2800 69.846 0.1 -0.0055377 +2850 122.175 0.1 -0.00721 +2900 32.456 0.1 -0.0086166 +2950 -85.137 0.1 -0.01107 +3000 154.735 0.1 -0.011337 +3050 72.979 0.1 -0.0095316 +3100 -24.457 0.1 -0.0098708 +3150 -0.383 0.1 -0.0094961 +3200 132.434 0.1 -0.011524 +3250 48.222 0.1 -0.014966 +3300 -73.186 0.1 -0.016999 +3350 172.062 0.1 -0.018554 +3400 106.144 0.1 -0.021202 +3450 -22.860 0.1 -0.01949 +3500 22.120 0.1 -0.016033 +3550 -254.920 0.1 -0.012172 +3600 -147.218 0.1 -0.011162 +3650 -12.508 0.1 -0.010255 +3700 81.846 0.1 -0.0085117 +3750 -79.406 0.1 -0.0061294 +3800 -34.994 0.1 -0.0026239 +3850 94.992 0.1 -0.0015312 +3900 -0.345 0.1 -0.0011157 +3950 -88.693 0.1 -0.0018929 +4000 156.029 0.1 -0.0024547 diff --git a/examples/USER/misc/flow_gauss/output-files/Vy_profile b/examples/USER/misc/flow_gauss/output-files/Vy_profile new file mode 100644 index 0000000000..2df7468364 --- /dev/null +++ b/examples/USER/misc/flow_gauss/output-files/Vy_profile @@ -0,0 +1,134 @@ +# Chunk-averaged data for fix velYprof and group file +# Timestep Number-of-chunks Total-count +# Chunk Coord1 Ncount vx +4000 130 18774 + 1 -19.8462 0 0 + 2 -19.5385 0 0 + 3 -19.2308 0 0 + 4 -18.9231 0 0 + 5 -18.6154 0 0 + 6 -18.3077 0 0 + 7 -18 0 0 + 8 -17.6923 0 0 + 9 -17.3846 0 0 + 10 -17.0769 0 0 + 11 -16.7692 0 0 + 12 -16.4615 0 0 + 13 -16.1538 0 0 + 14 -15.8462 0 0 + 15 -15.5385 0 0 + 16 -15.2308 0 0 + 17 -14.9231 0 0 + 18 -14.6154 0 0 + 19 -14.3077 0 0 + 20 -14 0 0 + 21 -13.6923 0 0 + 22 -13.3846 0 0 + 23 -13.0769 0 0 + 24 -12.7692 0 0 + 25 -12.4615 0 0 + 26 -12.1538 0 0 + 27 -11.8462 0 0 + 28 -11.5385 0 0 + 29 -11.2308 0 0 + 30 -10.9231 0 0 + 31 -10.6154 0 0 + 32 -10.3077 0 0 + 33 -10 0 0 + 34 -9.69231 0 0 + 35 -9.38462 0 0 + 36 -9.07692 12.3415 0.126356 + 37 -8.76923 9.14634 0.119194 + 38 -8.46154 3.46341 0.0688559 + 39 -8.15385 7.26829 0.180935 + 40 -7.84615 9.97561 0.114685 + 41 -7.53846 6.14634 0.158317 + 42 -7.23077 7.17073 0.128092 + 43 -6.92308 8.56098 0.30356 + 44 -6.61538 7.7561 0.118822 + 45 -6.30769 6.04878 0.170019 + 46 -6 8.19512 0.146873 + 47 -5.69231 8.4878 0.258003 + 48 -5.38462 7.21951 0.0612577 + 49 -5.07692 7.14634 0.394221 + 50 -4.76923 7.34146 0.214609 + 51 -4.46154 7.90244 0.1583 + 52 -4.15385 6.36585 0.191919 + 53 -3.84615 8.04878 0.202891 + 54 -3.53846 7.2439 -0.00173288 + 55 -3.23077 7.53659 0.117062 + 56 -2.92308 6.41463 0.324614 + 57 -2.61538 7.60976 0.496272 + 58 -2.30769 8.39024 0.364642 + 59 -2 6.73171 0.292624 + 60 -1.69231 7.02439 0.517913 + 61 -1.38462 8.43902 0.534594 + 62 -1.07692 7.21951 0.497622 + 63 -0.769231 6.95122 0.303701 + 64 -0.461538 8.68293 0.406682 + 65 -0.153846 7.5122 0.218835 + 66 0.153846 6.82927 0.189413 + 67 0.461538 8.26829 0.228409 + 68 0.769231 7.2439 0.506845 + 69 1.07692 7.97561 0.154118 + 70 1.38462 8.26829 0.144882 + 71 1.69231 6.58537 0.192568 + 72 2 7.46341 0.360144 + 73 2.30769 8.95122 0.0112179 + 74 2.61538 6.58537 0.276061 + 75 2.92308 6.53659 0.114354 + 76 3.23077 8.46341 0.0386417 + 77 3.53846 8 0.0711626 + 78 3.84615 6.92683 0.203194 + 79 4.15385 8.4878 0.317789 + 80 4.46154 7.5122 0.268122 + 81 4.76923 6.58537 -0.112372 + 82 5.07692 9.02439 0.115702 + 83 5.38462 7.41463 -0.067424 + 84 5.69231 6.07317 0.0626918 + 85 6 8.34146 -0.0153977 + 86 6.30769 8.21951 0.281342 + 87 6.61538 6.29268 0.359939 + 88 6.92308 8.87805 0.110875 + 89 7.23077 6.09756 0.134999 + 90 7.53846 6.65854 0.0841478 + 91 7.84615 10.8537 0.144519 + 92 8.15385 5.58537 0.309331 + 93 8.46154 5.80488 0.103667 + 94 8.76923 7.60976 0.39288 + 95 9.07692 12.0244 0.462022 + 96 9.38462 0 0 + 97 9.69231 0 0 + 98 10 0 0 + 99 10.3077 0 0 + 100 10.6154 0 0 + 101 10.9231 0 0 + 102 11.2308 0 0 + 103 11.5385 0 0 + 104 11.8462 0 0 + 105 12.1538 0 0 + 106 12.4615 0 0 + 107 12.7692 0 0 + 108 13.0769 0 0 + 109 13.3846 0 0 + 110 13.6923 0 0 + 111 14 0 0 + 112 14.3077 0 0 + 113 14.6154 0 0 + 114 14.9231 0 0 + 115 15.2308 0 0 + 116 15.5385 0 0 + 117 15.8462 0 0 + 118 16.1538 0 0 + 119 16.4615 0 0 + 120 16.7692 0 0 + 121 17.0769 0 0 + 122 17.3846 0 0 + 123 17.6923 0 0 + 124 18 0 0 + 125 18.3077 0 0 + 126 18.6154 0 0 + 127 18.9231 0 0 + 128 19.2308 0 0 + 129 19.5385 0 0 + 130 19.8462 0 0 diff --git a/examples/USER/misc/flow_gauss/output-files/x_profiles b/examples/USER/misc/flow_gauss/output-files/x_profiles new file mode 100644 index 0000000000..7a761345af --- /dev/null +++ b/examples/USER/misc/flow_gauss/output-files/x_profiles @@ -0,0 +1,36 @@ +# Chunk-averaged data for fix profiles and group density/mass +# Timestep Number-of-chunks Total-count +# Chunk Coord1 Ncount vx density/mass c_spa[1] c_spa[2] +4000 32 109675 + 1 -48.4375 97.7805 0.159561 0.782244 -9.17487 -8.9018 + 2 -45.3125 100.927 0.187846 0.807415 -9.24302 -9.92813 + 3 -42.1875 99.0976 0.227036 0.79278 -9.03415 -9.66032 + 4 -39.0625 101.146 0.243495 0.809171 -8.89515 -9.25314 + 5 -35.9375 98.7805 0.194616 0.790244 -9.13265 -8.52663 + 6 -32.8125 97.8049 0.165768 0.782439 -9.26009 -8.52446 + 7 -29.6875 100.195 0.0758064 0.801561 -9.02933 -8.50733 + 8 -26.5625 98.4878 0.054432 0.787902 -9.61672 -9.24963 + 9 -23.4375 99.9268 0.0740914 0.799415 -9.88959 -9.94984 + 10 -20.3125 99.7561 0.130294 0.798049 -10.2459 -9.39412 + 11 -17.1875 102.463 0.120168 0.819707 -10.6072 -10.254 + 12 -14.0625 47.6341 0.208545 0.381073 -9.85715 -10.0799 + 13 -10.9375 48.1951 0.238051 0.385561 -9.81349 -10.569 + 14 -7.8125 47.439 0.287107 0.379512 -10.0184 -9.63087 + 15 -4.6875 48.2439 0.22506 0.385951 -9.83794 -9.6963 + 16 -1.5625 48.4634 0.208869 0.387707 -9.29366 -10.0114 + 17 1.5625 46.4878 0.19447 0.371902 -10.2409 -9.84627 + 18 4.6875 47.2927 0.168034 0.378341 -10.1523 -11.908 + 19 7.8125 48.6829 0.145552 0.389463 -10.24 -11.0582 + 20 10.9375 48.8293 0.214036 0.390634 -9.27729 -10.1074 + 21 14.0625 46.9756 0.267083 0.375805 -9.24833 -9.83182 + 22 17.1875 97.2683 0.175404 0.778146 -9.64001 -8.61724 + 23 20.3125 101.146 0.10746 0.809171 -9.33416 -9.82308 + 24 23.4375 101.927 0.157503 0.815415 -9.76491 -10.1909 + 25 26.5625 101.024 0.179934 0.808195 -9.72775 -9.98559 + 26 29.6875 100.976 0.180631 0.807805 -9.33871 -10.0228 + 27 32.8125 96.4146 0.144418 0.771317 -9.74826 -9.79723 + 28 35.9375 101.244 0.117224 0.809951 -8.95584 -8.80226 + 29 39.0625 102 0.10507 0.816 -9.15563 -8.98232 + 30 42.1875 101.195 0.040236 0.809561 -9.1499 -8.95112 + 31 45.3125 96.9512 0.0312252 0.77561 -9.20475 -9.0005 + 32 48.4375 100.244 0.103032 0.801951 -9.16324 -8.77526 diff --git a/examples/USER/quip/in.gap b/examples/USER/quip/in.gap index 37667e39b9..dd049a4737 100644 --- a/examples/USER/quip/in.gap +++ b/examples/USER/quip/in.gap @@ -17,6 +17,6 @@ fix 1 all nve thermo 10 timestep 0.001 -dump 1 all custom 10 dump.gap id fx fy fz +#dump 1 all custom 10 dump.gap id fx fy fz run 40 diff --git a/examples/USER/quip/in.molecular b/examples/USER/quip/in.molecular new file mode 100644 index 0000000000..4253399d7c --- /dev/null +++ b/examples/USER/quip/in.molecular @@ -0,0 +1,47 @@ +units metal +atom_style full +boundary p p p +timestep 0.0001 # 0.1 fs + +read_data methane-box-8.data + +# DISCLAIMER: This potential mixes parameters from methane and silane +# potentials and is NOT intended to be a realistic representation of either +# system. It is meant to demonstrate the use of hybrid QUIP/LAMMPS potentials, +# including the use of separate 'special_bonds' settings. + +pair_style hybrid/overlay lj/cut 8.0 quip + +# exclusion setting for quip; cannot be exactly 1.0 1.0 1.0, +# since that would not flag 1-2, 1-3, and 1-4 pairs in lj/cut +special_bonds lj/coul 0.999999999 0.999999999 0.999999999 + +# Intermolecular: OPLS (JACS 118 (45), p. 11225 (1996)) +# Coulomb interactions ommitted for simplicity +pair_coeff 1 1 lj/cut 0.0028619844 3.5 # CT +pair_coeff 2 2 lj/cut 0.0013009018 2.5 # HC +pair_coeff 1 2 lj/cut 0.0019295487 2.95 +pair_modify shift no +# change exclusion settings for lj/cut only: exclude bonded pairs +pair_modify pair lj/cut special lj/coul 0.0 0.0 0.0 + +# Intramolecular +# Tell QUIP to pretend this is silane (which is covered by the parameter file) +pair_coeff * * quip sw_example.xml "IP SW" 14 1 +bond_style none +angle_style none + +fix 1 all nve + +# Include diagnostics that allow us to compare to a pure QUIP run +compute equip all pair quip +compute evdw all pair lj/cut +compute vir all pressure NULL virial + +thermo_style custom step epair ke etotal temp press c_vir c_evdw c_equip +thermo 1 + +# dump 1 all custom 1 dump.molecular id type x y z fx fy fz +# dump_modify 1 sort id + +run 10 diff --git a/examples/USER/quip/in.sw b/examples/USER/quip/in.sw index c1367ac805..aaa4217b2f 100644 --- a/examples/USER/quip/in.sw +++ b/examples/USER/quip/in.sw @@ -10,6 +10,7 @@ read_data data_sw pair_style quip pair_coeff * * sw_example.xml "IP SW" 14 +velocity all create 10.0 355311 neighbor 0.3 bin neigh_modify delay 10 @@ -17,6 +18,6 @@ fix 1 all nve thermo 10 timestep 0.001 -dump 1 all custom 10 dump.sw id fx fy fz +#dump 1 all custom 10 dump.sw id fx fy fz -run 1 +run 100 diff --git a/examples/USER/quip/log.24Jul17.gap.g++.1 b/examples/USER/quip/log.24Jul17.gap.g++.1 new file mode 100644 index 0000000000..348f2ae0cc --- /dev/null +++ b/examples/USER/quip/log.24Jul17.gap.g++.1 @@ -0,0 +1,76 @@ +LAMMPS (24 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# Test of GAP potential for Si system + +units metal +boundary p p p + +atom_style atomic + +read_data data_gap + orthogonal box = (0 0 0) to (10.9685 10.9685 10.9685) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 64 atoms + +pair_style quip +pair_coeff * * gap_example.xml "Potential xml_label=GAP_2015_2_20_0_10_54_35_765" 14 + +neighbor 0.3 bin +neigh_modify delay 10 + +fix 1 all nve +thermo 10 +timestep 0.001 + +#dump 1 all custom 10 dump.gap id fx fy fz + +run 40 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.3 + ghost atom cutoff = 4.3 + binsize = 2.15, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair quip, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.689 | 2.689 | 2.689 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -10412.677 0 -10412.677 -107490.01 + 10 173.98393 -10414.096 0 -10412.679 -91270.969 + 20 417.38493 -10416.08 0 -10412.681 -42816.133 + 30 434.34789 -10416.217 0 -10412.68 2459.83 + 40 423.05899 -10416.124 0 -10412.679 22936.209 +Loop time of 1.83555 on 1 procs for 40 steps with 64 atoms + +Performance: 1.883 ns/day, 12.747 hours/ns, 21.792 timesteps/s +98.1% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.8349 | 1.8349 | 1.8349 | 0.0 | 99.96 +Neigh | 0.00022817 | 0.00022817 | 0.00022817 | 0.0 | 0.01 +Comm | 0.00013709 | 0.00013709 | 0.00013709 | 0.0 | 0.01 +Output | 9.8228e-05 | 9.8228e-05 | 9.8228e-05 | 0.0 | 0.01 +Modify | 8.6308e-05 | 8.6308e-05 | 8.6308e-05 | 0.0 | 0.00 +Other | | 0.0001223 | | | 0.01 + +Nlocal: 64 ave 64 max 64 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 303 ave 303 max 303 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 1080 ave 1080 max 1080 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1080 +Ave neighs/atom = 16.875 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:01 diff --git a/examples/USER/quip/log.24Jul17.gap.g++.4 b/examples/USER/quip/log.24Jul17.gap.g++.4 new file mode 100644 index 0000000000..a8127148b5 --- /dev/null +++ b/examples/USER/quip/log.24Jul17.gap.g++.4 @@ -0,0 +1,76 @@ +LAMMPS (24 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# Test of GAP potential for Si system + +units metal +boundary p p p + +atom_style atomic + +read_data data_gap + orthogonal box = (0 0 0) to (10.9685 10.9685 10.9685) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 64 atoms + +pair_style quip +pair_coeff * * gap_example.xml "Potential xml_label=GAP_2015_2_20_0_10_54_35_765" 14 + +neighbor 0.3 bin +neigh_modify delay 10 + +fix 1 all nve +thermo 10 +timestep 0.001 + +#dump 1 all custom 10 dump.gap id fx fy fz + +run 40 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.3 + ghost atom cutoff = 4.3 + binsize = 2.15, bins = 6 6 6 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair quip, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.685 | 2.779 | 3.06 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 0 -10412.677 0 -10412.677 -107490.01 + 10 173.98393 -10414.096 0 -10412.679 -91270.969 + 20 417.38493 -10416.08 0 -10412.681 -42816.133 + 30 434.34789 -10416.217 0 -10412.68 2459.83 + 40 423.05899 -10416.124 0 -10412.679 22936.209 +Loop time of 0.837345 on 4 procs for 40 steps with 64 atoms + +Performance: 4.127 ns/day, 5.815 hours/ns, 47.770 timesteps/s +96.0% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.73144 | 0.79214 | 0.83586 | 4.3 | 94.60 +Neigh | 5.7936e-05 | 6.5327e-05 | 7.1049e-05 | 0.0 | 0.01 +Comm | 0.00085807 | 0.044631 | 0.10532 | 18.0 | 5.33 +Output | 0.00013208 | 0.00013494 | 0.00013733 | 0.0 | 0.02 +Modify | 6.0558e-05 | 7.8678e-05 | 9.5129e-05 | 0.0 | 0.01 +Other | | 0.0002971 | | | 0.04 + +Nlocal: 16 ave 18 max 14 min +Histogram: 1 0 1 0 0 0 0 1 0 1 +Nghost: 174 ave 182 max 167 min +Histogram: 1 0 0 0 2 0 0 0 0 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 270 ave 294 max 237 min +Histogram: 1 0 0 0 1 0 0 0 1 1 + +Total # of neighbors = 1080 +Ave neighs/atom = 16.875 +Neighbor list builds = 2 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/quip/log.24Jul17.molecular.g++.1 b/examples/USER/quip/log.24Jul17.molecular.g++.1 new file mode 100644 index 0000000000..28fc63579b --- /dev/null +++ b/examples/USER/quip/log.24Jul17.molecular.g++.1 @@ -0,0 +1,130 @@ +LAMMPS (24 Jul 2017) + using 1 OpenMP thread(s) per MPI task +units metal +atom_style full +boundary p p p +timestep 0.0001 # 0.1 fs + +read_data methane-box-8.data + orthogonal box = (-0.499095 -0.270629 0.131683) to (8.4109 8.63937 9.04168) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 40 atoms + scanning bonds ... + 4 = max bonds/atom + scanning angles ... + 6 = max angles/atom + reading bonds ... + 32 bonds + reading angles ... + 48 angles + 4 = max # of 1-2 neighbors + 3 = max # of 1-3 neighbors + 3 = max # of 1-4 neighbors + 4 = max # of special neighbors + +# DISCLAIMER: This potential mixes parameters from methane and silane +# potentials and is NOT intended to be a realistic representation of either +# system. It is meant to demonstrate the use of hybrid QUIP/LAMMPS potentials, +# including the use of separate 'special_bonds' settings. + +pair_style hybrid/overlay lj/cut 8.0 quip + +# exclusion setting for quip; cannot be exactly 1.0 1.0 1.0, +# since that would not flag 1-2, 1-3, and 1-4 pairs in lj/cut +special_bonds lj/coul 0.999999999 0.999999999 0.999999999 + 4 = max # of 1-2 neighbors + 3 = max # of 1-3 neighbors + 3 = max # of 1-4 neighbors + 4 = max # of special neighbors + +# Intermolecular: OPLS (JACS 118 (45), p. 11225 (1996)) +# Coulomb interactions ommitted for simplicity +pair_coeff 1 1 lj/cut 0.0028619844 3.5 # CT +pair_coeff 2 2 lj/cut 0.0013009018 2.5 # HC +pair_coeff 1 2 lj/cut 0.0019295487 2.95 +pair_modify shift no +# change exclusion settings for lj/cut only: exclude bonded pairs +pair_modify pair lj/cut special lj/coul 0.0 0.0 0.0 + +# Intramolecular +# Tell QUIP to pretend this is silane (which is covered by the parameter file) +pair_coeff * * quip sw_example.xml "IP SW" 14 1 +bond_style none +angle_style none + +fix 1 all nve + +# Include diagnostics that allow us to compare to a pure QUIP run +compute equip all pair quip +compute evdw all pair lj/cut +compute vir all pressure NULL virial + +thermo_style custom step epair ke etotal temp press c_vir c_evdw c_equip +thermo 1 + +# dump 1 all custom 1 dump.molecular id type x y z fx fy fz +# dump_modify 1 sort id + +run 10 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10 + ghost atom cutoff = 10 + binsize = 5, bins = 2 2 2 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair lj/cut, perpetual, half/full from (2) + attributes: half, newton on + pair build: halffull/newton + stencil: none + bin: none + (2) pair quip, perpetual + attributes: full, newton on + pair build: full/bin + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 8.288 | 8.288 | 8.288 Mbytes +Step E_pair KinEng TotEng Temp Press c_vir c_evdw c_equip + 0 -5.3530213 0 -5.3530213 0 518847.56 518847.56 -0.10904079 -5.2439805 + 1 -5.9384459 0.58384822 -5.3545977 115.81657 517370.5 516488.87 -0.10783656 -5.8306093 + 2 -7.669616 2.3104051 -5.3592109 458.30954 512986.36 509497.58 -0.10422283 -7.5653932 + 3 -10.473314 5.1069211 -5.3663924 1013.0477 505833.04 498121.43 -0.098049469 -10.375264 + 4 -14.234705 8.859182 -5.3755227 1757.3747 496127.44 482749.79 -0.089147485 -14.145557 + 5 -18.806851 13.420941 -5.3859098 2662.28 484148.76 463882.72 -0.077305196 -18.729546 + 6 -24.021727 18.625147 -5.3965797 3694.6259 470219.95 442095.39 -0.06194509 -23.959782 + 7 -29.702647 24.295529 -5.4071176 4819.446 454683.57 417996.56 -0.042859727 -29.659787 + 8 -35.67405 30.257258 -5.4167913 6002.0599 437887.03 392197.62 -0.019248651 -35.654801 + 9 -41.771047 36.345757 -5.4252893 7209.8209 420163.51 365280.27 0.0096063065 -41.780653 + 10 -47.845522 42.413161 -5.4323614 8413.3973 401821.91 337776.7 0.044743702 -47.890266 +Loop time of 0.0537777 on 1 procs for 10 steps with 40 atoms + +Performance: 1.607 ns/day, 14.938 hours/ns, 185.951 timesteps/s +90.3% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.053478 | 0.053478 | 0.053478 | 0.0 | 99.44 +Bond | 1.9073e-06 | 1.9073e-06 | 1.9073e-06 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 7.7724e-05 | 7.7724e-05 | 7.7724e-05 | 0.0 | 0.14 +Output | 0.00018263 | 0.00018263 | 0.00018263 | 0.0 | 0.34 +Modify | 1.5974e-05 | 1.5974e-05 | 1.5974e-05 | 0.0 | 0.03 +Other | | 2.122e-05 | | | 0.04 + +Nlocal: 40 ave 40 max 40 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1175 ave 1175 max 1175 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 4768 ave 4768 max 4768 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 9536 ave 9536 max 9536 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9536 +Ave neighs/atom = 238.4 +Ave special neighs/atom = 4 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/quip/log.24Jul17.molecular.g++.4 b/examples/USER/quip/log.24Jul17.molecular.g++.4 new file mode 100644 index 0000000000..a8be8e77bb --- /dev/null +++ b/examples/USER/quip/log.24Jul17.molecular.g++.4 @@ -0,0 +1,130 @@ +LAMMPS (24 Jul 2017) + using 1 OpenMP thread(s) per MPI task +units metal +atom_style full +boundary p p p +timestep 0.0001 # 0.1 fs + +read_data methane-box-8.data + orthogonal box = (-0.499095 -0.270629 0.131683) to (8.4109 8.63937 9.04168) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 40 atoms + scanning bonds ... + 4 = max bonds/atom + scanning angles ... + 6 = max angles/atom + reading bonds ... + 32 bonds + reading angles ... + 48 angles + 4 = max # of 1-2 neighbors + 3 = max # of 1-3 neighbors + 3 = max # of 1-4 neighbors + 4 = max # of special neighbors + +# DISCLAIMER: This potential mixes parameters from methane and silane +# potentials and is NOT intended to be a realistic representation of either +# system. It is meant to demonstrate the use of hybrid QUIP/LAMMPS potentials, +# including the use of separate 'special_bonds' settings. + +pair_style hybrid/overlay lj/cut 8.0 quip + +# exclusion setting for quip; cannot be exactly 1.0 1.0 1.0, +# since that would not flag 1-2, 1-3, and 1-4 pairs in lj/cut +special_bonds lj/coul 0.999999999 0.999999999 0.999999999 + 4 = max # of 1-2 neighbors + 3 = max # of 1-3 neighbors + 3 = max # of 1-4 neighbors + 4 = max # of special neighbors + +# Intermolecular: OPLS (JACS 118 (45), p. 11225 (1996)) +# Coulomb interactions ommitted for simplicity +pair_coeff 1 1 lj/cut 0.0028619844 3.5 # CT +pair_coeff 2 2 lj/cut 0.0013009018 2.5 # HC +pair_coeff 1 2 lj/cut 0.0019295487 2.95 +pair_modify shift no +# change exclusion settings for lj/cut only: exclude bonded pairs +pair_modify pair lj/cut special lj/coul 0.0 0.0 0.0 + +# Intramolecular +# Tell QUIP to pretend this is silane (which is covered by the parameter file) +pair_coeff * * quip sw_example.xml "IP SW" 14 1 +bond_style none +angle_style none + +fix 1 all nve + +# Include diagnostics that allow us to compare to a pure QUIP run +compute equip all pair quip +compute evdw all pair lj/cut +compute vir all pressure NULL virial + +thermo_style custom step epair ke etotal temp press c_vir c_evdw c_equip +thermo 1 + +# dump 1 all custom 1 dump.molecular id type x y z fx fy fz +# dump_modify 1 sort id + +run 10 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10 + ghost atom cutoff = 10 + binsize = 5, bins = 2 2 2 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair lj/cut, perpetual, half/full from (2) + attributes: half, newton on + pair build: halffull/newton + stencil: none + bin: none + (2) pair quip, perpetual + attributes: full, newton on + pair build: full/bin + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 8.26 | 8.386 | 8.762 Mbytes +Step E_pair KinEng TotEng Temp Press c_vir c_evdw c_equip + 0 -5.3530213 0 -5.3530213 0 518847.56 518847.56 -0.10904079 -5.2439805 + 1 -5.9384459 0.58384822 -5.3545977 115.81657 517370.5 516488.87 -0.10783656 -5.8306093 + 2 -7.669616 2.3104051 -5.3592109 458.30954 512986.36 509497.58 -0.10422283 -7.5653932 + 3 -10.473314 5.1069211 -5.3663924 1013.0477 505833.04 498121.43 -0.098049469 -10.375264 + 4 -14.234705 8.859182 -5.3755227 1757.3747 496127.44 482749.79 -0.089147485 -14.145557 + 5 -18.806851 13.420941 -5.3859098 2662.28 484148.76 463882.72 -0.077305196 -18.729546 + 6 -24.021727 18.625147 -5.3965797 3694.6259 470219.95 442095.39 -0.06194509 -23.959782 + 7 -29.702647 24.295529 -5.4071176 4819.446 454683.57 417996.56 -0.042859727 -29.659787 + 8 -35.67405 30.257258 -5.4167913 6002.0599 437887.03 392197.62 -0.019248651 -35.654801 + 9 -41.771047 36.345757 -5.4252893 7209.8209 420163.51 365280.27 0.0096063065 -41.780653 + 10 -47.845522 42.413161 -5.4323614 8413.3973 401821.91 337776.7 0.044743702 -47.890266 +Loop time of 0.0506847 on 4 procs for 10 steps with 40 atoms + +Performance: 1.705 ns/day, 14.079 hours/ns, 197.298 timesteps/s +94.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.04216 | 0.045656 | 0.049349 | 1.2 | 90.08 +Bond | 1.9073e-06 | 2.4438e-06 | 2.861e-06 | 0.0 | 0.00 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.00068545 | 0.004438 | 0.0079191 | 3.9 | 8.76 +Output | 0.00048304 | 0.00053334 | 0.00060964 | 0.0 | 1.05 +Modify | 1.1444e-05 | 1.4424e-05 | 1.9312e-05 | 0.0 | 0.03 +Other | | 4.047e-05 | | | 0.08 + +Nlocal: 10 ave 15 max 6 min +Histogram: 1 0 0 1 1 0 0 0 0 1 +Nghost: 878 ave 948 max 812 min +Histogram: 1 0 1 0 0 0 1 0 0 1 +Neighs: 1192 ave 1764 max 731 min +Histogram: 1 0 0 1 1 0 0 0 0 1 +FullNghs: 2384 ave 3527 max 1439 min +Histogram: 1 0 0 1 1 0 0 0 0 1 + +Total # of neighbors = 9536 +Ave neighs/atom = 238.4 +Ave special neighs/atom = 4 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/quip/log.24Jul17.sw.g++.1 b/examples/USER/quip/log.24Jul17.sw.g++.1 new file mode 100644 index 0000000000..c8115f4cfc --- /dev/null +++ b/examples/USER/quip/log.24Jul17.sw.g++.1 @@ -0,0 +1,83 @@ +LAMMPS (24 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# Test of SW potential for Si system + +units metal +boundary p p p + +atom_style atomic + +read_data data_sw + orthogonal box = (0 0 0) to (5.431 5.431 5.431) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 8 atoms + +pair_style quip +pair_coeff * * sw_example.xml "IP SW" 14 + +velocity all create 10.0 355311 +neighbor 0.3 bin +neigh_modify delay 10 + +fix 1 all nve +thermo 10 +timestep 0.001 + +#dump 1 all custom 10 dump.sw id fx fy fz + +run 100 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.2258 + ghost atom cutoff = 4.2258 + binsize = 2.1129, bins = 3 3 3 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair quip, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.684 | 2.684 | 2.684 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 10 -34.68 0 -34.670952 32.206289 + 10 4.5659178 -34.675073 0 -34.670942 46.253731 + 20 1.606683 -34.672391 0 -34.670937 44.736892 + 30 6.7007748 -34.677011 0 -34.670948 16.403049 + 40 5.682757 -34.676087 0 -34.670945 18.696408 + 50 2.2140716 -34.672942 0 -34.670939 37.592282 + 60 5.0475382 -34.675512 0 -34.670944 37.331666 + 70 7.0990979 -34.677369 0 -34.670946 40.533757 + 80 5.7306189 -34.676128 0 -34.670943 47.748813 + 90 5.0895648 -34.675549 0 -34.670944 38.092721 + 100 4.1070919 -34.674659 0 -34.670943 28.737864 +Loop time of 0.384233 on 1 procs for 100 steps with 8 atoms + +Performance: 22.486 ns/day, 1.067 hours/ns, 260.259 timesteps/s +94.6% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.38365 | 0.38365 | 0.38365 | 0.0 | 99.85 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.00017333 | 0.00017333 | 0.00017333 | 0.0 | 0.05 +Output | 0.00014162 | 0.00014162 | 0.00014162 | 0.0 | 0.04 +Modify | 7.081e-05 | 7.081e-05 | 7.081e-05 | 0.0 | 0.02 +Other | | 0.0001957 | | | 0.05 + +Nlocal: 8 ave 8 max 8 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 162 ave 162 max 162 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 128 ave 128 max 128 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 128 +Ave neighs/atom = 16 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/quip/log.24Jul17.sw.g++.4 b/examples/USER/quip/log.24Jul17.sw.g++.4 new file mode 100644 index 0000000000..d7306c7055 --- /dev/null +++ b/examples/USER/quip/log.24Jul17.sw.g++.4 @@ -0,0 +1,83 @@ +LAMMPS (24 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# Test of SW potential for Si system + +units metal +boundary p p p + +atom_style atomic + +read_data data_sw + orthogonal box = (0 0 0) to (5.431 5.431 5.431) + 1 by 2 by 2 MPI processor grid + reading atoms ... + 8 atoms + +pair_style quip +pair_coeff * * sw_example.xml "IP SW" 14 + +velocity all create 10.0 355311 +neighbor 0.3 bin +neigh_modify delay 10 + +fix 1 all nve +thermo 10 +timestep 0.001 + +#dump 1 all custom 10 dump.sw id fx fy fz + +run 100 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 4.2258 + ghost atom cutoff = 4.2258 + binsize = 2.1129, bins = 3 3 3 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair quip, perpetual + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 2.698 | 2.698 | 2.698 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 10 -34.68 0 -34.670952 32.206289 + 10 4.5659178 -34.675073 0 -34.670942 46.253731 + 20 1.606683 -34.672391 0 -34.670937 44.736892 + 30 6.7007748 -34.677011 0 -34.670948 16.403049 + 40 5.682757 -34.676087 0 -34.670945 18.696408 + 50 2.2140716 -34.672942 0 -34.670939 37.592282 + 60 5.0475382 -34.675512 0 -34.670944 37.331666 + 70 7.0990979 -34.677369 0 -34.670946 40.533757 + 80 5.7306189 -34.676128 0 -34.670943 47.748813 + 90 5.0895648 -34.675549 0 -34.670944 38.092721 + 100 4.1070919 -34.674659 0 -34.670943 28.737864 +Loop time of 0.423803 on 4 procs for 100 steps with 8 atoms + +Performance: 20.387 ns/day, 1.177 hours/ns, 235.959 timesteps/s +90.6% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.39332 | 0.40011 | 0.40704 | 0.8 | 94.41 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.015632 | 0.022605 | 0.029425 | 3.3 | 5.33 +Output | 0.00025702 | 0.00028491 | 0.00035429 | 0.0 | 0.07 +Modify | 7.3671e-05 | 8.1897e-05 | 8.9884e-05 | 0.0 | 0.02 +Other | | 0.0007259 | | | 0.17 + +Nlocal: 2 ave 2 max 2 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Nghost: 113 ave 113 max 113 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 32 ave 32 max 32 min +Histogram: 4 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 128 +Ave neighs/atom = 16 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/USER/quip/methane-box-8.data b/examples/USER/quip/methane-box-8.data new file mode 100644 index 0000000000..2a55fcf551 --- /dev/null +++ b/examples/USER/quip/methane-box-8.data @@ -0,0 +1,162 @@ +LAMMPS data file. CGCMM style. atom_style full generated by VMD/TopoTools v1.1 on Sat Oct 22 17:48:43 BST 2016. Original generated with Packmol + 40 atoms + 32 bonds + 48 angles + 0 dihedrals + 0 impropers + 2 atom types + 1 bond types + 1 angle types + 0 dihedral types + 0 improper types + -0.499095 8.410905 xlo xhi + -0.270629 8.639371 ylo yhi + 0.131683 9.041683 zlo zhi + +# Pair Coeffs +# +# 1 CT +# 2 HC + +# Bond Coeffs +# +# 1 CT-HC + +# Angle Coeffs +# +# 1 HC-CT-HC + + Masses + + 1 12.011000 # CT + 2 1.008000 # HC + + Atoms + +1 1 1 -0.240000 3.937038 0.677603 7.362249 # CT +2 1 2 0.060000 4.193022 1.709034 7.595834 # HC +3 1 2 0.060000 2.905136 0.486052 7.649386 # HC +4 1 2 0.060000 4.596317 0.007308 7.909996 # HC +5 1 2 0.060000 4.053670 0.507989 6.293814 # HC +6 2 1 -0.240000 6.131801 2.711096 0.901469 # CT +7 2 2 0.060000 6.787439 1.886720 0.628555 # HC +8 2 2 0.060000 5.728610 3.167652 -0.000171 # HC +9 2 2 0.060000 6.696346 3.453106 1.462433 # HC +10 2 2 0.060000 5.314820 2.336948 1.515051 # HC +11 3 1 -0.240000 5.723143 6.225007 1.430856 # CT +12 3 2 0.060000 5.585279 6.712817 2.393651 # HC +13 3 2 0.060000 5.584847 6.951755 0.632938 # HC +14 3 2 0.060000 4.994507 5.424203 1.322354 # HC +15 3 2 0.060000 6.727906 5.811248 1.374455 # HC +16 4 1 -0.240000 5.573754 5.038579 4.999124 # CT +17 4 2 0.060000 4.512787 5.184293 5.191620 # HC +18 4 2 0.060000 6.006150 5.966299 4.629893 # HC +19 4 2 0.060000 5.703088 4.256326 4.253924 # HC +20 4 2 0.060000 6.073008 4.747398 5.921016 # HC +21 5 1 -0.240000 2.108870 2.623461 3.348534 # CT +22 5 2 0.060000 2.886488 2.470897 2.602897 # HC +23 5 2 0.060000 1.382727 3.341833 2.973541 # HC +24 5 2 0.060000 2.554989 3.003606 4.265288 # HC +25 5 2 0.060000 1.611274 1.677549 3.552431 # HC +26 6 1 -0.240000 6.106165 2.015183 5.526875 # CT +27 6 2 0.060000 6.075817 2.038391 4.439456 # HC +28 6 2 0.060000 6.076127 0.982573 5.868599 # HC +29 6 2 0.060000 5.248943 2.554122 5.925227 # HC +30 6 2 0.060000 7.023739 2.485633 5.874240 # HC +31 7 1 -0.240000 0.644265 2.699668 7.212713 # CT +32 7 2 0.060000 0.403413 2.521819 6.166625 # HC +33 7 2 0.060000 0.098429 1.993976 7.835627 # HC +34 7 2 0.060000 0.361861 3.715309 7.482326 # HC +35 7 2 0.060000 1.713326 2.567585 7.366300 # HC +36 8 1 -0.240000 0.588072 6.428183 7.473536 # CT +37 8 2 0.060000 0.540903 6.363141 6.388417 # HC +38 8 2 0.060000 -0.008121 5.629967 7.910991 # HC +39 8 2 0.060000 0.197701 7.391140 7.796481 # HC +40 8 2 0.060000 1.621770 6.328495 7.798280 # HC + + Bonds + +1 1 1 3 +2 1 1 5 +3 1 1 2 +4 1 1 4 +5 1 6 7 +6 1 6 9 +7 1 6 8 +8 1 6 10 +9 1 11 14 +10 1 11 13 +11 1 11 12 +12 1 11 15 +13 1 16 17 +14 1 16 18 +15 1 16 19 +16 1 16 20 +17 1 21 22 +18 1 21 24 +19 1 21 25 +20 1 21 23 +21 1 26 27 +22 1 26 28 +23 1 26 29 +24 1 26 30 +25 1 31 33 +26 1 31 32 +27 1 31 34 +28 1 31 35 +29 1 36 38 +30 1 36 37 +31 1 36 39 +32 1 36 40 + + Angles + +1 1 3 1 5 +2 1 2 1 3 +3 1 3 1 4 +4 1 2 1 5 +5 1 4 1 5 +6 1 2 1 4 +7 1 7 6 9 +8 1 7 6 8 +9 1 7 6 10 +10 1 8 6 9 +11 1 9 6 10 +12 1 8 6 10 +13 1 13 11 14 +14 1 12 11 14 +15 1 14 11 15 +16 1 12 11 13 +17 1 13 11 15 +18 1 12 11 15 +19 1 17 16 18 +20 1 17 16 19 +21 1 17 16 20 +22 1 18 16 19 +23 1 18 16 20 +24 1 19 16 20 +25 1 22 21 24 +26 1 22 21 25 +27 1 22 21 23 +28 1 24 21 25 +29 1 23 21 24 +30 1 23 21 25 +31 1 27 26 28 +32 1 27 26 29 +33 1 27 26 30 +34 1 28 26 29 +35 1 28 26 30 +36 1 29 26 30 +37 1 32 31 33 +38 1 33 31 34 +39 1 33 31 35 +40 1 32 31 34 +41 1 32 31 35 +42 1 34 31 35 +43 1 37 36 38 +44 1 38 36 39 +45 1 38 36 40 +46 1 37 36 39 +47 1 37 36 40 +48 1 39 36 40 + diff --git a/examples/USER/quip/out.molecular b/examples/USER/quip/out.molecular new file mode 100644 index 0000000000..0e8d07d389 --- /dev/null +++ b/examples/USER/quip/out.molecular @@ -0,0 +1,93 @@ +LAMMPS (6 Jul 2017) +Reading data file ... + orthogonal box = (-0.499095 -0.270629 0.131683) to (8.4109 8.63937 9.04168) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 40 atoms + scanning bonds ... + 4 = max bonds/atom + scanning angles ... + 6 = max angles/atom + reading bonds ... + 32 bonds + reading angles ... + 48 angles +Finding 1-2 1-3 1-4 neighbors ... + special bond factors lj: 0 0 0 + special bond factors coul: 0 0 0 + 4 = max # of 1-2 neighbors + 3 = max # of 1-3 neighbors + 3 = max # of 1-4 neighbors + 4 = max # of special neighbors +Finding 1-2 1-3 1-4 neighbors ... + special bond factors lj: 1 1 1 + special bond factors coul: 1 1 1 + 4 = max # of 1-2 neighbors + 3 = max # of 1-3 neighbors + 3 = max # of 1-4 neighbors + 4 = max # of special neighbors +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10 + ghost atom cutoff = 10 + binsize = 5, bins = 2 2 2 + 2 neighbor lists, perpetual/occasional/extra = 2 0 0 + (1) pair lj/cut, perpetual, half/full from (2) + attributes: half, newton on + pair build: halffull/newton + stencil: none + bin: none + (2) pair quip, perpetual + attributes: full, newton on + pair build: full/bin + stencil: full/bin/3d + bin: standard +Setting up Verlet run ... + Unit style : metal + Current step : 0 + Time step : 0.0001 +Per MPI rank memory allocation (min/avg/max) = 9.543 | 9.543 | 9.543 Mbytes +Step E_pair KinEng TotEng Temp Press c_vir c_evdw c_equip + 0 -5.3530213 0 -5.3530213 0 518847.56 518847.56 -0.10904079 -5.2439805 + 1 -5.9384459 0.58384822 -5.3545977 115.81657 517370.5 516488.87 -0.10783656 -5.8306093 + 2 -7.669616 2.3104051 -5.3592109 458.30954 512986.36 509497.58 -0.10422283 -7.5653932 + 3 -10.473314 5.1069211 -5.3663924 1013.0477 505833.04 498121.43 -0.098049469 -10.375264 + 4 -14.234705 8.859182 -5.3755227 1757.3747 496127.44 482749.79 -0.089147485 -14.145557 + 5 -18.806851 13.420941 -5.3859098 2662.28 484148.76 463882.72 -0.077305196 -18.729546 + 6 -24.021727 18.625147 -5.3965797 3694.6259 470219.95 442095.39 -0.06194509 -23.959782 + 7 -29.702647 24.295529 -5.4071176 4819.446 454683.57 417996.56 -0.042859727 -29.659787 + 8 -35.67405 30.257258 -5.4167913 6002.0599 437887.03 392197.62 -0.019248651 -35.654801 + 9 -41.771047 36.345757 -5.4252893 7209.8209 420163.51 365280.27 0.0096063065 -41.780653 + 10 -47.845522 42.413161 -5.4323614 8413.3973 401821.91 337776.7 0.044743702 -47.890266 +Loop time of 0.131692 on 1 procs for 10 steps with 40 atoms + +Performance: 0.656 ns/day, 36.581 hours/ns, 75.935 timesteps/s +97.2% CPU use with 1 MPI tasks x no OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.12961 | 0.12961 | 0.12961 | 0.0 | 98.42 +Bond | 7.391e-06 | 7.391e-06 | 7.391e-06 | 0.0 | 0.01 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.00013185 | 0.00013185 | 0.00013185 | 0.0 | 0.10 +Output | 0.0018771 | 0.0018771 | 0.0018771 | 0.0 | 1.43 +Modify | 2.5988e-05 | 2.5988e-05 | 2.5988e-05 | 0.0 | 0.02 +Other | | 4.268e-05 | | | 0.03 + +Nlocal: 40 ave 40 max 40 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1175 ave 1175 max 1175 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 4768 ave 4768 max 4768 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 9536 ave 9536 max 9536 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 9536 +Ave neighs/atom = 238.4 +Ave special neighs/atom = 4 +Neighbor list builds = 0 +Dangerous builds = 0 +Total wall time: 0:00:00 diff --git a/examples/accelerate/README b/examples/accelerate/README index 1fab296a53..c4eb5dcc8d 100644 --- a/examples/accelerate/README +++ b/examples/accelerate/README @@ -1,14 +1,11 @@ These are example scripts that can be run with any of the acclerator packages in LAMMPS: -USER-CUDA, GPU, USER-INTEL, KOKKOS, USER-OMP, OPT +GPU, USER-INTEL, KOKKOS, USER-OMP, OPT The easiest way to build LAMMPS with these packages -is via the src/Make.py tool described in Section 2.4 -of the manual. You can also type "Make.py -h" to see -its options. The easiest way to run these scripts -is by using the appropriate - +is via the flags described in Section 4 of the manual. +The easiest way to run these scripts is by using the appropriate Details on the individual accelerator packages can be found in doc/Section_accelerate.html. @@ -16,21 +13,6 @@ can be found in doc/Section_accelerate.html. Build LAMMPS with one or more of the accelerator packages -The following command will invoke the src/Make.py tool with one of the -command-lines from the Make.list file: - -../../src/Make.py -r Make.list target - -target = one or more of the following: - cpu, omp, opt - cuda_double, cuda_mixed, cuda_single - gpu_double, gpu_mixed, gpu_single - intel_cpu, intel_phi - kokkos_omp, kokkos_cuda, kokkos_phi - -If successful, the build will produce the file lmp_target in this -directory. - Note that in addition to any accelerator packages, these packages also need to be installed to run all of the example scripts: ASPHERE, MOLECULE, KSPACE, RIGID. @@ -38,39 +20,11 @@ MOLECULE, KSPACE, RIGID. These two targets will build a single LAMMPS executable with all the CPU accelerator packages installed (USER-INTEL for CPU, KOKKOS for OMP, USER-OMP, OPT) or all the GPU accelerator packages installed -(USER-CUDA, GPU, KOKKOS for CUDA): +(GPU, KOKKOS for CUDA): -target = all_cpu, all_gpu - -Note that the Make.py commands in Make.list assume an MPI environment -exists on your machine and use mpicxx as the wrapper compiler with -whatever underlying compiler it wraps by default. If you add "-cc mpi -wrap=g++" or "-cc mpi wrap=icc" after the target, you can choose the -underlying compiler for mpicxx to invoke. E.g. - -../../src/Make.py -r Make.list intel_cpu -cc mpi wrap=icc - -You should do this for any build that includes the USER-INTEL -package, since it will perform best with the Intel compilers. - -Note that for kokkos_cuda, it needs to be "-cc nvcc" instead of "mpi", -since a KOKKOS for CUDA build requires NVIDIA nvcc as the wrapper -compiler. - -Also note that the Make.py commands in Make.list use the default -FFT support which is via the KISS library. If you want to -build with another FFT library, e.g. FFTW3, then you can add -"-fft fftw3" after the target, e.g. - -../../src/Make.py -r Make.list gpu -fft fftw3 - -For any build with USER-CUDA, GPU, or KOKKOS for CUDA, be sure to set +For any build with GPU, or KOKKOS for CUDA, be sure to set the arch=XX setting to the appropriate value for the GPUs and Cuda -environment on your system. What is defined in the Make.list file is -arch=21 for older Fermi GPUs. This can be overridden as follows, -e.g. for Kepler GPUs: - -../../src/Make.py -r Make.list gpu_double -gpu mode=double arch=35 +environment on your system. --------------------- @@ -118,12 +72,6 @@ Note that when running in.lj.5.0 (which has a long cutoff) with the GPU package, the "-pk tpa" setting should be > 1 (e.g. 8) for best performance. -** USER-CUDA package - -lmp_machine -c on -sf cuda < in.lj -mpirun -np 1 lmp_machine -c on -sf cuda < in.lj # 1 MPI, 1 MPI/GPU -mpirun -np 2 lmp_machine -c on -sf cuda -pk cuda 2 < in.lj # 2 MPI, 1 MPI/GPU - ** KOKKOS package for OMP lmp_kokkos_omp -k on t 1 -sf kk -pk kokkos neigh half < in.lj diff --git a/examples/airebo/data.airebo b/examples/airebo/data.airebo new file mode 100644 index 0000000000..90e20b2388 --- /dev/null +++ b/examples/airebo/data.airebo @@ -0,0 +1,78 @@ +LAMMPS data file from restart file: timestep = 1, procs = 1 + +60 atoms + +2 atom types + +-2.1 2.1 xlo xhi +-2.1 2.1 ylo yhi +0 25.5790000000 zlo zhi + +Masses + +1 12.01 +2 1.00794 + +Atoms + +1 1 0.0000000000 0.0000000000 0.0000000000 +2 2 0.9010066786 -0.6310205743 0.0000000000 +3 2 -0.9010066786 -0.6310205743 0.0000000000 +4 1 0.0000000000 0.8470061967 1.2789591482 +5 2 0.9010066786 1.4780267710 1.2789591482 +6 2 -0.9010066786 1.4780267710 1.2789591482 +7 1 0.0000000000 0.0000000000 2.5579182965 +8 2 0.9010066786 -0.6310205743 2.5579182965 +9 2 -0.9010066786 -0.6310205743 2.5579182965 +10 1 0.0000000000 0.8470061967 3.8368774447 +11 2 0.9010066786 1.4780267710 3.8368774447 +12 2 -0.9010066786 1.4780267710 3.8368774447 +13 1 0.0000000000 0.0000000000 5.1158365929 +14 2 0.9010066786 -0.6310205743 5.1158365929 +15 2 -0.9010066786 -0.6310205743 5.1158365929 +16 1 0.0000000000 0.8470061967 6.3947957411 +17 2 0.9010066786 1.4780267710 6.3947957411 +18 2 -0.9010066786 1.4780267710 6.3947957411 +19 1 0.0000000000 0.0000000000 7.6737548894 +20 2 0.9010066786 -0.6310205743 7.6737548894 +21 2 -0.9010066786 -0.6310205743 7.6737548894 +22 1 0.0000000000 0.8470061967 8.9527140376 +23 2 0.9010066786 1.4780267710 8.9527140376 +24 2 -0.9010066786 1.4780267710 8.9527140376 +25 1 0.0000000000 0.0000000000 10.2316731858 +26 2 0.9010066786 -0.6310205743 10.2316731858 +27 2 -0.9010066786 -0.6310205743 10.2316731858 +28 1 0.0000000000 0.8470061967 11.5106323340 +29 2 0.9010066786 1.4780267710 11.5106323340 +30 2 -0.9010066786 1.4780267710 11.5106323340 +31 1 0.0000000000 0.0000000000 12.7895914823 +32 2 0.9010066786 -0.6310205743 12.7895914823 +33 2 -0.9010066786 -0.6310205743 12.7895914823 +34 1 0.0000000000 0.8470061967 14.0685506305 +35 2 0.9010066786 1.4780267710 14.0685506305 +36 2 -0.9010066786 1.4780267710 14.0685506305 +37 1 0.0000000000 0.0000000000 15.3475097787 +38 2 0.9010066786 -0.6310205743 15.3475097787 +39 2 -0.9010066786 -0.6310205743 15.3475097787 +40 1 0.0000000000 0.8470061967 16.6264689269 +41 2 0.9010066786 1.4780267710 16.6264689269 +42 2 -0.9010066786 1.4780267710 16.6264689269 +43 1 0.0000000000 0.0000000000 17.9054280752 +44 2 0.9010066786 -0.6310205743 17.9054280752 +45 2 -0.9010066786 -0.6310205743 17.9054280752 +46 1 0.0000000000 0.8470061967 19.1843872234 +47 2 0.9010066786 1.4780267710 19.1843872234 +48 2 -0.9010066786 1.4780267710 19.1843872234 +49 1 0.0000000000 0.0000000000 20.4633463716 +50 2 0.9010066786 -0.6310205743 20.4633463716 +51 2 -0.9010066786 -0.6310205743 20.4633463716 +52 1 0.0000000000 0.8470061967 21.7423055198 +53 2 0.9010066786 1.4780267710 21.7423055198 +54 2 -0.9010066786 1.4780267710 21.7423055198 +55 1 0.0000000000 0.0000000000 23.0212646681 +56 2 0.9010066786 -0.6310205743 23.0212646681 +57 2 -0.9010066786 -0.6310205743 23.0212646681 +58 1 0.0000000000 0.8470061967 24.3002238163 +59 2 0.9010066786 1.4780267710 24.3002238163 +60 2 -0.9010066786 1.4780267710 24.3002238163 + diff --git a/examples/airebo/in.airebo b/examples/airebo/in.airebo new file mode 100644 index 0000000000..5b0e36ff4a --- /dev/null +++ b/examples/airebo/in.airebo @@ -0,0 +1,22 @@ +# AIREBO polyethelene benchmark + +units metal +atom_style atomic + +read_data data.airebo + +replicate 17 16 2 + +neighbor 0.5 bin +neigh_modify delay 5 every 1 + +pair_style airebo 3.0 1 1 +pair_coeff * * ../../potentials/CH.airebo C H + +velocity all create 300.0 761341 + +fix 1 all nve +timestep 0.0005 + +thermo 10 +run 100 diff --git a/examples/airebo/in.airebo-m b/examples/airebo/in.airebo-m new file mode 100644 index 0000000000..3ec29482a7 --- /dev/null +++ b/examples/airebo/in.airebo-m @@ -0,0 +1,22 @@ +# AIREBO polyethelene benchmark + +units metal +atom_style atomic + +read_data data.airebo + +replicate 17 16 2 + +neighbor 0.5 bin +neigh_modify delay 5 every 1 + +pair_style airebo/morse 3.0 1 1 +pair_coeff * * ../../potentials/CH.airebo-m C H + +velocity all create 300.0 761341 + +fix 1 all nve +timestep 0.0005 + +thermo 10 +run 100 diff --git a/examples/airebo/log.23Jun17.airebo-m.g++.1 b/examples/airebo/log.23Jun17.airebo-m.g++.1 new file mode 100644 index 0000000000..1483fcb4a6 --- /dev/null +++ b/examples/airebo/log.23Jun17.airebo-m.g++.1 @@ -0,0 +1,86 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +# AIREBO polyethelene benchmark + +units metal +atom_style atomic + +read_data data.airebo + orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 60 atoms + +replicate 17 16 2 + orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158) + 1 by 1 by 1 MPI processor grid + 32640 atoms + +neighbor 0.5 bin +neigh_modify delay 5 every 1 + +pair_style airebo/morse 3.0 1 1 +pair_coeff * * ../../potentials/CH.airebo-m C H +Reading potential file ../../potentials/CH.airebo-m with DATE: 2016-03-15 + +velocity all create 300.0 761341 + +fix 1 all nve +timestep 0.0005 + +thermo 10 +run 100 +Neighbor list info ... + update every 1 steps, delay 5 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.7 + ghost atom cutoff = 10.7 + binsize = 5.35, bins = 14 13 10 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair airebo/morse, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 106.4 | 106.4 | 106.4 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 300 -139283.82 0 -138018.14 152.25271 + 10 166.76148 -138718.75 0 -138015.19 17412.343 + 20 207.7293 -138891.79 0 -138015.4 -19395.339 + 30 138.54469 -138596.42 0 -138011.92 -11909.248 + 40 153.95239 -138661.7 0 -138012.19 -2448.7701 + 50 126.22907 -138545.12 0 -138012.57 5206.1374 + 60 181.02757 -138778.28 0 -138014.54 22506.777 + 70 185.72779 -138799.18 0 -138015.61 -10803.744 + 80 164.28396 -138709.5 0 -138016.4 -1524.7353 + 90 180.26403 -138776.42 0 -138015.9 -27143.467 + 100 164.05694 -138706.58 0 -138014.44 5157.5516 +Loop time of 117.672 on 1 procs for 100 steps with 32640 atoms + +Performance: 0.037 ns/day, 653.734 hours/ns, 0.850 timesteps/s +99.3% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 108.31 | 108.31 | 108.31 | 0.0 | 92.04 +Neigh | 9.2199 | 9.2199 | 9.2199 | 0.0 | 7.84 +Comm | 0.052942 | 0.052942 | 0.052942 | 0.0 | 0.04 +Output | 0.0015149 | 0.0015149 | 0.0015149 | 0.0 | 0.00 +Modify | 0.060962 | 0.060962 | 0.060962 | 0.0 | 0.05 +Other | | 0.02656 | | | 0.02 + +Nlocal: 32640 ave 32640 max 32640 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 48094 ave 48094 max 48094 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 2.22109e+07 ave 2.22109e+07 max 2.22109e+07 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 22210922 +Ave neighs/atom = 680.482 +Neighbor list builds = 8 +Dangerous builds = 0 +Total wall time: 0:02:00 diff --git a/examples/airebo/log.23Jun17.airebo-m.g++.4 b/examples/airebo/log.23Jun17.airebo-m.g++.4 new file mode 100644 index 0000000000..3a3d922bcb --- /dev/null +++ b/examples/airebo/log.23Jun17.airebo-m.g++.4 @@ -0,0 +1,86 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +# AIREBO polyethelene benchmark + +units metal +atom_style atomic + +read_data data.airebo + orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579) + 1 by 1 by 4 MPI processor grid + reading atoms ... + 60 atoms + +replicate 17 16 2 + orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158) + 2 by 2 by 1 MPI processor grid + 32640 atoms + +neighbor 0.5 bin +neigh_modify delay 5 every 1 + +pair_style airebo/morse 3.0 1 1 +pair_coeff * * ../../potentials/CH.airebo-m C H +Reading potential file ../../potentials/CH.airebo-m with DATE: 2016-03-15 + +velocity all create 300.0 761341 + +fix 1 all nve +timestep 0.0005 + +thermo 10 +run 100 +Neighbor list info ... + update every 1 steps, delay 5 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.7 + ghost atom cutoff = 10.7 + binsize = 5.35, bins = 14 13 10 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair airebo/morse, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 29.37 | 29.75 | 30.13 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 300 -139283.82 0 -138018.14 152.25271 + 10 166.76148 -138718.75 0 -138015.19 17412.343 + 20 207.7293 -138891.79 0 -138015.4 -19395.339 + 30 138.54469 -138596.42 0 -138011.92 -11909.248 + 40 153.95239 -138661.7 0 -138012.19 -2448.7701 + 50 126.22907 -138545.12 0 -138012.57 5206.1374 + 60 181.02757 -138778.28 0 -138014.54 22506.777 + 70 185.72779 -138799.18 0 -138015.61 -10803.744 + 80 164.28396 -138709.5 0 -138016.4 -1524.7353 + 90 180.26403 -138776.42 0 -138015.9 -27143.467 + 100 164.05694 -138706.58 0 -138014.44 5157.5516 +Loop time of 32.9268 on 4 procs for 100 steps with 32640 atoms + +Performance: 0.131 ns/day, 182.927 hours/ns, 3.037 timesteps/s +99.4% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 28.045 | 28.537 | 29.42 | 10.4 | 86.67 +Neigh | 3.163 | 3.237 | 3.3761 | 4.7 | 9.83 +Comm | 0.09883 | 1.1206 | 1.6862 | 60.4 | 3.40 +Output | 0.00099325 | 0.0011329 | 0.0012462 | 0.3 | 0.00 +Modify | 0.016013 | 0.016726 | 0.017257 | 0.4 | 0.05 +Other | | 0.01459 | | | 0.04 + +Nlocal: 8160 ave 8167 max 8153 min +Histogram: 1 0 1 0 0 0 0 1 0 1 +Nghost: 22581 ave 22594 max 22569 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 5.55273e+06 ave 5.55908e+06 max 5.54496e+06 min +Histogram: 1 0 0 0 0 1 1 0 0 1 + +Total # of neighbors = 22210922 +Ave neighs/atom = 680.482 +Neighbor list builds = 8 +Dangerous builds = 0 +Total wall time: 0:00:33 diff --git a/examples/airebo/log.23Jun17.airebo.g++.1 b/examples/airebo/log.23Jun17.airebo.g++.1 new file mode 100644 index 0000000000..0ef895dc28 --- /dev/null +++ b/examples/airebo/log.23Jun17.airebo.g++.1 @@ -0,0 +1,86 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +# AIREBO polyethelene benchmark + +units metal +atom_style atomic + +read_data data.airebo + orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579) + 1 by 1 by 1 MPI processor grid + reading atoms ... + 60 atoms + +replicate 17 16 2 + orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158) + 1 by 1 by 1 MPI processor grid + 32640 atoms + +neighbor 0.5 bin +neigh_modify delay 5 every 1 + +pair_style airebo 3.0 1 1 +pair_coeff * * ../../potentials/CH.airebo C H +Reading potential file ../../potentials/CH.airebo with DATE: 2011-10-25 + +velocity all create 300.0 761341 + +fix 1 all nve +timestep 0.0005 + +thermo 10 +run 100 +Neighbor list info ... + update every 1 steps, delay 5 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.7 + ghost atom cutoff = 10.7 + binsize = 5.35, bins = 14 13 10 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair airebo, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 106.4 | 106.4 | 106.4 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 300 -139300.72 0 -138035.04 7988.6647 + 10 161.34683 -138712.9 0 -138032.19 33228.921 + 20 208.59504 -138912.79 0 -138032.74 -3211.8806 + 30 139.7513 -138618.85 0 -138029.25 10878.143 + 40 142.14562 -138629.02 0 -138029.32 14601.302 + 50 114.23401 -138510.95 0 -138029 24691.125 + 60 164.92002 -138726 0 -138030.21 35125.541 + 70 162.15256 -138715.9 0 -138031.79 5658.7946 + 80 157.16184 -138695.77 0 -138032.72 19824.698 + 90 196.15907 -138860.65 0 -138033.07 -7950.8462 + 100 178.31875 -138784.89 0 -138032.57 30997.671 +Loop time of 110.107 on 1 procs for 100 steps with 32640 atoms + +Performance: 0.039 ns/day, 611.705 hours/ns, 0.908 timesteps/s +99.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 100.76 | 100.76 | 100.76 | 0.0 | 91.51 +Neigh | 9.1909 | 9.1909 | 9.1909 | 0.0 | 8.35 +Comm | 0.058134 | 0.058134 | 0.058134 | 0.0 | 0.05 +Output | 0.0015941 | 0.0015941 | 0.0015941 | 0.0 | 0.00 +Modify | 0.062212 | 0.062212 | 0.062212 | 0.0 | 0.06 +Other | | 0.03123 | | | 0.03 + +Nlocal: 32640 ave 32640 max 32640 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 48190 ave 48190 max 48190 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 2.22178e+07 ave 2.22178e+07 max 2.22178e+07 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 22217840 +Ave neighs/atom = 680.694 +Neighbor list builds = 8 +Dangerous builds = 0 +Total wall time: 0:01:52 diff --git a/examples/airebo/log.23Jun17.airebo.g++.4 b/examples/airebo/log.23Jun17.airebo.g++.4 new file mode 100644 index 0000000000..486b48a004 --- /dev/null +++ b/examples/airebo/log.23Jun17.airebo.g++.4 @@ -0,0 +1,86 @@ +LAMMPS (23 Jun 2017) + using 1 OpenMP thread(s) per MPI task +# AIREBO polyethelene benchmark + +units metal +atom_style atomic + +read_data data.airebo + orthogonal box = (-2.1 -2.1 0) to (2.1 2.1 25.579) + 1 by 1 by 4 MPI processor grid + reading atoms ... + 60 atoms + +replicate 17 16 2 + orthogonal box = (-2.1 -2.1 0) to (69.3 65.1 51.158) + 2 by 2 by 1 MPI processor grid + 32640 atoms + +neighbor 0.5 bin +neigh_modify delay 5 every 1 + +pair_style airebo 3.0 1 1 +pair_coeff * * ../../potentials/CH.airebo C H +Reading potential file ../../potentials/CH.airebo with DATE: 2011-10-25 + +velocity all create 300.0 761341 + +fix 1 all nve +timestep 0.0005 + +thermo 10 +run 100 +Neighbor list info ... + update every 1 steps, delay 5 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 10.7 + ghost atom cutoff = 10.7 + binsize = 5.35, bins = 14 13 10 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair airebo, perpetual + attributes: full, newton on, ghost + pair build: full/bin/ghost + stencil: full/ghost/bin/3d + bin: standard +Per MPI rank memory allocation (min/avg/max) = 29.37 | 29.75 | 30.13 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 300 -139300.72 0 -138035.04 7988.6647 + 10 161.34683 -138712.9 0 -138032.19 33228.921 + 20 208.59504 -138912.79 0 -138032.74 -3211.8806 + 30 139.7513 -138618.85 0 -138029.25 10878.143 + 40 142.14562 -138629.02 0 -138029.32 14601.302 + 50 114.23401 -138510.95 0 -138029 24691.125 + 60 164.92002 -138726 0 -138030.21 35125.541 + 70 162.15256 -138715.9 0 -138031.79 5658.7946 + 80 157.16184 -138695.77 0 -138032.72 19824.698 + 90 196.15907 -138860.65 0 -138033.07 -7950.8462 + 100 178.31875 -138784.89 0 -138032.57 30997.671 +Loop time of 30.1916 on 4 procs for 100 steps with 32640 atoms + +Performance: 0.143 ns/day, 167.731 hours/ns, 3.312 timesteps/s +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 26.083 | 26.31 | 26.795 | 5.5 | 87.14 +Neigh | 3.1781 | 3.2134 | 3.2775 | 2.2 | 10.64 +Comm | 0.086296 | 0.63643 | 0.88995 | 40.2 | 2.11 +Output | 0.00074124 | 0.0010698 | 0.0013616 | 0.7 | 0.00 +Modify | 0.015335 | 0.016373 | 0.017565 | 0.8 | 0.05 +Other | | 0.01457 | | | 0.05 + +Nlocal: 8160 ave 8174 max 8146 min +Histogram: 1 0 1 0 0 0 0 1 0 1 +Nghost: 22614.5 ave 22629 max 22601 min +Histogram: 1 1 0 0 0 0 0 1 0 1 +Neighs: 0 ave 0 max 0 min +Histogram: 4 0 0 0 0 0 0 0 0 0 +FullNghs: 5.55446e+06 ave 5.56556e+06 max 5.54192e+06 min +Histogram: 1 0 0 1 0 0 0 1 0 1 + +Total # of neighbors = 22217840 +Ave neighs/atom = 680.694 +Neighbor list builds = 8 +Dangerous builds = 0 +Total wall time: 0:00:30 diff --git a/examples/gcmc/H2O.txt b/examples/gcmc/H2O.txt new file mode 100644 index 0000000000..b56f869693 --- /dev/null +++ b/examples/gcmc/H2O.txt @@ -0,0 +1,62 @@ +# CO2 molecule file. TraPPE model. + +3 atoms +2 bonds +1 angles + +Coords + +1 1.12456 0.09298 1.27452 +2 1.53683 0.75606 1.89928 +3 0.49482 0.56390 0.65678 + +Types + +1 1 +2 2 +3 2 + +Charges + +1 -0.8472 +2 0.4236 +3 0.4236 + +Bonds + +1 1 1 2 +2 1 1 3 + +Angles + +1 1 2 1 3 + +Shake Flags + +1 1 +2 1 +3 1 + +Shake Atoms + +1 1 2 3 +2 1 2 3 +3 1 2 3 + +Shake Bond Types + +1 1 1 1 +2 1 1 1 +3 1 1 1 + +Special Bond Counts + +1 2 0 0 +2 1 1 0 +3 1 1 0 + +Special Bonds + +1 2 3 +2 1 3 +3 1 2 diff --git a/examples/gcmc/in.gcmc.co2 b/examples/gcmc/in.gcmc.co2 index 0961e2b556..d11ef72fdd 100644 --- a/examples/gcmc/in.gcmc.co2 +++ b/examples/gcmc/in.gcmc.co2 @@ -64,7 +64,7 @@ fix_modify myrigidnvt dynamic/dof no # gcmc variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) -fix mygcmc all gcmc 100 100 100 0 54341 ${temp} ${mu} ${disp} mol & +fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol & co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt # output diff --git a/examples/gcmc/in.gcmc.h2o b/examples/gcmc/in.gcmc.h2o new file mode 100644 index 0000000000..7ffaafa975 --- /dev/null +++ b/examples/gcmc/in.gcmc.h2o @@ -0,0 +1,88 @@ +# fix gcmc example with fix shake + +# variables available on command line + +variable mu index -8.1 +variable disp index 0.5 +variable temp index 338.0 +variable lbox index 10.0 +variable spacing index 5.0 + +# global model settings + +units real +atom_style full +boundary p p p +pair_style lj/cut/coul/long 14 +pair_modify mix arithmetic tail yes +kspace_style ewald 0.0001 +bond_style harmonic +angle_style harmonic + +# box, start molecules on simple cubic lattice + +lattice sc ${spacing} +region box block 0 ${lbox} 0 ${lbox} 0 ${lbox} units box +create_box 2 box & + bond/types 1 & + angle/types 1 & + extra/bond/per/atom 2 & + extra/angle/per/atom 1 & + extra/special/per/atom 2 + +# we can load multiple molecule templates, but don't have to use them all +molecule co2mol CO2.txt +molecule h2omol H2O.txt +create_atoms 0 box mol h2omol 464563 units box + +# rigid SPC/E water model + +pair_coeff 1 1 0.15535 3.166 +pair_coeff * 2 0.0000 0.0000 + +bond_coeff 1 1000 1.0 +angle_coeff 1 100 109.47 + +# masses + +mass 1 15.9994 +mass 2 1.0 + +# MD settings + +group h2o type 1 2 +neighbor 2.0 bin +neigh_modify every 1 delay 1 check yes +velocity all create ${temp} 54654 +timestep 1.0 + +minimize 0.0 0.0 100 1000 +reset_timestep 0 +# rigid constraints with thermostat + +fix mynvt all nvt temp ${temp} ${temp} 100 +fix wshake all shake 0.0001 50 0 b 1 a 1 mol h2omol +# gcmc + + + +run 1000 + +variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) +fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol & + h2omol tfac_insert ${tfac} group h2o shake wshake + +# output + +variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) +variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) +variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) +variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) +compute_modify thermo_temp dynamic/dof yes +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo 1000 + +# run + +run 20000 + diff --git a/examples/gcmc/log.24Mar17.gcmc.co2.g++.4 b/examples/gcmc/log.24Mar17.gcmc.co2.g++.4 deleted file mode 100644 index 65504b8d46..0000000000 --- a/examples/gcmc/log.24Mar17.gcmc.co2.g++.4 +++ /dev/null @@ -1,179 +0,0 @@ -LAMMPS (17 Mar 2017) -# GCMC for CO2 molecular fluid, rigid/small/nvt dynamics -# Rigid CO2 TraPPE model -# [Potoff and J.I. Siepmann, Vapor-liquid equilibria of -# mixtures containing alkanes, carbon dioxide and -# nitrogen AIChE J., 47,1676-1682 (2001)]. - -# variables available on command line - -variable mu index -8.1 -variable disp index 0.5 -variable temp index 338.0 -variable lbox index 10.0 -variable spacing index 5.0 - -# global model settings - -units real -atom_style full -boundary p p p -pair_style lj/cut/coul/long 14 -pair_modify mix arithmetic tail yes -kspace_style ewald 0.0001 -bond_style harmonic -angle_style harmonic - -# box, start molecules on simple cubic lattice - -lattice sc ${spacing} -lattice sc 5.0 -Lattice spacing in x,y,z = 5 5 5 -region box block 0 ${lbox} 0 ${lbox} 0 ${lbox} units box -region box block 0 10.0 0 ${lbox} 0 ${lbox} units box -region box block 0 10.0 0 10.0 0 ${lbox} units box -region box block 0 10.0 0 10.0 0 10.0 units box -create_box 2 box bond/types 1 angle/types 1 extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 -Created orthogonal box = (0 0 0) to (10 10 10) - 1 by 2 by 2 MPI processor grid -molecule co2mol CO2.txt -Read molecule co2mol: - 3 atoms with 2 types - 2 bonds with 1 types - 1 angles with 1 types - 0 dihedrals with 0 types - 0 impropers with 0 types -create_atoms 0 box mol co2mol 464563 units box -Created 24 atoms - -# rigid CO2 TraPPE model - -pair_coeff 1 1 0.053649 2.8 -pair_coeff 2 2 0.156973 3.05 -bond_coeff 1 0 1.16 -angle_coeff 1 0 180 - -# masses - -mass 1 12.0107 -mass 2 15.9994 - -# MD settings - -group co2 type 1 2 -24 atoms in group co2 -neighbor 2.0 bin -neigh_modify every 1 delay 10 check yes -velocity all create ${temp} 54654 -velocity all create 338.0 54654 -timestep 1.0 - -# rigid constraints with thermostat - -fix myrigidnvt all rigid/nvt/small molecule temp ${temp} ${temp} 100 mol co2mol -fix myrigidnvt all rigid/nvt/small molecule temp 338.0 ${temp} 100 mol co2mol -fix myrigidnvt all rigid/nvt/small molecule temp 338.0 338.0 100 mol co2mol -8 rigid bodies with 24 atoms - 1.16 = max distance from body owner to body atom -fix_modify myrigidnvt dynamic/dof no - -# gcmc - -variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) -fix mygcmc all gcmc 100 100 100 0 54341 ${temp} ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 -8.1 ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert 1.66666666666667 group co2 rigid myrigidnvt - -# output - -variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) -variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) -variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) -variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) -compute_modify thermo_temp dynamic/dof yes -thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc -thermo 1000 - -# run - -run 20000 -Ewald initialization ... -WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) - G vector (1/distance) = 0.164636 - estimated absolute RMS force accuracy = 0.0332064 - estimated relative force accuracy = 0.0001 - KSpace vectors: actual max1d max3d = 16 2 62 - kxmax kymax kzmax = 2 2 2 -WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:439) -0 atoms in group FixGCMC:gcmc_exclusion_group:mygcmc -0 atoms in group FixGCMC:rotation_gas_atoms:mygcmc -WARNING: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies (../neighbor.cpp:472) -Neighbor list info ... - update every 1 steps, delay 10 steps, check yes - max neighbors/atom: 2000, page size: 100000 - master list distance cutoff = 16 - ghost atom cutoff = 16 - binsize = 8, bins = 2 2 2 - 1 neighbor lists, perpetual/occasional/extra = 1 0 0 - (1) pair lj/cut/coul/long, perpetual - attributes: half, newton on - pair build: half/bin/newton - stencil: half/bin/3d/newton - bin: standard -Per MPI rank memory allocation (min/avg/max) = 15.4 | 15.4 | 15.4 Mbytes -Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc - 0 386.52184 23582.465 -3.2433417 14.209828 0.5846359 24 0 0 0 0 -WARNING: Using kspace solver on system with no charge (../kspace.cpp:289) - 1000 760.80877 -39.270882 -3.5239626 12.851016 0.29231795 12 0.24161633 0.22984103 0.71087092 0.85283311 - 2000 308.0739 -255.061 -20.411926 14.386853 0.73079488 30 0.26075352 0.24898725 0.73128383 0.88590474 - 3000 432.34358 -1361.3278 -12.644057 15.894387 0.5846359 24 0.21121583 0.21051229 0.70036003 0.86735027 - 4000 631.524 -63.488785 -3.6517158 13.804656 0.36539744 15 0.22486443 0.22886173 0.72358173 0.87172606 - 5000 730.61244 -1029.284 -6.2144028 19.600352 0.43847693 18 0.23017182 0.22740779 0.72281887 0.87820845 - 6000 752.43412 503.4547 -3.7053679 16.447663 0.36539744 15 0.22943971 0.226183 0.71450085 0.87447436 - 7000 660.68448 828.51735 -10.592278 21.006666 0.51155641 21 0.24702096 0.24218506 0.71815602 0.8740222 - 8000 331.58822 -621.22187 -5.3705759 7.2482776 0.36539744 15 0.23211903 0.22906813 0.70281376 0.86269411 - 9000 413.91538 869.51669 -11.28701 15.216905 0.5846359 24 0.23246466 0.22923961 0.70832684 0.86244176 - 10000 242.20861 -808.23311 -5.4533937 5.2945044 0.36539744 15 0.22024676 0.22031775 0.70785097 0.85712561 - 11000 348.20046 -372.16895 -3.4663358 7.6114092 0.36539744 15 0.2252033 0.22688969 0.71513402 0.86123263 - 12000 251.99682 303.30092 -18.58289 11.768089 0.73079488 30 0.20916844 0.21068047 0.694787 0.84635875 - 13000 306.83592 -1582.0137 -20.810287 14.329041 0.73079488 30 0.19494837 0.196527 0.67554784 0.83056119 - 14000 476.57411 268.94927 -14.185859 19.888076 0.65771539 27 0.19779631 0.20016859 0.67957528 0.83375167 - 15000 267.03534 730.71183 -9.3348616 9.8171066 0.5846359 24 0.19468305 0.19814971 0.68032974 0.83810439 - 16000 639.83235 2190.3244 -9.6666503 26.701062 0.65771539 27 0.19520687 0.19848997 0.68514387 0.84100361 - 17000 2237.1203 -222.59057 -0.18248834 4.4456205 0.073079488 3 0.20412446 0.20757814 0.69175318 0.8434939 - 18000 754.44841 205.54694 -10.501943 27.736031 0.5846359 24 0.2129422 0.21508015 0.69665031 0.84758331 - 19000 1610.1148 1293.6003 -0.20849836 3.1996309 0.073079488 3 0.22061668 0.22356929 0.69949369 0.84851405 - 20000 231.61458 -39.696514 -4.6452226 5.0629266 0.36539744 15 0.21984893 0.22246517 0.69914635 0.85058457 -Loop time of 21.1019 on 4 procs for 20000 steps with 15 atoms - -Performance: 81.888 ns/day, 0.293 hours/ns, 947.781 timesteps/s -98.9% CPU use with 4 MPI tasks x no OpenMP threads - -MPI task timing breakdown: -Section | min time | avg time | max time |%varavg| %total ---------------------------------------------------------------- -Pair | 0.31897 | 0.41973 | 0.49748 | 10.1 | 1.99 -Bond | 0.014808 | 0.015063 | 0.015289 | 0.2 | 0.07 -Kspace | 0.3813 | 0.46228 | 0.56585 | 9.8 | 2.19 -Neigh | 0.049173 | 0.050043 | 0.050868 | 0.3 | 0.24 -Comm | 0.9755 | 0.99686 | 1.0205 | 1.9 | 4.72 -Output | 0.0014546 | 0.0015051 | 0.0016098 | 0.2 | 0.01 -Modify | 19.043 | 19.062 | 19.085 | 0.4 | 90.33 -Other | | 0.09438 | | | 0.45 - -Nlocal: 3.75 ave 6 max 3 min -Histogram: 3 0 0 0 0 0 0 0 0 1 -Nghost: 876.5 ave 937 max 818 min -Histogram: 1 1 0 0 0 0 0 0 1 1 -Neighs: 490.5 ave 647 max 363 min -Histogram: 1 0 1 0 0 1 0 0 0 1 - -Total # of neighbors = 1962 -Ave neighs/atom = 130.8 -Ave special neighs/atom = 2 -Neighbor list builds = 40070 -Dangerous builds = 115 - -Total wall time: 0:00:21 diff --git a/examples/gcmc/log.24Mar17.gcmc.co2.g++.1 b/examples/gcmc/log.6Jul17.gcmc.co2.g++.1 similarity index 50% rename from examples/gcmc/log.24Mar17.gcmc.co2.g++.1 rename to examples/gcmc/log.6Jul17.gcmc.co2.g++.1 index 7562476bf3..f9e494c43f 100644 --- a/examples/gcmc/log.24Mar17.gcmc.co2.g++.1 +++ b/examples/gcmc/log.6Jul17.gcmc.co2.g++.1 @@ -1,4 +1,5 @@ -LAMMPS (17 Mar 2017) +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task # GCMC for CO2 molecular fluid, rigid/small/nvt dynamics # Rigid CO2 TraPPE model # [Potoff and J.I. Siepmann, Vapor-liquid equilibria of @@ -80,11 +81,11 @@ fix_modify myrigidnvt dynamic/dof no # gcmc variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) -fix mygcmc all gcmc 100 100 100 0 54341 ${temp} ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 -8.1 ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt -fix mygcmc all gcmc 100 100 100 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert 1.66666666666667 group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert 1.66666666666667 group co2 rigid myrigidnvt # output @@ -106,7 +107,7 @@ WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) estimated relative force accuracy = 0.0001 KSpace vectors: actual max1d max3d = 16 2 62 kxmax kymax kzmax = 2 2 2 -WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:439) +WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:445) 0 atoms in group FixGCMC:gcmc_exclusion_group:mygcmc 0 atoms in group FixGCMC:rotation_gas_atoms:mygcmc WARNING: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies (../neighbor.cpp:472) @@ -122,56 +123,58 @@ Neighbor list info ... pair build: half/bin/newton stencil: half/bin/3d/newton bin: standard -Per MPI rank memory allocation (min/avg/max) = 15.61 | 15.61 | 15.61 Mbytes +Per MPI rank memory allocation (min/avg/max) = 15.62 | 15.62 | 15.62 Mbytes Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc 0 364.27579 4238.8631 -9.6809388 13.391989 0.5846359 24 0 0 0 0 - 1000 311.39835 -327.93481 -8.6795381 9.9010062 0.51155641 21 0.13302848 0.12331626 0.6894397 0.90997852 WARNING: Using kspace solver on system with no charge (../kspace.cpp:289) - 2000 905.66812 319.43347 -0.50350961 6.2991241 0.14615898 6 0.20952183 0.20430213 0.71797992 0.92626683 - 3000 275.57393 -719.89718 -26.534978 14.238181 0.80387436 33 0.21291069 0.20460696 0.72899202 0.9133259 - 4000 254.70771 -245.01902 -20.981537 13.160079 0.80387436 33 0.17245726 0.16974613 0.70145764 0.90542759 - 5000 96.073601 -517.98124 -34.019065 5.441166 0.87695385 36 0.14174575 0.13607057 0.6776754 0.90155771 - 6000 397.57265 148.92645 -7.2012893 10.665797 0.43847693 18 0.12299956 0.1202471 0.66165464 0.90274793 - 7000 455.4271 -347.44181 -5.9244703 12.217875 0.43847693 18 0.15182038 0.14791307 0.67904236 0.90560829 - 8000 301.03124 -627.45324 -13.251012 11.066909 0.5846359 24 0.16687346 0.16315516 0.6936719 0.91129375 - 9000 256.5747 -565.67983 -17.814128 11.981874 0.73079488 30 0.15458482 0.15131825 0.68966283 0.90993975 - 10000 443.60076 89.586912 -6.077863 11.900606 0.43847693 18 0.16092552 0.16020353 0.69882461 0.91422145 - 11000 436.43777 64.412921 -6.7128469 11.708443 0.43847693 18 0.17453966 0.17480683 0.70679243 0.91369445 - 12000 594.42207 849.07743 -3.3708621 10.040536 0.29231795 12 0.17461606 0.17568622 0.71175869 0.91333367 - 13000 426.85849 -1093.1334 -17.524618 17.813377 0.65771539 27 0.17742896 0.17792831 0.71363306 0.91450124 - 14000 317.75995 336.31107 -10.46774 11.681912 0.5846359 24 0.18331181 0.18427921 0.71715557 0.91652256 - 15000 272.65129 317.50536 -26.428336 14.087176 0.80387436 33 0.17449167 0.175957 0.71122398 0.91528038 - 16000 344.28567 -577.91079 -18.177927 16.077919 0.73079488 30 0.1661682 0.16781514 0.70485136 0.91508882 - 17000 134.55928 -193.5668 -30.297136 7.6208177 0.87695385 36 0.15965609 0.1605036 0.69658104 0.9140445 - 18000 231.87302 -446.07671 -14.875027 9.6763722 0.65771539 27 0.15270985 0.15351831 0.69002918 0.91372795 - 19000 328.6835 -280.22365 -20.001303 16.982214 0.80387436 33 0.15201017 0.15272181 0.69023195 0.91272534 - 20000 0 -20.39554 -0.14872889 -0 0 0 0.15600204 0.15750795 0.69503275 0.9138765 -Loop time of 30.9008 on 1 procs for 20000 steps with 0 atoms + 1000 420.43475 1722.4052 -9.6956123 15.456579 0.5846359 24 0.20879341 0.20713005 0 0 + 2000 302.29516 -547.83641 -22.017674 14.11699 0.73079488 30 0.1742478 0.1678018 0 0 + 3000 316.6934 -1080.2672 -8.2218891 10.069364 0.51155641 21 0.13544917 0.13720634 0 0 + 4000 246.81618 -679.83642 -14.577244 10.29997 0.65771539 27 0.1568939 0.15860229 0 0 + 5000 260.22849 -896.29914 -16.097593 10.859684 0.65771539 27 0.13138744 0.13547049 0 0 + 6000 291.70796 -1521.99 -22.303136 13.622574 0.73079488 30 0.12615476 0.12717694 0 0 + 7000 236.02638 -599.92186 -27.580831 13.367447 0.87695385 36 0.119703 0.12145398 0 0 + 8000 321.45341 688.10577 -10.09204 11.817696 0.5846359 24 0.10917411 0.11032646 0 0 + 9000 502.85382 -302.31056 -0.22330142 0.99927447 0.073079488 3 0.1254105 0.12905828 0 0 + 10000 249.98239 -510.0091 -32.815145 15.399767 0.95003334 39 0.1274504 0.12875623 0 0 + 11000 247.59424 -1129.0274 -25.320205 12.792544 0.80387436 33 0.11739076 0.11916784 0 0 + 12000 0 -20.39554 -0.14872889 -0 0 0 0.1254933 0.12920375 0 0 + 13000 1272.2738 -474.79484 -0.29450485 8.8489483 0.14615898 6 0.13767133 0.14112496 0 0 + 14000 516.54246 -36.296516 -5.0012009 11.291243 0.36539744 15 0.15632744 0.15955377 0 0 + 15000 307.09233 1951.9301 -14.820362 12.815375 0.65771539 27 0.15393544 0.15716192 0 0 + 16000 198.31989 -559.48443 -30.459487 11.231925 0.87695385 36 0.1482565 0.15025652 0 0 + 17000 246.99311 657.85683 -18.579206 11.53442 0.73079488 30 0.14143958 0.14375423 0 0 + 18000 467.13468 167.03738 -1.0945268 5.569759 0.21923846 9 0.13847359 0.14098533 0 0 + 19000 359.54027 -1413.5407 -12.156233 13.217895 0.5846359 24 0.15169146 0.15294205 0 0 + 20000 227.79597 -1204.5652 -23.24144 10.637925 0.73079488 30 0.14917199 0.15022946 0 0 +Loop time of 20.153 on 1 procs for 20000 steps with 30 atoms -Performance: 55.921 ns/day, 0.429 hours/ns, 647.233 timesteps/s -99.8% CPU use with 1 MPI tasks x no OpenMP threads +Performance: 85.744 ns/day, 0.280 hours/ns, 992.408 timesteps/s +99.3% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 2.1985 | 2.1985 | 2.1985 | 0.0 | 7.11 -Bond | 0.029596 | 0.029596 | 0.029596 | 0.0 | 0.10 -Kspace | 0.23123 | 0.23123 | 0.23123 | 0.0 | 0.75 -Neigh | 0.16141 | 0.16141 | 0.16141 | 0.0 | 0.52 -Comm | 0.20628 | 0.20628 | 0.20628 | 0.0 | 0.67 -Output | 0.00068831 | 0.00068831 | 0.00068831 | 0.0 | 0.00 -Modify | 28.022 | 28.022 | 28.022 | 0.0 | 90.69 -Other | | 0.05058 | | | 0.16 +Pair | 2.5352 | 2.5352 | 2.5352 | 0.0 | 12.58 +Bond | 0.026112 | 0.026112 | 0.026112 | 0.0 | 0.13 +Kspace | 0.25 | 0.25 | 0.25 | 0.0 | 1.24 +Neigh | 0.10364 | 0.10364 | 0.10364 | 0.0 | 0.51 +Comm | 0.22907 | 0.22907 | 0.22907 | 0.0 | 1.14 +Output | 0.0013065 | 0.0013065 | 0.0013065 | 0.0 | 0.01 +Modify | 16.957 | 16.957 | 16.957 | 0.0 | 84.14 +Other | | 0.05061 | | | 0.25 -Nlocal: 0 ave 0 max 0 min +Nlocal: 30 ave 30 max 30 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 0 ave 0 max 0 min +Nghost: 2310 ave 2310 max 2310 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 0 ave 0 max 0 min +Neighs: 7736 ave 7736 max 7736 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Total # of neighbors = 0 -Neighbor list builds = 40367 -Dangerous builds = 118 +Total # of neighbors = 7736 +Ave neighs/atom = 257.867 +Ave special neighs/atom = 2 +Neighbor list builds = 20349 +Dangerous builds = 0 -Total wall time: 0:00:30 +Total wall time: 0:00:20 diff --git a/examples/gcmc/log.6Jul17.gcmc.co2.g++.4 b/examples/gcmc/log.6Jul17.gcmc.co2.g++.4 new file mode 100644 index 0000000000..0df25430d2 --- /dev/null +++ b/examples/gcmc/log.6Jul17.gcmc.co2.g++.4 @@ -0,0 +1,180 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# GCMC for CO2 molecular fluid, rigid/small/nvt dynamics +# Rigid CO2 TraPPE model +# [Potoff and J.I. Siepmann, Vapor-liquid equilibria of +# mixtures containing alkanes, carbon dioxide and +# nitrogen AIChE J., 47,1676-1682 (2001)]. + +# variables available on command line + +variable mu index -8.1 +variable disp index 0.5 +variable temp index 338.0 +variable lbox index 10.0 +variable spacing index 5.0 + +# global model settings + +units real +atom_style full +boundary p p p +pair_style lj/cut/coul/long 14 +pair_modify mix arithmetic tail yes +kspace_style ewald 0.0001 +bond_style harmonic +angle_style harmonic + +# box, start molecules on simple cubic lattice + +lattice sc ${spacing} +lattice sc 5.0 +Lattice spacing in x,y,z = 5 5 5 +region box block 0 ${lbox} 0 ${lbox} 0 ${lbox} units box +region box block 0 10.0 0 ${lbox} 0 ${lbox} units box +region box block 0 10.0 0 10.0 0 ${lbox} units box +region box block 0 10.0 0 10.0 0 10.0 units box +create_box 2 box bond/types 1 angle/types 1 extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 +Created orthogonal box = (0 0 0) to (10 10 10) + 1 by 2 by 2 MPI processor grid +molecule co2mol CO2.txt +Read molecule co2mol: + 3 atoms with 2 types + 2 bonds with 1 types + 1 angles with 1 types + 0 dihedrals with 0 types + 0 impropers with 0 types +create_atoms 0 box mol co2mol 464563 units box +Created 24 atoms + +# rigid CO2 TraPPE model + +pair_coeff 1 1 0.053649 2.8 +pair_coeff 2 2 0.156973 3.05 +bond_coeff 1 0 1.16 +angle_coeff 1 0 180 + +# masses + +mass 1 12.0107 +mass 2 15.9994 + +# MD settings + +group co2 type 1 2 +24 atoms in group co2 +neighbor 2.0 bin +neigh_modify every 1 delay 10 check yes +velocity all create ${temp} 54654 +velocity all create 338.0 54654 +timestep 1.0 + +# rigid constraints with thermostat + +fix myrigidnvt all rigid/nvt/small molecule temp ${temp} ${temp} 100 mol co2mol +fix myrigidnvt all rigid/nvt/small molecule temp 338.0 ${temp} 100 mol co2mol +fix myrigidnvt all rigid/nvt/small molecule temp 338.0 338.0 100 mol co2mol +8 rigid bodies with 24 atoms + 1.16 = max distance from body owner to body atom +fix_modify myrigidnvt dynamic/dof no + +# gcmc + +variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) +fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 ${mu} ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert ${tfac} group co2 rigid myrigidnvt +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol co2mol tfac_insert 1.66666666666667 group co2 rigid myrigidnvt + +# output + +variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) +variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) +variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) +variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) +compute_modify thermo_temp dynamic/dof yes +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo 1000 + +# run + +run 20000 +Ewald initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.164636 + estimated absolute RMS force accuracy = 0.0332064 + estimated relative force accuracy = 0.0001 + KSpace vectors: actual max1d max3d = 16 2 62 + kxmax kymax kzmax = 2 2 2 +WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:445) +0 atoms in group FixGCMC:gcmc_exclusion_group:mygcmc +0 atoms in group FixGCMC:rotation_gas_atoms:mygcmc +WARNING: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies (../neighbor.cpp:472) +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 16 + ghost atom cutoff = 16 + binsize = 8, bins = 2 2 2 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 15.41 | 15.41 | 15.41 Mbytes +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc + 0 386.52184 23582.465 -3.2433417 14.209828 0.5846359 24 0 0 0 0 +WARNING: Using kspace solver on system with no charge (../kspace.cpp:289) + 1000 335.66829 -3.7743052 -4.6268612 7.3374649 0.36539744 15 0.20601899 0.20787963 0 0 + 2000 459.73529 238.91592 -0.42937831 5.4815343 0.21923846 9 0.30392058 0.30105616 0 0 + 3000 255.47773 -479.67802 -36.850434 15.738299 0.95003334 39 0.22220744 0.2197582 0 0 + 4000 182.70803 -1059.2262 -43.044833 12.163134 1.0231128 42 0.16781689 0.16716177 0 0 + 5000 234.00907 -1821.0444 -46.04795 15.578317 1.0231128 42 0.13498091 0.13704201 0 0 + 6000 163.42759 -774.67294 -49.686261 11.691518 1.0961923 45 0.11401677 0.11296973 0 0 + 7000 171.64616 -355.23516 -49.323434 12.27947 1.0961923 45 0.098302308 0.098552065 0 0 + 8000 251.29791 -905.47863 -37.841209 15.480807 0.95003334 39 0.086856972 0.08638658 0 0 + 9000 143.69498 -849.95393 -49.073188 10.279858 1.0961923 45 0.078261061 0.077955243 0 0 + 10000 239.35727 -1158.1879 -43.562047 15.934355 1.0231128 42 0.070789792 0.070807529 0 0 + 11000 169.51213 -1574.7885 -51.125228 12.126803 1.0961923 45 0.065008734 0.06498871 0 0 + 12000 181.39739 160.11631 -46.850937 12.977068 1.0961923 45 0.059648717 0.059514803 0 0 + 13000 164.14601 -1107.7629 -50.726722 11.742914 1.0961923 45 0.055207333 0.055097701 0 0 + 14000 287.26285 418.51463 -41.664766 19.123497 1.0231128 42 0.051346789 0.051222285 0 0 + 15000 256.94593 -532.36615 -41.651618 17.105257 1.0231128 42 0.047870301 0.047861685 0 0 + 16000 166.92132 151.15933 -39.957018 11.11219 1.0231128 42 0.045205599 0.045042211 0 0 + 17000 163.22452 -1299.8119 -42.677558 10.866089 1.0231128 42 0.043122086 0.042993687 0 0 + 18000 158.01154 475.77329 -48.803162 11.304057 1.0961923 45 0.041016683 0.040647229 0 0 + 19000 138.49297 -1585.1508 -47.517099 9.9077098 1.0961923 45 0.038929287 0.038436764 0 0 + 20000 173.84439 -1362.6301 -53.002743 12.436731 1.0961923 45 0.036973919 0.036523816 0 0 +Loop time of 31.8386 on 4 procs for 20000 steps with 45 atoms + +Performance: 54.274 ns/day, 0.442 hours/ns, 628.168 timesteps/s +98.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 1.1546 | 1.6687 | 2.1338 | 29.5 | 5.24 +Bond | 0.019769 | 0.020369 | 0.02132 | 0.4 | 0.06 +Kspace | 0.53392 | 0.99911 | 1.5116 | 37.8 | 3.14 +Neigh | 0.06737 | 0.067842 | 0.068412 | 0.2 | 0.21 +Comm | 1.9408 | 1.9582 | 1.9733 | 1.1 | 6.15 +Output | 0.0019503 | 0.0020472 | 0.0022476 | 0.3 | 0.01 +Modify | 26.974 | 26.99 | 27.001 | 0.2 | 84.77 +Other | | 0.1322 | | | 0.42 + +Nlocal: 11.25 ave 14 max 8 min +Histogram: 1 0 0 0 0 1 1 0 0 1 +Nghost: 2639.75 ave 2656 max 2617 min +Histogram: 1 0 0 0 0 0 2 0 0 1 +Neighs: 4320 ave 5824 max 2201 min +Histogram: 1 0 0 0 0 0 1 1 0 1 + +Total # of neighbors = 17280 +Ave neighs/atom = 384 +Ave special neighs/atom = 2 +Neighbor list builds = 20394 +Dangerous builds = 0 + +Total wall time: 0:00:31 diff --git a/examples/gcmc/log.6Jul17.gcmc.h2o.g++.1 b/examples/gcmc/log.6Jul17.gcmc.h2o.g++.1 new file mode 100644 index 0000000000..3b1606e65d --- /dev/null +++ b/examples/gcmc/log.6Jul17.gcmc.h2o.g++.1 @@ -0,0 +1,281 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# fix gcmc example with fix shake + +# variables available on command line + +variable mu index -8.1 +variable disp index 0.5 +variable temp index 338.0 +variable lbox index 10.0 +variable spacing index 5.0 + +# global model settings + +units real +atom_style full +boundary p p p +pair_style lj/cut/coul/long 14 +pair_modify mix arithmetic tail yes +kspace_style ewald 0.0001 +bond_style harmonic +angle_style harmonic + +# box, start molecules on simple cubic lattice + +lattice sc ${spacing} +lattice sc 5.0 +Lattice spacing in x,y,z = 5 5 5 +region box block 0 ${lbox} 0 ${lbox} 0 ${lbox} units box +region box block 0 10.0 0 ${lbox} 0 ${lbox} units box +region box block 0 10.0 0 10.0 0 ${lbox} units box +region box block 0 10.0 0 10.0 0 10.0 units box +create_box 2 box bond/types 1 angle/types 1 extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 +Created orthogonal box = (0 0 0) to (10 10 10) + 1 by 1 by 1 MPI processor grid + +# we can load multiple molecule templates, but don't have to use them all +molecule co2mol CO2.txt +Read molecule co2mol: + 3 atoms with 2 types + 2 bonds with 1 types + 1 angles with 1 types + 0 dihedrals with 0 types + 0 impropers with 0 types +molecule h2omol H2O.txt +Read molecule h2omol: + 3 atoms with 2 types + 2 bonds with 1 types + 1 angles with 1 types + 0 dihedrals with 0 types + 0 impropers with 0 types +create_atoms 0 box mol h2omol 464563 units box +Created 24 atoms + +# rigid SPC/E water model + +pair_coeff 1 1 0.15535 3.166 +pair_coeff * 2 0.0000 0.0000 + +bond_coeff 1 1000 1.0 +angle_coeff 1 100 109.47 + +# masses + +mass 1 15.9994 +mass 2 1.0 + +# MD settings + +group h2o type 1 2 +24 atoms in group h2o +neighbor 2.0 bin +neigh_modify every 1 delay 1 check yes +velocity all create ${temp} 54654 +velocity all create 338.0 54654 +timestep 1.0 + +minimize 0.0 0.0 100 1000 +WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (../min.cpp:168) +Ewald initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.170448 + estimated absolute RMS force accuracy = 0.0332064 + estimated relative force accuracy = 0.0001 + KSpace vectors: actual max1d max3d = 16 2 62 + kxmax kymax kzmax = 2 2 2 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 16 + ghost atom cutoff = 16 + binsize = 8, bins = 2 2 2 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 11.88 | 11.88 | 11.88 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 338 -4.1890564 9.2628112e-06 18.98377 739.06991 + 100 338 -30.182886 0.85607237 -6.1539961 -2535.3207 +Loop time of 0.0525794 on 1 procs for 100 steps with 24 atoms + +99.4% CPU use with 1 MPI tasks x 1 OpenMP threads + +Minimization stats: + Stopping criterion = max iterations + Energy initial, next-to-last, final = + -4.18904713252 -28.9258064504 -29.3268133965 + Force two-norm initial, final = 18.0027 42.4511 + Force max component initial, final = 5.8993 16.0523 + Final line search alpha, max atom move = 0.00353207 0.056698 + Iterations, force evaluations = 100 238 + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.044199 | 0.044199 | 0.044199 | 0.0 | 84.06 +Bond | 0.00049019 | 0.00049019 | 0.00049019 | 0.0 | 0.93 +Kspace | 0.0031631 | 0.0031631 | 0.0031631 | 0.0 | 6.02 +Neigh | 0.00046444 | 0.00046444 | 0.00046444 | 0.0 | 0.88 +Comm | 0.0034101 | 0.0034101 | 0.0034101 | 0.0 | 6.49 +Output | 1.9073e-05 | 1.9073e-05 | 1.9073e-05 | 0.0 | 0.04 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 0.0008333 | | | 1.58 + +Nlocal: 24 ave 24 max 24 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 2047 ave 2047 max 2047 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 4936 ave 4936 max 4936 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 4936 +Ave neighs/atom = 205.667 +Ave special neighs/atom = 2 +Neighbor list builds = 2 +Dangerous builds = 0 +reset_timestep 0 +# rigid constraints with thermostat + +fix mynvt all nvt temp ${temp} ${temp} 100 +fix mynvt all nvt temp 338.0 ${temp} 100 +fix mynvt all nvt temp 338.0 338.0 100 +fix wshake all shake 0.0001 50 0 b 1 a 1 mol h2omol + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 8 = # of frozen angles +# gcmc + + + +run 1000 +Ewald initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.170448 + estimated absolute RMS force accuracy = 0.0332064 + estimated relative force accuracy = 0.0001 + KSpace vectors: actual max1d max3d = 16 2 62 + kxmax kymax kzmax = 2 2 2 +Per MPI rank memory allocation (min/avg/max) = 11.63 | 11.63 | 11.63 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 518.26667 -30.182886 0 -7.0100684 993.1985 + 1000 326.9865 -62.258445 0 -47.638175 -5.3440813 +Loop time of 0.14263 on 1 procs for 1000 steps with 24 atoms + +Performance: 605.764 ns/day, 0.040 hours/ns, 7011.155 timesteps/s +99.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.10849 | 0.10849 | 0.10849 | 0.0 | 76.07 +Bond | 0.00015426 | 0.00015426 | 0.00015426 | 0.0 | 0.11 +Kspace | 0.01205 | 0.01205 | 0.01205 | 0.0 | 8.45 +Neigh | 0.0046577 | 0.0046577 | 0.0046577 | 0.0 | 3.27 +Comm | 0.011531 | 0.011531 | 0.011531 | 0.0 | 8.08 +Output | 1.6212e-05 | 1.6212e-05 | 1.6212e-05 | 0.0 | 0.01 +Modify | 0.0037699 | 0.0037699 | 0.0037699 | 0.0 | 2.64 +Other | | 0.001957 | | | 1.37 + +Nlocal: 24 ave 24 max 24 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 1660 ave 1660 max 1660 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 5112 ave 5112 max 5112 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 5112 +Ave neighs/atom = 213 +Ave special neighs/atom = 2 +Neighbor list builds = 25 +Dangerous builds = 0 + +variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) +fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 ${mu} ${disp} mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert 1.66666666666667 group h2o shake wshake + +# output + +variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) +variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) +variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) +variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) +compute_modify thermo_temp dynamic/dof yes +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo 1000 + +# run + +run 20000 +Ewald initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.170448 + estimated absolute RMS force accuracy = 0.0332064 + estimated relative force accuracy = 0.0001 + KSpace vectors: actual max1d max3d = 16 2 62 + kxmax kymax kzmax = 2 2 2 +WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:445) +0 atoms in group FixGCMC:gcmc_exclusion_group:mygcmc +0 atoms in group FixGCMC:rotation_gas_atoms:mygcmc +WARNING: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies (../neighbor.cpp:472) +Per MPI rank memory allocation (min/avg/max) = 11.63 | 11.63 | 11.63 Mbytes +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc + 1000 326.9865 -4.3509713 -62.258445 14.62027 0.23910963 24 0 0 0 0 + 2000 116.99793 -5344.1527 -286.61595 17.088682 0.74721761 75 0.048183096 0.013941446 0 0 + 3000 106.86746 -3920.4926 -361.60598 18.794545 0.89666113 90 0.035637919 0.012768883 0 0 + 4000 75.002668 540.46846 -414.8511 14.531966 0.98632724 99 0.025963651 0.0093451705 0 0 + 5000 79.924788 -2131.1173 -437.21216 15.962121 1.0162159 102 0.019879728 0.0070418993 0 0 + 6000 95.552773 -3647.0233 -438.24276 19.083253 1.0162159 102 0.015753613 0.0056885133 0 0 + 7000 79.501736 -2071.5369 -440.77351 15.877631 1.0162159 102 0.01326216 0.0046915318 0 0 + 8000 62.567091 -3102.9616 -442.21884 12.495541 1.0162159 102 0.011305503 0.0040437885 0 0 + 9000 68.324047 -3812.7866 -440.46835 13.645287 1.0162159 102 0.0099549538 0.0035157329 0 0 + 10000 83.857631 -2158.2659 -444.8183 16.747566 1.0162159 102 0.0088200922 0.0031354281 0 0 + 11000 68.350984 -2084.0789 -440.14081 13.650667 1.0162159 102 0.0081331455 0.0030247424 0 0 + 12000 76.867315 -1585.6723 -443.36199 15.3515 1.0162159 102 0.0073845932 0.0027532534 0 0 + 13000 59.74266 -2211.0211 -446.07791 11.931462 1.0162159 102 0.0067756276 0.0025213898 0 0 + 14000 81.154979 -907.0176 -441.53368 16.207808 1.0162159 102 0.0062527642 0.0023280719 0 0 + 15000 66.814346 -2804.5134 -455.80704 13.7421 1.0461046 105 0.0059590528 0.0021576214 0 0 + 16000 71.42983 -3930.4004 -458.43218 14.691394 1.0461046 105 0.0055547473 0.0020163729 0 0 + 17000 89.624855 -3569.8136 -455.18164 18.433672 1.0461046 105 0.0052173265 0.0018867687 0 0 + 18000 63.519962 -1882.8157 -456.58939 13.064525 1.0461046 105 0.0049082049 0.0017765986 0 0 + 19000 71.872698 -2243.5046 -454.93359 14.782481 1.0461046 105 0.0046439115 0.0016748361 0 0 + 20000 73.660765 -2285.3173 -476.35473 15.589381 1.0759934 108 0.0045124933 0.0015837653 0 0 + 21000 95.675868 987.92089 -475.46736 20.248603 1.0759934 108 0.004285814 0.0015049513 0 0 +Loop time of 226.155 on 1 procs for 20000 steps with 108 atoms + +Performance: 7.641 ns/day, 3.141 hours/ns, 88.435 timesteps/s +99.2% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 38.053 | 38.053 | 38.053 | 0.0 | 16.83 +Bond | 0.089673 | 0.089673 | 0.089673 | 0.0 | 0.04 +Kspace | 0.92778 | 0.92778 | 0.92778 | 0.0 | 0.41 +Neigh | 1.2619 | 1.2619 | 1.2619 | 0.0 | 0.56 +Comm | 0.97483 | 0.97483 | 0.97483 | 0.0 | 0.43 +Output | 0.0013306 | 0.0013306 | 0.0013306 | 0.0 | 0.00 +Modify | 184.68 | 184.68 | 184.68 | 0.0 | 81.66 +Other | | 0.171 | | | 0.08 + +Nlocal: 108 ave 108 max 108 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 7850 ave 7850 max 7850 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 99828 ave 99828 max 99828 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 99828 +Ave neighs/atom = 924.333 +Ave special neighs/atom = 2 +Neighbor list builds = 20439 +Dangerous builds = 0 + +Total wall time: 0:03:46 diff --git a/examples/gcmc/log.6Jul17.gcmc.h2o.g++.4 b/examples/gcmc/log.6Jul17.gcmc.h2o.g++.4 new file mode 100644 index 0000000000..c04b25f45e --- /dev/null +++ b/examples/gcmc/log.6Jul17.gcmc.h2o.g++.4 @@ -0,0 +1,281 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# fix gcmc example with fix shake + +# variables available on command line + +variable mu index -8.1 +variable disp index 0.5 +variable temp index 338.0 +variable lbox index 10.0 +variable spacing index 5.0 + +# global model settings + +units real +atom_style full +boundary p p p +pair_style lj/cut/coul/long 14 +pair_modify mix arithmetic tail yes +kspace_style ewald 0.0001 +bond_style harmonic +angle_style harmonic + +# box, start molecules on simple cubic lattice + +lattice sc ${spacing} +lattice sc 5.0 +Lattice spacing in x,y,z = 5 5 5 +region box block 0 ${lbox} 0 ${lbox} 0 ${lbox} units box +region box block 0 10.0 0 ${lbox} 0 ${lbox} units box +region box block 0 10.0 0 10.0 0 ${lbox} units box +region box block 0 10.0 0 10.0 0 10.0 units box +create_box 2 box bond/types 1 angle/types 1 extra/bond/per/atom 2 extra/angle/per/atom 1 extra/special/per/atom 2 +Created orthogonal box = (0 0 0) to (10 10 10) + 1 by 2 by 2 MPI processor grid + +# we can load multiple molecule templates, but don't have to use them all +molecule co2mol CO2.txt +Read molecule co2mol: + 3 atoms with 2 types + 2 bonds with 1 types + 1 angles with 1 types + 0 dihedrals with 0 types + 0 impropers with 0 types +molecule h2omol H2O.txt +Read molecule h2omol: + 3 atoms with 2 types + 2 bonds with 1 types + 1 angles with 1 types + 0 dihedrals with 0 types + 0 impropers with 0 types +create_atoms 0 box mol h2omol 464563 units box +Created 24 atoms + +# rigid SPC/E water model + +pair_coeff 1 1 0.15535 3.166 +pair_coeff * 2 0.0000 0.0000 + +bond_coeff 1 1000 1.0 +angle_coeff 1 100 109.47 + +# masses + +mass 1 15.9994 +mass 2 1.0 + +# MD settings + +group h2o type 1 2 +24 atoms in group h2o +neighbor 2.0 bin +neigh_modify every 1 delay 1 check yes +velocity all create ${temp} 54654 +velocity all create 338.0 54654 +timestep 1.0 + +minimize 0.0 0.0 100 1000 +WARNING: Using 'neigh_modify every 1 delay 0 check yes' setting during minimization (../min.cpp:168) +Ewald initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.170448 + estimated absolute RMS force accuracy = 0.0332064 + estimated relative force accuracy = 0.0001 + KSpace vectors: actual max1d max3d = 16 2 62 + kxmax kymax kzmax = 2 2 2 +Neighbor list info ... + update every 1 steps, delay 0 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 16 + ghost atom cutoff = 16 + binsize = 8, bins = 2 2 2 + 1 neighbor lists, perpetual/occasional/extra = 1 0 0 + (1) pair lj/cut/coul/long, perpetual + attributes: half, newton on + pair build: half/bin/newton + stencil: half/bin/3d/newton + bin: standard +Per MPI rank memory allocation (min/avg/max) = 11.85 | 11.85 | 11.85 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 338 -4.9610706 9.2628112e-06 18.211756 730.90791 + 100 338 -15.742442 0.14954269 7.579918 -637.49568 +Loop time of 0.0828406 on 4 procs for 100 steps with 24 atoms + +98.7% CPU use with 4 MPI tasks x 1 OpenMP threads + +Minimization stats: + Stopping criterion = max iterations + Energy initial, next-to-last, final = + -4.96106135393 -15.5388622715 -15.592899346 + Force two-norm initial, final = 15.474 18.1478 + Force max component initial, final = 5.80042 7.56514 + Final line search alpha, max atom move = 0.00151131 0.0114333 + Iterations, force evaluations = 100 328 + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.012844 | 0.025471 | 0.047008 | 8.1 | 30.75 +Bond | 0.00038934 | 0.00046468 | 0.00054336 | 0.0 | 0.56 +Kspace | 0.0061138 | 0.027556 | 0.04014 | 7.8 | 33.26 +Neigh | 0 | 0 | 0 | 0.0 | 0.00 +Comm | 0.023276 | 0.023636 | 0.023804 | 0.1 | 28.53 +Output | 3.171e-05 | 3.3557e-05 | 3.8147e-05 | 0.0 | 0.04 +Modify | 0 | 0 | 0 | 0.0 | 0.00 +Other | | 0.00568 | | | 6.86 + +Nlocal: 6 ave 8 max 3 min +Histogram: 1 0 0 0 1 0 0 0 0 2 +Nghost: 1722 ave 1725 max 1720 min +Histogram: 2 0 0 0 0 0 1 0 0 1 +Neighs: 1256.75 ave 2101 max 667 min +Histogram: 1 0 1 0 1 0 0 0 0 1 + +Total # of neighbors = 5027 +Ave neighs/atom = 209.458 +Ave special neighs/atom = 2 +Neighbor list builds = 0 +Dangerous builds = 0 +reset_timestep 0 +# rigid constraints with thermostat + +fix mynvt all nvt temp ${temp} ${temp} 100 +fix mynvt all nvt temp 338.0 ${temp} 100 +fix mynvt all nvt temp 338.0 338.0 100 +fix wshake all shake 0.0001 50 0 b 1 a 1 mol h2omol + 0 = # of size 2 clusters + 0 = # of size 3 clusters + 0 = # of size 4 clusters + 8 = # of frozen angles +# gcmc + + + +run 1000 +Ewald initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.170448 + estimated absolute RMS force accuracy = 0.0332064 + estimated relative force accuracy = 0.0001 + KSpace vectors: actual max1d max3d = 16 2 62 + kxmax kymax kzmax = 2 2 2 +Per MPI rank memory allocation (min/avg/max) = 11.6 | 11.6 | 11.6 Mbytes +Step Temp E_pair E_mol TotEng Press + 0 518.26667 -15.742442 0 7.4303753 -613.0781 + 1000 369.81793 -54.202686 0 -37.667331 294.98823 +Loop time of 0.199641 on 4 procs for 1000 steps with 24 atoms + +Performance: 432.777 ns/day, 0.055 hours/ns, 5008.996 timesteps/s +98.5% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 0.017161 | 0.034988 | 0.05833 | 8.0 | 17.53 +Bond | 0.00017357 | 0.00021374 | 0.00027347 | 0.0 | 0.11 +Kspace | 0.018025 | 0.044624 | 0.065613 | 8.4 | 22.35 +Neigh | 0.0029755 | 0.0033154 | 0.0036366 | 0.6 | 1.66 +Comm | 0.059933 | 0.06537 | 0.070709 | 1.5 | 32.74 +Output | 3.4571e-05 | 3.6657e-05 | 4.22e-05 | 0.0 | 0.02 +Modify | 0.043458 | 0.045628 | 0.04767 | 0.9 | 22.86 +Other | | 0.005465 | | | 2.74 + +Nlocal: 6 ave 8 max 3 min +Histogram: 1 0 0 0 0 0 1 0 1 1 +Nghost: 1331.5 ave 1369 max 1290 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +Neighs: 1259.75 ave 1642 max 428 min +Histogram: 1 0 0 0 0 0 0 1 0 2 + +Total # of neighbors = 5039 +Ave neighs/atom = 209.958 +Ave special neighs/atom = 2 +Neighbor list builds = 27 +Dangerous builds = 0 + +variable tfac equal 5.0/3.0 # (3 trans + 2 rot)/(3 trans) +fix mygcmc all gcmc 100 100 0 0 54341 ${temp} ${mu} ${disp} mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 ${mu} ${disp} mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 ${disp} mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert ${tfac} group h2o shake wshake +fix mygcmc all gcmc 100 100 0 0 54341 338.0 -8.1 0.5 mol h2omol tfac_insert 1.66666666666667 group h2o shake wshake + +# output + +variable tacc equal f_mygcmc[2]/(f_mygcmc[1]+0.1) +variable iacc equal f_mygcmc[4]/(f_mygcmc[3]+0.1) +variable dacc equal f_mygcmc[6]/(f_mygcmc[5]+0.1) +variable racc equal f_mygcmc[8]/(f_mygcmc[7]+0.1) +compute_modify thermo_temp dynamic/dof yes +thermo_style custom step temp press pe ke density atoms v_iacc v_dacc v_tacc v_racc +thermo 1000 + +# run + +run 20000 +Ewald initialization ... +WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321) + G vector (1/distance) = 0.170448 + estimated absolute RMS force accuracy = 0.0332064 + estimated relative force accuracy = 0.0001 + KSpace vectors: actual max1d max3d = 16 2 62 + kxmax kymax kzmax = 2 2 2 +WARNING: Fix gcmc using full_energy option (../fix_gcmc.cpp:445) +0 atoms in group FixGCMC:gcmc_exclusion_group:mygcmc +0 atoms in group FixGCMC:rotation_gas_atoms:mygcmc +WARNING: Neighbor exclusions used with KSpace solver may give inconsistent Coulombic energies (../neighbor.cpp:472) +Per MPI rank memory allocation (min/avg/max) = 11.6 | 11.6 | 11.6 Mbytes +Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_racc + 1000 369.81793 295.32434 -54.202686 16.535355 0.23910963 24 0 0 0 0 + 2000 84.544466 -2810.9047 -344.81664 14.364627 0.86677242 87 0.052198354 0.0099581757 0 0 + 3000 75.188527 -3688.256 -425.02228 14.567977 0.98632724 99 0.030546787 0.0049111089 0 0 + 4000 75.019396 -5669.3063 -427.69454 14.535207 0.98632724 99 0.019972039 0.0033375609 0 0 + 5000 90.415371 -2141.7596 -434.65925 17.518218 0.98632724 99 0.014909796 0.002514964 0 0 + 6000 78.212628 -943.75125 -428.80584 15.153904 0.98632724 99 0.01181521 0.0020316119 0 0 + 7000 71.754139 -2028.5122 -435.2139 13.902555 0.98632724 99 0.0099466198 0.0016755471 0 0 + 8000 84.446231 -1969.1657 -428.27313 16.361681 0.98632724 99 0.0084791272 0.0014442102 0 0 + 9000 70.952348 -2476.9812 -446.33824 14.170197 1.0162159 102 0.0077150892 0.0012556189 0 0 + 10000 71.418543 -1875.7083 -443.7214 14.263302 1.0162159 102 0.0068355714 0.0011197957 0 0 + 11000 86.094994 -4508.7581 -444.82687 17.194399 1.0162159 102 0.0061494515 0.0010082475 0 0 + 12000 81.906091 -1547.8105 -442.36719 16.357815 1.0162159 102 0.0055834729 0.00091775114 0 0 + 13000 57.221548 -4607.6222 -448.30939 11.42796 1.0162159 102 0.0051230355 0.00084046326 0 0 + 14000 61.288344 -2518.1779 -445.70636 12.240157 1.0162159 102 0.0047276997 0.00077602396 0 0 + 15000 85.787669 -2407.7111 -443.3834 17.133022 1.0162159 102 0.0043983485 0.00071920715 0 0 + 16000 74.845939 -3288.3403 -445.8247 14.947802 1.0162159 102 0.0042321884 0.00080654918 0 0 + 17000 73.835431 -1926.9566 -445.67476 14.745989 1.0162159 102 0.0039751059 0.00075470749 0 0 + 18000 72.634985 -3997.552 -447.2351 14.506243 1.0162159 102 0.0037395847 0.00071063946 0 0 + 19000 96.776472 -714.44132 -453.65552 19.904587 1.0461046 105 0.0036487876 0.00066993446 0 0 + 20000 75.470786 183.16972 -464.04688 15.522521 1.0461046 105 0.0034630763 0.00063350614 0 0 + 21000 65.658309 -773.41266 -466.27068 13.504331 1.0461046 105 0.003289113 0.00060198052 0 0 +Loop time of 93.8859 on 4 procs for 20000 steps with 105 atoms + +Performance: 18.405 ns/day, 1.304 hours/ns, 213.024 timesteps/s +98.8% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 6.7882 | 10.264 | 14.758 | 93.2 | 10.93 +Bond | 0.028286 | 0.034218 | 0.039038 | 2.5 | 0.04 +Kspace | 0.57255 | 5.2227 | 8.8493 | 133.8 | 5.56 +Neigh | 0.3635 | 0.36915 | 0.37473 | 0.9 | 0.39 +Comm | 2.9961 | 3.2542 | 3.509 | 11.4 | 3.47 +Output | 0.0011675 | 0.0012342 | 0.001375 | 0.2 | 0.00 +Modify | 74.428 | 74.499 | 74.571 | 0.7 | 79.35 +Other | | 0.2411 | | | 0.26 + +Nlocal: 26.25 ave 31 max 22 min +Histogram: 1 0 1 0 0 0 1 0 0 1 +Nghost: 6049.25 ave 6133 max 5962 min +Histogram: 1 0 0 0 1 0 1 0 0 1 +Neighs: 23613 ave 35083 max 14025 min +Histogram: 1 0 0 1 1 0 0 0 0 1 + +Total # of neighbors = 94452 +Ave neighs/atom = 899.543 +Ave special neighs/atom = 2 +Neighbor list builds = 20428 +Dangerous builds = 0 + +Total wall time: 0:01:34 diff --git a/examples/gcmc/log.24Mar17.gcmc.lj.g++.1 b/examples/gcmc/log.6Jul17.gcmc.lj.g++.1 similarity index 88% rename from examples/gcmc/log.24Mar17.gcmc.lj.g++.1 rename to examples/gcmc/log.6Jul17.gcmc.lj.g++.1 index 36a9fe885d..69fc2ede1c 100644 --- a/examples/gcmc/log.24Mar17.gcmc.lj.g++.1 +++ b/examples/gcmc/log.6Jul17.gcmc.lj.g++.1 @@ -1,5 +1,4 @@ -LAMMPS (17 Mar 2017) -OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90) +LAMMPS (6 Jul 2017) using 1 OpenMP thread(s) per MPI task # GCMC for LJ simple fluid, no dynamics # T = 2.0 @@ -100,20 +99,20 @@ Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_rhoav v_pav v 8000 2.2175324 1.5897263 -3.078898 3.2759002 0.528 66 0.068180395 0.067899629 0.11332691 0.53928 1.5488388 -0.01075766 9000 1.8610779 1.0396231 -2.923262 2.7465908 0.496 62 0.068346453 0.068028117 0.1134132 0.52912 1.4352871 0.027082544 10000 2.1079271 1.1746643 -2.9112062 3.1091925 0.48 60 0.068352878 0.068054948 0.11335434 0.5316 1.4462327 0.018503094 -Loop time of 13.05 on 1 procs for 10000 steps with 60 atoms +Loop time of 20.6892 on 1 procs for 10000 steps with 60 atoms -Performance: 331035.016 tau/day, 766.285 timesteps/s -100.0% CPU use with 1 MPI tasks x 1 OpenMP threads +Performance: 208804.611 tau/day, 483.344 timesteps/s +99.4% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.37239 | 0.37239 | 0.37239 | 0.0 | 2.85 -Neigh | 0.94764 | 0.94764 | 0.94764 | 0.0 | 7.26 -Comm | 0.092473 | 0.092473 | 0.092473 | 0.0 | 0.71 -Output | 0.00023365 | 0.00023365 | 0.00023365 | 0.0 | 0.00 -Modify | 11.627 | 11.627 | 11.627 | 0.0 | 89.09 -Other | | 0.01054 | | | 0.08 +Pair | 0.47227 | 0.47227 | 0.47227 | 0.0 | 2.28 +Neigh | 1.1729 | 1.1729 | 1.1729 | 0.0 | 5.67 +Comm | 0.17133 | 0.17133 | 0.17133 | 0.0 | 0.83 +Output | 0.00028253 | 0.00028253 | 0.00028253 | 0.0 | 0.00 +Modify | 18.852 | 18.852 | 18.852 | 0.0 | 91.12 +Other | | 0.02063 | | | 0.10 Nlocal: 60 ave 60 max 60 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -126,4 +125,4 @@ Total # of neighbors = 2133 Ave neighs/atom = 35.55 Neighbor list builds = 10000 Dangerous builds = 0 -Total wall time: 0:00:13 +Total wall time: 0:00:20 diff --git a/examples/gcmc/log.24Mar17.gcmc.lj.g++.4 b/examples/gcmc/log.6Jul17.gcmc.lj.g++.4 similarity index 88% rename from examples/gcmc/log.24Mar17.gcmc.lj.g++.4 rename to examples/gcmc/log.6Jul17.gcmc.lj.g++.4 index 8694d8b95e..6bd3b3189c 100644 --- a/examples/gcmc/log.24Mar17.gcmc.lj.g++.4 +++ b/examples/gcmc/log.6Jul17.gcmc.lj.g++.4 @@ -1,5 +1,4 @@ -LAMMPS (17 Mar 2017) -OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90) +LAMMPS (6 Jul 2017) using 1 OpenMP thread(s) per MPI task # GCMC for LJ simple fluid, no dynamics # T = 2.0 @@ -100,20 +99,20 @@ Step Temp Press PotEng KinEng Density Atoms v_iacc v_dacc v_tacc v_rhoav v_pav v 8000 1.7790467 1.8680568 -2.8028819 2.6275151 0.52 65 0.070454494 0.070172368 0.11736806 0.524 1.4213649 0.047985191 9000 1.7968847 1.3195587 -3.261001 2.6550983 0.536 67 0.069952011 0.069618327 0.11650087 0.53904 1.4624201 -0.01069837 10000 2.1566109 1.1015729 -3.4999837 3.1880335 0.552 69 0.069603309 0.069284134 0.11625548 0.53128 1.3587249 0.02075238 -Loop time of 13.0611 on 4 procs for 10000 steps with 69 atoms +Loop time of 24.9916 on 4 procs for 10000 steps with 69 atoms -Performance: 330753.007 tau/day, 765.632 timesteps/s -99.7% CPU use with 4 MPI tasks x 1 OpenMP threads +Performance: 172857.936 tau/day, 400.134 timesteps/s +98.2% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 0.08888 | 0.09443 | 0.099889 | 1.4 | 0.72 -Neigh | 0.27721 | 0.28532 | 0.29177 | 1.1 | 2.18 -Comm | 0.27648 | 0.28875 | 0.30268 | 1.9 | 2.21 -Output | 0.00029635 | 0.00043058 | 0.00048113 | 0.0 | 0.00 -Modify | 12.384 | 12.384 | 12.384 | 0.0 | 94.82 -Other | | 0.008055 | | | 0.06 +Pair | 0.11696 | 0.12516 | 0.1321 | 1.7 | 0.50 +Neigh | 0.34874 | 0.35644 | 0.36545 | 1.2 | 1.43 +Comm | 0.48531 | 0.51366 | 0.54755 | 3.8 | 2.06 +Output | 0.0005362 | 0.00069767 | 0.00076008 | 0.0 | 0.00 +Modify | 23.956 | 23.972 | 23.988 | 0.3 | 95.92 +Other | | 0.02376 | | | 0.10 Nlocal: 17.25 ave 23 max 10 min Histogram: 1 0 0 0 0 0 2 0 0 1 @@ -126,4 +125,4 @@ Total # of neighbors = 2823 Ave neighs/atom = 40.913 Neighbor list builds = 10000 Dangerous builds = 0 -Total wall time: 0:00:13 +Total wall time: 0:00:24 diff --git a/examples/peri/in.peri b/examples/peri/in.peri-pmb similarity index 100% rename from examples/peri/in.peri rename to examples/peri/in.peri-pmb diff --git a/examples/peri/in.peri.eps b/examples/peri/in.peri.eps new file mode 100644 index 0000000000..5ddea41722 --- /dev/null +++ b/examples/peri/in.peri.eps @@ -0,0 +1,45 @@ +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +create_atoms 1 region target + +pair_style peri/eps +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 10.0e8 +set group all density 2200 +set group all volume 1.25e-10 +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 diff --git a/examples/peri/in.peri.lps b/examples/peri/in.peri.lps new file mode 100644 index 0000000000..af0462b5d4 --- /dev/null +++ b/examples/peri/in.peri.lps @@ -0,0 +1,45 @@ +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +create_atoms 1 region target + +pair_style peri/lps +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 +set group all density 2200 +set group all volume 1.25e-10 +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 diff --git a/examples/peri/in.peri.pmb b/examples/peri/in.peri.pmb new file mode 100644 index 0000000000..f9f5d54231 --- /dev/null +++ b/examples/peri/in.peri.pmb @@ -0,0 +1,45 @@ +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +create_atoms 1 region target + +pair_style peri/pmb +pair_coeff * * 1.6863e22 0.0015001 0.0005 0.25 +set group all density 2200 +set group all volume 1.25e-10 +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 diff --git a/examples/peri/in.peri.ves b/examples/peri/in.peri.ves new file mode 100644 index 0000000000..3787e676a4 --- /dev/null +++ b/examples/peri/in.peri.ves @@ -0,0 +1,45 @@ +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +create_atoms 1 region target + +pair_style peri/ves +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 0.5 0.001 +set group all density 2200 +set group all volume 1.25e-10 +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type & +# axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 diff --git a/examples/peri/log.6Jul17.peri.eps.g++.1 b/examples/peri/log.6Jul17.peri.eps.g++.1 new file mode 100644 index 0000000000..6aa4314d53 --- /dev/null +++ b/examples/peri/log.6Jul17.peri.eps.g++.1 @@ -0,0 +1,115 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +Lattice spacing in x,y,z = 0.0005 0.0005 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +Created orthogonal box = (-0.005 -0.005 -0.005) to (0.005 0 0.005) + 1 by 1 by 1 MPI processor grid +create_atoms 1 region target +Created 3487 atoms + +pair_style peri/eps +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 10.0e8 +set group all density 2200 + 3487 settings made for density +set group all volume 1.25e-10 + 3487 settings made for volume +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 0.0025001 + ghost atom cutoff = 0.0025001 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/eps, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Peridynamic bonds: + total # of bonds = 335966 + bonds/atom = 96.3482 +Per MPI rank memory allocation (min/avg/max) = 50.29 | 50.29 | 50.29 Mbytes +Step Temp E_pair E_mol TotEng Press Volume + 0 0 0 0 0 0 5.0030006e-07 + 100 8.3466308e+24 247103.03 0 849681.45 8.0295601e+11 5.0030006e-07 + 200 1.1784921e+27 1098605.6 0 86178912 1.0246967e+14 5.5353162e-07 + 300 2.6263212e+27 4118581.6 0 1.9372377e+08 1.662415e+14 7.6036043e-07 + 400 3.3085888e+27 9397203.3 0 2.4825816e+08 1.561692e+14 1.0196674e-06 + 500 3.9151799e+27 18408722 0 3.0106204e+08 1.5298661e+14 1.2317127e-06 + 600 6.2936721e+27 11346143 0 4.6571282e+08 1.9645007e+14 1.5419242e-06 + 700 1.2721597e+28 3830223.2 0 9.2225588e+08 3.0235577e+14 2.0250441e-06 + 800 1.3190107e+28 2831668.7 0 9.5508099e+08 2.4853932e+14 2.5542553e-06 + 900 1.3166045e+28 1911868.6 0 9.524241e+08 1.9729649e+14 3.2117896e-06 + 1000 1.3159578e+28 1995827.6 0 9.5204114e+08 1.6722163e+14 3.7875695e-06 +Loop time of 72.5574 on 1 procs for 1000 steps with 3487 atoms + +99.7% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 71.779 | 71.779 | 71.779 | 0.0 | 98.93 +Neigh | 0.5596 | 0.5596 | 0.5596 | 0.0 | 0.77 +Comm | 0.0040631 | 0.0040631 | 0.0040631 | 0.0 | 0.01 +Output | 0.00056624 | 0.00056624 | 0.00056624 | 0.0 | 0.00 +Modify | 0.18403 | 0.18403 | 0.18403 | 0.0 | 0.25 +Other | | 0.03016 | | | 0.04 + +Nlocal: 3487 ave 3487 max 3487 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 569177 ave 569177 max 569177 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 1.20908e+06 ave 1.20908e+06 max 1.20908e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1209076 +Ave neighs/atom = 346.738 +Neighbor list builds = 40 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:01:12 diff --git a/examples/peri/log.6Jul17.peri.eps.g++.4 b/examples/peri/log.6Jul17.peri.eps.g++.4 new file mode 100644 index 0000000000..1423ec4637 --- /dev/null +++ b/examples/peri/log.6Jul17.peri.eps.g++.4 @@ -0,0 +1,115 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +Lattice spacing in x,y,z = 0.0005 0.0005 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +Created orthogonal box = (-0.005 -0.005 -0.005) to (0.005 0 0.005) + 2 by 1 by 2 MPI processor grid +create_atoms 1 region target +Created 3487 atoms + +pair_style peri/eps +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 10.0e8 +set group all density 2200 + 3487 settings made for density +set group all volume 1.25e-10 + 3487 settings made for volume +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 0.0025001 + ghost atom cutoff = 0.0025001 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/eps, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Peridynamic bonds: + total # of bonds = 335966 + bonds/atom = 96.3482 +Per MPI rank memory allocation (min/avg/max) = 44.77 | 45.04 | 45.14 Mbytes +Step Temp E_pair E_mol TotEng Press Volume + 0 0 0 0 0 0 5.0030006e-07 + 100 8.3466308e+24 247103.03 0 849681.45 8.0295601e+11 5.0030006e-07 + 200 1.1784921e+27 1098605.6 0 86178912 1.0246967e+14 5.5353162e-07 + 300 2.6263212e+27 4118581.6 0 1.9372377e+08 1.662415e+14 7.6036043e-07 + 400 3.3085888e+27 9397203.3 0 2.4825816e+08 1.561692e+14 1.0196674e-06 + 500 3.9151799e+27 18408722 0 3.0106204e+08 1.5298661e+14 1.2317127e-06 + 600 6.2936721e+27 11346143 0 4.6571282e+08 1.9645007e+14 1.5419242e-06 + 700 1.2721597e+28 3830223.2 0 9.2225588e+08 3.0235577e+14 2.0250441e-06 + 800 1.3190107e+28 2831668.7 0 9.5508099e+08 2.4853932e+14 2.5542553e-06 + 900 1.3166045e+28 1911869.3 0 9.524241e+08 1.9729649e+14 3.2117896e-06 + 1000 1.3159578e+28 1995833.9 0 9.5204114e+08 1.6722163e+14 3.7875695e-06 +Loop time of 29.6266 on 4 procs for 1000 steps with 3487 atoms + +98.8% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 25.905 | 26.18 | 26.326 | 3.2 | 88.37 +Neigh | 0.15352 | 0.1872 | 0.22394 | 7.6 | 0.63 +Comm | 3.0374 | 3.1471 | 3.3731 | 7.5 | 10.62 +Output | 0.00047588 | 0.00062978 | 0.00097752 | 0.0 | 0.00 +Modify | 0.073521 | 0.081854 | 0.093222 | 2.7 | 0.28 +Other | | 0.02989 | | | 0.10 + +Nlocal: 871.75 ave 908 max 838 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Nghost: 1368.25 ave 1402 max 1332 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Neighs: 142294 ave 159233 max 124729 min +Histogram: 2 0 0 0 0 0 0 0 0 2 +FullNghs: 302269 ave 346070 max 260820 min +Histogram: 1 0 0 0 2 0 0 0 0 1 + +Total # of neighbors = 1209076 +Ave neighs/atom = 346.738 +Neighbor list builds = 40 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:29 diff --git a/examples/peri/log.6Jul17.peri.lps.g++.1 b/examples/peri/log.6Jul17.peri.lps.g++.1 new file mode 100644 index 0000000000..4b2ac532d1 --- /dev/null +++ b/examples/peri/log.6Jul17.peri.lps.g++.1 @@ -0,0 +1,115 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +Lattice spacing in x,y,z = 0.0005 0.0005 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +Created orthogonal box = (-0.005 -0.005 -0.005) to (0.005 0 0.005) + 1 by 1 by 1 MPI processor grid +create_atoms 1 region target +Created 3487 atoms + +pair_style peri/lps +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 +set group all density 2200 + 3487 settings made for density +set group all volume 1.25e-10 + 3487 settings made for volume +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 0.0025001 + ghost atom cutoff = 0.0025001 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/lps, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Peridynamic bonds: + total # of bonds = 335966 + bonds/atom = 96.3482 +Per MPI rank memory allocation (min/avg/max) = 34.91 | 34.91 | 34.91 Mbytes +Step Temp E_pair E_mol TotEng Press Volume + 0 0 0 0 0 0 5.0030006e-07 + 100 1.684629e+24 133446.65 0 255067.11 1.6206343e+11 5.0030006e-07 + 200 1.1380148e+27 684478.05 0 82842557 9.9178307e+13 5.5225839e-07 + 300 2.5659218e+27 5944645.9 0 1.9118934e+08 1.6231114e+14 7.6086254e-07 + 400 2.9916164e+27 13677434 0 2.2965481e+08 1.4081705e+14 1.0224963e-06 + 500 3.3570343e+27 11130894 0 2.5348933e+08 1.2577633e+14 1.2846002e-06 + 600 3.9506165e+27 6986672.5 0 2.9219831e+08 1.2659956e+14 1.5019096e-06 + 700 7.8366157e+27 11716082 0 5.7747436e+08 1.9480124e+14 1.9361899e-06 + 800 8.2483231e+27 4671647.2 0 6.0015282e+08 1.7040064e+14 2.3297298e-06 + 900 8.2720965e+27 1249680.9 0 5.9844715e+08 1.4117116e+14 2.8202052e-06 + 1000 8.2441462e+27 2278265.6 0 5.9745788e+08 1.234652e+14 3.213751e-06 +Loop time of 62.3833 on 1 procs for 1000 steps with 3487 atoms + +99.5% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 61.608 | 61.608 | 61.608 | 0.0 | 98.76 +Neigh | 0.57177 | 0.57177 | 0.57177 | 0.0 | 0.92 +Comm | 0.0030825 | 0.0030825 | 0.0030825 | 0.0 | 0.00 +Output | 0.00051951 | 0.00051951 | 0.00051951 | 0.0 | 0.00 +Modify | 0.17278 | 0.17278 | 0.17278 | 0.0 | 0.28 +Other | | 0.02745 | | | 0.04 + +Nlocal: 3487 ave 3487 max 3487 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 576568 ave 576568 max 576568 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 1.20908e+06 ave 1.20908e+06 max 1.20908e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1209076 +Ave neighs/atom = 346.738 +Neighbor list builds = 37 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:01:02 diff --git a/examples/peri/log.6Jul17.peri.lps.g++.4 b/examples/peri/log.6Jul17.peri.lps.g++.4 new file mode 100644 index 0000000000..04244f0123 --- /dev/null +++ b/examples/peri/log.6Jul17.peri.lps.g++.4 @@ -0,0 +1,115 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +Lattice spacing in x,y,z = 0.0005 0.0005 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +Created orthogonal box = (-0.005 -0.005 -0.005) to (0.005 0 0.005) + 2 by 1 by 2 MPI processor grid +create_atoms 1 region target +Created 3487 atoms + +pair_style peri/lps +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 +set group all density 2200 + 3487 settings made for density +set group all volume 1.25e-10 + 3487 settings made for volume +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 0.0025001 + ghost atom cutoff = 0.0025001 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/lps, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Peridynamic bonds: + total # of bonds = 335966 + bonds/atom = 96.3482 +Per MPI rank memory allocation (min/avg/max) = 29.4 | 29.66 | 29.76 Mbytes +Step Temp E_pair E_mol TotEng Press Volume + 0 0 0 0 0 0 5.0030006e-07 + 100 1.684629e+24 133446.65 0 255067.11 1.6206343e+11 5.0030006e-07 + 200 1.1380148e+27 684478.05 0 82842557 9.9178307e+13 5.5225839e-07 + 300 2.5659218e+27 5944645.9 0 1.9118934e+08 1.6231114e+14 7.6086254e-07 + 400 2.9916164e+27 13677434 0 2.2965481e+08 1.4081705e+14 1.0224963e-06 + 500 3.3570343e+27 11130894 0 2.5348933e+08 1.2577633e+14 1.2846002e-06 + 600 3.9506165e+27 6986672.5 0 2.9219831e+08 1.2659956e+14 1.5019096e-06 + 700 7.8366157e+27 11716082 0 5.7747436e+08 1.9480124e+14 1.9361899e-06 + 800 8.2483231e+27 4671647.2 0 6.0015282e+08 1.7040064e+14 2.3297298e-06 + 900 8.2720965e+27 1249680.9 0 5.9844715e+08 1.4117116e+14 2.8202052e-06 + 1000 8.2441489e+27 2277476.2 0 5.9745729e+08 1.2346524e+14 3.213751e-06 +Loop time of 23.2656 on 4 procs for 1000 steps with 3487 atoms + +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 20.801 | 21.119 | 21.525 | 6.3 | 90.78 +Neigh | 0.13851 | 0.18557 | 0.22747 | 8.5 | 0.80 +Comm | 1.5175 | 1.8689 | 2.1386 | 18.0 | 8.03 +Output | 0.00049806 | 0.00059026 | 0.00071931 | 0.0 | 0.00 +Modify | 0.063441 | 0.066235 | 0.069135 | 0.9 | 0.28 +Other | | 0.02496 | | | 0.11 + +Nlocal: 871.75 ave 939 max 805 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Nghost: 1343.25 ave 1410 max 1276 min +Histogram: 1 0 0 0 1 1 0 0 0 1 +Neighs: 144142 ave 176488 max 113797 min +Histogram: 1 0 1 0 0 0 1 0 0 1 +FullNghs: 302269 ave 346070 max 260820 min +Histogram: 1 0 0 0 2 0 0 0 0 1 + +Total # of neighbors = 1209076 +Ave neighs/atom = 346.738 +Neighbor list builds = 37 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:23 diff --git a/examples/peri/log.5Oct16.peri.g++.1 b/examples/peri/log.6Jul17.peri.pmb.g++.1 similarity index 62% rename from examples/peri/log.5Oct16.peri.g++.1 rename to examples/peri/log.6Jul17.peri.pmb.g++.1 index 687876f97f..84a439674b 100644 --- a/examples/peri/log.5Oct16.peri.g++.1 +++ b/examples/peri/log.6Jul17.peri.pmb.g++.1 @@ -1,10 +1,11 @@ -LAMMPS (5 Oct 2016) +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task # small Peridynamic cylinder hit by projectile -units si +units si boundary s s s atom_style peri -atom_modify map array +atom_modify map array neighbor 0.0010 bin # small target @@ -41,24 +42,34 @@ thermo 100 #dump 1 all custom 100 dump.peri id type x y z c_1 -#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 -#dump_modify 2 pad 4 +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 -#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 -#dump_modify 3 pad 4 +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 run 1000 Neighbor list info ... - 2 neighbor list requests update every 1 steps, delay 10 steps, check yes max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 0.0025001 ghost atom cutoff = 0.0025001 - binsize = 0.00125005 -> bins = 9 5 9 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/pmb, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard Peridynamic bonds: total # of bonds = 335966 bonds/atom = 96.3482 -Memory usage per processor = 26.6858 Mbytes +Per MPI rank memory allocation (min/avg/max) = 34.79 | 34.79 | 34.79 Mbytes Step Temp E_pair E_mol TotEng Press Volume 0 0 0 0 0 0 5.0030006e-07 100 1.7890585e+24 552721.8 0 681881.47 1.7210968e+11 5.0030006e-07 @@ -68,28 +79,28 @@ Step Temp E_pair E_mol TotEng Press Volume 500 4.2580877e+27 20212686 0 3.2762196e+08 1.6249923e+14 1.2611723e-06 600 5.5126512e+27 30861342 0 4.2884284e+08 1.7320038e+14 1.531873e-06 700 1.1807414e+28 23119941 0 8.7554687e+08 2.9477434e+14 1.9278632e-06 - 800 1.2424839e+28 2407361.6 0 8.994088e+08 2.3787786e+14 2.5138992e-06 - 900 1.2358395e+28 4532520.6 0 8.9673706e+08 1.9097312e+14 3.1145903e-06 - 1000 1.2341057e+28 3219939.5 0 8.9417279e+08 1.5968597e+14 3.7196039e-06 -Loop time of 20.3026 on 1 procs for 1000 steps with 3487 atoms + 800 1.2424839e+28 2407365.1 0 8.994088e+08 2.3787786e+14 2.5138992e-06 + 900 1.2358397e+28 4532424.3 0 8.9673716e+08 1.9097316e+14 3.1145903e-06 + 1000 1.2341048e+28 3219355.8 0 8.9417154e+08 1.5968585e+14 3.7196039e-06 +Loop time of 28.565 on 1 procs for 1000 steps with 3487 atoms -99.9% CPU use with 1 MPI tasks x no OpenMP threads +99.5% CPU use with 1 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 19.625 | 19.625 | 19.625 | 0.0 | 96.66 -Neigh | 0.57013 | 0.57013 | 0.57013 | 0.0 | 2.81 -Comm | 0.0014448 | 0.0014448 | 0.0014448 | 0.0 | 0.01 -Output | 0.00024772 | 0.00024772 | 0.00024772 | 0.0 | 0.00 -Modify | 0.092173 | 0.092173 | 0.092173 | 0.0 | 0.45 -Other | | 0.01359 | | | 0.07 +Pair | 27.721 | 27.721 | 27.721 | 0.0 | 97.04 +Neigh | 0.66353 | 0.66353 | 0.66353 | 0.0 | 2.32 +Comm | 0.0027969 | 0.0027969 | 0.0027969 | 0.0 | 0.01 +Output | 0.00042295 | 0.00042295 | 0.00042295 | 0.0 | 0.00 +Modify | 0.1566 | 0.1566 | 0.1566 | 0.0 | 0.55 +Other | | 0.02086 | | | 0.07 Nlocal: 3487 ave 3487 max 3487 min Histogram: 1 0 0 0 0 0 0 0 0 0 Nghost: 0 ave 0 max 0 min Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 567140 ave 567140 max 567140 min +Neighs: 567132 ave 567132 max 567132 min Histogram: 1 0 0 0 0 0 0 0 0 0 FullNghs: 1.20908e+06 ave 1.20908e+06 max 1.20908e+06 min Histogram: 1 0 0 0 0 0 0 0 0 0 @@ -101,4 +112,4 @@ Dangerous builds = 0 Please see the log.cite file for references relevant to this simulation -Total wall time: 0:00:20 +Total wall time: 0:00:28 diff --git a/examples/peri/log.5Oct16.peri.g++.4 b/examples/peri/log.6Jul17.peri.pmb.g++.4 similarity index 58% rename from examples/peri/log.5Oct16.peri.g++.4 rename to examples/peri/log.6Jul17.peri.pmb.g++.4 index cb478772af..637b2cc26a 100644 --- a/examples/peri/log.5Oct16.peri.g++.4 +++ b/examples/peri/log.6Jul17.peri.pmb.g++.4 @@ -1,10 +1,11 @@ -LAMMPS (5 Oct 2016) +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task # small Peridynamic cylinder hit by projectile -units si +units si boundary s s s atom_style peri -atom_modify map array +atom_modify map array neighbor 0.0010 bin # small target @@ -41,24 +42,34 @@ thermo 100 #dump 1 all custom 100 dump.peri id type x y z c_1 -#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 -#dump_modify 2 pad 4 +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 -#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 -#dump_modify 3 pad 4 +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 run 1000 Neighbor list info ... - 2 neighbor list requests update every 1 steps, delay 10 steps, check yes max neighbors/atom: 2000, page size: 100000 master list distance cutoff = 0.0025001 ghost atom cutoff = 0.0025001 - binsize = 0.00125005 -> bins = 9 5 9 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/pmb, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard Peridynamic bonds: total # of bonds = 335966 bonds/atom = 96.3482 -Memory usage per processor = 26.9049 Mbytes +Per MPI rank memory allocation (min/avg/max) = 29.27 | 29.54 | 29.64 Mbytes Step Temp E_pair E_mol TotEng Press Volume 0 0 0 0 0 0 5.0030006e-07 100 1.7890585e+24 552721.8 0 681881.47 1.7210968e+11 5.0030006e-07 @@ -68,29 +79,29 @@ Step Temp E_pair E_mol TotEng Press Volume 500 4.2580877e+27 20212686 0 3.2762196e+08 1.6249923e+14 1.2611723e-06 600 5.5126512e+27 30861342 0 4.2884284e+08 1.7320038e+14 1.531873e-06 700 1.1807414e+28 23119941 0 8.7554687e+08 2.9477434e+14 1.9278632e-06 - 800 1.2424839e+28 2407361.5 0 8.994088e+08 2.3787786e+14 2.5138992e-06 - 900 1.2358395e+28 4532520.1 0 8.9673706e+08 1.9097312e+14 3.1145903e-06 - 1000 1.2341057e+28 3219974.3 0 8.9417286e+08 1.5968598e+14 3.7196039e-06 -Loop time of 5.91321 on 4 procs for 1000 steps with 3487 atoms + 800 1.2424839e+28 2407365.2 0 8.994088e+08 2.3787786e+14 2.5138992e-06 + 900 1.2358397e+28 4532423 0 8.9673716e+08 1.9097316e+14 3.1145903e-06 + 1000 1.2341048e+28 3219408.7 0 8.9417158e+08 1.5968585e+14 3.7196039e-06 +Loop time of 9.59889 on 4 procs for 1000 steps with 3487 atoms -99.6% CPU use with 4 MPI tasks x no OpenMP threads +99.2% CPU use with 4 MPI tasks x 1 OpenMP threads MPI task timing breakdown: Section | min time | avg time | max time |%varavg| %total --------------------------------------------------------------- -Pair | 4.5763 | 5.0164 | 5.502 | 15.8 | 84.83 -Neigh | 0.11212 | 0.14636 | 0.1811 | 7.8 | 2.48 -Comm | 0.18545 | 0.70922 | 1.1869 | 45.6 | 11.99 -Output | 0.00026011 | 0.00030977 | 0.00038433 | 0.3 | 0.01 -Modify | 0.028668 | 0.029356 | 0.030043 | 0.4 | 0.50 -Other | | 0.01158 | | | 0.20 +Pair | 7.9131 | 8.1341 | 8.3286 | 6.7 | 84.74 +Neigh | 0.19736 | 0.22539 | 0.25643 | 5.6 | 2.35 +Comm | 0.92843 | 1.1536 | 1.402 | 18.4 | 12.02 +Output | 0.00053358 | 0.00059688 | 0.00070548 | 0.0 | 0.01 +Modify | 0.060774 | 0.06358 | 0.068375 | 1.2 | 0.66 +Other | | 0.02165 | | | 0.23 -Nlocal: 871.75 ave 920 max 824 min -Histogram: 1 0 0 1 0 0 1 0 0 1 -Nghost: 1343.25 ave 1391 max 1295 min -Histogram: 1 0 0 1 0 0 1 0 0 1 -Neighs: 141785 ave 170754 max 115891 min -Histogram: 1 1 0 0 0 0 0 1 0 1 +Nlocal: 871.75 ave 920 max 829 min +Histogram: 1 0 0 0 2 0 0 0 0 1 +Nghost: 1343.25 ave 1386 max 1295 min +Histogram: 1 0 0 0 0 2 0 0 0 1 +Neighs: 141783 ave 157099 max 127518 min +Histogram: 2 0 0 0 0 0 0 0 1 1 FullNghs: 302269 ave 346070 max 260820 min Histogram: 1 0 0 0 2 0 0 0 0 1 @@ -101,4 +112,4 @@ Dangerous builds = 0 Please see the log.cite file for references relevant to this simulation -Total wall time: 0:00:05 +Total wall time: 0:00:09 diff --git a/examples/peri/log.6Jul17.peri.ves.g++.1 b/examples/peri/log.6Jul17.peri.ves.g++.1 new file mode 100644 index 0000000000..3d1d156d4a --- /dev/null +++ b/examples/peri/log.6Jul17.peri.ves.g++.1 @@ -0,0 +1,115 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +Lattice spacing in x,y,z = 0.0005 0.0005 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +Created orthogonal box = (-0.005 -0.005 -0.005) to (0.005 0 0.005) + 1 by 1 by 1 MPI processor grid +create_atoms 1 region target +Created 3487 atoms + +pair_style peri/ves +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 0.5 0.001 +set group all density 2200 + 3487 settings made for density +set group all volume 1.25e-10 + 3487 settings made for volume +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 0.0025001 + ghost atom cutoff = 0.0025001 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/ves, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Peridynamic bonds: + total # of bonds = 335966 + bonds/atom = 96.3482 +Per MPI rank memory allocation (min/avg/max) = 65.41 | 65.41 | 65.41 Mbytes +Step Temp E_pair E_mol TotEng Press Volume + 0 0 0 0 0 0 5.0030006e-07 + 100 8.3392177e+24 247040.57 0 849083.8 8.0224286e+11 5.0030006e-07 + 200 1.1849022e+27 1158030.5 0 86701105 1.0301578e+14 5.5359205e-07 + 300 2.6287222e+27 4389155.1 0 1.9416767e+08 1.6636212e+14 7.6050375e-07 + 400 3.2718778e+27 7458219 0 2.4366885e+08 1.5439709e+14 1.0199269e-06 + 500 3.8413187e+27 6151611.4 0 2.8347258e+08 1.5008974e+14 1.2318007e-06 + 600 6.1409926e+27 18424316 0 4.6176842e+08 1.9507512e+14 1.5151227e-06 + 700 1.0046131e+28 11478344 0 7.3675086e+08 2.4228512e+14 1.9956447e-06 + 800 1.0402132e+28 4421233.6 0 7.5539495e+08 2.0512303e+14 2.4407262e-06 + 900 1.0419515e+28 7223261.3 0 7.594519e+08 1.6647307e+14 3.0124137e-06 + 1000 1.0503737e+28 2621490.6 0 7.6093049e+08 1.4315634e+14 3.5313793e-06 +Loop time of 77.2175 on 1 procs for 1000 steps with 3487 atoms + +99.4% CPU use with 1 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 76.421 | 76.421 | 76.421 | 0.0 | 98.97 +Neigh | 0.56616 | 0.56616 | 0.56616 | 0.0 | 0.73 +Comm | 0.0038247 | 0.0038247 | 0.0038247 | 0.0 | 0.00 +Output | 0.00051951 | 0.00051951 | 0.00051951 | 0.0 | 0.00 +Modify | 0.19434 | 0.19434 | 0.19434 | 0.0 | 0.25 +Other | | 0.03197 | | | 0.04 + +Nlocal: 3487 ave 3487 max 3487 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Nghost: 0 ave 0 max 0 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +Neighs: 561942 ave 561942 max 561942 min +Histogram: 1 0 0 0 0 0 0 0 0 0 +FullNghs: 1.20908e+06 ave 1.20908e+06 max 1.20908e+06 min +Histogram: 1 0 0 0 0 0 0 0 0 0 + +Total # of neighbors = 1209076 +Ave neighs/atom = 346.738 +Neighbor list builds = 37 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:01:17 diff --git a/examples/peri/log.6Jul17.peri.ves.g++.4 b/examples/peri/log.6Jul17.peri.ves.g++.4 new file mode 100644 index 0000000000..bd05d58e9f --- /dev/null +++ b/examples/peri/log.6Jul17.peri.ves.g++.4 @@ -0,0 +1,115 @@ +LAMMPS (6 Jul 2017) + using 1 OpenMP thread(s) per MPI task +# small Peridynamic cylinder hit by projectile + +units si +boundary s s s +atom_style peri +atom_modify map array +neighbor 0.0010 bin + +# small target + +lattice sc 0.0005 +Lattice spacing in x,y,z = 0.0005 0.0005 0.0005 +region target cylinder y 0.0 0.0 0.0050 -0.0050 0.0 units box +create_box 1 target +Created orthogonal box = (-0.005 -0.005 -0.005) to (0.005 0 0.005) + 2 by 1 by 2 MPI processor grid +create_atoms 1 region target +Created 3487 atoms + +pair_style peri/ves +pair_coeff * * 14.9e9 14.9e9 0.0015001 0.0005 0.25 0.5 0.001 +set group all density 2200 + 3487 settings made for density +set group all volume 1.25e-10 + 3487 settings made for volume +velocity all set 0.0 0.0 0.0 sum no units box +fix 1 all nve + +# spherical indenter to shatter target + +variable y0 equal 0.00155 +variable vy equal -100 +variable y equal "v_y0 + step*dt*v_vy" + +fix 2 all indent 1e17 sphere 0.0000 v_y 0.0000 0.0015 units box + +compute 1 all damage/atom +timestep 1.0e-7 +thermo 100 + +#dump 1 all custom 100 dump.peri id type x y z c_1 + +#dump 2 all image 50 image.*.jpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 2 pad 4 + +#dump 3 all movie 50 movie.mpg type type # axes yes 0.8 0.02 view 80 -30 adiam 0.0006 +#dump_modify 3 pad 4 + +run 1000 +Neighbor list info ... + update every 1 steps, delay 10 steps, check yes + max neighbors/atom: 2000, page size: 100000 + master list distance cutoff = 0.0025001 + ghost atom cutoff = 0.0025001 + binsize = 0.00125005, bins = 9 5 9 + 2 neighbor lists, perpetual/occasional/extra = 1 1 0 + (1) pair peri/ves, perpetual + attributes: half, newton on + pair build: half/bin/atomonly/newton + stencil: half/bin/3d/newton + bin: standard + (2) fix PERI_NEIGH, occasional + attributes: full, newton on + pair build: full/bin/atomonly + stencil: full/bin/3d + bin: standard +Peridynamic bonds: + total # of bonds = 335966 + bonds/atom = 96.3482 +Per MPI rank memory allocation (min/avg/max) = 59.9 | 60.16 | 60.26 Mbytes +Step Temp E_pair E_mol TotEng Press Volume + 0 0 0 0 0 0 5.0030006e-07 + 100 8.3392177e+24 247040.57 0 849083.8 8.0224286e+11 5.0030006e-07 + 200 1.1849022e+27 1158030.5 0 86701105 1.0301578e+14 5.5359205e-07 + 300 2.6287222e+27 4389155.1 0 1.9416767e+08 1.6636212e+14 7.6050375e-07 + 400 3.2718778e+27 7458219 0 2.4366885e+08 1.5439709e+14 1.0199269e-06 + 500 3.8413187e+27 6151611.4 0 2.8347258e+08 1.5008974e+14 1.2318007e-06 + 600 6.1409926e+27 18424316 0 4.6176842e+08 1.9507512e+14 1.5151227e-06 + 700 1.0046131e+28 11478344 0 7.3675086e+08 2.4228512e+14 1.9956447e-06 + 800 1.0402132e+28 4421233.6 0 7.5539495e+08 2.0512303e+14 2.4407262e-06 + 900 1.0419515e+28 7223258.7 0 7.594519e+08 1.6647307e+14 3.0124137e-06 + 1000 1.0503738e+28 2621480.4 0 7.6093057e+08 1.4315636e+14 3.5313793e-06 +Loop time of 25.9768 on 4 procs for 1000 steps with 3487 atoms + +99.1% CPU use with 4 MPI tasks x 1 OpenMP threads + +MPI task timing breakdown: +Section | min time | avg time | max time |%varavg| %total +--------------------------------------------------------------- +Pair | 22.455 | 23.348 | 24.175 | 14.1 | 89.88 +Neigh | 0.14472 | 0.18294 | 0.2299 | 8.6 | 0.70 +Comm | 1.4715 | 2.3485 | 3.2075 | 44.8 | 9.04 +Output | 0.000489 | 0.00059682 | 0.0007987 | 0.0 | 0.00 +Modify | 0.063634 | 0.071411 | 0.076907 | 1.9 | 0.27 +Other | | 0.02506 | | | 0.10 + +Nlocal: 871.75 ave 896 max 852 min +Histogram: 2 0 0 0 0 0 0 1 0 1 +Nghost: 1293.25 ave 1313 max 1269 min +Histogram: 1 0 1 0 0 0 0 0 0 2 +Neighs: 140486 ave 167239 max 121255 min +Histogram: 2 0 0 0 0 0 1 0 0 1 +FullNghs: 302269 ave 346070 max 260820 min +Histogram: 1 0 0 0 2 0 0 0 0 1 + +Total # of neighbors = 1209076 +Ave neighs/atom = 346.738 +Neighbor list builds = 37 +Dangerous builds = 0 + +Please see the log.cite file for references relevant to this simulation + +Total wall time: 0:00:26 diff --git a/lib/.gitignore b/lib/.gitignore index e153da2c34..4f9ebba6a5 100644 --- a/lib/.gitignore +++ b/lib/.gitignore @@ -1 +1,3 @@ Makefile.lammps +.depend +Makefile.auto diff --git a/lib/Install.py b/lib/Install.py index 18b426f928..d30cbffcf1 100644 --- a/lib/Install.py +++ b/lib/Install.py @@ -4,24 +4,35 @@ # soft linked to by many of the lib/Install.py files # used to automate the steps described in the corresponding lib/README -import sys,commands,os +from __future__ import print_function +import sys,os,subprocess # help message help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter +Syntax from src dir: make lib-libname args="-m machine -e suffix" +Syntax from lib dir: python Install.py -m machine -e suffix + +libname = name of lib dir (e.g. atc, h5md, meam, poems, etc) +specify -m and optionally -e, order does not matter + -m = peform a clean followed by "make -f Makefile.machine" machine = suffix of a lib/Makefile.* file -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix does not alter existing Makefile.machine + +Examples: + +make lib-poems args="-m serial" # build POEMS lib with same settings as in the serial Makefile in src +make lib-colvars args="-m mpi" # build USER-COLVARS lib with same settings as in the mpi Makefile in src +make lib-meam args="-m ifort" # build MEAM lib with custom Makefile.ifort (using Intel Fortran) """ # print error message or help def error(str=None): - if not str: print help - else: print "ERROR",str + if not str: print(help) + else: print("ERROR",str) sys.exit() # parse args @@ -38,12 +49,12 @@ while iarg < nargs: if args[iarg] == "-m": if iarg+2 > nargs: error() machine = args[iarg+1] - iarg += 2 + iarg += 2 elif args[iarg] == "-e": if iarg+2 > nargs: error() extraflag = 1 suffix = args[iarg+1] - iarg += 2 + iarg += 2 else: error() # set lib from working dir @@ -53,30 +64,38 @@ lib = os.path.basename(cwd) # create Makefile.auto as copy of Makefile.machine # reset EXTRAMAKE if requested - + if not os.path.exists("Makefile.%s" % machine): error("lib/%s/Makefile.%s does not exist" % (lib,machine)) lines = open("Makefile.%s" % machine,'r').readlines() fp = open("Makefile.auto",'w') +has_extramake = False for line in lines: words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, + if len(words) == 3 and words[0] == "EXTRAMAKE" and words[1] == '=': + has_extramake = True + if extraflag: + line = line.replace(words[2],"Makefile.lammps.%s" % suffix) + fp.write(line) fp.close() -# make the library via Makefile.auto +# make the library via Makefile.auto optionally with parallel make -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt +try: + import multiprocessing + n_cpus = multiprocessing.cpu_count() +except: + n_cpus = 1 -if os.path.exists("lib%s.a" % lib): print "Build was successful" +print("Building lib%s.a ..." % lib) +cmd = "make -f Makefile.auto clean; make -f Makefile.auto -j%d" % n_cpus +txt = subprocess.check_output(cmd,shell=True,stderr=subprocess.STDOUT) +print(txt.decode('UTF-8')) + +if os.path.exists("lib%s.a" % lib): print("Build was successful") else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib +if has_extramake and not os.path.exists("Makefile.lammps"): + print("lib/%s/Makefile.lammps was NOT created" % lib) diff --git a/lib/atc/Install.py b/lib/atc/Install.py deleted file mode 100644 index 18b426f928..0000000000 --- a/lib/atc/Install.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file - -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix - does not alter existing Makefile.machine -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None -extraflag = 0 - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 - suffix = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - -if not os.path.exists("Makefile.%s" % machine): - error("lib/%s/Makefile.%s does not exist" % (lib,machine)) - -lines = open("Makefile.%s" % machine,'r').readlines() -fp = open("Makefile.auto",'w') - -for line in lines: - words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - -fp.close() - -# make the library via Makefile.auto - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/atc/Install.py b/lib/atc/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/atc/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/atc/Makefile.g++ b/lib/atc/Makefile.g++ index d15e6cb3b8..bb3028392a 100644 --- a/lib/atc/Makefile.g++ +++ b/lib/atc/Makefile.g++ @@ -1,3 +1,4 @@ +# library build -*- makefile -*- SHELL = /bin/sh # which file will be copied to Makefile.lammps @@ -5,6 +6,7 @@ SHELL = /bin/sh EXTRAMAKE = Makefile.lammps.installed # ------ FILES ------ + SRC = $(wildcard *.cpp) INC = $(wildcard *.h) @@ -47,5 +49,9 @@ DEPENDS = $(OBJ:.o=.d) # ------ CLEAN ------ +.PHONY: clean lib + clean: -rm *.o *.d *~ $(LIB) + +sinclude $(DEPENDS) diff --git a/lib/atc/Makefile.mingw32-cross b/lib/atc/Makefile.mingw32-cross deleted file mode 100644 index 8b33540981..0000000000 --- a/lib/atc/Makefile.mingw32-cross +++ /dev/null @@ -1,67 +0,0 @@ -# library build -*- makefile -*- -SHELL = /bin/sh - -# which file will be copied to Makefile.lammps -EXTRAMAKE = Makefile.lammps.linalg - -# ------ FILES ------ - -SRC = $(wildcard *.cpp) -INC = $(wildcard *.h) - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw32/ -LIB = $(DIR)libatc.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) - -# ------ SETTINGS ------ - -# include any MPI settings needed for the ATC library to build with -# the same MPI library that LAMMPS is built with - -CC = i686-w64-mingw32-g++ -CCFLAGS = -I../../src -I../../src/STUBS -DMPICH_IGNORE_CXX_SEEK \ - -O3 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -DLAMMPS_SMALLSMALL -Wno-uninitialized -ARCHIVE = i686-w64-mingw32-ar -ARCHFLAG = -rcs -DEPFLAGS = -M -LINK = $(CC) -LINKFLAGS = -O -USRLIB = -SYSLIB = - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.cpp - $(CC) $(CCFLAGS) -c $< -o $@ -$(DIR)%.d:%.cpp - $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ - -# ------ DEPENDENCIES ------ - -DEPENDS = $(OBJ:.o=.d) - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.d *~ $(LIB) - -$(DEPENDS) : $(DIR) -sinclude $(DEPENDS) diff --git a/lib/atc/Makefile.mingw32-cross-mpi b/lib/atc/Makefile.mingw32-cross-mpi deleted file mode 100644 index c5feeca81a..0000000000 --- a/lib/atc/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,68 +0,0 @@ -# library build -*- makefile -*- -SHELL = /bin/sh - -# which file will be copied to Makefile.lammps -EXTRAMAKE = Makefile.lammps.linalg - -# ------ FILES ------ - -SRC = $(wildcard *.cpp) -INC = $(wildcard *.h) - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw32-mpi/ -LIB = $(DIR)libatc.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) - -# ------ SETTINGS ------ - -# include any MPI settings needed for the ATC library to build with -# the same MPI library that LAMMPS is built with - -CC = i686-w64-mingw32-g++ -CCFLAGS = -I../../tools/mingw-cross/mpich2-win32/include/ \ - -I../../src -DMPICH_IGNORE_CXX_SEEK \ - -O3 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -DLAMMPS_SMALLSMALL -Wno-uninitialized -ARCHIVE = i686-w64-mingw32-ar -ARCHFLAG = -rcs -DEPFLAGS = -M -LINK = $(CC) -LINKFLAGS = -O -USRLIB = -SYSLIB = - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.cpp - $(CC) $(CCFLAGS) -c $< -o $@ -$(DIR)%.d:%.cpp - $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ - -# ------ DEPENDENCIES ------ - -DEPENDS = $(OBJ:.o=.d) - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.d *~ $(LIB) - -$(DEPENDS) : $(DIR) -sinclude $(DEPENDS) diff --git a/lib/atc/Makefile.mingw64-cross b/lib/atc/Makefile.mingw64-cross deleted file mode 100644 index fbd3a02610..0000000000 --- a/lib/atc/Makefile.mingw64-cross +++ /dev/null @@ -1,67 +0,0 @@ -# library build -*- makefile -*- -SHELL = /bin/sh - -# which file will be copied to Makefile.lammps -EXTRAMAKE = Makefile.lammps.linalg - -# ------ FILES ------ - -SRC = $(wildcard *.cpp) -INC = $(wildcard *.h) - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw64/ -LIB = $(DIR)libatc.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) - -# ------ SETTINGS ------ - -# include any MPI settings needed for the ATC library to build with -# the same MPI library that LAMMPS is built with - -CC = x86_64-w64-mingw32-g++ -CCFLAGS = -I../../src -I../../src/STUBS -DMPICH_IGNORE_CXX_SEEK \ - -O3 -march=core2 -mtune=core2 -mpc64 -msse2 \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -DLAMMPS_SMALLBIG -Wno-uninitialized -ARCHIVE = x86_64-w64-mingw32-ar -ARCHFLAG = -rcs -DEPFLAGS = -M -LINK = $(CC) -LINKFLAGS = -O -USRLIB = -SYSLIB = - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.cpp - $(CC) $(CCFLAGS) -c $< -o $@ -$(DIR)%.d:%.cpp - $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ - -# ------ DEPENDENCIES ------ - -DEPENDS = $(OBJ:.o=.d) - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.d *~ $(LIB) - -$(DEPENDS) : $(DIR) -sinclude $(DEPENDS) diff --git a/lib/atc/Makefile.mingw64-cross-mpi b/lib/atc/Makefile.mingw64-cross-mpi deleted file mode 100644 index f8dd64eae3..0000000000 --- a/lib/atc/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,68 +0,0 @@ -# library build -*- makefile -*- -SHELL = /bin/sh - -# which file will be copied to Makefile.lammps -EXTRAMAKE = Makefile.lammps.linalg - -# ------ FILES ------ - -SRC = $(wildcard *.cpp) -INC = $(wildcard *.h) - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw64-mpi/ -LIB = $(DIR)libatc.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) - -# ------ SETTINGS ------ - -# include any MPI settings needed for the ATC library to build with -# the same MPI library that LAMMPS is built with - -CC = x86_64-w64-mingw32-g++ -CCFLAGS = -I../../tools/mingw-cross/mpich2-win64/include/ \ - -I../../src -DMPICH_IGNORE_CXX_SEEK \ - -O3 -march=core2 -mtune=core2 -mpc64 -msse2 \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -DLAMMPS_SMALLBIG -Wno-uninitialized -ARCHIVE = x86_64-w64-mingw32-ar -ARCHFLAG = -rcs -DEPFLAGS = -M -LINK = $(CC) -LINKFLAGS = -O -USRLIB = -SYSLIB = - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.cpp - $(CC) $(CCFLAGS) -c $< -o $@ -$(DIR)%.d:%.cpp - $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ - -# ------ DEPENDENCIES ------ - -DEPENDS = $(OBJ:.o=.d) - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.d *~ $(LIB) - -$(DEPENDS) : $(DIR) -sinclude $(DEPENDS) diff --git a/lib/atc/Makefile.mpi b/lib/atc/Makefile.mpi new file mode 100644 index 0000000000..ec941efdcb --- /dev/null +++ b/lib/atc/Makefile.mpi @@ -0,0 +1,55 @@ +# library build -*- makefile -*- +SHELL = /bin/sh + +# which file will be copied to Makefile.lammps +EXTRAMAKE = Makefile.lammps.linalg + +# ------ FILES ------ + +SRC = $(wildcard *.cpp) +INC = $(wildcard *.h) + +# ------ DEFINITIONS ------ + +LIB = libatc.a +OBJ = $(SRC:.cpp=.o) + +default: lib + +# ------ SETTINGS ------ + +.PHONY: clean lib depend + +# include any MPI settings needed for the ATC library to build with +# must be the same MPI library that LAMMPS is built with + +CC = mpicxx +CCFLAGS = -O3 -Wall -g -fPIC +CPPFLAGS = -I../../src -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 +ARCHIVE = ar +ARCHFLAG = -rc +# ------ MAKE PROCEDURE ------ + +lib: $(OBJ) + $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) + @cp $(EXTRAMAKE) Makefile.lammps + +# ------ COMPILE RULES ------ + +%.o:%.cpp + $(CC) $(CPPFLAGS) $(CCFLAGS) -c $< + +# ------ DEPENDENCIES ------ + +depend .depend : fastdep.exe $(SRC) + @./fastdep.exe $(INCFLAGS) -- $^ > .depend || exit 1 + +fastdep.exe: ../../src/DEPEND/fastdep.c + @cc -O -o $@ $< + +# ------ CLEAN ------ + +clean: + -rm -f *.o *~ .depend $(LIB) fastdep.exe + +sinclude $(DEPENDS) diff --git a/lib/atc/Makefile.mpic++ b/lib/atc/Makefile.mpic++ deleted file mode 100644 index c9dfdb79c9..0000000000 --- a/lib/atc/Makefile.mpic++ +++ /dev/null @@ -1,39 +0,0 @@ -# library build -*- makefile -*- -SHELL = /bin/sh - -# which file will be copied to Makefile.lammps -EXTRAMAKE = Makefile.lammps.installed -# ------ FILES ------ -SRC = $(wildcard *.cpp) -INC = $(wildcard *.h) -# ------ DEFINITIONS ------ -LIB = libatc.a -OBJ = $(SRC:.cpp=.o) -# ------ SETTINGS ------ - -# include any MPI settings needed for the ATC library to build with -# must be the same MPI library that LAMMPS is built with - -CC = mpic++ -CCFLAGS = -O3 -Wall -g -I../../src -fPIC -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -ARCHIVE = ar -ARCHFLAG = -rc -DEPFLAGS = -M -LINK = $(CC) -LINKFLAGS = -O -# ------ MAKE PROCEDURE ------ -lib: $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps -# ------ COMPILE RULES ------ -%.o:%.cpp - $(CC) $(CCFLAGS) -c $< -%.d:%.cpp - $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ -# ------ DEPENDENCIES ------ -DEPENDS = $(OBJ:.o=.d) -# ------ CLEAN ------ -clean: - -rm *.o *.d *~ $(LIB) - -sinclude $(DEPENDS) diff --git a/lib/atc/Makefile.mpic++ b/lib/atc/Makefile.mpic++ new file mode 120000 index 0000000000..1f5f55d2ad --- /dev/null +++ b/lib/atc/Makefile.mpic++ @@ -0,0 +1 @@ +Makefile.mpi \ No newline at end of file diff --git a/lib/atc/Makefile.serial b/lib/atc/Makefile.serial index 44ce5fd341..70b786a6b8 100644 --- a/lib/atc/Makefile.serial +++ b/lib/atc/Makefile.serial @@ -14,18 +14,20 @@ INC = $(wildcard *.h) LIB = libatc.a OBJ = $(SRC:.cpp=.o) +default: lib + # ------ SETTINGS ------ +.PHONY: clean lib depend + # include any MPI settings needed for the ATC library to build with # must be the same MPI library that LAMMPS is built with CC = g++ -CCFLAGS = -O -g -fPIC -I../../src -I../../src/STUBS +CCFLAGS = -O3 -g -fPIC +CPPFLAGS = -I../../src -I../../src/STUBS ARCHIVE = ar ARCHFLAG = -rc -DEPFLAGS = -M -LINK = $(CC) -LINKFLAGS = -O # ------ MAKE PROCEDURE ------ lib: $(OBJ) @@ -35,17 +37,19 @@ lib: $(OBJ) # ------ COMPILE RULES ------ %.o:%.cpp - $(CC) $(CCFLAGS) -c $< -%.d:%.cpp - $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ + $(CC) $(CPPFLAGS) $(CCFLAGS) -c $< # ------ DEPENDENCIES ------ -DEPENDS = $(OBJ:.o=.d) +depend .depend : fastdep.exe $(SRC) + @./fastdep.exe $(INCFLAGS) -- $^ > .depend || exit 1 + +fastdep.exe: ../../src/DEPEND/fastdep.c + @cc -O -o $@ $< # ------ CLEAN ------ clean: - -rm *.o *.d *~ $(LIB) + -rm -f *.o *~ .depend $(LIB) fastdep.exe -sinclude $(DEPENDS) +sinclude .depend diff --git a/lib/awpmd/Install.py b/lib/awpmd/Install.py deleted file mode 100644 index 18b426f928..0000000000 --- a/lib/awpmd/Install.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file - -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix - does not alter existing Makefile.machine -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None -extraflag = 0 - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 - suffix = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - -if not os.path.exists("Makefile.%s" % machine): - error("lib/%s/Makefile.%s does not exist" % (lib,machine)) - -lines = open("Makefile.%s" % machine,'r').readlines() -fp = open("Makefile.auto",'w') - -for line in lines: - words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - -fp.close() - -# make the library via Makefile.auto - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/awpmd/Install.py b/lib/awpmd/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/awpmd/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/awpmd/Makefile.mingw32-cross-mpi b/lib/awpmd/Makefile.mingw32-cross-mpi deleted file mode 100644 index cc2a76111a..0000000000 --- a/lib/awpmd/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw32-cross - rm -f Obj_mingw32-mpi - ln -s Obj_mingw32 Obj_mingw32-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw32-cross - rm -f Obj_mingw32-mpi - diff --git a/lib/awpmd/Makefile.mingw64-cross-mpi b/lib/awpmd/Makefile.mingw64-cross-mpi deleted file mode 100644 index 1ec1a0995b..0000000000 --- a/lib/awpmd/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw64-cross - rm -f Obj_mingw64-mpi - ln -s Obj_mingw64 Obj_mingw64-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw64-cross - rm -f Obj_mingw64-mpi - diff --git a/lib/awpmd/Makefile.mingw64-cross b/lib/awpmd/Makefile.mpi similarity index 55% rename from lib/awpmd/Makefile.mingw64-cross rename to lib/awpmd/Makefile.mpi index 1f3e608129..e4b424e776 100644 --- a/lib/awpmd/Makefile.mingw64-cross +++ b/lib/awpmd/Makefile.mpi @@ -1,4 +1,3 @@ -# library build -*- makefile -*- SHELL = /bin/sh # which file will be copied to Makefile.lammps @@ -7,9 +6,10 @@ EXTRAMAKE = Makefile.lammps.linalg # ------ FILES ------ -SRC = logexc.cpp wpmd.cpp wpmd_split.cpp -vpath %.cpp ivutils/src -vpath %.cpp systems/interact/TCP +SRC = \ + ivutils/src/logexc.cpp \ + systems/interact/TCP/wpmd.cpp \ + systems/interact/TCP/wpmd_split.cpp INC = \ cerf.h \ @@ -27,21 +27,21 @@ INC = \ wpmd_split.h # ------ DEFINITIONS ------ -DIR = Obj_mingw64/ -LIB = $(DIR)libawpmd.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) + +LIB = libawpmd.a +OBJ = $(SRC:.cpp=.o) # ------ SETTINGS ------ # include any MPI settings needed for the ATC library to build with # the same MPI library that LAMMPS is built with -CC = x86_64-w64-mingw32-g++ -CCFLAGS = -O3 -march=core2 -mtune=core2 -mpc64 -msse2 \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -Wall -W -Wno-uninitialized -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include -ARCHIVE = x86_64-w64-mingw32-ar -ARCHFLAG = -rscv +CC = mpicxx +CCFLAGS = -O3 -fPIC -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include \ + -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 + +ARCHIVE = ar +ARCHFLAG = -rc DEPFLAGS = -M #LINK = #LINKFLAGS = @@ -50,23 +50,15 @@ SYSLIB = # ------ MAKE PROCEDURE ------ -default: $(DIR) $(LIB) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) +lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) @cp $(EXTRAMAKE) Makefile.lammps # ------ COMPILE RULES ------ -$(DIR)%.o:%.cpp +%.o:%.cpp $(CC) $(CCFLAGS) -c $< -o $@ -$(DIR)%.d:%.cpp +%.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # ------ DEPENDENCIES ------ @@ -76,4 +68,4 @@ DEPENDS = $(OBJ:.o=.d) # ------ CLEAN ------ clean: - -rm *.d *~ $(OBJ) $(LIB) + -rm -f *.d *~ $(OBJ) $(LIB) diff --git a/lib/awpmd/Makefile.mpicc b/lib/awpmd/Makefile.mpicc index 4c289ad88a..5cf6a75bd7 100644 --- a/lib/awpmd/Makefile.mpicc +++ b/lib/awpmd/Makefile.mpicc @@ -36,8 +36,10 @@ OBJ = $(SRC:.cpp=.o) # include any MPI settings needed for the ATC library to build with # the same MPI library that LAMMPS is built with -CC = mpic++ -CCFLAGS = -O -fPIC -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include +CC = mpicxx +CCFLAGS = -O3 -fPIC -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include \ + -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 + ARCHIVE = ar ARCHFLAG = -rc DEPFLAGS = -M @@ -66,4 +68,4 @@ DEPENDS = $(OBJ:.o=.d) # ------ CLEAN ------ clean: - -rm *.d *~ $(OBJ) $(LIB) + -rm -f *.d *~ $(OBJ) $(LIB) diff --git a/lib/awpmd/Makefile.mingw32-cross b/lib/awpmd/Makefile.serial similarity index 54% rename from lib/awpmd/Makefile.mingw32-cross rename to lib/awpmd/Makefile.serial index 6a93987173..f51714fc9a 100644 --- a/lib/awpmd/Makefile.mingw32-cross +++ b/lib/awpmd/Makefile.serial @@ -1,4 +1,3 @@ -# library build -*- makefile -*- SHELL = /bin/sh # which file will be copied to Makefile.lammps @@ -7,9 +6,10 @@ EXTRAMAKE = Makefile.lammps.linalg # ------ FILES ------ -SRC = logexc.cpp wpmd.cpp wpmd_split.cpp -vpath %.cpp ivutils/src -vpath %.cpp systems/interact/TCP +SRC = \ + ivutils/src/logexc.cpp \ + systems/interact/TCP/wpmd.cpp \ + systems/interact/TCP/wpmd_split.cpp INC = \ cerf.h \ @@ -27,22 +27,21 @@ INC = \ wpmd_split.h # ------ DEFINITIONS ------ -DIR = Obj_mingw32/ -LIB = $(DIR)libawpmd.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) + +LIB = libawpmd.a +OBJ = $(SRC:.cpp=.o) # ------ SETTINGS ------ # include any MPI settings needed for the ATC library to build with # the same MPI library that LAMMPS is built with -CC = i686-w64-mingw32-g++ -CCFLAGS = -O2 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -finline-functions \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -Wall -W -Wno-uninitialized -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include -ARCHIVE = i686-w64-mingw32-ar -ARCHFLAG = -rscv +CC = g++ +CCFLAGS = -O3 -fPIC -Isystems/interact/TCP/ -Isystems/interact -Iivutils/include \ + -I../../src/STUBS + +ARCHIVE = ar +ARCHFLAG = -rc DEPFLAGS = -M #LINK = #LINKFLAGS = @@ -51,23 +50,15 @@ SYSLIB = # ------ MAKE PROCEDURE ------ -default: $(DIR) $(LIB) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) +lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) @cp $(EXTRAMAKE) Makefile.lammps # ------ COMPILE RULES ------ -$(DIR)%.o:%.cpp +%.o:%.cpp $(CC) $(CCFLAGS) -c $< -o $@ -$(DIR)%.d:%.cpp +%.d:%.cpp $(CC) $(CCFLAGS) $(DEPFLAGS) $< > $@ # ------ DEPENDENCIES ------ @@ -77,4 +68,4 @@ DEPENDS = $(OBJ:.o=.d) # ------ CLEAN ------ clean: - -rm *.d *~ $(OBJ) $(LIB) + -rm -f *.d *~ $(OBJ) $(LIB) diff --git a/lib/colvars/Install.py b/lib/colvars/Install.py index 18b426f928..030644ceb5 100644 --- a/lib/colvars/Install.py +++ b/lib/colvars/Install.py @@ -1,27 +1,34 @@ #!/usr/bin/env python -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README +# Install.py tool to do automate build of Colvars -import sys,commands,os +from __future__ import print_function +import sys,os,subprocess # help message help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file +Syntax from src dir: make lib-colvars args="-m machine -e suffix" +Syntax from lib/colvars dir: python Install.py -m machine -e suffix + +specify -m and optionally -e, order does not matter + + -m = delete all existing objects, followed by "make -f Makefile.machine" + machine = suffix of a lib/colvars/Makefile.* or of a + src/MAKE/MACHINES/Makefile.* file -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix does not alter existing Makefile.machine + +Examples: + +make lib-colvars args="-m mpi" # build COLVARS lib with default mpi compiler wrapper """ # print error message or help def error(str=None): - if not str: print help - else: print "ERROR",str + if not str: print(help) + else: print("ERROR",str) sys.exit() # parse args @@ -31,19 +38,19 @@ nargs = len(args) if nargs == 0: error() machine = None -extraflag = 0 +extraflag = False iarg = 0 while iarg < nargs: if args[iarg] == "-m": - if iarg+2 > nargs: error() + if iarg+2 > len(args): error() machine = args[iarg+1] - iarg += 2 + iarg += 2 elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 + if iarg+2 > len(args): error() + extraflag = True suffix = args[iarg+1] - iarg += 2 + iarg += 2 else: error() # set lib from working dir @@ -51,32 +58,85 @@ while iarg < nargs: cwd = os.getcwd() lib = os.path.basename(cwd) -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - +def get_lammps_machine_flags(machine): + """Parse Makefile.machine from LAMMPS, return dictionary of compiler flags""" + if not os.path.exists("../../src/MAKE/MACHINES/Makefile.%s" % machine): + error("Cannot locate src/MAKE/MACHINES/Makefile.%s" % machine) + lines = open("../../src/MAKE/MACHINES/Makefile.%s" % machine, + 'r').readlines() + machine_flags = {} + for line in lines: + line = line.partition('#')[0] + line = line.rstrip() + words = line.split() + if (len(words) > 2): + if ((words[0] == 'CC') or (words[0] == 'CCFLAGS') or + (words[0] == 'SHFLAGS') or (words[0] == 'ARCHIVE') or + (words[0] == 'ARFLAGS') or (words[0] == 'SHELL')): + machine_flags[words[0]] = ' '.join(words[2:]) + return machine_flags + +def gen_colvars_makefile_machine(machine, machine_flags): + """Generate Makefile.machine for Colvars given the compiler flags""" + machine_makefile = open("Makefile.%s" % machine, 'w') + machine_makefile.write('''# -*- makefile -*- to build Colvars module with %s + +COLVARS_LIB = libcolvars.a +COLVARS_OBJ_DIR = + +CXX = %s +CXXFLAGS = %s %s +AR = %s +ARFLAGS = %s +SHELL = %s + +include Makefile.common + +.PHONY: default clean + +default: $(COLVARS_LIB) Makefile.lammps + +clean: + -rm -f $(COLVARS_OBJS) $(COLVARS_LIB) +''' % (machine, machine_flags['CC'], + machine_flags['CCFLAGS'], machine_flags['SHFLAGS'] , + machine_flags['ARCHIVE'], machine_flags['ARFLAGS'], + machine_flags['SHELL'])) + +if not os.path.exists("Makefile.%s" % machine): + machine_flags = get_lammps_machine_flags(machine) + gen_colvars_makefile_machine(machine, machine_flags) if not os.path.exists("Makefile.%s" % machine): error("lib/%s/Makefile.%s does not exist" % (lib,machine)) +# create Makefile.auto as copy of Makefile.machine +# reset EXTRAMAKE if requested + lines = open("Makefile.%s" % machine,'r').readlines() fp = open("Makefile.auto",'w') - for line in lines: words = line.split() if len(words) == 3 and extraflag and \ words[0] == "EXTRAMAKE" and words[1] == '=': line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - + fp.write(line) fp.close() -# make the library via Makefile.auto +# make the library via Makefile.auto optionally with parallel make -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt +try: + import multiprocessing + n_cpus = multiprocessing.cpu_count() +except: + n_cpus = 1 -if os.path.exists("lib%s.a" % lib): print "Build was successful" +print("Building lib%s.a ..." % lib) +cmd = ["make -f Makefile.auto clean"] +print(subprocess.check_output(cmd, shell=True).decode('UTF-8')) +cmd = ["make -f Makefile.auto -j%d" % n_cpus] +print(subprocess.check_output(cmd, shell=True).decode('UTF-8')) + +if os.path.exists("lib%s.a" % lib): print("Build was successful") else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib + print("lib/%s/Makefile.lammps was NOT created" % lib) diff --git a/lib/colvars/Makefile.colvars b/lib/colvars/Makefile.colvars deleted file mode 100644 index d1a2044038..0000000000 --- a/lib/colvars/Makefile.colvars +++ /dev/null @@ -1,119 +0,0 @@ -# library build -*- makefile -*- for colvars module - -# which file will be copied to Makefile.lammps - -EXTRAMAKE = Makefile.lammps.empty - -# ------ SETTINGS ------ - -CXX = g++ -CXXFLAGS = -O2 -g -Wall -fPIC -funroll-loops # -DCOLVARS_DEBUG -ARCHIVE = ar -ARCHFLAG = -rscv -SHELL = /bin/sh - -# ------ DEFINITIONS ------ - -SRC = colvaratoms.cpp colvarbias_abf.cpp colvarbias_alb.cpp colvarbias.cpp \ - colvarbias_histogram.cpp colvarbias_meta.cpp colvarbias_restraint.cpp \ - colvarcomp_angles.cpp colvarcomp_coordnums.cpp colvarcomp.cpp \ - colvarcomp_distances.cpp colvarcomp_protein.cpp colvarcomp_rotations.cpp \ - colvardeps.cpp colvar.cpp colvargrid.cpp colvarmodule.cpp colvarparse.cpp \ - colvarscript.cpp colvartypes.cpp colvarvalue.cpp - -LIB = libcolvars.a -OBJ = $(SRC:.cpp=.o) -EXE = #colvars_standalone - -# ------ MAKE PROCEDURE ------ - -default: $(LIB) $(EXE) Makefile.lammps - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - -colvars_standalone: colvars_main.o colvarproxy_standalone.o $(LIB) - $(CXX) -o $@ $(CXXFLAGS) $^ - -# ------ MAKE FLAGS ------ - -.SUFFIXES: -.SUFFIXES: .cpp .o - -.PHONY: default clean - -# ------ COMPILE RULES ------ - -.cpp.o: - $(CXX) $(CXXFLAGS) -c $< - -# ------ DEPENDENCIES ------ -# -colvaratoms.o: colvaratoms.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvaratoms.h -colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarbias_abf.h colvarbias.h colvargrid.h -colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarbias_alb.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_restraint.h colvarbias.h -colvarbias.o: colvarbias.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h -colvarbias_histogram.o: colvarbias_histogram.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_histogram.h colvarbias.h colvargrid.h -colvarbias_meta.o: colvarbias_meta.cpp colvar.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvarbias_meta.h colvarbias.h colvargrid.h -colvarbias_restraint.o: colvarbias_restraint.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias_restraint.h \ - colvarbias.h colvar.h colvarparse.h colvardeps.h -colvarcomp_angles.o: colvarcomp_angles.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarcomp.h colvaratoms.h -colvarcomp_coordnums.o: colvarcomp_coordnums.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvaratoms.h colvar.h colvarcomp.h -colvarcomp.o: colvarcomp.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \ - colvaratoms.h -colvarcomp_distances.o: colvarcomp_distances.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -colvarcomp_protein.o: colvarcomp_protein.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarcomp.h colvaratoms.h -colvarcomp_rotations.o: colvarcomp_rotations.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -colvar.o: colvar.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvarscript.h colvarbias.h -colvardeps.o: colvardeps.cpp colvardeps.h colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h -colvargrid.o: colvargrid.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvargrid.h -colvarmodule.o: colvarmodule.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarbias.h colvarbias_abf.h colvargrid.h colvarbias_alb.h \ - colvarbias_restraint.h colvarbias_histogram.h colvarbias_meta.h \ - colvarscript.h -colvarparse.o: colvarparse.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -colvarscript.o: colvarscript.cpp colvarscript.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h colvar.h \ - colvarparse.h colvardeps.h -colvartypes.o: colvartypes.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -colvarvalue.o: colvarvalue.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h - -# ------ CLEAN ------ - -clean: - -rm *.o *~ $(LIB) - diff --git a/lib/colvars/Makefile.common b/lib/colvars/Makefile.common new file mode 100644 index 0000000000..f47403f771 --- /dev/null +++ b/lib/colvars/Makefile.common @@ -0,0 +1,65 @@ +# Shared -*- makefile -*- for multiple architectures + +# # Detect settings from PYTHON package (if defined) +# sinclude ../../src/Makefile.package.settings +# ifeq ($(python_SYSINC),) +# COLVARS_PYTHON_INCFLAGS = +# else +# COLVARS_PYTHON_INCFLAGS = -DCOLVARS_PYTHON $(python_SYSINC) +# endif + +# Detect debug settings +ifeq ($(COLVARS_DEBUG),) +COLVARS_DEBUG_INCFLAGS = +else +COLVARS_DEBUG_INCFLAGS= -DCOLVARS_DEBUG +endif + +COLVARS_INCFLAGS = $(COLVARS_DEBUG_INCFLAGS) $(COLVARS_PYTHON_INCFLAGS) + + +.SUFFIXES: +.SUFFIXES: .cpp .o + +COLVARS_SRCS = \ + colvaratoms.cpp \ + colvarbias_abf.cpp \ + colvarbias_alb.cpp \ + colvarbias.cpp \ + colvarbias_histogram.cpp \ + colvarbias_meta.cpp \ + colvarbias_restraint.cpp \ + colvarcomp_angles.cpp \ + colvarcomp_coordnums.cpp \ + colvarcomp.cpp \ + colvarcomp_distances.cpp \ + colvarcomp_protein.cpp \ + colvarcomp_rotations.cpp \ + colvar.cpp \ + colvardeps.cpp \ + colvargrid.cpp \ + colvarmodule.cpp \ + colvarparse.cpp \ + colvarproxy.cpp \ + colvarscript.cpp \ + colvartypes.cpp \ + colvarvalue.cpp + +COLVARS_OBJS = $(COLVARS_SRCS:.cpp=.o) + +.cpp.o: + $(CXX) $(CXXFLAGS) $(COLVARS_INCFLAGS) -c $< + +$(COLVARS_LIB): Makefile.deps $(COLVARS_OBJS) + $(AR) $(ARFLAGS) $(COLVARS_LIB) $(COLVARS_OBJS) + + +Makefile.deps: $(COLVARS_SRCS) + @echo > $@ + @for src in $^ ; do \ + obj=`basename $$src .cpp`.o ; \ + $(CXX) -MM $(COLVARS_INCFLAGS) \ + -MT '$$(COLVARS_OBJ_DIR)'$$obj $$src >> $@ ; \ + done + +include Makefile.deps diff --git a/lib/colvars/Makefile.deps b/lib/colvars/Makefile.deps new file mode 100644 index 0000000000..f463da5f86 --- /dev/null +++ b/lib/colvars/Makefile.deps @@ -0,0 +1,78 @@ + +$(COLVARS_OBJ_DIR)colvaratoms.o: colvaratoms.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ + colvarparse.h colvaratoms.h colvardeps.h +$(COLVARS_OBJ_DIR)colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvar.h \ + colvarparse.h colvardeps.h colvarbias_abf.h colvarbias.h colvargrid.h +$(COLVARS_OBJ_DIR)colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ + colvarbias_alb.h colvar.h colvarparse.h colvardeps.h colvarbias.h +$(COLVARS_OBJ_DIR)colvarbias.o: colvarbias.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h \ + colvar.h colvarparse.h colvardeps.h +$(COLVARS_OBJ_DIR)colvarbias_histogram.o: colvarbias_histogram.cpp \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarbias_histogram.h \ + colvarbias.h colvargrid.h +$(COLVARS_OBJ_DIR)colvarbias_meta.o: colvarbias_meta.cpp colvar.h \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarparse.h colvardeps.h colvarbias_meta.h colvarbias.h \ + colvargrid.h +$(COLVARS_OBJ_DIR)colvarbias_restraint.o: colvarbias_restraint.cpp \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarbias_restraint.h colvarbias.h colvar.h colvarparse.h \ + colvardeps.h +$(COLVARS_OBJ_DIR)colvarcomp_angles.o: colvarcomp_angles.cpp \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \ + colvaratoms.h +$(COLVARS_OBJ_DIR)colvarcomp_coordnums.o: colvarcomp_coordnums.cpp \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarparse.h colvaratoms.h colvardeps.h colvar.h \ + colvarcomp.h +$(COLVARS_OBJ_DIR)colvarcomp.o: colvarcomp.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h colvar.h \ + colvarparse.h colvardeps.h colvarcomp.h colvaratoms.h +$(COLVARS_OBJ_DIR)colvarcomp_distances.o: colvarcomp_distances.cpp \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \ + colvaratoms.h +$(COLVARS_OBJ_DIR)colvarcomp_protein.o: colvarcomp_protein.cpp \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \ + colvaratoms.h +$(COLVARS_OBJ_DIR)colvarcomp_rotations.o: colvarcomp_rotations.cpp \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarparse.h colvar.h colvardeps.h colvarcomp.h \ + colvaratoms.h +$(COLVARS_OBJ_DIR)colvar.o: colvar.cpp colvarmodule.h colvars_version.h \ + colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvar.h \ + colvardeps.h colvarcomp.h colvaratoms.h colvarscript.h colvarbias.h +$(COLVARS_OBJ_DIR)colvardeps.o: colvardeps.cpp colvardeps.h \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarparse.h +$(COLVARS_OBJ_DIR)colvargrid.o: colvargrid.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ + colvarparse.h colvar.h colvardeps.h colvarcomp.h colvaratoms.h \ + colvargrid.h +$(COLVARS_OBJ_DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ + colvarparse.h colvar.h colvardeps.h colvarbias.h colvarbias_abf.h \ + colvargrid.h colvarbias_alb.h colvarbias_histogram.h colvarbias_meta.h \ + colvarbias_restraint.h colvarscript.h colvaratoms.h +$(COLVARS_OBJ_DIR)colvarparse.o: colvarparse.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ + colvarparse.h +$(COLVARS_OBJ_DIR)colvarproxy.o: colvarproxy.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ + colvarscript.h colvarbias.h colvar.h colvarparse.h colvardeps.h \ + colvaratoms.h +$(COLVARS_OBJ_DIR)colvarscript.o: colvarscript.cpp colvarscript.h \ + colvarmodule.h colvars_version.h colvartypes.h colvarproxy.h \ + colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h +$(COLVARS_OBJ_DIR)colvartypes.o: colvartypes.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h \ + colvarparse.h +$(COLVARS_OBJ_DIR)colvarvalue.o: colvarvalue.cpp colvarmodule.h \ + colvars_version.h colvartypes.h colvarproxy.h colvarvalue.h diff --git a/lib/colvars/Makefile.fermi b/lib/colvars/Makefile.fermi deleted file mode 100644 index 906675ae12..0000000000 --- a/lib/colvars/Makefile.fermi +++ /dev/null @@ -1,120 +0,0 @@ -# library build -*- makefile -*- for colvars module - -# which file will be copied to Makefile.lammps - -EXTRAMAKE = Makefile.lammps.empty - -# ------ SETTINGS ------ - -CXX = g++ -CXXFLAGS = -O2 -mpc64 -g -fPIC \ - -Wall -Wno-sign-compare # -DCOLVARS_DEBUG -ARCHIVE = ar -ARCHFLAG = -rscv -SHELL = /bin/sh - -# ------ DEFINITIONS ------ - -SRC = colvaratoms.cpp colvarbias_abf.cpp colvarbias_alb.cpp colvarbias.cpp \ - colvarbias_histogram.cpp colvarbias_meta.cpp colvarbias_restraint.cpp \ - colvarcomp_angles.cpp colvarcomp_coordnums.cpp colvarcomp.cpp \ - colvarcomp_distances.cpp colvarcomp_protein.cpp colvarcomp_rotations.cpp \ - colvardeps.cpp colvar.cpp colvargrid.cpp colvarmodule.cpp colvarparse.cpp \ - colvarscript.cpp colvartypes.cpp colvarvalue.cpp - -LIB = libcolvars.a -OBJ = $(SRC:.cpp=.o) -EXE = #colvars_standalone - -# ------ MAKE PROCEDURE ------ - -default: $(LIB) $(EXE) Makefile.lammps - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - -colvars_standalone: colvars_main.o colvarproxy_standalone.o $(LIB) - $(CXX) -o $@ $(CXXFLAGS) $^ - -# ------ MAKE FLAGS ------ - -.SUFFIXES: -.SUFFIXES: .cpp .o - -.PHONY: default clean - -# ------ COMPILE RULES ------ - -.cpp.o: - $(CXX) $(CXXFLAGS) -c $< - -# ------ DEPENDENCIES ------ -# -colvaratoms.o: colvaratoms.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvaratoms.h -colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarbias_abf.h colvarbias.h colvargrid.h -colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarbias_alb.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_restraint.h colvarbias.h -colvarbias.o: colvarbias.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h -colvarbias_histogram.o: colvarbias_histogram.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_histogram.h colvarbias.h colvargrid.h -colvarbias_meta.o: colvarbias_meta.cpp colvar.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvarbias_meta.h colvarbias.h colvargrid.h -colvarbias_restraint.o: colvarbias_restraint.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias_restraint.h \ - colvarbias.h colvar.h colvarparse.h colvardeps.h -colvarcomp_angles.o: colvarcomp_angles.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarcomp.h colvaratoms.h -colvarcomp_coordnums.o: colvarcomp_coordnums.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvaratoms.h colvar.h colvarcomp.h -colvarcomp.o: colvarcomp.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \ - colvaratoms.h -colvarcomp_distances.o: colvarcomp_distances.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -colvarcomp_protein.o: colvarcomp_protein.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarcomp.h colvaratoms.h -colvarcomp_rotations.o: colvarcomp_rotations.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -colvar.o: colvar.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvarscript.h colvarbias.h -colvardeps.o: colvardeps.cpp colvardeps.h colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h -colvargrid.o: colvargrid.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvargrid.h -colvarmodule.o: colvarmodule.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarbias.h colvarbias_abf.h colvargrid.h colvarbias_alb.h \ - colvarbias_restraint.h colvarbias_histogram.h colvarbias_meta.h \ - colvarscript.h -colvarparse.o: colvarparse.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -colvarscript.o: colvarscript.cpp colvarscript.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h colvar.h \ - colvarparse.h colvardeps.h -colvartypes.o: colvartypes.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -colvarvalue.o: colvarvalue.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h - -# ------ CLEAN ------ - -clean: - -rm *.o *~ $(LIB) - diff --git a/lib/colvars/Makefile.g++ b/lib/colvars/Makefile.g++ index c80fa1065e..556e39d070 100644 --- a/lib/colvars/Makefile.g++ +++ b/lib/colvars/Makefile.g++ @@ -1,119 +1,25 @@ -# library build -*- makefile -*- for colvars module - -# which file will be copied to Makefile.lammps +# -*- makefile -*- to build Colvars module with GNU compiler EXTRAMAKE = Makefile.lammps.empty -# ------ SETTINGS ------ +COLVARS_LIB = libcolvars.a +COLVARS_OBJ_DIR = CXX = g++ -CXXFLAGS = -O2 -g -fPIC -funroll-loops # -DCOLVARS_DEBUG -ARCHIVE = ar -ARCHFLAG = -rscv +CXXFLAGS = -O2 -g -Wall -fPIC -funroll-loops +AR = ar +ARFLAGS = -rscv SHELL = /bin/sh -# ------ DEFINITIONS ------ - -SRC = colvaratoms.cpp colvarbias_abf.cpp colvarbias_alb.cpp colvarbias.cpp \ - colvarbias_histogram.cpp colvarbias_meta.cpp colvarbias_restraint.cpp \ - colvarcomp_angles.cpp colvarcomp_coordnums.cpp colvarcomp.cpp \ - colvarcomp_distances.cpp colvarcomp_protein.cpp colvarcomp_rotations.cpp \ - colvardeps.cpp colvar.cpp colvargrid.cpp colvarmodule.cpp colvarparse.cpp \ - colvarscript.cpp colvartypes.cpp colvarvalue.cpp - -LIB = libcolvars.a -OBJ = $(SRC:.cpp=.o) -EXE = #colvars_standalone - -# ------ MAKE PROCEDURE ------ - -default: $(LIB) $(EXE) Makefile.lammps - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - -colvars_standalone: colvars_main.o colvarproxy_standalone.o $(LIB) - $(CXX) -o $@ $(CXXFLAGS) $^ - -# ------ MAKE FLAGS ------ - -.SUFFIXES: -.SUFFIXES: .cpp .o - .PHONY: default clean -# ------ COMPILE RULES ------ +default: $(COLVARS_LIB) Makefile.lammps -.cpp.o: - $(CXX) $(CXXFLAGS) -c $< - -# ------ DEPENDENCIES ------ -# -colvaratoms.o: colvaratoms.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvaratoms.h -colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarbias_abf.h colvarbias.h colvargrid.h -colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarbias_alb.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_restraint.h colvarbias.h -colvarbias.o: colvarbias.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h -colvarbias_histogram.o: colvarbias_histogram.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_histogram.h colvarbias.h colvargrid.h -colvarbias_meta.o: colvarbias_meta.cpp colvar.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvarbias_meta.h colvarbias.h colvargrid.h -colvarbias_restraint.o: colvarbias_restraint.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias_restraint.h \ - colvarbias.h colvar.h colvarparse.h colvardeps.h -colvarcomp_angles.o: colvarcomp_angles.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarcomp.h colvaratoms.h -colvarcomp_coordnums.o: colvarcomp_coordnums.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvaratoms.h colvar.h colvarcomp.h -colvarcomp.o: colvarcomp.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \ - colvaratoms.h -colvarcomp_distances.o: colvarcomp_distances.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -colvarcomp_protein.o: colvarcomp_protein.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarcomp.h colvaratoms.h -colvarcomp_rotations.o: colvarcomp_rotations.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -colvar.o: colvar.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvarscript.h colvarbias.h -colvardeps.o: colvardeps.cpp colvardeps.h colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h -colvargrid.o: colvargrid.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvargrid.h -colvarmodule.o: colvarmodule.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarbias.h colvarbias_abf.h colvargrid.h colvarbias_alb.h \ - colvarbias_restraint.h colvarbias_histogram.h colvarbias_meta.h \ - colvarscript.h -colvarparse.o: colvarparse.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -colvarscript.o: colvarscript.cpp colvarscript.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h colvar.h \ - colvarparse.h colvardeps.h -colvartypes.o: colvartypes.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -colvarvalue.o: colvarvalue.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h - -# ------ CLEAN ------ +include Makefile.common clean: - -rm *.o *~ $(LIB) + -rm -f $(COLVARS_OBJS) $(COLVARS_LIB) + +Makefile.lammps: + -cp $(EXTRAMAKE) Makefile.lammps diff --git a/lib/colvars/Makefile.g++-debug b/lib/colvars/Makefile.g++-debug new file mode 100644 index 0000000000..a6ca2f8124 --- /dev/null +++ b/lib/colvars/Makefile.g++-debug @@ -0,0 +1,5 @@ +# -*- makefile -*- to build Colvars module with GNU compiler + +COLVARS_DEBUG = "YES" + +include Makefile.g++ diff --git a/lib/colvars/Makefile.lammps.debug b/lib/colvars/Makefile.lammps.debug index 1ef229d58a..1c4399a2cd 100644 --- a/lib/colvars/Makefile.lammps.debug +++ b/lib/colvars/Makefile.lammps.debug @@ -1,5 +1,5 @@ # Settings that the LAMMPS build will import when this package library is used -colvars_SYSINC = # -DCOLVARS_DEBUG +colvars_SYSINC = -DCOLVARS_DEBUG colvars_SYSLIB = colvars_SYSPATH = diff --git a/lib/colvars/Makefile.lammps.empty b/lib/colvars/Makefile.lammps.empty index 1ef229d58a..99f57b050b 100644 --- a/lib/colvars/Makefile.lammps.empty +++ b/lib/colvars/Makefile.lammps.empty @@ -1,5 +1,5 @@ # Settings that the LAMMPS build will import when this package library is used -colvars_SYSINC = # -DCOLVARS_DEBUG +colvars_SYSINC = colvars_SYSLIB = colvars_SYSPATH = diff --git a/lib/colvars/Makefile.mingw32-cross b/lib/colvars/Makefile.mingw32-cross deleted file mode 100644 index eba83c555f..0000000000 --- a/lib/colvars/Makefile.mingw32-cross +++ /dev/null @@ -1,127 +0,0 @@ -# library build -*- makefile -*- for colvars module - -# which file will be copied to Makefile.lammps - -EXTRAMAKE = Makefile.lammps.empty - -# ------ SETTINGS ------ - -CXX = i686-w64-mingw32-g++ -CXXFLAGS = -O2 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -fno-rtti -fno-exceptions -finline-functions \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -Wall -W -Wno-uninitialized -ARCHIVE = i686-w64-mingw32-ar -ARCHFLAG = -rscv -SHELL = /bin/sh - -# ------ DEFINITIONS ------ - -SRC = colvaratoms.cpp colvarbias_abf.cpp colvarbias_alb.cpp colvarbias.cpp \ - colvarbias_histogram.cpp colvarbias_meta.cpp colvarbias_restraint.cpp \ - colvarcomp_angles.cpp colvarcomp_coordnums.cpp colvarcomp.cpp \ - colvarcomp_distances.cpp colvarcomp_protein.cpp colvarcomp_rotations.cpp \ - colvardeps.cpp colvar.cpp colvargrid.cpp colvarmodule.cpp colvarparse.cpp \ - colvarscript.cpp colvartypes.cpp colvarvalue.cpp - -DIR = Obj_mingw32/ -LIB = $(DIR)libcolvars.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) -EXE = #colvars_standalone - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) $(EXE) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(DIR) $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -$(DIR)colvars_standalone: colvars_main.o colvarproxy_standalone.o $(LIB) - $(CXX) -o $@ $(CXXFLAGS) $^ - -# ------ MAKE FLAGS ------ - -.SUFFIXES: -.SUFFIXES: .cpp .o - -.PHONY: default clean - -# ------ COMPILE RULES ------ - -$(DIR)%.o: %.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -# ------ DEPENDENCIES ------ -# -$(DIR)colvaratoms.o: colvaratoms.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvaratoms.h -$(DIR)colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarbias_abf.h colvarbias.h colvargrid.h -$(DIR)colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarbias_alb.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_restraint.h colvarbias.h -$(DIR)colvarbias.o: colvarbias.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h -$(DIR)colvarbias_histogram.o: colvarbias_histogram.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_histogram.h colvarbias.h colvargrid.h -$(DIR)colvarbias_meta.o: colvarbias_meta.cpp colvar.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvarbias_meta.h colvarbias.h colvargrid.h -$(DIR)colvarbias_restraint.o: colvarbias_restraint.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias_restraint.h \ - colvarbias.h colvar.h colvarparse.h colvardeps.h -$(DIR)colvarcomp_angles.o: colvarcomp_angles.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarcomp.h colvaratoms.h -$(DIR)colvarcomp_coordnums.o: colvarcomp_coordnums.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvaratoms.h colvar.h colvarcomp.h -$(DIR)colvarcomp.o: colvarcomp.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \ - colvaratoms.h -$(DIR)colvarcomp_distances.o: colvarcomp_distances.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -$(DIR)colvarcomp_protein.o: colvarcomp_protein.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarcomp.h colvaratoms.h -$(DIR)colvarcomp_rotations.o: colvarcomp_rotations.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -$(DIR)colvar.o: colvar.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvarscript.h colvarbias.h -$(DIR)colvardeps.o: colvardeps.cpp colvardeps.h colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h -$(DIR)colvargrid.o: colvargrid.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvargrid.h -$(DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarbias.h colvarbias_abf.h colvargrid.h colvarbias_alb.h \ - colvarbias_restraint.h colvarbias_histogram.h colvarbias_meta.h \ - colvarscript.h -$(DIR)colvarparse.o: colvarparse.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -$(DIR)colvarscript.o: colvarscript.cpp colvarscript.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h colvar.h \ - colvarparse.h colvardeps.h -$(DIR)colvartypes.o: colvartypes.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -$(DIR)colvarvalue.o: colvarvalue.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o *~ $(LIB) - -rmdir $(DIR) diff --git a/lib/colvars/Makefile.mingw32-cross-mpi b/lib/colvars/Makefile.mingw32-cross-mpi deleted file mode 100644 index 1e35c5b461..0000000000 --- a/lib/colvars/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw32-cross - -rm -f Obj_mingw32-mpi - ln -s Obj_mingw32 Obj_mingw32-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw32-cross - -rm -f Obj_mingw32-mpi - diff --git a/lib/colvars/Makefile.mingw64-cross b/lib/colvars/Makefile.mingw64-cross deleted file mode 100644 index 1d83b6a0a8..0000000000 --- a/lib/colvars/Makefile.mingw64-cross +++ /dev/null @@ -1,127 +0,0 @@ -# library build -*- makefile -*- for colvars module - -# which file will be copied to Makefile.lammps - -EXTRAMAKE = Makefile.lammps.empty - -# ------ SETTINGS ------ - -CXX = x86_64-w64-mingw32-g++ -CXXFLAGS = -O2 -march=core2 -mtune=core2 -mpc64 -msse2 \ - -fno-rtti -fno-exceptions -finline-functions \ - -ffast-math -funroll-loops -fstrict-aliasing \ - -Wall -W -Wno-uninitialized -ARCHIVE = x86_64-w64-mingw32-ar -ARCHFLAG = -rscv -SHELL = /bin/sh - -# ------ DEFINITIONS ------ - -SRC = colvaratoms.cpp colvarbias_abf.cpp colvarbias_alb.cpp colvarbias.cpp \ - colvarbias_histogram.cpp colvarbias_meta.cpp colvarbias_restraint.cpp \ - colvarcomp_angles.cpp colvarcomp_coordnums.cpp colvarcomp.cpp \ - colvarcomp_distances.cpp colvarcomp_protein.cpp colvarcomp_rotations.cpp \ - colvardeps.cpp colvar.cpp colvargrid.cpp colvarmodule.cpp colvarparse.cpp \ - colvarscript.cpp colvartypes.cpp colvarvalue.cpp - -DIR = Obj_mingw64/ -LIB = $(DIR)libcolvars.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) -EXE = #colvars_standalone - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) $(EXE) Makefile.lammps - -$(DIR): - mkdir $(DIR) - -Makefile.lammps: - @cp $(EXTRAMAKE) Makefile.lammps - -$(LIB): $(DIR) $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -$(DIR)colvars_standalone: colvars_main.o colvarproxy_standalone.o $(LIB) - $(CXX) -o $@ $(CXXFLAGS) $^ - -# ------ MAKE FLAGS ------ - -.SUFFIXES: -.SUFFIXES: .cpp .o - -.PHONY: default clean - -# ------ COMPILE RULES ------ - -$(DIR)%.o: %.cpp - $(CXX) $(CXXFLAGS) -c $< -o $@ - -# ------ DEPENDENCIES ------ -# -$(DIR)colvaratoms.o: colvaratoms.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvaratoms.h -$(DIR)colvarbias_abf.o: colvarbias_abf.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarbias_abf.h colvarbias.h colvargrid.h -$(DIR)colvarbias_alb.o: colvarbias_alb.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarbias_alb.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_restraint.h colvarbias.h -$(DIR)colvarbias.o: colvarbias.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarbias.h colvar.h colvarparse.h colvardeps.h -$(DIR)colvarbias_histogram.o: colvarbias_histogram.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvar.h colvarparse.h \ - colvardeps.h colvarbias_histogram.h colvarbias.h colvargrid.h -$(DIR)colvarbias_meta.o: colvarbias_meta.cpp colvar.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvarbias_meta.h colvarbias.h colvargrid.h -$(DIR)colvarbias_restraint.o: colvarbias_restraint.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias_restraint.h \ - colvarbias.h colvar.h colvarparse.h colvardeps.h -$(DIR)colvarcomp_angles.o: colvarcomp_angles.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvar.h colvarparse.h colvardeps.h \ - colvarcomp.h colvaratoms.h -$(DIR)colvarcomp_coordnums.o: colvarcomp_coordnums.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvaratoms.h colvar.h colvarcomp.h -$(DIR)colvarcomp.o: colvarcomp.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvar.h colvarparse.h colvardeps.h colvarcomp.h \ - colvaratoms.h -$(DIR)colvarcomp_distances.o: colvarcomp_distances.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -$(DIR)colvarcomp_protein.o: colvarcomp_protein.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarcomp.h colvaratoms.h -$(DIR)colvarcomp_rotations.o: colvarcomp_rotations.cpp colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h \ - colvar.h colvarcomp.h colvaratoms.h -$(DIR)colvar.o: colvar.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvarscript.h colvarbias.h -$(DIR)colvardeps.o: colvardeps.cpp colvardeps.h colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h -$(DIR)colvargrid.o: colvargrid.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h colvardeps.h colvar.h colvarcomp.h \ - colvaratoms.h colvargrid.h -$(DIR)colvarmodule.o: colvarmodule.cpp colvarmodule.h colvartypes.h \ - colvarproxy.h colvarvalue.h colvarparse.h colvardeps.h colvar.h \ - colvarbias.h colvarbias_abf.h colvargrid.h colvarbias_alb.h \ - colvarbias_restraint.h colvarbias_histogram.h colvarbias_meta.h \ - colvarscript.h -$(DIR)colvarparse.o: colvarparse.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -$(DIR)colvarscript.o: colvarscript.cpp colvarscript.h colvarmodule.h \ - colvartypes.h colvarproxy.h colvarvalue.h colvarbias.h colvar.h \ - colvarparse.h colvardeps.h -$(DIR)colvartypes.o: colvartypes.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h colvarparse.h -$(DIR)colvarvalue.o: colvarvalue.cpp colvarmodule.h colvartypes.h colvarproxy.h \ - colvarvalue.h - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o *~ $(LIB) - -rmdir $(DIR) diff --git a/lib/colvars/Makefile.mingw64-cross-mpi b/lib/colvars/Makefile.mingw64-cross-mpi deleted file mode 100644 index ca6f4a6d43..0000000000 --- a/lib/colvars/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw64-cross - -rm -f Obj_mingw64-mpi - ln -s Obj_mingw64 Obj_mingw64-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw64-cross - -rm -f Obj_mingw64-mpi - diff --git a/lib/colvars/Makefile.mpi b/lib/colvars/Makefile.mpi new file mode 100644 index 0000000000..6343ed7c06 --- /dev/null +++ b/lib/colvars/Makefile.mpi @@ -0,0 +1,25 @@ +# -*- makefile -*- to build Colvars module with default MPI compiler wrapper + +EXTRAMAKE = Makefile.lammps.empty + +COLVARS_LIB = libcolvars.a +COLVARS_OBJ_DIR = + +CXX = mpicxx +CXXFLAGS = -O2 -g -Wall -fPIC -funroll-loops +AR = ar +ARFLAGS = -rscv +SHELL = /bin/sh + +.PHONY: default clean + +default: $(COLVARS_LIB) Makefile.lammps + +include Makefile.common + +clean: + -rm -f $(COLVARS_OBJS) $(COLVARS_LIB) + +Makefile.lammps: + -cp $(EXTRAMAKE) Makefile.lammps + diff --git a/lib/colvars/Makefile.serial b/lib/colvars/Makefile.serial new file mode 100644 index 0000000000..556e39d070 --- /dev/null +++ b/lib/colvars/Makefile.serial @@ -0,0 +1,25 @@ +# -*- makefile -*- to build Colvars module with GNU compiler + +EXTRAMAKE = Makefile.lammps.empty + +COLVARS_LIB = libcolvars.a +COLVARS_OBJ_DIR = + +CXX = g++ +CXXFLAGS = -O2 -g -Wall -fPIC -funroll-loops +AR = ar +ARFLAGS = -rscv +SHELL = /bin/sh + +.PHONY: default clean + +default: $(COLVARS_LIB) Makefile.lammps + +include Makefile.common + +clean: + -rm -f $(COLVARS_OBJS) $(COLVARS_LIB) + +Makefile.lammps: + -cp $(EXTRAMAKE) Makefile.lammps + diff --git a/lib/colvars/README b/lib/colvars/README index a5e5938b20..5df9612dfa 100644 --- a/lib/colvars/README +++ b/lib/colvars/README @@ -1,49 +1,39 @@ -This library is the portable "colvars" module, originally interfaced -with the NAMD MD code, to provide an extensible software framework, -that allows enhanced sampling in molecular dynamics simulations. -The module is written to maximize performance, portability, -flexibility of usage for the user, and extensibility for the developer. +## Collective variables module (Colvars) -The development of the colvars library is now hosted on github at: -http://colvars.github.io/ -You can use this site to get access to the latest development sources -and the up-to-date documentation. +A software module for molecular simulation and analysis that provides a +high-performance implementation of sampling algorithms defined on a reduced +space of continuously differentiable functions (aka collective variables). -Copy of the specific documentation is also in - doc/PDF/colvars-refman-lammps.pdf +The module itself implements a variety of functions and algorithms, including +free-energy estimators based on thermodynamic forces, non-equilibrium work and +probability distributions. -Please report bugs and request new features at: -https://github.com/colvars/colvars/issues +For a brief description see: + http://colvars.github.io/ + https://github.com/colvars/colvars/ -The following publications describe the principles of -the implementation of this library: - Using collective variables to drive molecular dynamics simulations, - Giacomo Fiorin , Michael L. Klein & Jérôme Hénin (2013): - Molecular Physics DOI:10.1080/00268976.2013.813594 - - Exploring Multidimensional Free Energy Landscapes Using - Time-Dependent Biases on Collective Variables, - J. Hénin, G. Fiorin, C. Chipot, and M. L. Klein, - J. Chem. Theory Comput., 6, 35-47 (2010). - -------------------------------------------------- +## How to build This directory has source files to build a library that LAMMPS links against when using the USER-COLVARS package. -This library must be built with a C++ compiler, before LAMMPS is -built, so LAMMPS can link against it. +This library must be built with a C++ compiler, *before* LAMMPS is built and +*after* packages are configured, so that LAMMPS can link against it. +You can use the provided Makefile.* files or create your own, specific to your +compiler and system. For example: -You can type "make lib-colvars" from the src directory to see help on -how to build this library via make commands, or you can do the same -thing by typing "python Install.py" from within this directory, or you -can do it manually by following the instructions below. + cd src + make yes-user-colvars + cd ../lib/colvars + make -f Makefile.g++ -Build the library using one of the provided Makefile.* files or create -your own, specific to your compiler and system. For example: +where Makefile.g++ uses the GNU C++ compiler and is a good template to start. -make -f Makefile.g++ +**Optional**: if you use the Install.py script provided in this folder, you +can give the machine name as the '-m' argument. This can be the suffix of one +of the files from either this folder, or from src/MAKE/MACHINES. +*This is only supported by the Install.py within the lib/colvars folder*. When you are done building this library, two files should exist in this directory: @@ -51,23 +41,42 @@ exist in this directory: libcolvars.a the library LAMMPS will link against Makefile.lammps settings the LAMMPS Makefile will import -Makefile.lammps is created by the make command, by copying one of the -Makefile.lammps.* files. See the EXTRAMAKE setting at the top of the -Makefile.* files. - IMPORTANT: You must examine the final Makefile.lammps to insure it is correct for your system, else the LAMMPS build will likely fail. -Makefile.lammps has settings for 3 variables: - -user-colvars_SYSINC = leave blank for this package unless debugging -user-colvars_SYSLIB = leave blank for this package -user-colvars_SYSPATH = leave blank for this package - -You have several choices for these settings: - -Since they do not normally need to be set, the settings in -Makefile.lammps.empty should work. - If you want to set a debug flag recognized by the library, the -settings in Makefile.lammps.debug should work. +settings in Makefile.common should work. + + +## Documentation + +For the reference manual see: + http://colvars.github.io/colvars-refman-lammps + +A copy of the reference manual is also in: + doc/PDF/colvars-refman-lammps.pdf + +Also available is a Doxygen-based developer documentation: + http://colvars.github.io/doxygen/html/ + +The reference article is: + G. Fiorin, M. L. Klein, and J. Henin, + Molecular Physics 111, 3345 (2013). + http://dx.doi.org/10.1080/00268976.2013.813594 + + +## Updating to the latest version + +To recompile LAMMPS with the most recent version of this module, the `master` +branch of this repository from GitHub, or clone it via git: + + git clone https://github.com/colvars/colvars.git + +and run the provided `update-colvars-code.sh` script against the unpacked +LAMMPS source tree: + + ./update-colvars-code.sh /path/to/lammps/folder + +Please report bugs and request new features at: +https://github.com/colvars/colvars/issues + diff --git a/lib/colvars/colvar.cpp b/lib/colvars/colvar.cpp index e8c7e88324..d23bd852aa 100644 --- a/lib/colvars/colvar.cpp +++ b/lib/colvars/colvar.cpp @@ -1,5 +1,5 @@ - // -*- c++ -*- + // This file is part of the Collective Variables module (Colvars). // The original version of Colvars and its updates are located at: // https://github.com/colvars/colvars @@ -7,13 +7,14 @@ // If you wish to distribute your changes, please submit them to the // Colvars repository at GitHub. - #include "colvarmodule.h" #include "colvarvalue.h" #include "colvarparse.h" #include "colvar.h" #include "colvarcomp.h" #include "colvarscript.h" + +// used in build_atom_list() #include @@ -25,8 +26,10 @@ bool compare(colvar::cvc *i, colvar::cvc *j) { colvar::colvar() + : prev_timestep(-1) { // Initialize static array once and for all + runave_os = NULL; init_cv_requires(); } @@ -66,6 +69,13 @@ int colvar::init(std::string const &conf) size_t i; +#ifdef LEPTON + error_code |= init_custom_function(conf); + if (error_code != COLVARS_OK) { + return cvm::get_error(); + } +#endif + // Setup colvar as scripted function of components if (get_keyval(conf, "scriptedFunction", scripted_function, "", colvarparse::parse_silent)) { @@ -122,7 +132,7 @@ int colvar::init(std::string const &conf) } } - if (!is_enabled(f_cv_scripted)) { + if (!(is_enabled(f_cv_scripted) || is_enabled(f_cv_custom_function))) { colvarvalue const &cvc_value = (cvcs[0])->value(); if (cvm::debug()) cvm::log ("This collective variable is a "+ @@ -141,7 +151,7 @@ int colvar::init(std::string const &conf) // check for linear combinations { - bool lin = !is_enabled(f_cv_scripted); + bool lin = !(is_enabled(f_cv_scripted) || is_enabled(f_cv_custom_function)); for (i = 0; i < cvcs.size(); i++) { // FIXME this is a reverse dependency, ie. cv feature depends on cvc flag @@ -206,7 +216,7 @@ int colvar::init(std::string const &conf) for (i = 0; i < cvcs.size(); i++) { // components may have different types only for scripted functions - if (!is_enabled(f_cv_scripted) && (colvarvalue::check_types(cvcs[i]->value(), + if (!(is_enabled(f_cv_scripted) || is_enabled(f_cv_custom_function)) && (colvarvalue::check_types(cvcs[i]->value(), cvcs[0]->value())) ) { cvm::error("ERROR: you are definining this collective variable " "by using components of different types. " @@ -223,7 +233,6 @@ int colvar::init(std::string const &conf) // at this point, the colvar's type is defined f.type(value()); - f_accumulated.type(value()); x_old.type(value()); v_fdiff.type(value()); @@ -239,18 +248,23 @@ int colvar::init(std::string const &conf) reset_bias_force(); + get_keyval(conf, "timeStepFactor", time_step_factor, 1); + if (time_step_factor < 0) { + cvm::error("Error: timeStepFactor must be positive.\n"); + return COLVARS_ERROR; + } + if (time_step_factor != 1) { + enable(f_cv_multiple_ts); + } + // TODO use here information from the CVCs' own natural boundaries error_code |= init_grid_parameters(conf); - get_keyval(conf, "timeStepFactor", time_step_factor, 1); - error_code |= init_extended_Lagrangian(conf); error_code |= init_output_flags(conf); - // Start in active state by default + // Now that the children are defined we can solve dependencies enable(f_cv_active); - // Make sure dependency side-effects are correct - refresh_deps(); if (cvm::b_analysis) parse_analysis(conf); @@ -262,6 +276,158 @@ int colvar::init(std::string const &conf) } +#ifdef LEPTON +int colvar::init_custom_function(std::string const &conf) +{ + std::string expr; + std::vector pexprs; + Lepton::ParsedExpression pexpr; + size_t pos = 0; // current position in config string + double *ref; + + if (!key_lookup(conf, "customFunction", &expr, &pos)) { + return COLVARS_OK; + } + + enable(f_cv_custom_function); + cvm::log("This colvar uses a custom function.\n"); + + do { + if (cvm::debug()) + cvm::log("Parsing expression \"" + expr + "\".\n"); + try { + pexpr = Lepton::Parser::parse(expr); + pexprs.push_back(pexpr); + } + catch (...) { + cvm::error("Error parsing expression \"" + expr + "\".\n", INPUT_ERROR); + return INPUT_ERROR; + } + + try { + value_evaluators.push_back( + new Lepton::CompiledExpression(pexpr.createCompiledExpression())); + // Define variables for cvc values + // Stored in order: expr1, cvc1, cvc2, expr2, cvc1... + for (size_t i = 0; i < cvcs.size(); i++) { + for (size_t j = 0; j < cvcs[i]->value().size(); j++) { + std::string vn = cvcs[i]->name + + (cvcs[i]->value().size() > 1 ? cvm::to_str(j+1) : ""); + try { + ref =&value_evaluators.back()->getVariableReference(vn); + } + catch (...) { // Variable is absent from expression + // To keep the same workflow, we use a pointer to a double here + // that will receive CVC values - even though none was allocated by Lepton + ref = &dev_null; + if (cvm::debug()) + cvm::log("Variable " + vn + " is absent from expression \"" + expr + "\".\n"); + } + value_eval_var_refs.push_back(ref); + } + } + } + catch (...) { + cvm::error("Error compiling expression \"" + expr + "\".\n", INPUT_ERROR); + return INPUT_ERROR; + } + } while (key_lookup(conf, "customFunction", &expr, &pos)); + + + // Now define derivative with respect to each scalar sub-component + for (size_t i = 0; i < cvcs.size(); i++) { + for (size_t j = 0; j < cvcs[i]->value().size(); j++) { + std::string vn = cvcs[i]->name + + (cvcs[i]->value().size() > 1 ? cvm::to_str(j+1) : ""); + // Element ordering: we want the + // gradient vector of derivatives of all elements of the colvar + // wrt to a given element of a cvc ([i][j]) + for (size_t c = 0; c < pexprs.size(); c++) { + gradient_evaluators.push_back( + new Lepton::CompiledExpression(pexprs[c].differentiate(vn).createCompiledExpression())); + // and record the refs to each variable in those expressions + for (size_t k = 0; k < cvcs.size(); k++) { + for (size_t l = 0; l < cvcs[k]->value().size(); l++) { + std::string vvn = cvcs[k]->name + + (cvcs[k]->value().size() > 1 ? cvm::to_str(l+1) : ""); + try { + ref = &gradient_evaluators.back()->getVariableReference(vvn); + } + catch (...) { // Variable is absent from derivative + // To keep the same workflow, we use a pointer to a double here + // that will receive CVC values - even though none was allocated by Lepton + if (cvm::debug()) + cvm::log("Variable " + vvn + " is absent from derivative of \"" + expr + "\" wrt " + vn + ".\n"); + ref = &dev_null; + } + grad_eval_var_refs.push_back(ref); + } + } + } + } + } + + + if (value_evaluators.size() == 0) { + cvm::error("Error: no custom function defined.\n", INPUT_ERROR); + return INPUT_ERROR; + } + + std::string type_str; + bool b_type_specified = get_keyval(conf, "customFunctionType", + type_str, "scalar", parse_silent); + x.type(colvarvalue::type_notset); + int t; + for (t = 0; t < colvarvalue::type_all; t++) { + if (type_str == colvarvalue::type_keyword(colvarvalue::Type(t))) { + x.type(colvarvalue::Type(t)); + break; + } + } + if (x.type() == colvarvalue::type_notset) { + cvm::error("Could not parse custom colvar type.", INPUT_ERROR); + return INPUT_ERROR; + } + + // Guess type based on number of expressions + if (!b_type_specified) { + if (value_evaluators.size() == 1) { + x.type(colvarvalue::type_scalar); + } else { + x.type(colvarvalue::type_vector); + } + } + + if (x.type() == colvarvalue::type_vector) { + x.vector1d_value.resize(value_evaluators.size()); + } + + x_reported.type(x); + cvm::log(std::string("Expecting colvar value of type ") + + colvarvalue::type_desc(x.type()) + + (x.type()==colvarvalue::type_vector ? " of size " + cvm::to_str(x.size()) : "") + + ".\n"); + + if (x.size() != value_evaluators.size()) { + cvm::error("Error: based on custom function type, expected " + + cvm::to_str(x.size()) + " scalar expressions, but " + + cvm::to_str(value_evaluators.size() + " were found.\n")); + return INPUT_ERROR; + } + + return COLVARS_OK; +} + +#else + +int colvar::init_custom_function(std::string const &conf) +{ + return COLVARS_OK; +} + +#endif // #ifdef LEPTON + + int colvar::init_grid_parameters(std::string const &conf) { colvarmodule *cv = cvm::main(); @@ -326,7 +492,8 @@ int colvar::init_grid_parameters(std::string const &conf) std::string const walls_conf("\n\ harmonicWalls {\n\ name "+this->name+"w\n\ - colvars "+this->name+"\n"+lw_conf+uw_conf+ + colvars "+this->name+"\n"+lw_conf+uw_conf+"\ + timeStepFactor "+cvm::to_str(time_step_factor)+"\n"+ "}\n"); cv->append_new_config(walls_conf); } @@ -372,17 +539,14 @@ harmonicWalls {\n\ int colvar::init_extended_Lagrangian(std::string const &conf) { - bool b_extended_Lagrangian; - get_keyval(conf, "extendedLagrangian", b_extended_Lagrangian, false); + get_keyval_feature(this, conf, "extendedLagrangian", f_cv_extended_Lagrangian, false); - if (b_extended_Lagrangian) { + if (is_enabled(f_cv_extended_Lagrangian)) { cvm::real temp, tolerance, period; cvm::log("Enabling the extended Lagrangian term for colvar \""+ this->name+"\".\n"); - enable(f_cv_extended_Lagrangian); - xr.type(value()); vr.type(value()); fr.type(value()); @@ -404,7 +568,7 @@ int colvar::init_extended_Lagrangian(std::string const &conf) return INPUT_ERROR; } ext_force_k = cvm::boltzmann() * temp / (tolerance * tolerance); - cvm::log("Computed extended system force constant: " + cvm::to_str(ext_force_k) + " kcal/mol/U^2"); + cvm::log("Computed extended system force constant: " + cvm::to_str(ext_force_k) + " [E]/U^2"); get_keyval(conf, "extendedTimeConstant", period, 200.0); if (period <= 0.0) { @@ -412,7 +576,7 @@ int colvar::init_extended_Lagrangian(std::string const &conf) } ext_mass = (cvm::boltzmann() * temp * period * period) / (4.0 * PI * PI * tolerance * tolerance); - cvm::log("Computed fictitious mass: " + cvm::to_str(ext_mass) + " kcal/mol/(U/fs)^2 (U: colvar unit)"); + cvm::log("Computed fictitious mass: " + cvm::to_str(ext_mass) + " [E]/(U/fs)^2 (U: colvar unit)"); { bool b_output_energy; @@ -429,8 +593,9 @@ int colvar::init_extended_Lagrangian(std::string const &conf) } if (ext_gamma != 0.0) { enable(f_cv_Langevin); - ext_gamma *= 1.0e-3; // convert from ps-1 to fs-1 - ext_sigma = std::sqrt(2.0 * cvm::boltzmann() * temp * ext_gamma * ext_mass / cvm::dt()); + ext_gamma *= 1.0e-3; // correct as long as input is required in ps-1 and cvm::dt() is in fs + // Adjust Langevin sigma for slow time step if time_step_factor != 1 + ext_sigma = std::sqrt(2.0 * cvm::boltzmann() * temp * ext_gamma * ext_mass / (cvm::dt() * cvm::real(time_step_factor))); } } @@ -486,8 +651,8 @@ template int colvar::init_components_type(std::string c size_t pos = 0; while ( this->key_lookup(conf, def_config_key, - def_conf, - pos) ) { + &def_conf, + &pos) ) { if (!def_conf.size()) continue; cvm::log("Initializing " "a new \""+std::string(def_config_key)+"\" component"+ @@ -514,6 +679,7 @@ template int colvar::init_components_type(std::string c if ( (cvcp->period != 0.0) || (cvcp->wrap_center != 0.0) ) { if ( (cvcp->function_type != std::string("distance_z")) && (cvcp->function_type != std::string("dihedral")) && + (cvcp->function_type != std::string("polar_phi")) && (cvcp->function_type != std::string("spin_angle")) ) { cvm::error("Error: invalid use of period and/or " "wrapAround in a \""+ @@ -566,6 +732,10 @@ int colvar::init_components(std::string const &conf) "on an axis", "distanceZ"); error_code |= init_components_type(conf, "distance projection " "on a plane", "distanceXY"); + error_code |= init_components_type(conf, "spherical polar angle theta", + "polarTheta"); + error_code |= init_components_type(conf, "spherical azimuthal angle phi", + "polarPhi"); error_code |= init_components_type(conf, "average distance " "weighted by inverse power", "distanceInv"); error_code |= init_components_type(conf, "N1xN2-long vector " @@ -618,16 +788,18 @@ int colvar::init_components(std::string const &conf) } -int colvar::refresh_deps() +void colvar::do_feature_side_effects(int id) { - // If enabled features are changed upstream, the features below should be refreshed - if (is_enabled(f_cv_total_force_calc)) { - cvm::request_total_force(); + switch (id) { + case f_cv_total_force_calc: + cvm::request_total_force(); + break; + case f_cv_collect_gradient: + if (atom_ids.size() == 0) { + build_atom_list(); + } + break; } - if (is_enabled(f_cv_collect_gradient) && atom_ids.size() == 0) { - build_atom_list(); - } - return COLVARS_OK; } @@ -688,20 +860,19 @@ int colvar::parse_analysis(std::string const &conf) cvm::error("Error: runAveStride must be commensurate with the restart frequency.\n", INPUT_ERROR); } - std::string runave_outfile; get_keyval(conf, "runAveOutputFile", runave_outfile, std::string(cvm::output_prefix()+"."+ this->name+".runave.traj")); size_t const this_cv_width = x.output_width(cvm::cv_width); - cvm::backup_file(runave_outfile.c_str()); - runave_os.open(runave_outfile.c_str()); - runave_os << "# " << cvm::wrap_string("step", cvm::it_width-2) - << " " - << cvm::wrap_string("running average", this_cv_width) - << " " - << cvm::wrap_string("running stddev", this_cv_width) - << "\n"; + cvm::proxy->backup_file(runave_outfile); + runave_os = cvm::proxy->output_stream(runave_outfile); + *runave_os << "# " << cvm::wrap_string("step", cvm::it_width-2) + << " " + << cvm::wrap_string("running average", this_cv_width) + << " " + << cvm::wrap_string("running stddev", this_cv_width) + << "\n"; } acf_length = 0; @@ -768,6 +939,10 @@ void colvar::setup() { colvar::~colvar() { + // There is no need to call free_children_deps() here + // because the children are cvcs and will be deleted + // just below + // Clear references to this colvar's cvcs as children // for dependency purposes remove_all_children(); @@ -792,6 +967,22 @@ colvar::~colvar() break; } } + +#ifdef LEPTON + for (std::vector::iterator cei = value_evaluators.begin(); + cei != value_evaluators.end(); + ++cei) { + if (*cei != NULL) delete (*cei); + } + value_evaluators.clear(); + + for (std::vector::iterator gei = gradient_evaluators.begin(); + gei != gradient_evaluators.end(); + ++gei) { + if (*gei != NULL) delete (*gei); + } + gradient_evaluators.clear(); +#endif } @@ -911,7 +1102,6 @@ int colvar::calc_cvc_values(int first_cvc, size_t num_cvcs) int colvar::collect_cvc_values() { x.reset(); - size_t i; // combine them appropriately, using either a scripted function or a polynomial if (is_enabled(f_cv_scripted)) { @@ -925,9 +1115,26 @@ int colvar::collect_cvc_values() cvm::error("Error running scripted colvar"); return COLVARS_OK; } + +#ifdef LEPTON + } else if (is_enabled(f_cv_custom_function)) { + + size_t l = 0; // index in the vector of variable references + + for (size_t i = 0; i < x.size(); i++) { + // Fill Lepton evaluator variables with CVC values, serialized into scalars + for (size_t j = 0; j < cvcs.size(); j++) { + for (size_t k = 0; k < cvcs[j]->value().size(); k++) { + *(value_eval_var_refs[l++]) = cvcs[j]->value()[k]; + } + } + x[i] = value_evaluators[i]->evaluate(); + } +#endif + } else if (x.type() == colvarvalue::type_scalar) { // polynomial combination allowed - for (i = 0; i < cvcs.size(); i++) { + for (size_t i = 0; i < cvcs.size(); i++) { if (!cvcs[i]->is_enabled()) continue; x += (cvcs[i])->sup_coeff * ( ((cvcs[i])->sup_np != 1) ? @@ -935,7 +1142,7 @@ int colvar::collect_cvc_values() (cvcs[i])->value().real_value ); } } else { - for (i = 0; i < cvcs.size(); i++) { + for (size_t i = 0; i < cvcs.size(); i++) { if (!cvcs[i]->is_enabled()) continue; x += (cvcs[i])->sup_coeff * (cvcs[i])->value(); } @@ -984,16 +1191,9 @@ int colvar::calc_cvc_gradients(int first_cvc, size_t num_cvcs) (cvcs[i])->calc_gradients(); // if requested, propagate (via chain rule) the gradients above // to the atoms used to define the roto-translation - // This could be integrated in the CVC base class - for (size_t ig = 0; ig < cvcs[i]->atom_groups.size(); ig++) { - if (cvcs[i]->atom_groups[ig]->b_fit_gradients) - cvcs[i]->atom_groups[ig]->calc_fit_gradients(); - - if (cvcs[i]->is_enabled(f_cvc_debug_gradient)) { - cvm::log("Debugging gradients for " + cvcs[i]->description); - cvcs[i]->debug_gradients(cvcs[i]->atom_groups[ig]); - } - } + (cvcs[i])->calc_fit_gradients(); + if ((cvcs[i])->is_enabled(f_cvc_debug_gradient)) + (cvcs[i])->debug_gradients(); } cvm::decrease_depth(); @@ -1011,13 +1211,6 @@ int colvar::collect_cvc_gradients() size_t i; if (is_enabled(f_cv_collect_gradient)) { - - if (is_enabled(f_cv_scripted)) { - cvm::error("Collecting atomic gradients is not implemented for " - "scripted colvars.", COLVARS_NOT_IMPLEMENTED); - return COLVARS_NOT_IMPLEMENTED; - } - // Collect the atomic gradients inside colvar object for (unsigned int a = 0; a < atomic_gradients.size(); a++) { atomic_gradients[a].reset(); @@ -1214,6 +1407,11 @@ cvm::real colvar::update_forces_energy() // set to zero the applied force f.type(value()); f.reset(); + fr.reset(); + + // If we are not active at this timestep, that's all we have to do + // return with energy == zero + if (!is_enabled(f_cv_active)) return 0.; // add the biases' force, which at this point should already have // been summed over each bias using this colvar @@ -1236,7 +1434,24 @@ cvm::real colvar::update_forces_energy() cvm::log("Updating extended-Lagrangian degree of freedom.\n"); } - cvm::real dt = cvm::dt(); + if (prev_timestep > -1) { + // Keep track of slow timestep to integrate MTS colvars + // the colvar checks the interval after waking up twice + int n_timesteps = cvm::step_relative() - prev_timestep; + if (n_timesteps != 0 && n_timesteps != time_step_factor) { + cvm::error("Error: extended-Lagrangian " + description + " has timeStepFactor " + + cvm::to_str(time_step_factor) + ", but was activated after " + cvm::to_str(n_timesteps) + + " steps at timestep " + cvm::to_str(cvm::step_absolute()) + " (relative step: " + + cvm::to_str(cvm::step_relative()) + ").\n" + + "Make sure that this colvar is requested by biases at multiples of timeStepFactor.\n"); + return 0.; + } + } + prev_timestep = cvm::step_relative(); + + // Integrate with slow timestep (if time_step_factor != 1) + cvm::real dt = cvm::dt() * cvm::real(time_step_factor); + colvarvalue f_ext(fr.type()); // force acting on the extended variable f_ext.reset(); @@ -1248,18 +1463,17 @@ cvm::real colvar::update_forces_energy() // - after this code block, colvar force to be applied to atomic coordinates // ie. spring force (fb_actual will be added just below) fr = f; - f_ext = f + (-0.5 * ext_force_k) * this->dist2_lgrad(xr, x); - f = (-0.5 * ext_force_k) * this->dist2_rgrad(xr, x); + // External force has been scaled for a 1-timestep impulse, scale it back because we will + // integrate it with the colvar's own timestep factor + f_ext = f / cvm::real(time_step_factor); + f_ext += (-0.5 * ext_force_k) * this->dist2_lgrad(xr, x); + f = (-0.5 * ext_force_k) * this->dist2_rgrad(xr, x); + // Coupling force is a slow force, to be applied to atomic coords impulse-style + f *= cvm::real(time_step_factor); - if (is_enabled(f_cv_subtract_applied_force)) { - // Report a "system" force without the biases on this colvar - // that is, just the spring force - ft_reported = (-0.5 * ext_force_k) * this->dist2_lgrad(xr, x); - } else { - // The total force acting on the extended variable is f_ext - // This will be used in the next timestep - ft_reported = f_ext; - } + // The total force acting on the extended variable is f_ext + // This will be used in the next timestep + ft_reported = f_ext; // leapfrog: starting from x_i, f_i, v_(i-1/2) vr += (0.5 * dt) * f_ext / ext_mass; @@ -1279,13 +1493,10 @@ cvm::real colvar::update_forces_energy() if (this->is_enabled(f_cv_periodic)) this->wrap(xr); } - // Now adding the force on the actual colvar (for those biases who + // Now adding the force on the actual colvar (for those biases that // bypass the extended Lagrangian mass) f += fb_actual; - // Store force to be applied, possibly summed over several timesteps - f_accumulated += f; - if (is_enabled(f_cv_fdiff_velocity)) { // set it for the next step x_old = x; @@ -1306,7 +1517,7 @@ void colvar::communicate_forces() size_t i; if (cvm::debug()) { cvm::log("Communicating forces from colvar \""+this->name+"\".\n"); - cvm::log("Force to be applied: " + cvm::to_str(f_accumulated) + "\n"); + cvm::log("Force to be applied: " + cvm::to_str(f) + "\n"); } if (is_enabled(f_cv_scripted)) { @@ -1333,14 +1544,42 @@ void colvar::communicate_forces() if (!cvcs[i]->is_enabled()) continue; // cvc force is colvar force times colvar/cvc Jacobian // (vector-matrix product) - (cvcs[i])->apply_force(colvarvalue(f_accumulated.as_vector() * func_grads[grad_index++], + (cvcs[i])->apply_force(colvarvalue(f.as_vector() * func_grads[grad_index++], cvcs[i]->value().type())); } + +#ifdef LEPTON + } else if (is_enabled(f_cv_custom_function)) { + + size_t r = 0; // index in the vector of variable references + size_t e = 0; // index of the gradient evaluator + + for (size_t i = 0; i < cvcs.size(); i++) { // gradient with respect to cvc i + cvm::matrix2d jacobian (x.size(), cvcs[i]->value().size()); + for (size_t j = 0; j < cvcs[i]->value().size(); j++) { // j-th element + for (size_t c = 0; c < x.size(); c++) { // derivative of scalar element c of the colvarvalue + + // Feed cvc values to the evaluator + for (size_t k = 0; k < cvcs.size(); k++) { // + for (size_t l = 0; l < cvcs[k]->value().size(); l++) { + *(grad_eval_var_refs[r++]) = cvcs[k]->value()[l]; + } + } + jacobian[c][j] = gradient_evaluators[e++]->evaluate(); + } + } + // cvc force is colvar force times colvar/cvc Jacobian + // (vector-matrix product) + (cvcs[i])->apply_force(colvarvalue(f.as_vector() * jacobian, + cvcs[i]->value().type())); + } +#endif + } else if (x.type() == colvarvalue::type_scalar) { for (i = 0; i < cvcs.size(); i++) { if (!cvcs[i]->is_enabled()) continue; - (cvcs[i])->apply_force(f_accumulated * (cvcs[i])->sup_coeff * + (cvcs[i])->apply_force(f * (cvcs[i])->sup_coeff * cvm::real((cvcs[i])->sup_np) * (std::pow((cvcs[i])->value().real_value, (cvcs[i])->sup_np-1)) ); @@ -1350,14 +1589,10 @@ void colvar::communicate_forces() for (i = 0; i < cvcs.size(); i++) { if (!cvcs[i]->is_enabled()) continue; - (cvcs[i])->apply_force(f_accumulated * (cvcs[i])->sup_coeff); + (cvcs[i])->apply_force(f * (cvcs[i])->sup_coeff); } } - // Accumulated forces have been applied, impulse-style - // Reset to start accumulating again - f_accumulated.reset(); - if (cvm::debug()) cvm::log("Done communicating forces from colvar \""+this->name+"\".\n"); } @@ -1394,7 +1629,7 @@ int colvar::update_cvc_flags() cvm::error("ERROR: All CVCs are disabled for colvar " + this->name +"\n"); return COLVARS_ERROR; } - cvc_flags.resize(0); + cvc_flags.clear(); } return COLVARS_OK; @@ -1744,16 +1979,15 @@ int colvar::write_output_files() cvm::log("Writing acf to file \""+acf_outfile+"\".\n"); cvm::backup_file(acf_outfile.c_str()); - cvm::ofstream acf_os(acf_outfile.c_str()); - if (! acf_os.is_open()) { - cvm::error("Cannot open file \""+acf_outfile+"\".\n", FILE_ERROR); - } - write_acf(acf_os); - acf_os.close(); + std::ostream *acf_os = cvm::proxy->output_stream(acf_outfile); + if (!acf_os) return cvm::get_error(); + write_acf(*acf_os); + cvm::proxy->close_output_stream(acf_outfile); } - if (runave_os.is_open()) { - runave_os.close(); + if (runave_os) { + cvm::proxy->close_output_stream(runave_outfile); + runave_os = NULL; } } return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); @@ -2031,12 +2265,12 @@ void colvar::calc_runave() } runave_variance *= 1.0 / cvm::real(runave_length-1); - runave_os << std::setw(cvm::it_width) << cvm::step_relative() - << " " - << std::setprecision(cvm::cv_prec) << std::setw(cvm::cv_width) - << runave << " " - << std::setprecision(cvm::cv_prec) << std::setw(cvm::cv_width) - << std::sqrt(runave_variance) << "\n"; + *runave_os << std::setw(cvm::it_width) << cvm::step_relative() + << " " + << std::setprecision(cvm::cv_prec) << std::setw(cvm::cv_width) + << runave << " " + << std::setprecision(cvm::cv_prec) << std::setw(cvm::cv_width) + << std::sqrt(runave_variance) << "\n"; } history_add_value(runave_length, *x_history_p, x); diff --git a/lib/colvars/colvar.h b/lib/colvars/colvar.h index 0cbda450b8..dfa9e093a5 100644 --- a/lib/colvars/colvar.h +++ b/lib/colvars/colvar.h @@ -19,6 +19,9 @@ #include "colvarparse.h" #include "colvardeps.h" +#ifdef LEPTON +#include "Lepton.h" // for runtime custom expressions +#endif /// \brief A collective variable (main class); to be defined, it needs /// at least one object of a derived class of colvar::cvc; it @@ -85,11 +88,19 @@ public: static std::vector cv_features; /// \brief Implementation of the feature list accessor for colvar - std::vector &features() { + virtual const std::vector &features() + { + return cv_features; + } + virtual std::vector &modify_features() + { return cv_features; } - int refresh_deps(); + /// Implements possible actions to be carried out + /// when a given feature is enabled + /// This overloads the base function in colvardeps + void do_feature_side_effects(int id); /// List of biases that depend on this colvar std::vector biases; @@ -235,6 +246,9 @@ public: /// Parse the CVC configuration and allocate their data int init_components(std::string const &conf); + /// Parse parameters for custom function with Lepton + int init_custom_function(std::string const &conf); + /// Init defaults for grid options int init_grid_parameters(std::string const &conf); @@ -334,24 +348,13 @@ protected: /// Sum of square coefficients for active cvcs cvm::real active_cvc_square_norm; - /// Time step multiplier (for coarse-time-step colvars) - /// Colvar will only be calculated at those times; biases may ignore the information and - /// always update their own forces (which is typically inexpensive) especially if - /// they rely on other colvars. In this case, the colvar will accumulate forces applied between - /// colvar updates. Alternately they may use it to calculate "impulse" biasing - /// forces at longer intervals. Impulse forces must be multiplied by the timestep factor. - int time_step_factor; - - /// Biasing force collected between updates, to be applied at next update for coarse-time-step colvars - colvarvalue f_accumulated; + /// \brief Absolute timestep number when this colvar was last updated + int prev_timestep; public: /// \brief Return the number of CVC objects with an active flag (as set by update_cvc_flags) inline size_t num_active_cvcs() const { return n_active_cvcs; } - /// \brief returns time_step_factor - inline int get_time_step_factor() const {return time_step_factor;} - /// \brief Use the internal metrics (as from \link cvc /// \endlink objects) to calculate square distances and gradients /// @@ -484,7 +487,9 @@ protected: /// Timesteps to skip between two values in the running average series size_t runave_stride; /// Name of the file to write the running average - cvm::ofstream runave_os; + std::string runave_outfile; + /// File to write the running average + std::ostream *runave_os; /// Current value of the running average colvarvalue runave; /// Current value of the square deviation from the running average @@ -508,6 +513,8 @@ public: class distance; class distance_z; class distance_xy; + class polar_theta; + class polar_phi; class distance_inv; class distance_pairs; class angle; @@ -556,6 +563,21 @@ private: /// when using scriptedFunction std::vector sorted_cvc_values; +#ifdef LEPTON + /// Vector of evaluators for custom functions using Lepton + std::vector value_evaluators; + + /// Vector of evaluators for gradients of custom functions + std::vector gradient_evaluators; + + /// Vector of references to cvc values to be passed to Lepton evaluators + std::vector value_eval_var_refs; + std::vector grad_eval_var_refs; + + /// Unused value that is written to when a variable simplifies out of a Lepton expression + double dev_null; +#endif + public: /// \brief Sorted array of (zero-based) IDs for all atoms involved std::vector atom_ids; diff --git a/lib/colvars/colvaratoms.cpp b/lib/colvars/colvaratoms.cpp index 32cfadf3b6..9b4a922e3f 100644 --- a/lib/colvars/colvaratoms.cpp +++ b/lib/colvars/colvaratoms.cpp @@ -67,18 +67,16 @@ cvm::atom::~atom() -// TODO change this arrangement -// Note: "conf" is the configuration of the cvc who is using this atom group; -// "key" is the name of the atom group (e.g. "atoms", "group1", "group2", ...) -cvm::atom_group::atom_group(std::string const &conf, - char const *key_in) +cvm::atom_group::atom_group() +{ + init(); +} + + +cvm::atom_group::atom_group(char const *key_in) { key = key_in; - cvm::log("Defining atom group \"" + key + "\".\n"); init(); - // real work is done by parse - parse(conf); - setup(); } @@ -90,12 +88,6 @@ cvm::atom_group::atom_group(std::vector const &atoms_in) } -cvm::atom_group::atom_group() -{ - init(); -} - - cvm::atom_group::~atom_group() { if (is_enabled(f_ag_scalable) && !b_dummy) { @@ -180,7 +172,7 @@ int cvm::atom_group::init() { if (!key.size()) key = "unnamed"; description = "atom group " + key; - // These will be overwritten by parse(), if initializing from a config string + // These may be overwritten by parse(), if a name is provided atoms.clear(); @@ -193,7 +185,6 @@ int cvm::atom_group::init() b_center = false; b_rotate = false; b_user_defined_fit = false; - b_fit_gradients = false; fitting_group = NULL; noforce = false; @@ -265,34 +256,10 @@ void cvm::atom_group::update_total_charge() } -int cvm::atom_group::parse(std::string const &conf) +int cvm::atom_group::parse(std::string const &group_conf) { - std::string group_conf; - - // TODO move this to the cvc class constructor/init - - // save_delimiters is set to false for this call, because "conf" is - // not the config string of this group, but of its parent object - // (which has already taken care of the delimiters) - save_delimiters = false; - key_lookup(conf, key.c_str(), group_conf, dummy_pos); - // restoring the normal value, because we do want keywords checked - // inside "group_conf" - save_delimiters = true; - - if (group_conf.size() == 0) { - cvm::error("Error: atom group \""+key+ - "\" is set, but has no definition.\n", - INPUT_ERROR); - return COLVARS_ERROR; - } - - cvm::increase_depth(); - cvm::log("Initializing atom group \""+key+"\".\n"); - description = "atom group " + key; - // whether or not to include messages in the log // colvarparse::Parse_Mode mode = parse_silent; // { @@ -304,10 +271,53 @@ int cvm::atom_group::parse(std::string const &conf) int parse_error = COLVARS_OK; + // Optional group name will let other groups reuse atom definition + if (get_keyval(group_conf, "name", name)) { + if ((cvm::atom_group_by_name(this->name) != NULL) && + (cvm::atom_group_by_name(this->name) != this)) { + cvm::error("Error: this atom group cannot have the same name, \""+this->name+ + "\", as another atom group.\n", + INPUT_ERROR); + return INPUT_ERROR; + } + cvm::main()->register_named_atom_group(this); + description = "atom group " + name; + } + + // We need to know about fitting to decide whether the group is scalable + // and we need to know about scalability before adding atoms + bool b_defined_center = get_keyval(group_conf, "centerReference", b_center, false); + bool b_defined_rotate = get_keyval(group_conf, "rotateReference", b_rotate, false); + // is the user setting explicit options? + b_user_defined_fit = b_defined_center || b_defined_rotate; + + if (is_available(f_ag_scalable_com) && !b_rotate && !b_center) { + enable(f_ag_scalable_com); + enable(f_ag_scalable); + } + + { + std::string atoms_of = ""; + if (get_keyval(group_conf, "atomsOfGroup", atoms_of)) { + atom_group * ag = atom_group_by_name(atoms_of); + if (ag == NULL) { + cvm::error("Error: cannot find atom group with name " + atoms_of + ".\n"); + return COLVARS_ERROR; + } + parse_error |= add_atoms_of_group(ag); + } + } + +// if (get_keyval(group_conf, "copyOfGroup", source)) { +// // Goal: Initialize this as a full copy +// // for this we'll need an atom_group copy constructor +// return COLVARS_OK; +// } + { std::string numbers_conf = ""; size_t pos = 0; - while (key_lookup(group_conf, "atomNumbers", numbers_conf, pos)) { + while (key_lookup(group_conf, "atomNumbers", &numbers_conf, &pos)) { parse_error |= add_atom_numbers(numbers_conf); numbers_conf = ""; } @@ -325,7 +335,7 @@ int cvm::atom_group::parse(std::string const &conf) std::string range_conf = ""; size_t pos = 0; while (key_lookup(group_conf, "atomNumbersRange", - range_conf, pos)) { + &range_conf, &pos)) { parse_error |= add_atom_numbers_range(range_conf); range_conf = ""; } @@ -347,7 +357,7 @@ int cvm::atom_group::parse(std::string const &conf) size_t range_count = 0; psii = psf_segids.begin(); while (key_lookup(group_conf, "atomNameResidueRange", - range_conf, pos)) { + &range_conf, &pos)) { range_count++; if (psf_segids.size() && (range_count > psf_segids.size())) { cvm::error("Error: more instances of \"atomNameResidueRange\" than " @@ -415,14 +425,9 @@ int cvm::atom_group::parse(std::string const &conf) } } - // We need to know the fitting options to decide whether the group is scalable + // Now that atoms are defined we can parse the detailed fitting options parse_error |= parse_fitting_options(group_conf); - if (is_available(f_ag_scalable_com) && !b_rotate && !b_center) { - enable(f_ag_scalable_com); - enable(f_ag_scalable); - } - if (is_enabled(f_ag_scalable) && !b_dummy) { cvm::log("Enabling scalable calculation for group \""+this->key+"\".\n"); index = (cvm::proxy)->init_atom_group(atoms_ids); @@ -431,13 +436,6 @@ int cvm::atom_group::parse(std::string const &conf) bool b_print_atom_ids = false; get_keyval(group_conf, "printAtomIDs", b_print_atom_ids, false, colvarparse::parse_silent); - // TODO move this to colvarparse object - check_keywords(group_conf, key.c_str()); - if (cvm::get_error()) { - cvm::error("Error setting up atom group \""+key+"\"."); - return COLVARS_ERROR; - } - // Calculate all required properties (such as total mass) setup(); @@ -446,7 +444,7 @@ int cvm::atom_group::parse(std::string const &conf) cvm::log("Atom group \""+key+"\" defined, "+ cvm::to_str(atoms_ids.size())+" atoms initialized: total mass = "+ - cvm::to_str(total_mass)+", total charge = "+ + cvm::to_str(total_mass)+", total charge = "+ cvm::to_str(total_charge)+".\n"); if (b_print_atom_ids) { @@ -454,12 +452,41 @@ int cvm::atom_group::parse(std::string const &conf) cvm::log(print_atom_ids()); } - cvm::decrease_depth(); - return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); } +int cvm::atom_group::add_atoms_of_group(atom_group const * ag) +{ + std::vector const &source_ids = ag->atoms_ids; + + if (source_ids.size()) { + atoms_ids.reserve(atoms_ids.size()+source_ids.size()); + + if (is_enabled(f_ag_scalable)) { + for (size_t i = 0; i < source_ids.size(); i++) { + add_atom_id(source_ids[i]); + } + } else { + atoms.reserve(atoms.size()+source_ids.size()); + for (size_t i = 0; i < source_ids.size(); i++) { + // We could use the atom copy constructor, but only if the source + // group is not scalable - whereas this works in both cases + // atom constructor expects 1-based atom number + add_atom(cvm::atom(source_ids[i] + 1)); + } + } + + if (cvm::get_error()) return COLVARS_ERROR; + } else { + cvm::error("Error: source atom group contains no atoms\".\n", INPUT_ERROR); + return COLVARS_ERROR; + } + + return COLVARS_OK; +} + + int cvm::atom_group::add_atom_numbers(std::string const &numbers_conf) { std::vector atom_indexes; @@ -629,13 +656,6 @@ std::string const cvm::atom_group::print_atom_ids() const int cvm::atom_group::parse_fitting_options(std::string const &group_conf) { - bool b_defined_center = get_keyval(group_conf, "centerReference", b_center, false); - bool b_defined_rotate = get_keyval(group_conf, "rotateReference", b_rotate, false); - // is the user setting explicit options? - b_user_defined_fit = b_defined_center || b_defined_rotate; - - get_keyval(group_conf, "enableFitGradients", b_fit_gradients, true); - if (b_center || b_rotate) { if (b_dummy) @@ -643,27 +663,31 @@ int cvm::atom_group::parse_fitting_options(std::string const &group_conf) "cannot be defined for a dummy atom.\n"); bool b_ref_pos_group = false; - if (key_lookup(group_conf, "refPositionsGroup")) { + std::string fitting_group_conf; + if (key_lookup(group_conf, "refPositionsGroup", &fitting_group_conf)) { b_ref_pos_group = true; cvm::log("Warning: keyword \"refPositionsGroup\" is deprecated: please use \"fittingGroup\" instead.\n"); } - if (b_ref_pos_group || key_lookup(group_conf, "fittingGroup")) { + if (b_ref_pos_group || key_lookup(group_conf, "fittingGroup", &fitting_group_conf)) { // instead of this group, define another group to compute the fit if (fitting_group) { cvm::error("Error: the atom group \""+ key+"\" has already a reference group " "for the rototranslational fit, which was communicated by the " "colvar component. You should not use fittingGroup " - "in this case.\n"); + "in this case.\n", INPUT_ERROR); + return INPUT_ERROR; } cvm::log("Within atom group \""+key+"\":\n"); - fitting_group = b_ref_pos_group ? - new atom_group(group_conf, "refPositionsGroup") : - new atom_group(group_conf, "fittingGroup"); - - // regardless of the configuration, fit gradients must be calculated by fittingGroup - fitting_group->b_fit_gradients = this->b_fit_gradients; + fitting_group = new atom_group("fittingGroup"); + if (fitting_group->parse(fitting_group_conf) == COLVARS_OK) { + fitting_group->check_keywords(fitting_group_conf, "fittingGroup"); + if (cvm::get_error()) { + cvm::error("Error setting up atom group \"fittingGroup\".", INPUT_ERROR); + return INPUT_ERROR; + } + } } atom_group *group_for_fit = fitting_group ? fitting_group : this; @@ -720,11 +744,6 @@ int cvm::atom_group::parse_fitting_options(std::string const &group_conf) return COLVARS_ERROR; } - if (b_fit_gradients) { - group_for_fit->fit_gradients.assign(group_for_fit->size(), cvm::atom_pos(0.0, 0.0, 0.0)); - rot.request_group1_gradients(group_for_fit->size()); - } - if (b_rotate && !noforce) { cvm::log("Warning: atom group \""+key+ "\" will be aligned to a fixed orientation given by the reference positions provided. " @@ -737,10 +756,37 @@ int cvm::atom_group::parse_fitting_options(std::string const &group_conf) } } + // Enable fit gradient calculation only if necessary, and not disabled by the user + // This must happen after fitting group is defined so that side-effects are performed + // properly (ie. allocating fitting group gradients) + { + bool b_fit_gradients; + get_keyval(group_conf, "enableFitGradients", b_fit_gradients, true); + + if (b_fit_gradients && (b_center || b_rotate)) { + enable(f_ag_fit_gradients); + } + } + return COLVARS_OK; } +void cvm::atom_group::do_feature_side_effects(int id) +{ + // If enabled features are changed upstream, the features below should be refreshed + switch (id) { + case f_ag_fit_gradients: + if (b_center || b_rotate) { + atom_group *group_for_fit = fitting_group ? fitting_group : this; + group_for_fit->fit_gradients.assign(group_for_fit->size(), cvm::atom_pos(0.0, 0.0, 0.0)); + rot.request_group1_gradients(group_for_fit->size()); + } + break; + } +} + + int cvm::atom_group::create_sorted_ids(void) { // Only do the work if the vector is not yet populated @@ -1000,12 +1046,12 @@ void cvm::atom_group::set_weighted_gradient(cvm::rvector const &grad) void cvm::atom_group::calc_fit_gradients() { - if (b_dummy) return; + if (b_dummy || ! is_enabled(f_ag_fit_gradients)) return; if (cvm::debug()) cvm::log("Calculating fit gradients.\n"); - atom_group *group_for_fit = fitting_group ? fitting_group : this; + cvm::atom_group *group_for_fit = fitting_group ? fitting_group : this; if (b_center) { // add the center of geometry contribution to the gradients @@ -1190,7 +1236,7 @@ void cvm::atom_group::apply_colvar_force(cvm::real const &force) } } - if ((b_center || b_rotate) && b_fit_gradients) { + if ((b_center || b_rotate) && is_enabled(f_ag_fit_gradients)) { atom_group *group_for_fit = fitting_group ? fitting_group : this; diff --git a/lib/colvars/colvaratoms.h b/lib/colvars/colvaratoms.h index 85f6212951..6113fb38a9 100644 --- a/lib/colvars/colvaratoms.h +++ b/lib/colvars/colvaratoms.h @@ -150,12 +150,21 @@ class colvarmodule::atom_group { public: - /// \brief Initialize the group by looking up its configuration - /// string in conf and parsing it; this is actually done by parse(), - /// which is a member function so that a group can be initialized - /// also after construction - atom_group(std::string const &conf, - char const *key); + + /// \brief Default constructor + atom_group(); + + /// \brief Create a group object, assign a name to it + atom_group(char const *key); + + /// \brief Initialize the group after a (temporary) vector of atoms + atom_group(std::vector const &atoms_in); + + /// \brief Destructor + ~atom_group(); + + /// \brief Optional name to reuse properties of this in other groups + std::string name; /// \brief Keyword used to define the group // TODO Make this field part of the data structures that link a group to a CVC @@ -172,15 +181,13 @@ public: int parse(std::string const &conf); int add_atom_numbers(std::string const &numbers_conf); + int add_atoms_of_group(atom_group const * ag); int add_index_group(std::string const &index_group_name); int add_atom_numbers_range(std::string const &range_conf); int add_atom_name_residue_range(std::string const &psf_segid, std::string const &range_conf); int parse_fitting_options(std::string const &group_conf); - /// \brief Initialize the group after a (temporary) vector of atoms - atom_group(std::vector const &atoms_in); - /// \brief Add an atom object to this group int add_atom(cvm::atom const &a); @@ -199,15 +206,14 @@ public: static std::vector ag_features; /// \brief Implementation of the feature list accessor for atom group - virtual std::vector &features() { + virtual const std::vector &features() + { + return ag_features; + } + virtual std::vector &modify_features() + { return ag_features; } - - /// \brief Default constructor - atom_group(); - - /// \brief Destructor - ~atom_group(); protected: @@ -294,10 +300,6 @@ public: /// cvc's (eg rmsd, eigenvector) will not override the user's choice bool b_user_defined_fit; - /// \brief Whether or not the derivatives of the roto-translation - /// should be included when calculating the colvar's gradients (default: yes) - bool b_fit_gradients; - /// \brief use reference coordinates for b_center or b_rotate std::vector ref_pos; @@ -464,6 +466,10 @@ public: /// apply_colvar_force() once that is implemented for non-scalar values void apply_force(cvm::rvector const &force); + /// Implements possible actions to be carried out + /// when a given feature is enabled + /// This overloads the base function in colvardeps + void do_feature_side_effects(int id); }; diff --git a/lib/colvars/colvarbias.cpp b/lib/colvars/colvarbias.cpp index 3779c82aa3..636727ca39 100644 --- a/lib/colvars/colvarbias.cpp +++ b/lib/colvars/colvarbias.cpp @@ -23,9 +23,7 @@ colvarbias::colvarbias(char const *key) b_output_energy = false; reset(); state_file_step = 0; - - // Start in active state by default - enable(f_cvb_active); + description = "uninitialized " + cvm::to_str(key) + " bias"; } @@ -74,7 +72,6 @@ int colvarbias::init(std::string const &conf) cvm::error("Error: no collective variables specified.\n", INPUT_ERROR); return INPUT_ERROR; } - } else { cvm::log("Reinitializing bias \""+name+"\".\n"); } @@ -83,6 +80,16 @@ int colvarbias::init(std::string const &conf) get_keyval(conf, "outputEnergy", b_output_energy, b_output_energy); + get_keyval(conf, "timeStepFactor", time_step_factor, 1); + if (time_step_factor < 1) { + cvm::error("Error: timeStepFactor must be 1 or greater.\n"); + return COLVARS_ERROR; + } + + // Now that children are defined, we can solve dependencies + enable(f_cvb_active); + if (cvm::debug()) print_state(); + return COLVARS_OK; } @@ -110,6 +117,8 @@ colvarbias::~colvarbias() int colvarbias::clear() { + free_children_deps(); + // Remove references to this bias from colvars for (std::vector::iterator cvi = colvars.begin(); cvi != colvars.end(); @@ -200,7 +209,12 @@ void colvarbias::communicate_forces() cvm::log("Communicating a force to colvar \""+ variables(i)->name+"\".\n"); } - variables(i)->add_bias_force(colvar_forces[i]); + // Impulse-style multiple timestep + // Note that biases with different values of time_step_factor + // may send forces to the same colvar + // which is why rescaling has to happen now: the colvar is not + // aware of this bias' time_step_factor + variables(i)->add_bias_force(cvm::real(time_step_factor) * colvar_forces[i]); } } @@ -370,6 +384,7 @@ std::ostream & colvarbias::write_traj(std::ostream &os) os << " "; if (b_output_energy) os << " " + << std::setprecision(cvm::en_prec) << std::setw(cvm::en_width) << bias_energy; return os; } diff --git a/lib/colvars/colvarbias.h b/lib/colvars/colvarbias.h index 6d5776d3db..a147cd3210 100644 --- a/lib/colvars/colvarbias.h +++ b/lib/colvars/colvarbias.h @@ -56,7 +56,7 @@ public: /// \brief Compute the energy of the bias with alternative values of the /// collective variables (suitable for bias exchange) - virtual int calc_energy(std::vector const &values = + virtual int calc_energy(std::vector const &values = std::vector(0)) { cvm::error("Error: calc_energy() not implemented.\n", COLVARS_NOT_IMPLEMENTED); @@ -175,7 +175,11 @@ public: static std::vector cvb_features; /// \brief Implementation of the feature list accessor for colvarbias - virtual std::vector &features() + virtual const std::vector &features() + { + return cvb_features; + } + virtual std::vector &modify_features() { return cvb_features; } diff --git a/lib/colvars/colvarbias_abf.cpp b/lib/colvars/colvarbias_abf.cpp index d039004f09..a96fc21d64 100644 --- a/lib/colvars/colvarbias_abf.cpp +++ b/lib/colvars/colvarbias_abf.cpp @@ -71,10 +71,17 @@ int colvarbias_abf::init(std::string const &conf) // shared ABF get_keyval(conf, "shared", shared_on, false); if (shared_on) { - if (!cvm::replica_enabled() || cvm::replica_num() <= 1) + if (!cvm::replica_enabled() || cvm::replica_num() <= 1) { cvm::error("Error: shared ABF requires more than one replica."); - else - cvm::log("shared ABF will be applied among "+ cvm::to_str(cvm::replica_num()) + " replicas.\n"); + return COLVARS_ERROR; + } + cvm::log("shared ABF will be applied among "+ cvm::to_str(cvm::replica_num()) + " replicas.\n"); + if (cvm::proxy->smp_enabled() == COLVARS_OK) { + cvm::error("Error: shared ABF is currently not available with SMP parallelism; " + "please set \"SMP off\" at the top of the Colvars configuration file.\n", + COLVARS_NOT_IMPLEMENTED); + return COLVARS_NOT_IMPLEMENTED; + } // If shared_freq is not set, we default to output_freq get_keyval(conf, "sharedFreq", shared_freq, output_freq); @@ -84,11 +91,11 @@ int colvarbias_abf::init(std::string const &conf) if (colvars.size() == 0) { cvm::error("Error: no collective variables specified for the ABF bias.\n"); + return COLVARS_ERROR; } if (update_bias) { - // Request calculation of total force (which also checks for availability) - // TODO - change this to a dependency - needs ABF-specific features + // Request calculation of total force if(enable(f_cvb_get_total_force)) return cvm::get_error(); } @@ -108,6 +115,16 @@ int colvarbias_abf::init(std::string const &conf) if (colvars[i]->is_enabled(f_cv_extended_Lagrangian)) b_extended = true; + // Cannot mix and match coarse time steps with ABF because it gives + // wrong total force averages - total force needs to be averaged over + // every time step + if (colvars[i]->get_time_step_factor() != time_step_factor) { + cvm::error("Error: " + colvars[i]->description + " has a value of timeStepFactor (" + + cvm::to_str(colvars[i]->get_time_step_factor()) + ") different from that of " + + description + " (" + cvm::to_str(time_step_factor) + ").\n"); + return COLVARS_ERROR; + } + // Here we could check for orthogonality of the Cartesian coordinates // and make it just a warning if some parameter is set? } @@ -282,12 +299,12 @@ int colvarbias_abf::update() // Compute and apply the new bias, if applicable if (is_enabled(f_cvb_apply_force) && samples->index_ok(bin)) { - size_t count = samples->value(bin); - cvm::real fact = 1.0; + size_t count = samples->value(bin); + cvm::real fact = 1.0; // Factor that ensures smooth introduction of the force if ( count < full_samples ) { - fact = ( count < min_samples) ? 0.0 : + fact = (count < min_samples) ? 0.0 : (cvm::real(count - min_samples)) / (cvm::real(full_samples - min_samples)); } @@ -434,62 +451,57 @@ void colvarbias_abf::write_gradients_samples(const std::string &prefix, bool app std::string gradients_out_name = prefix + ".grad"; std::ios::openmode mode = (append ? std::ios::app : std::ios::out); - cvm::ofstream samples_os; - cvm::ofstream gradients_os; - - if (!append) cvm::backup_file(samples_out_name.c_str()); - samples_os.open(samples_out_name.c_str(), mode); - if (!samples_os.is_open()) { + std::ostream *samples_os = + cvm::proxy->output_stream(samples_out_name, mode); + if (!samples_os) { cvm::error("Error opening ABF samples file " + samples_out_name + " for writing"); } - samples->write_multicol(samples_os); - samples_os.close(); + samples->write_multicol(*samples_os); + cvm::proxy->close_output_stream(samples_out_name); - if (!append) cvm::backup_file(gradients_out_name.c_str()); - gradients_os.open(gradients_out_name.c_str(), mode); - if (!gradients_os.is_open()) { + std::ostream *gradients_os = + cvm::proxy->output_stream(gradients_out_name, mode); + if (!gradients_os) { cvm::error("Error opening ABF gradient file " + gradients_out_name + " for writing"); } - gradients->write_multicol(gradients_os); - gradients_os.close(); + gradients->write_multicol(*gradients_os); + cvm::proxy->close_output_stream(gradients_out_name); if (colvars.size() == 1) { - std::string pmf_out_name = prefix + ".pmf"; - if (!append) cvm::backup_file(pmf_out_name.c_str()); - cvm::ofstream pmf_os; // Do numerical integration and output a PMF - pmf_os.open(pmf_out_name.c_str(), mode); - if (!pmf_os.is_open()) cvm::error("Error opening pmf file " + pmf_out_name + " for writing"); - gradients->write_1D_integral(pmf_os); - pmf_os << std::endl; - pmf_os.close(); + std::string pmf_out_name = prefix + ".pmf"; + std::ostream *pmf_os = cvm::proxy->output_stream(pmf_out_name, mode); + if (!pmf_os) { + cvm::error("Error opening pmf file " + pmf_out_name + " for writing"); + } + gradients->write_1D_integral(*pmf_os); + *pmf_os << std::endl; + cvm::proxy->close_output_stream(pmf_out_name); } if (z_gradients) { // Write eABF-related quantities std::string z_samples_out_name = prefix + ".zcount"; - cvm::ofstream z_samples_os; - if (!append) cvm::backup_file(z_samples_out_name.c_str()); - z_samples_os.open(z_samples_out_name.c_str(), mode); - if (!z_samples_os.is_open()) { + std::ostream *z_samples_os = + cvm::proxy->output_stream(z_samples_out_name, mode); + if (!z_samples_os) { cvm::error("Error opening eABF z-histogram file " + z_samples_out_name + " for writing"); } - z_samples->write_multicol(z_samples_os); - z_samples_os.close(); + z_samples->write_multicol(*z_samples_os); + cvm::proxy->close_output_stream(z_samples_out_name); if (b_czar_window_file) { std::string z_gradients_out_name = prefix + ".zgrad"; - cvm::ofstream z_gradients_os; - if (!append) cvm::backup_file(z_gradients_out_name.c_str()); - z_gradients_os.open(z_gradients_out_name.c_str(), mode); - if (!z_gradients_os.is_open()) { + std::ostream *z_gradients_os = + cvm::proxy->output_stream(z_gradients_out_name, mode); + if (!z_gradients_os) { cvm::error("Error opening eABF z-gradient file " + z_gradients_out_name + " for writing"); } - z_gradients->write_multicol(z_gradients_os); - z_gradients_os.close(); + z_gradients->write_multicol(*z_gradients_os); + cvm::proxy->close_output_stream(z_gradients_out_name); } // Calculate CZAR estimator of gradients @@ -503,26 +515,24 @@ void colvarbias_abf::write_gradients_samples(const std::string &prefix, bool app } std::string czar_gradients_out_name = prefix + ".czar.grad"; - cvm::ofstream czar_gradients_os; - if (!append) cvm::backup_file(czar_gradients_out_name.c_str()); - czar_gradients_os.open(czar_gradients_out_name.c_str(), mode); - if (!czar_gradients_os.is_open()) { + std::ostream *czar_gradients_os = + cvm::proxy->output_stream(czar_gradients_out_name, mode); + if (!czar_gradients_os) { cvm::error("Error opening CZAR gradient file " + czar_gradients_out_name + " for writing"); } - czar_gradients->write_multicol(czar_gradients_os); - czar_gradients_os.close(); + czar_gradients->write_multicol(*czar_gradients_os); + cvm::proxy->close_output_stream(czar_gradients_out_name); if (colvars.size() == 1) { - std::string czar_pmf_out_name = prefix + ".czar.pmf"; - if (!append) cvm::backup_file(czar_pmf_out_name.c_str()); - cvm::ofstream czar_pmf_os; // Do numerical integration and output a PMF - czar_pmf_os.open(czar_pmf_out_name.c_str(), mode); - if (!czar_pmf_os.is_open()) cvm::error("Error opening CZAR pmf file " + czar_pmf_out_name + " for writing"); - czar_gradients->write_1D_integral(czar_pmf_os); - czar_pmf_os << std::endl; - czar_pmf_os.close(); + std::string czar_pmf_out_name = prefix + ".czar.pmf"; + std::ostream *czar_pmf_os = + cvm::proxy->output_stream(czar_pmf_out_name, mode); + if (!czar_pmf_os) cvm::error("Error opening CZAR pmf file " + czar_pmf_out_name + " for writing"); + czar_gradients->write_1D_integral(*czar_pmf_os); + *czar_pmf_os << std::endl; + cvm::proxy->close_output_stream(czar_pmf_out_name); } } return; @@ -570,9 +580,13 @@ void colvarbias_abf::read_gradients_samples() is.clear(); is.open(gradients_in_name.c_str()); - if (!is.is_open()) cvm::error("Error opening ABF gradient file " + gradients_in_name + " for reading"); - gradients->read_multicol(is, true); - is.close(); + if (!is.is_open()) { + cvm::error("Error opening ABF gradient file " + + gradients_in_name + " for reading", INPUT_ERROR); + } else { + gradients->read_multicol(is, true); + is.close(); + } if (z_gradients) { // Read eABF z-averaged data for CZAR diff --git a/lib/colvars/colvarbias_alb.cpp b/lib/colvars/colvarbias_alb.cpp index d096ac3daf..124a15c5da 100644 --- a/lib/colvars/colvarbias_alb.cpp +++ b/lib/colvars/colvarbias_alb.cpp @@ -156,8 +156,8 @@ int colvarbias_alb::update() colvars[i], colvar_centers[i]); bias_energy += restraint_potential(restraint_convert_k(current_coupling[i], colvars[i]->width), - colvars[i], - colvar_centers[i]); + colvars[i], + colvar_centers[i]); if (!b_equilibration) { //Welford, West, and Hanso online variance method @@ -169,26 +169,26 @@ int colvarbias_alb::update() } else { //check if we've reached the setpoint if (coupling_rate[i] == 0 || pow(current_coupling[i] - set_coupling[i],2) < pow(coupling_rate[i],2)) { - finished_equil_flag &= 1; //we continue equilibrating as long as we haven't reached all the set points + finished_equil_flag &= 1; //we continue equilibrating as long as we haven't reached all the set points } else { - current_coupling[i] += coupling_rate[i]; - finished_equil_flag = 0; + current_coupling[i] += coupling_rate[i]; + finished_equil_flag = 0; } //update max_coupling_range if (!b_hard_coupling_range && fabs(current_coupling[i]) > max_coupling_range[i]) { - std::ostringstream logStream; - logStream << "Coupling constant for " - << colvars[i]->name - << " has exceeded coupling range of " - << max_coupling_range[i] - << ".\n"; + std::ostringstream logStream; + logStream << "Coupling constant for " + << colvars[i]->name + << " has exceeded coupling range of " + << max_coupling_range[i] + << ".\n"; - max_coupling_range[i] *= 1.25; - logStream << "Expanding coupling range to " << max_coupling_range[i] << ".\n"; - cvm::log(logStream.str()); + max_coupling_range[i] *= 1.25; + logStream << "Expanding coupling range to " << max_coupling_range[i] << ".\n"; + cvm::log(logStream.str()); } @@ -214,23 +214,23 @@ int colvarbias_alb::update() temp = 2. * (means[i] / (static_cast (colvar_centers[i])) - 1) * ssd[i] / (update_calls - 1); if (cvm::temperature() > 0) - step_size = temp / (cvm::temperature() * cvm::boltzmann()); + step_size = temp / (cvm::temperature() * cvm::boltzmann()); else - step_size = temp / cvm::boltzmann(); + step_size = temp / cvm::boltzmann(); means[i] = 0; ssd[i] = 0; //stochastic if we do that update or not if (colvars.size() == 1 || rand() < RAND_MAX / ((int) colvars.size())) { - coupling_accum[i] += step_size * step_size; - current_coupling[i] = set_coupling[i]; - set_coupling[i] += max_coupling_range[i] / sqrt(coupling_accum[i]) * step_size; - coupling_rate[i] = (set_coupling[i] - current_coupling[i]) / update_freq; - //set to the minimum rate and then put the sign back on it - coupling_rate[i] = copysign(fmin(fabs(coupling_rate[i]), max_coupling_rate[i]), coupling_rate[i]); + coupling_accum[i] += step_size * step_size; + current_coupling[i] = set_coupling[i]; + set_coupling[i] += max_coupling_range[i] / sqrt(coupling_accum[i]) * step_size; + coupling_rate[i] = (set_coupling[i] - current_coupling[i]) / update_freq; + //set to the minimum rate and then put the sign back on it + coupling_rate[i] = copysign(fmin(fabs(coupling_rate[i]), max_coupling_rate[i]), coupling_rate[i]); } else { - coupling_rate[i] = 0; + coupling_rate[i] = 0; } } @@ -339,14 +339,14 @@ std::ostream & colvarbias_alb::write_traj_label(std::ostream &os) if (b_output_coupling) for (size_t i = 0; i < current_coupling.size(); i++) { os << " ForceConst_" << i - <name, cvm::cv_width - 4); + << cvm::wrap_string(colvars[i]->name, cvm::cv_width - 4); } if (b_output_centers) @@ -372,8 +372,8 @@ std::ostream & colvarbias_alb::write_traj(std::ostream &os) if (b_output_coupling) for (size_t i = 0; i < current_coupling.size(); i++) { os << " " - << std::setprecision(cvm::en_prec) << std::setw(cvm::en_width) - << current_coupling[i]; + << std::setprecision(cvm::en_prec) << std::setw(cvm::en_width) + << current_coupling[i]; } @@ -387,8 +387,8 @@ std::ostream & colvarbias_alb::write_traj(std::ostream &os) if (b_output_grad) for (size_t i = 0; i < means.size(); i++) { os << " " - << std::setprecision(cvm::cv_prec) << std::setw(cvm::cv_width) - << -2. * (means[i] / (static_cast (colvar_centers[i])) - 1) * ssd[i] / (fmax(update_calls,2) - 1); + << std::setprecision(cvm::cv_prec) << std::setw(cvm::cv_width) + << -2. * (means[i] / (static_cast (colvar_centers[i])) - 1) * ssd[i] / (fmax(update_calls,2) - 1); } diff --git a/lib/colvars/colvarbias_histogram.cpp b/lib/colvars/colvarbias_histogram.cpp index 502a7455b1..0722e6384d 100644 --- a/lib/colvars/colvarbias_histogram.cpp +++ b/lib/colvars/colvarbias_histogram.cpp @@ -86,8 +86,9 @@ int colvarbias_histogram::init(std::string const &conf) { std::string grid_conf; - if (key_lookup(conf, "histogramGrid", grid_conf)) { + if (key_lookup(conf, "histogramGrid", &grid_conf)) { grid->parse_params(grid_conf); + grid->check_keywords(grid_conf, "histogramGrid"); } } @@ -176,26 +177,27 @@ int colvarbias_histogram::write_output_files() if (out_name.size()) { cvm::log("Writing the histogram file \""+out_name+"\".\n"); cvm::backup_file(out_name.c_str()); - cvm::ofstream grid_os(out_name.c_str()); - if (!grid_os.is_open()) { - cvm::error("Error opening histogram file " + out_name + " for writing.\n", FILE_ERROR); + std::ostream *grid_os = cvm::proxy->output_stream(out_name); + if (!grid_os) { + return cvm::error("Error opening histogram file "+out_name+ + " for writing.\n", FILE_ERROR); } - // TODO add return code here - grid->write_multicol(grid_os); - grid_os.close(); + grid->write_multicol(*grid_os); + cvm::proxy->close_output_stream(out_name); } if (out_name_dx.size()) { cvm::log("Writing the histogram file \""+out_name_dx+"\".\n"); cvm::backup_file(out_name_dx.c_str()); - cvm::ofstream grid_os(out_name_dx.c_str()); - if (!grid_os.is_open()) { - cvm::error("Error opening histogram file " + out_name_dx + " for writing.\n", FILE_ERROR); + std::ostream *grid_os = cvm::proxy->output_stream(out_name_dx); + if (!grid_os) { + return cvm::error("Error opening histogram file "+out_name_dx+ + " for writing.\n", FILE_ERROR); } - // TODO add return code here - grid->write_opendx(grid_os); - grid_os.close(); + grid->write_opendx(*grid_os); + cvm::proxy->close_output_stream(out_name_dx); } + return COLVARS_OK; } diff --git a/lib/colvars/colvarbias_meta.cpp b/lib/colvars/colvarbias_meta.cpp index b0acfe974a..66806fc9fc 100644 --- a/lib/colvars/colvarbias_meta.cpp +++ b/lib/colvars/colvarbias_meta.cpp @@ -36,6 +36,8 @@ colvarbias_meta::colvarbias_meta(char const *key) : colvarbias(key) { new_hills_begin = hills.end(); + hills_traj_os = NULL; + replica_hills_os = NULL; } @@ -163,7 +165,6 @@ int colvarbias_meta::init(std::string const &conf) cvm::log("Done initializing the metadynamics bias \""+this->name+"\""+ ((comm != single_replica) ? ", replica \""+replica_id+"\"" : "")+".\n"); - save_delimiters = false; return COLVARS_OK; } @@ -239,11 +240,15 @@ colvarbias_meta::~colvarbias_meta() hills_energy_gradients = NULL; } - if (replica_hills_os.is_open()) - replica_hills_os.close(); + if (replica_hills_os) { + cvm::proxy->close_output_stream(replica_hills_file); + replica_hills_os = NULL; + } - if (hills_traj_os.is_open()) - hills_traj_os.close(); + if (hills_traj_os) { + cvm::proxy->close_output_stream(hills_traj_file_name()); + hills_traj_os = NULL; + } if(target_dist) { delete target_dist; @@ -280,9 +285,9 @@ colvarbias_meta::create_hill(colvarbias_meta::hill const &h) } // output to trajectory (if specified) - if (hills_traj_os.is_open()) { - hills_traj_os << (hills.back()).output_traj(); - hills_traj_os.flush(); + if (hills_traj_os) { + *hills_traj_os << (hills.back()).output_traj(); + cvm::proxy->flush_output_stream(hills_traj_os); } has_data = true; @@ -312,12 +317,12 @@ colvarbias_meta::delete_hill(hill_iter &h) } } - if (hills_traj_os.is_open()) { + if (hills_traj_os) { // output to the trajectory - hills_traj_os << "# DELETED this hill: " - << (hills.back()).output_traj() - << "\n"; - hills_traj_os.flush(); + *hills_traj_os << "# DELETED this hill: " + << (hills.back()).output_traj() + << "\n"; + cvm::proxy->flush_output_stream(hills_traj_os); } return hills.erase(h); @@ -501,12 +506,12 @@ int colvarbias_meta::update_bias() case multiple_replicas: create_hill(hill(hill_weight*hills_scale, colvars, hill_width, replica_id)); - if (replica_hills_os.is_open()) { - replica_hills_os << hills.back(); + if (replica_hills_os) { + *replica_hills_os << hills.back(); } else { - cvm::fatal_error("Error: in metadynamics bias \""+this->name+"\""+ - ((comm != single_replica) ? ", replica \""+replica_id+"\"" : "")+ - " while writing hills for the other replicas.\n"); + return cvm::error("Error: in metadynamics bias \""+this->name+"\""+ + ((comm != single_replica) ? ", replica \""+replica_id+"\"" : "")+ + " while writing hills for the other replicas.\n", FILE_ERROR); } break; } @@ -904,8 +909,9 @@ int colvarbias_meta::replica_share() // reread the replicas registry update_replicas_registry(); // empty the output buffer - if (replica_hills_os.is_open()) - replica_hills_os.flush(); + if (replica_hills_os) { + cvm::proxy->flush_output_stream(replica_hills_os); + } read_replica_files(); } return COLVARS_OK; @@ -1421,7 +1427,7 @@ std::istream & colvarbias_meta::read_hill(std::istream &is) // it is safer to read colvarvalue objects one at a time; // TODO: change this it later std::string centers_input; - key_lookup(data, "centers", centers_input); + key_lookup(data, "centers", ¢ers_input); std::istringstream centers_is(centers_input); for (size_t i = 0; i < num_variables(); i++) { centers_is >> h_centers[i]; @@ -1521,13 +1527,11 @@ int colvarbias_meta::setup_output() // for the others to read // open the "hills" buffer file - if (!replica_hills_os.is_open()) { - cvm::backup_file(replica_hills_file.c_str()); - replica_hills_os.open(replica_hills_file.c_str()); - if (!replica_hills_os.is_open()) - cvm::error("Error: in opening file \""+ - replica_hills_file+"\" for writing.\n", FILE_ERROR); - replica_hills_os.setf(std::ios::scientific, std::ios::floatfield); + if (!replica_hills_os) { + cvm::proxy->backup_file(replica_hills_file); + replica_hills_os = cvm::proxy->output_stream(replica_hills_file); + if (!replica_hills_os) return cvm::get_error(); + replica_hills_os->setf(std::ios::scientific, std::ios::floatfield); } // write the state file (so that there is always one available) @@ -1539,46 +1543,52 @@ int colvarbias_meta::setup_output() // if we're running without grids, use a growing list of "hills" files // otherwise, just one state file and one "hills" file as buffer - std::ofstream list_os(replica_list_file.c_str(), - (use_grids ? std::ios::trunc : std::ios::app)); - if (! list_os.is_open()) - cvm::fatal_error("Error: in opening file \""+ - replica_list_file+"\" for writing.\n"); - list_os << "stateFile " << replica_state_file << "\n"; - list_os << "hillsFile " << replica_hills_file << "\n"; - list_os.close(); + std::ostream *list_os = + cvm::proxy->output_stream(replica_list_file, + (use_grids ? std::ios_base::trunc : + std::ios_base::app)); + if (!list_os) { + return cvm::get_error(); + } + *list_os << "stateFile " << replica_state_file << "\n"; + *list_os << "hillsFile " << replica_hills_file << "\n"; + cvm::proxy->close_output_stream(replica_list_file); - // finally, if add a new record for this replica to the registry + // finally, add a new record for this replica to the registry if (! registered_replica) { - std::ofstream reg_os(replicas_registry_file.c_str(), std::ios::app); - if (! reg_os.is_open()) - cvm::error("Error: in opening file \""+ - replicas_registry_file+"\" for writing.\n", FILE_ERROR); - reg_os << replica_id << " " << replica_list_file << "\n"; - reg_os.close(); + std::ostream *reg_os = + cvm::proxy->output_stream(replicas_registry_file, + std::ios::app); + if (!reg_os) { + return cvm::get_error(); + } + *reg_os << replica_id << " " << replica_list_file << "\n"; + cvm::proxy->close_output_stream(replicas_registry_file); } } if (b_hills_traj) { - std::string const traj_file_name(cvm::output_prefix()+ - ".colvars."+this->name+ - ( (comm != single_replica) ? - ("."+replica_id) : - ("") )+ - ".hills.traj"); - if (!hills_traj_os.is_open()) { - cvm::backup_file(traj_file_name.c_str()); - hills_traj_os.open(traj_file_name.c_str()); + if (!hills_traj_os) { + hills_traj_os = cvm::proxy->output_stream(hills_traj_file_name()); + if (!hills_traj_os) return cvm::get_error(); } - if (!hills_traj_os.is_open()) - cvm::error("Error: in opening hills output file \"" + - traj_file_name+"\".\n", FILE_ERROR); } return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); } +std::string const colvarbias_meta::hills_traj_file_name() const +{ + return std::string(cvm::output_prefix()+ + ".colvars."+this->name+ + ( (comm != single_replica) ? + ("."+replica_id) : + ("") )+ + ".hills.traj"); +} + + std::string const colvarbias_meta::get_state_params() const { std::ostringstream os; @@ -1671,12 +1681,13 @@ void colvarbias_meta::write_pmf() (dump_fes_save ? "."+cvm::to_str(cvm::step_absolute()) : "") + ".pmf"); - cvm::backup_file(fes_file_name.c_str()); - cvm::ofstream fes_os(fes_file_name.c_str()); - pmf->write_multicol(fes_os); - fes_os.close(); + cvm::proxy->backup_file(fes_file_name); + std::ostream *fes_os = cvm::proxy->output_stream(fes_file_name); + pmf->write_multicol(*fes_os); + cvm::proxy->close_output_stream(fes_file_name); } } + if (comm != single_replica) { // output the combined PMF from all replicas pmf->reset(); @@ -1695,10 +1706,10 @@ void colvarbias_meta::write_pmf() (dump_fes_save ? "."+cvm::to_str(cvm::step_absolute()) : "") + ".pmf"); - cvm::backup_file(fes_file_name.c_str()); - cvm::ofstream fes_os(fes_file_name.c_str()); - pmf->write_multicol(fes_os); - fes_os.close(); + cvm::proxy->backup_file(fes_file_name); + std::ostream *fes_os = cvm::proxy->output_stream(fes_file_name); + pmf->write_multicol(*fes_os); + cvm::proxy->close_output_stream(fes_file_name); } delete pmf; @@ -1769,13 +1780,11 @@ int colvarbias_meta::write_replica_state_file() // rep_state_os.close(); // reopen the hills file - replica_hills_os.close(); - cvm::backup_file(replica_hills_file.c_str()); - replica_hills_os.open(replica_hills_file.c_str()); - if (!replica_hills_os.is_open()) - cvm::fatal_error("Error: in opening file \""+ - replica_hills_file+"\" for writing.\n"); - replica_hills_os.setf(std::ios::scientific, std::ios::floatfield); + cvm::proxy->close_output_stream(replica_hills_file); + cvm::proxy->backup_file(replica_hills_file); + replica_hills_os = cvm::proxy->output_stream(replica_hills_file); + if (!replica_hills_os) return cvm::get_error(); + replica_hills_os->setf(std::ios::scientific, std::ios::floatfield); return COLVARS_OK; } diff --git a/lib/colvars/colvarbias_meta.h b/lib/colvars/colvarbias_meta.h index 01921eaf64..249f7342bc 100644 --- a/lib/colvars/colvarbias_meta.h +++ b/lib/colvars/colvarbias_meta.h @@ -78,7 +78,10 @@ protected: /// Write the hill logfile bool b_hills_traj; /// Logfile of hill management (creation and deletion) - cvm::ofstream hills_traj_os; + std::ostream *hills_traj_os; + + /// Name of the hill logfile + std::string const hills_traj_file_name() const; /// \brief List of hills used on this bias (total); if a grid is /// employed, these don't need to be updated at every time step @@ -241,7 +244,7 @@ protected: std::string replica_hills_file; /// \brief Output stream corresponding to replica_hills_file - cvm::ofstream replica_hills_os; + std::ostream *replica_hills_os; /// Position within replica_hills_file (when reading it) int replica_hills_file_pos; diff --git a/lib/colvars/colvarbias_restraint.cpp b/lib/colvars/colvarbias_restraint.cpp index 159d9eae64..6879190968 100644 --- a/lib/colvars/colvarbias_restraint.cpp +++ b/lib/colvars/colvarbias_restraint.cpp @@ -99,12 +99,9 @@ int colvarbias_restraint_centers::init(std::string const &conf) if (null_centers) { // try to initialize the restraint centers for the first time colvar_centers.resize(num_variables()); - colvar_centers_raw.resize(num_variables()); for (i = 0; i < num_variables(); i++) { colvar_centers[i].type(variables(i)->value()); colvar_centers[i].reset(); - colvar_centers_raw[i].type(variables(i)->value()); - colvar_centers_raw[i].reset(); } } @@ -113,7 +110,6 @@ int colvarbias_restraint_centers::init(std::string const &conf) if (cvm::debug()) { cvm::log("colvarbias_restraint: parsing initial centers, i = "+cvm::to_str(i)+".\n"); } - colvar_centers_raw[i] = colvar_centers[i]; colvar_centers[i].apply_constraints(); } null_centers = false; @@ -141,8 +137,6 @@ int colvarbias_restraint_centers::change_configuration(std::string const &conf) for (size_t i = 0; i < num_variables(); i++) { colvar_centers[i].type(variables(i)->value()); colvar_centers[i].apply_constraints(); - colvar_centers_raw[i].type(variables(i)->value()); - colvar_centers_raw[i] = colvar_centers[i]; } } return COLVARS_OK; @@ -232,7 +226,6 @@ int colvarbias_restraint_moving::set_state_params(std::string const &conf) { if (b_chg_centers || b_chg_force_k) { if (target_nstages) { - // cvm::log ("Reading current stage from the restart.\n"); if (!get_keyval(conf, "stage", stage)) cvm::error("Error: current stage is missing from the restart.\n"); } @@ -265,100 +258,127 @@ int colvarbias_restraint_centers_moving::init(std::string const &conf) size_t i; if (get_keyval(conf, "targetCenters", target_centers, colvar_centers)) { - if (colvar_centers.size() != num_variables()) { + if (target_centers.size() != num_variables()) { cvm::error("Error: number of target centers does not match " - "that of collective variables.\n"); + "that of collective variables.\n", INPUT_ERROR); } b_chg_centers = true; for (i = 0; i < target_centers.size(); i++) { target_centers[i].apply_constraints(); + centers_incr.push_back(colvar_centers[i]); + centers_incr[i].reset(); } } if (b_chg_centers) { - // parse moving restraint options + // parse moving schedule options colvarbias_restraint_moving::init(conf); + if (initial_centers.size() == 0) { + // One-time init + initial_centers = colvar_centers; + } + // Call to check that the definition is correct + for (i = 0; i < num_variables(); i++) { + colvarvalue const midpoint = + colvarvalue::interpolate(initial_centers[i], + target_centers[i], + 0.5); + } } else { target_centers.clear(); return COLVARS_OK; } get_keyval(conf, "outputCenters", b_output_centers, b_output_centers); - get_keyval(conf, "outputAccumulatedWork", b_output_acc_work, b_output_acc_work); + get_keyval(conf, "outputAccumulatedWork", b_output_acc_work, + b_output_acc_work); // TODO this conflicts with stages return COLVARS_OK; } +int colvarbias_restraint_centers_moving::update_centers(cvm::real lambda) +{ + if (cvm::debug()) { + cvm::log("Updating centers for the restraint bias \""+ + this->name+"\": "+cvm::to_str(colvar_centers)+".\n"); + } + size_t i; + for (i = 0; i < num_variables(); i++) { + colvarvalue const c_new = colvarvalue::interpolate(initial_centers[i], + target_centers[i], + lambda); + centers_incr[i] = (c_new).dist2_grad(colvar_centers[i]); + colvar_centers[i] = c_new; + variables(i)->wrap(colvar_centers[i]); + } + if (cvm::debug()) { + cvm::log("New centers for the restraint bias \""+ + this->name+"\": "+cvm::to_str(colvar_centers)+".\n"); + } + return cvm::get_error(); +} + + int colvarbias_restraint_centers_moving::update() { if (b_chg_centers) { - if (cvm::debug()) { - cvm::log("Updating centers for the restraint bias \""+ - this->name+"\": "+cvm::to_str(colvar_centers)+".\n"); - } - - if (!centers_incr.size()) { - // if this is the first calculation, calculate the advancement - // at each simulation step (or stage, if applicable) - // (take current stage into account: it can be non-zero - // if we are restarting a staged calculation) - centers_incr.resize(num_variables()); - for (size_t i = 0; i < num_variables(); i++) { - centers_incr[i].type(variables(i)->value()); - centers_incr[i] = (target_centers[i] - colvar_centers_raw[i]) / - cvm::real( target_nstages ? (target_nstages - stage) : - (target_nsteps - cvm::step_absolute())); - } - if (cvm::debug()) { - cvm::log("Center increment for the restraint bias \""+ - this->name+"\": "+cvm::to_str(centers_incr)+" at stage "+cvm::to_str(stage)+ ".\n"); - } - } - if (target_nstages) { - if ((cvm::step_relative() > 0) - && (cvm::step_absolute() % target_nsteps) == 0 - && stage < target_nstages) { - - for (size_t i = 0; i < num_variables(); i++) { - colvar_centers_raw[i] += centers_incr[i]; - colvar_centers[i] = colvar_centers_raw[i]; - variables(i)->wrap(colvar_centers[i]); - colvar_centers[i].apply_constraints(); + // Staged update + if (stage <= target_nstages) { + if ((cvm::step_relative() > 0) && + ((cvm::step_absolute() % target_nsteps) == 1)) { + cvm::real const lambda = + cvm::real(stage)/cvm::real(target_nstages); + update_centers(lambda); + stage++; + cvm::log("Moving restraint \"" + this->name + + "\" stage " + cvm::to_str(stage) + + " : setting centers to " + cvm::to_str(colvar_centers) + + " at step " + cvm::to_str(cvm::step_absolute())); + } else { + for (size_t i = 0; i < num_variables(); i++) { + centers_incr[i].reset(); + } } - stage++; - cvm::log("Moving restraint \"" + this->name + - "\" stage " + cvm::to_str(stage) + - " : setting centers to " + cvm::to_str(colvar_centers) + - " at step " + cvm::to_str(cvm::step_absolute())); } - } else if ((cvm::step_relative() > 0) && (cvm::step_absolute() <= target_nsteps)) { - // move the restraint centers in the direction of the targets - // (slow growth) + } else { + // Continuous update + if (cvm::step_absolute() <= target_nsteps) { + cvm::real const lambda = + cvm::real(cvm::step_absolute())/cvm::real(target_nsteps); + update_centers(lambda); + } else { + for (size_t i = 0; i < num_variables(); i++) { + centers_incr[i].reset(); + } + } + } + + if (cvm::step_relative() == 0) { for (size_t i = 0; i < num_variables(); i++) { - colvar_centers_raw[i] += centers_incr[i]; - colvar_centers[i] = colvar_centers_raw[i]; - variables(i)->wrap(colvar_centers[i]); - colvar_centers[i].apply_constraints(); + // finite differences are undefined when restarting + centers_incr[i].reset(); } } if (cvm::debug()) { - cvm::log("New centers for the restraint bias \""+ - this->name+"\": "+cvm::to_str(colvar_centers)+".\n"); + cvm::log("Center increment for the restraint bias \""+ + this->name+"\": "+cvm::to_str(centers_incr)+ + " at stage "+cvm::to_str(stage)+ ".\n"); } } - return COLVARS_OK; + return cvm::get_error(); } int colvarbias_restraint_centers_moving::update_acc_work() { if (b_output_acc_work) { - if ((cvm::step_relative() > 0) || (cvm::step_absolute() == 0)) { + if ((cvm::step_relative() > 0) && + (cvm::step_absolute() <= target_nsteps)) { for (size_t i = 0; i < num_variables(); i++) { // project forces on the calculated increments at this step acc_work += colvar_forces[i] * centers_incr[i]; @@ -383,13 +403,6 @@ std::string const colvarbias_restraint_centers_moving::get_state_params() const << colvar_centers[i]; } os << "\n"; - os << "centers_raw "; - for (i = 0; i < num_variables(); i++) { - os << " " - << std::setprecision(cvm::cv_prec) << std::setw(cvm::cv_width) - << colvar_centers_raw[i]; - } - os << "\n"; if (b_output_acc_work) { os << "accumulatedWork " @@ -398,7 +411,7 @@ std::string const colvarbias_restraint_centers_moving::get_state_params() const } } - return colvarbias_restraint_moving::get_state_params() + os.str(); + return os.str(); } @@ -410,8 +423,6 @@ int colvarbias_restraint_centers_moving::set_state_params(std::string const &con // cvm::log ("Reading the updated restraint centers from the restart.\n"); if (!get_keyval(conf, "centers", colvar_centers)) cvm::error("Error: restraint centers are missing from the restart.\n"); - if (!get_keyval(conf, "centers_raw", colvar_centers_raw)) - cvm::error("Error: \"raw\" restraint centers are missing from the restart.\n"); if (b_output_acc_work) { if (!get_keyval(conf, "accumulatedWork", acc_work)) cvm::error("Error: accumulatedWork is missing from the restart.\n"); @@ -609,7 +620,7 @@ std::string const colvarbias_restraint_k_moving::get_state_params() const << std::setprecision(cvm::en_prec) << std::setw(cvm::en_width) << force_k << "\n"; } - return colvarbias_restraint_moving::get_state_params() + os.str(); + return os.str(); } @@ -770,6 +781,7 @@ cvm::real colvarbias_restraint_harmonic::d_restraint_potential_dk(size_t i) cons std::string const colvarbias_restraint_harmonic::get_state_params() const { return colvarbias_restraint::get_state_params() + + colvarbias_restraint_moving::get_state_params() + colvarbias_restraint_centers_moving::get_state_params() + colvarbias_restraint_k_moving::get_state_params(); } @@ -779,6 +791,7 @@ int colvarbias_restraint_harmonic::set_state_params(std::string const &conf) { int error_code = COLVARS_OK; error_code |= colvarbias_restraint::set_state_params(conf); + error_code |= colvarbias_restraint_moving::set_state_params(conf); error_code |= colvarbias_restraint_centers_moving::set_state_params(conf); error_code |= colvarbias_restraint_k_moving::set_state_params(conf); return error_code; @@ -853,6 +866,21 @@ int colvarbias_restraint_harmonic_walls::init(std::string const &conf) get_keyval(conf, "upperWallConstant", upper_wall_k, (upper_wall_k > 0.0) ? upper_wall_k : force_k); + if (lower_wall_k * upper_wall_k > 0.0) { + for (size_t i = 0; i < num_variables(); i++) { + if (variables(i)->width != 1.0) + cvm::log("The lower and upper wall force constants for colvar \""+ + variables(i)->name+ + "\" will be rescaled to "+ + cvm::to_str(lower_wall_k / + (variables(i)->width * variables(i)->width))+ + " and "+ + cvm::to_str(upper_wall_k / + (variables(i)->width * variables(i)->width))+ + " according to the specified width.\n"); + } + } + enable(f_cvb_scalar_variables); size_t i; @@ -869,7 +897,7 @@ int colvarbias_restraint_harmonic_walls::init(std::string const &conf) if (!get_keyval(conf, "lowerWalls", lower_walls, lower_walls) && b_null_lower_walls) { cvm::log("Lower walls were not provided.\n"); - lower_walls.resize(0); + lower_walls.clear(); } bool b_null_upper_walls = false; @@ -884,7 +912,7 @@ int colvarbias_restraint_harmonic_walls::init(std::string const &conf) if (!get_keyval(conf, "upperWalls", upper_walls, upper_walls) && b_null_upper_walls) { cvm::log("Upper walls were not provided.\n"); - upper_walls.resize(0); + upper_walls.clear(); } if ((lower_walls.size() == 0) && (upper_walls.size() == 0)) { @@ -954,7 +982,8 @@ void colvarbias_restraint_harmonic_walls::communicate_forces() cvm::log("Communicating a force to colvar \""+ variables(i)->name+"\".\n"); } - variables(i)->add_bias_force_actual_value(colvar_forces[i]); + // Impulse-style multiple timestep + variables(i)->add_bias_force_actual_value(cvm::real(time_step_factor) * colvar_forces[i]); } } @@ -1021,6 +1050,7 @@ cvm::real colvarbias_restraint_harmonic_walls::d_restraint_potential_dk(size_t i std::string const colvarbias_restraint_harmonic_walls::get_state_params() const { return colvarbias_restraint::get_state_params() + + colvarbias_restraint_moving::get_state_params() + colvarbias_restraint_k_moving::get_state_params(); } @@ -1029,6 +1059,7 @@ int colvarbias_restraint_harmonic_walls::set_state_params(std::string const &con { int error_code = COLVARS_OK; error_code |= colvarbias_restraint::set_state_params(conf); + error_code |= colvarbias_restraint_moving::set_state_params(conf); error_code |= colvarbias_restraint_k_moving::set_state_params(conf); return error_code; } @@ -1148,6 +1179,7 @@ cvm::real colvarbias_restraint_linear::d_restraint_potential_dk(size_t i) const std::string const colvarbias_restraint_linear::get_state_params() const { return colvarbias_restraint::get_state_params() + + colvarbias_restraint_moving::get_state_params() + colvarbias_restraint_centers_moving::get_state_params() + colvarbias_restraint_k_moving::get_state_params(); } @@ -1157,6 +1189,7 @@ int colvarbias_restraint_linear::set_state_params(std::string const &conf) { int error_code = COLVARS_OK; error_code |= colvarbias_restraint::set_state_params(conf); + error_code |= colvarbias_restraint_moving::set_state_params(conf); error_code |= colvarbias_restraint_centers_moving::set_state_params(conf); error_code |= colvarbias_restraint_k_moving::set_state_params(conf); return error_code; @@ -1282,9 +1315,9 @@ int colvarbias_restraint_histogram::init(std::string const &conf) colvarbias_restraint_histogram::~colvarbias_restraint_histogram() { - p.resize(0); - ref_p.resize(0); - p_diff.resize(0); + p.clear(); + ref_p.clear(); + p_diff.clear(); } @@ -1382,23 +1415,23 @@ std::ostream & colvarbias_restraint_histogram::write_restart(std::ostream &os) { if (b_write_histogram) { std::string file_name(cvm::output_prefix()+"."+this->name+".hist.dat"); - std::ofstream os(file_name.c_str()); - os << "# " << cvm::wrap_string(variables(0)->name, cvm::cv_width) - << " " << "p(" << cvm::wrap_string(variables(0)->name, cvm::cv_width-3) - << ")\n"; + std::ostream *os = cvm::proxy->output_stream(file_name); + *os << "# " << cvm::wrap_string(variables(0)->name, cvm::cv_width) + << " " << "p(" << cvm::wrap_string(variables(0)->name, cvm::cv_width-3) + << ")\n"; size_t igrid; for (igrid = 0; igrid < p.size(); igrid++) { cvm::real const x_grid = (lower_boundary + (igrid+1)*width); - os << " " - << std::setprecision(cvm::cv_prec) - << std::setw(cvm::cv_width) - << x_grid - << " " - << std::setprecision(cvm::cv_prec) - << std::setw(cvm::cv_width) - << p[igrid] << "\n"; + *os << " " + << std::setprecision(cvm::cv_prec) + << std::setw(cvm::cv_width) + << x_grid + << " " + << std::setprecision(cvm::cv_prec) + << std::setw(cvm::cv_width) + << p[igrid] << "\n"; } - os.close(); + cvm::proxy->close_output_stream(file_name); } return os; } diff --git a/lib/colvars/colvarbias_restraint.h b/lib/colvars/colvarbias_restraint.h index 98b967abdb..8c3a1537fc 100644 --- a/lib/colvars/colvarbias_restraint.h +++ b/lib/colvars/colvarbias_restraint.h @@ -74,9 +74,6 @@ protected: /// \brief Restraint centers std::vector colvar_centers; - - /// \brief Restraint centers outside the domain of the colvars (no wrapping or constraints applied) - std::vector colvar_centers_raw; }; @@ -156,10 +153,16 @@ protected: /// \brief New restraint centers std::vector target_centers; + /// \brief Initial value of the restraint centers + std::vector initial_centers; + /// \brief Amplitude of the restraint centers' increment at each step - /// (or stage) towards the new values (calculated from target_nsteps) + /// towards the new values (calculated from target_nsteps) std::vector centers_incr; + /// \brief Update the centers by interpolating between initial and target + virtual int update_centers(cvm::real lambda); + /// Whether to write the current restraint centers to the trajectory file bool b_output_centers; diff --git a/lib/colvars/colvarcomp.cpp b/lib/colvars/colvarcomp.cpp index 786bc032d2..589de1d32a 100644 --- a/lib/colvars/colvarcomp.cpp +++ b/lib/colvars/colvarcomp.cpp @@ -51,6 +51,17 @@ colvar::cvc::cvc(std::string const &conf) get_keyval_feature((colvarparse *)this, conf, "debugGradients", f_cvc_debug_gradient, false, parse_silent); + { + bool b_no_PBC = false; + get_keyval(conf, "forceNoPBC", b_no_PBC, false); + if (b_no_PBC) { + disable(f_cvc_pbc_minimum_image); + } else { + enable(f_cvc_pbc_minimum_image); + } + // this does not use get_keyval_feature() only for backward compatibility + } + // Attempt scalable calculations when in parallel? (By default yes, if available) get_keyval(conf, "scalable", b_try_scalable, true); @@ -94,13 +105,15 @@ cvm::atom_group *colvar::cvc::parse_group(std::string const &conf, bool optional) { cvm::atom_group *group = NULL; + std::string group_conf; - if (key_lookup(conf, group_key)) { - group = new cvm::atom_group; - group->key = group_key; + if (key_lookup(conf, group_key, &group_conf)) { + group = new cvm::atom_group(group_key); if (b_try_scalable) { - if (is_available(f_cvc_scalable_com) && is_enabled(f_cvc_com_based)) { + if (is_available(f_cvc_scalable_com) + && is_enabled(f_cvc_com_based) + && !is_enabled(f_cvc_debug_gradient)) { enable(f_cvc_scalable_com); enable(f_cvc_scalable); // The CVC makes the feature available; @@ -111,44 +124,51 @@ cvm::atom_group *colvar::cvc::parse_group(std::string const &conf, // TODO check for other types of parallelism here } - if (group->parse(conf) == COLVARS_OK) { - atom_groups.push_back(group); - } else { - cvm::error("Error parsing definition for atom group \""+ - std::string(group_key)+"\".\n"); + if (group_conf.size() == 0) { + cvm::error("Error: atom group \""+group->key+ + "\" is set, but has no definition.\n", + INPUT_ERROR); + return group; } + + cvm::increase_depth(); + if (group->parse(group_conf) == COLVARS_OK) { + register_atom_group(group); + } + group->check_keywords(group_conf, group_key); + if (cvm::get_error()) { + cvm::error("Error parsing definition for atom group \""+ + std::string(group_key)+"\"\n.", INPUT_ERROR); + } + cvm::decrease_depth(); + } else { if (! optional) { cvm::error("Error: definition for atom group \""+ - std::string(group_key)+"\" not found.\n"); + std::string(group_key)+"\" not found.\n"); } } + return group; } int colvar::cvc::setup() { - size_t i; description = "cvc " + name; - - for (i = 0; i < atom_groups.size(); i++) { - add_child((colvardeps *) atom_groups[i]); - } - return COLVARS_OK; } colvar::cvc::~cvc() { + free_children_deps(); remove_all_children(); for (size_t i = 0; i < atom_groups.size(); i++) { if (atom_groups[i] != NULL) delete atom_groups[i]; } } - void colvar::cvc::read_data() { size_t ig; @@ -187,117 +207,129 @@ void colvar::cvc::calc_Jacobian_derivative() } -void colvar::cvc::debug_gradients(cvm::atom_group *group) +void colvar::cvc::calc_fit_gradients() { - // this function should work for any scalar variable: + for (size_t ig = 0; ig < atom_groups.size(); ig++) { + atom_groups[ig]->calc_fit_gradients(); + } +} + + +void colvar::cvc::debug_gradients() +{ + // this function should work for any scalar cvc: // the only difference will be the name of the atom group (here, "group") // NOTE: this assumes that groups for this cvc are non-overlapping, // since atom coordinates are modified only within the current group - if (group->b_dummy) return; + cvm::log("Debugging gradients for " + description); - cvm::rotation const rot_0 = group->rot; - cvm::rotation const rot_inv = group->rot.inverse(); + for (size_t ig = 0; ig < atom_groups.size(); ig++) { + cvm::atom_group *group = atom_groups[ig]; + if (group->b_dummy) continue; - cvm::real x_0 = x.real_value; - if ((x.type() == colvarvalue::type_vector) && (x.size() == 1)) x_0 = x[0]; + cvm::rotation const rot_0 = group->rot; + cvm::rotation const rot_inv = group->rot.inverse(); - // cvm::log("gradients = "+cvm::to_str (gradients)+"\n"); + cvm::real x_0 = x.real_value; + if ((x.type() == colvarvalue::type_vector) && (x.size() == 1)) x_0 = x[0]; - cvm::atom_group *group_for_fit = group->fitting_group ? group->fitting_group : group; - cvm::atom_pos fit_gradient_sum, gradient_sum; + // cvm::log("gradients = "+cvm::to_str (gradients)+"\n"); - // print the values of the fit gradients - if (group->b_rotate || group->b_center) { - if (group->b_fit_gradients) { - size_t j; + cvm::atom_group *group_for_fit = group->fitting_group ? group->fitting_group : group; + cvm::atom_pos fit_gradient_sum, gradient_sum; - // fit_gradients are in the simulation frame: we should print them in the rotated frame - cvm::log("Fit gradients:\n"); - for (j = 0; j < group_for_fit->fit_gradients.size(); j++) { - cvm::log((group->fitting_group ? std::string("refPosGroup") : group->key) + - "[" + cvm::to_str(j) + "] = " + - (group->b_rotate ? - cvm::to_str(rot_0.rotate(group_for_fit->fit_gradients[j])) : - cvm::to_str(group_for_fit->fit_gradients[j]))); + // print the values of the fit gradients + if (group->b_rotate || group->b_center) { + if (group->is_enabled(f_ag_fit_gradients)) { + size_t j; + + // fit_gradients are in the simulation frame: we should print them in the rotated frame + cvm::log("Fit gradients:\n"); + for (j = 0; j < group_for_fit->fit_gradients.size(); j++) { + cvm::log((group->fitting_group ? std::string("refPosGroup") : group->key) + + "[" + cvm::to_str(j) + "] = " + + (group->b_rotate ? + cvm::to_str(rot_0.rotate(group_for_fit->fit_gradients[j])) : + cvm::to_str(group_for_fit->fit_gradients[j]))); + } } } - } - // debug the gradients - for (size_t ia = 0; ia < group->size(); ia++) { + // debug the gradients + for (size_t ia = 0; ia < group->size(); ia++) { - // tests are best conducted in the unrotated (simulation) frame - cvm::rvector const atom_grad = (group->b_rotate ? - rot_inv.rotate((*group)[ia].grad) : - (*group)[ia].grad); - gradient_sum += atom_grad; - - for (size_t id = 0; id < 3; id++) { - // (re)read original positions - group->read_positions(); - // change one coordinate - (*group)[ia].pos[id] += cvm::debug_gradients_step_size; - group->calc_required_properties(); - calc_value(); - cvm::real x_1 = x.real_value; - if ((x.type() == colvarvalue::type_vector) && (x.size() == 1)) x_1 = x[0]; - cvm::log("Atom "+cvm::to_str(ia)+", component "+cvm::to_str(id)+":\n"); - cvm::log("dx(actual) = "+cvm::to_str(x_1 - x_0, - 21, 14)+"\n"); - cvm::real const dx_pred = (group->fit_gradients.size()) ? - (cvm::debug_gradients_step_size * (atom_grad[id] + group->fit_gradients[ia][id])) : - (cvm::debug_gradients_step_size * atom_grad[id]); - cvm::log("dx(interp) = "+cvm::to_str(dx_pred, - 21, 14)+"\n"); - cvm::log("|dx(actual) - dx(interp)|/|dx(actual)| = "+ - cvm::to_str(std::fabs(x_1 - x_0 - dx_pred) / - std::fabs(x_1 - x_0), 12, 5)+"\n"); - } - } - - if ((group->b_fit_gradients) && (group->fitting_group != NULL)) { - cvm::atom_group *ref_group = group->fitting_group; - group->read_positions(); - group->calc_required_properties(); - - for (size_t ia = 0; ia < ref_group->size(); ia++) { - - // fit gradients are in the unrotated (simulation) frame - cvm::rvector const atom_grad = ref_group->fit_gradients[ia]; - fit_gradient_sum += atom_grad; + // tests are best conducted in the unrotated (simulation) frame + cvm::rvector const atom_grad = (group->b_rotate ? + rot_inv.rotate((*group)[ia].grad) : + (*group)[ia].grad); + gradient_sum += atom_grad; for (size_t id = 0; id < 3; id++) { // (re)read original positions group->read_positions(); - ref_group->read_positions(); // change one coordinate - (*ref_group)[ia].pos[id] += cvm::debug_gradients_step_size; + (*group)[ia].pos[id] += cvm::debug_gradients_step_size; group->calc_required_properties(); calc_value(); - - cvm::real const x_1 = x.real_value; - cvm::log("refPosGroup atom "+cvm::to_str(ia)+", component "+cvm::to_str (id)+":\n"); - cvm::log("dx(actual) = "+cvm::to_str (x_1 - x_0, - 21, 14)+"\n"); - - cvm::real const dx_pred = cvm::debug_gradients_step_size * atom_grad[id]; - - cvm::log("dx(interp) = "+cvm::to_str (dx_pred, - 21, 14)+"\n"); - cvm::log ("|dx(actual) - dx(interp)|/|dx(actual)| = "+ - cvm::to_str(std::fabs (x_1 - x_0 - dx_pred) / - std::fabs (x_1 - x_0), - 12, 5)+ - ".\n"); + cvm::real x_1 = x.real_value; + if ((x.type() == colvarvalue::type_vector) && (x.size() == 1)) x_1 = x[0]; + cvm::log("Atom "+cvm::to_str(ia)+", component "+cvm::to_str(id)+":\n"); + cvm::log("dx(actual) = "+cvm::to_str(x_1 - x_0, + 21, 14)+"\n"); + cvm::real const dx_pred = (group->fit_gradients.size()) ? + (cvm::debug_gradients_step_size * (atom_grad[id] + group->fit_gradients[ia][id])) : + (cvm::debug_gradients_step_size * atom_grad[id]); + cvm::log("dx(interp) = "+cvm::to_str(dx_pred, + 21, 14)+"\n"); + cvm::log("|dx(actual) - dx(interp)|/|dx(actual)| = "+ + cvm::to_str(std::fabs(x_1 - x_0 - dx_pred) / + std::fabs(x_1 - x_0), 12, 5)+"\n"); } } + + if ((group->is_enabled(f_ag_fit_gradients)) && (group->fitting_group != NULL)) { + cvm::atom_group *ref_group = group->fitting_group; + group->read_positions(); + group->calc_required_properties(); + + for (size_t ia = 0; ia < ref_group->size(); ia++) { + + // fit gradients are in the unrotated (simulation) frame + cvm::rvector const atom_grad = ref_group->fit_gradients[ia]; + fit_gradient_sum += atom_grad; + + for (size_t id = 0; id < 3; id++) { + // (re)read original positions + group->read_positions(); + ref_group->read_positions(); + // change one coordinate + (*ref_group)[ia].pos[id] += cvm::debug_gradients_step_size; + group->calc_required_properties(); + calc_value(); + + cvm::real const x_1 = x.real_value; + cvm::log("refPosGroup atom "+cvm::to_str(ia)+", component "+cvm::to_str (id)+":\n"); + cvm::log("dx(actual) = "+cvm::to_str (x_1 - x_0, + 21, 14)+"\n"); + + cvm::real const dx_pred = cvm::debug_gradients_step_size * atom_grad[id]; + + cvm::log("dx(interp) = "+cvm::to_str (dx_pred, + 21, 14)+"\n"); + cvm::log ("|dx(actual) - dx(interp)|/|dx(actual)| = "+ + cvm::to_str(std::fabs (x_1 - x_0 - dx_pred) / + std::fabs (x_1 - x_0), + 12, 5)+ + ".\n"); + } + } + } + + cvm::log("Gradient sum: " + cvm::to_str(gradient_sum) + + " Fit gradient sum: " + cvm::to_str(fit_gradient_sum) + + " Total " + cvm::to_str(gradient_sum + fit_gradient_sum)); } - - cvm::log("Gradient sum: " + cvm::to_str(gradient_sum) + - " Fit gradient sum: " + cvm::to_str(fit_gradient_sum) + - " Total " + cvm::to_str(gradient_sum + fit_gradient_sum)); - return; } diff --git a/lib/colvars/colvarcomp.h b/lib/colvars/colvarcomp.h index ec215cbad1..3c1ec2495c 100644 --- a/lib/colvars/colvarcomp.h +++ b/lib/colvars/colvarcomp.h @@ -132,9 +132,15 @@ public: static std::vector cvc_features; /// \brief Implementation of the feature list accessor for colvar - virtual std::vector &features() { + virtual const std::vector &features() + { return cvc_features; } + virtual std::vector &modify_features() + { + return cvc_features; + } + /// \brief Obtain data needed for the calculation for the backend virtual void read_data(); @@ -146,8 +152,11 @@ public: /// order to apply forces virtual void calc_gradients() = 0; + /// \brief Calculate the atomic fit gradients + void calc_fit_gradients(); + /// \brief Calculate finite-difference gradients alongside the analytical ones, for each Cartesian component - virtual void debug_gradients(cvm::atom_group *group); + virtual void debug_gradients(); /// \brief Calculate the total force from the system using the /// inverse atomic gradients @@ -228,6 +237,12 @@ public: /// e.g. atomic gradients std::vector atom_groups; + /// \brief Store a pointer to new atom group, and list as child for dependencies + inline void register_atom_group(cvm::atom_group *ag) { + atom_groups.push_back(ag); + add_child((colvardeps *) ag); + } + /// \brief Whether or not this CVC will be computed in parallel whenever possible bool b_try_scalable; @@ -427,15 +442,77 @@ public: }; +/// \brief Colvar component: polar coordinate phi of a group +/// (colvarvalue::type_scalar type, range [-180:180]) +class colvar::polar_phi + : public colvar::cvc +{ +public: + polar_phi(std::string const &conf); + polar_phi(); + virtual ~polar_phi() {} +protected: + cvm::atom_group *atoms; + cvm::real r, theta, phi; +public: + virtual void calc_value(); + virtual void calc_gradients(); + virtual void apply_force(colvarvalue const &force); + /// Redefined to handle the 2*PI periodicity + virtual cvm::real dist2(colvarvalue const &x1, + colvarvalue const &x2) const; + /// Redefined to handle the 2*PI periodicity + virtual colvarvalue dist2_lgrad(colvarvalue const &x1, + colvarvalue const &x2) const; + /// Redefined to handle the 2*PI periodicity + virtual colvarvalue dist2_rgrad(colvarvalue const &x1, + colvarvalue const &x2) const; + /// Redefined to handle the 2*PI periodicity + virtual void wrap(colvarvalue &x) const; +}; + + +/// \brief Colvar component: polar coordinate theta of a group +/// (colvarvalue::type_scalar type, range [0:180]) +class colvar::polar_theta + : public colvar::cvc +{ +public: + polar_theta(std::string const &conf); + polar_theta(); + virtual ~polar_theta() {} +protected: + cvm::atom_group *atoms; + cvm::real r, theta, phi; +public: + virtual void calc_value(); + virtual void calc_gradients(); + virtual void apply_force(colvarvalue const &force); + /// Redefined to override the distance ones + virtual cvm::real dist2(colvarvalue const &x1, + colvarvalue const &x2) const; + /// Redefined to override the distance ones + virtual colvarvalue dist2_lgrad(colvarvalue const &x1, + colvarvalue const &x2) const; + /// Redefined to override the distance ones + virtual colvarvalue dist2_rgrad(colvarvalue const &x1, + colvarvalue const &x2) const; +}; /// \brief Colvar component: average distance between two groups of atoms, weighted as the sixth power, /// as in NMR refinements(colvarvalue::type_scalar type, range (0:*)) class colvar::distance_inv - : public colvar::distance + : public colvar::cvc { protected: + /// First atom group + cvm::atom_group *group1; + /// Second atom group + cvm::atom_group *group2; /// Components of the distance vector orthogonal to the axis int exponent; + /// Use absolute positions, ignoring PBCs when present + bool b_no_PBC; public: distance_inv(std::string const &conf); distance_inv(); diff --git a/lib/colvars/colvarcomp_angles.cpp b/lib/colvars/colvarcomp_angles.cpp index 0204f3b4b1..9f879a4c41 100644 --- a/lib/colvars/colvarcomp_angles.cpp +++ b/lib/colvars/colvarcomp_angles.cpp @@ -45,9 +45,9 @@ colvar::angle::angle(cvm::atom const &a1, group1 = new cvm::atom_group(std::vector(1, a1)); group2 = new cvm::atom_group(std::vector(1, a2)); group3 = new cvm::atom_group(std::vector(1, a3)); - atom_groups.push_back(group1); - atom_groups.push_back(group2); - atom_groups.push_back(group3); + register_atom_group(group1); + register_atom_group(group2); + register_atom_group(group3); x.type(colvarvalue::type_scalar); } @@ -66,12 +66,16 @@ void colvar::angle::calc_value() cvm::atom_pos const g2_pos = group2->center_of_mass(); cvm::atom_pos const g3_pos = group3->center_of_mass(); - r21 = cvm::position_distance(g2_pos, g1_pos); + r21 = is_enabled(f_cvc_pbc_minimum_image) ? + cvm::position_distance(g2_pos, g1_pos) : + g1_pos - g2_pos; r21l = r21.norm(); - r23 = cvm::position_distance(g2_pos, g3_pos); + r23 = is_enabled(f_cvc_pbc_minimum_image) ? + cvm::position_distance(g2_pos, g3_pos) : + g3_pos - g2_pos; r23l = r23.norm(); - cvm::real const cos_theta = (r21*r23)/(r21l*r23l); + cvm::real const cos_theta = (r21*r23)/(r21l*r23l); x.real_value = (180.0/PI) * std::acos(cos_theta); } @@ -166,9 +170,9 @@ colvar::dipole_angle::dipole_angle(cvm::atom const &a1, group1 = new cvm::atom_group(std::vector(1, a1)); group2 = new cvm::atom_group(std::vector(1, a2)); group3 = new cvm::atom_group(std::vector(1, a3)); - atom_groups.push_back(group1); - atom_groups.push_back(group2); - atom_groups.push_back(group3); + register_atom_group(group1); + register_atom_group(group2); + register_atom_group(group3); x.type(colvarvalue::type_scalar); } @@ -191,10 +195,12 @@ void colvar::dipole_angle::calc_value() r21 = group1->dipole(); r21l = r21.norm(); - r23 = cvm::position_distance(g2_pos, g3_pos); + r23 = is_enabled(f_cvc_pbc_minimum_image) ? + cvm::position_distance(g2_pos, g3_pos) : + g3_pos - g2_pos; r23l = r23.norm(); - cvm::real const cos_theta = (r21*r23)/(r21l*r23l); + cvm::real const cos_theta = (r21*r23)/(r21l*r23l); x.real_value = (180.0/PI) * std::acos(cos_theta); } @@ -293,10 +299,10 @@ colvar::dihedral::dihedral(cvm::atom const &a1, group2 = new cvm::atom_group(std::vector(1, a2)); group3 = new cvm::atom_group(std::vector(1, a3)); group4 = new cvm::atom_group(std::vector(1, a4)); - atom_groups.push_back(group1); - atom_groups.push_back(group2); - atom_groups.push_back(group3); - atom_groups.push_back(group4); + register_atom_group(group1); + register_atom_group(group2); + register_atom_group(group3); + register_atom_group(group4); x.type(colvarvalue::type_scalar); @@ -324,9 +330,15 @@ void colvar::dihedral::calc_value() cvm::atom_pos const g4_pos = group4->center_of_mass(); // Usual sign convention: r12 = r2 - r1 - r12 = cvm::position_distance(g1_pos, g2_pos); - r23 = cvm::position_distance(g2_pos, g3_pos); - r34 = cvm::position_distance(g3_pos, g4_pos); + r12 = is_enabled(f_cvc_pbc_minimum_image) ? + cvm::position_distance(g1_pos, g2_pos) : + g2_pos - g1_pos; + r23 = is_enabled(f_cvc_pbc_minimum_image) ? + cvm::position_distance(g2_pos, g3_pos) : + g3_pos - g2_pos; + r34 = is_enabled(f_cvc_pbc_minimum_image) ? + cvm::position_distance(g3_pos, g4_pos) : + g4_pos - g3_pos; cvm::rvector const n1 = cvm::rvector::outer(r12, r23); cvm::rvector const n2 = cvm::rvector::outer(r23, r34); @@ -365,10 +377,10 @@ void colvar::dihedral::calc_gradients() cvm::real const K = (1.0/sin_phi) * (180.0/PI); - f1 = K * cvm::rvector::outer(r23, dcosdA); - f3 = K * cvm::rvector::outer(dcosdB, r23); - f2 = K * (cvm::rvector::outer(dcosdA, r12) - + cvm::rvector::outer(r34, dcosdB)); + f1 = K * cvm::rvector::outer(r23, dcosdA); + f3 = K * cvm::rvector::outer(dcosdB, r23); + f2 = K * (cvm::rvector::outer(dcosdA, r12) + + cvm::rvector::outer(r34, dcosdB)); } else { rC = 1.0/rC; @@ -439,7 +451,7 @@ void colvar::dihedral::calc_force_invgrads() // Default case: use groups 1 and 4 group4->read_total_forces(); ft.real_value = PI/180.0 * 0.5 * (fact1 * (cross1 * group1->total_force()) - + fact4 * (cross4 * group4->total_force())); + + fact4 * (cross4 * group4->total_force())); } } @@ -510,3 +522,148 @@ void colvar::dihedral::wrap(colvarvalue &x) const return; } + + +colvar::polar_theta::polar_theta(std::string const &conf) + : cvc(conf) +{ + function_type = "polar_theta"; + enable(f_cvc_com_based); + + atoms = parse_group(conf, "atoms"); + init_total_force_params(conf); + x.type(colvarvalue::type_scalar); +} + + +colvar::polar_theta::polar_theta() +{ + function_type = "polar_theta"; + x.type(colvarvalue::type_scalar); +} + + +void colvar::polar_theta::calc_value() +{ + cvm::rvector pos = atoms->center_of_mass(); + r = atoms->center_of_mass().norm(); + // Internal values of theta and phi are radians + theta = (r > 0.) ? std::acos(pos.z / r) : 0.; + phi = std::atan2(pos.y, pos.x); + x.real_value = (180.0/PI) * theta; +} + + +void colvar::polar_theta::calc_gradients() +{ + if (r == 0.) + atoms->set_weighted_gradient(cvm::rvector(0., 0., 0.)); + else + atoms->set_weighted_gradient(cvm::rvector( + (180.0/PI) * std::cos(theta) * std::cos(phi) / r, + (180.0/PI) * std::cos(theta) * std::sin(phi) / r, + (180.0/PI) * -std::sin(theta) / r)); +} + + +void colvar::polar_theta::apply_force(colvarvalue const &force) +{ + if (!atoms->noforce) + atoms->apply_colvar_force(force.real_value); +} + + +simple_scalar_dist_functions(polar_theta) + + +colvar::polar_phi::polar_phi(std::string const &conf) + : cvc(conf) +{ + function_type = "polar_phi"; + period = 360.0; + enable(f_cvc_com_based); + + atoms = parse_group(conf, "atoms"); + init_total_force_params(conf); + x.type(colvarvalue::type_scalar); +} + + +colvar::polar_phi::polar_phi() +{ + function_type = "polar_phi"; + period = 360.0; + x.type(colvarvalue::type_scalar); +} + + +void colvar::polar_phi::calc_value() +{ + cvm::rvector pos = atoms->center_of_mass(); + r = atoms->center_of_mass().norm(); + // Internal values of theta and phi are radians + theta = (r > 0.) ? std::acos(pos.z / r) : 0.; + phi = std::atan2(pos.y, pos.x); + x.real_value = (180.0/PI) * phi; +} + + +void colvar::polar_phi::calc_gradients() +{ + atoms->set_weighted_gradient(cvm::rvector( + (180.0/PI) * -std::sin(phi) / (r*std::sin(theta)), + (180.0/PI) * std::cos(phi) / (r*std::sin(theta)), + 0.)); +} + + +void colvar::polar_phi::apply_force(colvarvalue const &force) +{ + if (!atoms->noforce) + atoms->apply_colvar_force(force.real_value); +} + + +// Same as dihedral, for polar_phi + +cvm::real colvar::polar_phi::dist2(colvarvalue const &x1, + colvarvalue const &x2) const +{ + cvm::real diff = x1.real_value - x2.real_value; + diff = (diff < -180.0 ? diff + 360.0 : (diff > 180.0 ? diff - 360.0 : diff)); + return diff * diff; +} + + +colvarvalue colvar::polar_phi::dist2_lgrad(colvarvalue const &x1, + colvarvalue const &x2) const +{ + cvm::real diff = x1.real_value - x2.real_value; + diff = (diff < -180.0 ? diff + 360.0 : (diff > 180.0 ? diff - 360.0 : diff)); + return 2.0 * diff; +} + + +colvarvalue colvar::polar_phi::dist2_rgrad(colvarvalue const &x1, + colvarvalue const &x2) const +{ + cvm::real diff = x1.real_value - x2.real_value; + diff = (diff < -180.0 ? diff + 360.0 : (diff > 180.0 ? diff - 360.0 : diff)); + return (-2.0) * diff; +} + + +void colvar::polar_phi::wrap(colvarvalue &x) const +{ + if ((x.real_value - wrap_center) >= 180.0) { + x.real_value -= 360.0; + return; + } + + if ((x.real_value - wrap_center) < -180.0) { + x.real_value += 360.0; + return; + } + + return; +} diff --git a/lib/colvars/colvarcomp_coordnums.cpp b/lib/colvars/colvarcomp_coordnums.cpp index 987a16a816..369d489e27 100644 --- a/lib/colvars/colvarcomp_coordnums.cpp +++ b/lib/colvars/colvarcomp_coordnums.cpp @@ -87,8 +87,10 @@ colvar::coordnum::coordnum(std::string const &conf) group1 = parse_group(conf, "group1"); group2 = parse_group(conf, "group2"); - if (group1->b_dummy) - cvm::fatal_error("Error: only group2 is allowed to be a dummy atom\n"); + if (group1->b_dummy) { + cvm::error("Error: only group2 is allowed to be a dummy atom\n"); + return; + } bool const b_isotropic = get_keyval(conf, "cutoff", r0, cvm::real(4.0 * cvm::unit_angstrom())); @@ -99,6 +101,7 @@ colvar::coordnum::coordnum(std::string const &conf) if (b_isotropic) { cvm::error("Error: cannot specify \"cutoff\" and \"cutoff3\" at the same time.\n", INPUT_ERROR); + return; } b_anisotropic = true; @@ -115,6 +118,10 @@ colvar::coordnum::coordnum(std::string const &conf) cvm::error("Error: odd exponents provided, can only use even ones.\n", INPUT_ERROR); } + if (!is_enabled(f_cvc_pbc_minimum_image)) { + cvm::log("Warning: only minimum-image distances are used by this variable.\n"); + } + get_keyval(conf, "group2CenterOnly", b_group2_center_only, group2->b_dummy); } @@ -228,12 +235,13 @@ colvar::h_bond::h_bond(std::string const &conf) get_keyval(conf, "donor", d_num, -1); if ( (a_num == -1) || (d_num == -1) ) { - cvm::fatal_error("Error: either acceptor or donor undefined.\n"); + cvm::error("Error: either acceptor or donor undefined.\n"); + return; } cvm::atom acceptor = cvm::atom(a_num); cvm::atom donor = cvm::atom(d_num); - atom_groups.push_back(new cvm::atom_group); + register_atom_group(new cvm::atom_group); atom_groups[0]->add_atom(acceptor); atom_groups[0]->add_atom(donor); @@ -242,7 +250,8 @@ colvar::h_bond::h_bond(std::string const &conf) get_keyval(conf, "expDenom", ed, 8); if ( (en%2) || (ed%2) ) { - cvm::fatal_error("Error: odd exponents provided, can only use even ones.\n"); + cvm::error("Error: odd exponents provided, can only use even ones.\n"); + return; } if (cvm::debug()) @@ -258,7 +267,7 @@ colvar::h_bond::h_bond(cvm::atom const &acceptor, function_type = "h_bond"; x.type(colvarvalue::type_scalar); - atom_groups.push_back(new cvm::atom_group); + register_atom_group(new cvm::atom_group); atom_groups[0]->add_atom(acceptor); atom_groups[0]->add_atom(donor); } @@ -313,7 +322,12 @@ colvar::selfcoordnum::selfcoordnum(std::string const &conf) get_keyval(conf, "expDenom", ed, int(12)); if ( (en%2) || (ed%2) ) { - cvm::fatal_error("Error: odd exponents provided, can only use even ones.\n"); + cvm::error("Error: odd exponents provided, can only use even ones.\n"); + return; + } + + if (!is_enabled(f_cvc_pbc_minimum_image)) { + cvm::log("Warning: only minimum-image distances are used by this variable.\n"); } } @@ -364,8 +378,10 @@ colvar::groupcoordnum::groupcoordnum(std::string const &conf) x.type(colvarvalue::type_scalar); // group1 and group2 are already initialized by distance() - if (group1->b_dummy || group2->b_dummy) - cvm::fatal_error("Error: neither group can be a dummy atom\n"); + if (group1->b_dummy || group2->b_dummy) { + cvm::error("Error: neither group can be a dummy atom\n"); + return; + } bool const b_scale = get_keyval(conf, "cutoff", r0, cvm::real(4.0 * cvm::unit_angstrom())); @@ -373,9 +389,11 @@ colvar::groupcoordnum::groupcoordnum(std::string const &conf) if (get_keyval(conf, "cutoff3", r0_vec, cvm::rvector(4.0, 4.0, 4.0), parse_silent)) { - if (b_scale) - cvm::fatal_error("Error: cannot specify \"scale\" and " + if (b_scale) { + cvm::error("Error: cannot specify \"scale\" and " "\"scale3\" at the same time.\n"); + return; + } b_anisotropic = true; // remove meaningless negative signs if (r0_vec.x < 0.0) r0_vec.x *= -1.0; @@ -387,7 +405,12 @@ colvar::groupcoordnum::groupcoordnum(std::string const &conf) get_keyval(conf, "expDenom", ed, int(12)); if ( (en%2) || (ed%2) ) { - cvm::fatal_error("Error: odd exponents provided, can only use even ones.\n"); + cvm::error("Error: odd exponents provided, can only use even ones.\n"); + return; + } + + if (!is_enabled(f_cvc_pbc_minimum_image)) { + cvm::log("Warning: only minimum-image distances are used by this variable.\n"); } } diff --git a/lib/colvars/colvarcomp_distances.cpp b/lib/colvars/colvarcomp_distances.cpp index f46270246f..18d154515a 100644 --- a/lib/colvars/colvarcomp_distances.cpp +++ b/lib/colvars/colvarcomp_distances.cpp @@ -28,10 +28,6 @@ colvar::distance::distance(std::string const &conf) group1 = parse_group(conf, "group1"); group2 = parse_group(conf, "group2"); - if (get_keyval(conf, "forceNoPBC", b_no_PBC, false)) { - cvm::log("Computing distance using absolute positions (not minimal-image)"); - } - init_total_force_params(conf); x.type(colvarvalue::type_scalar); @@ -45,18 +41,17 @@ colvar::distance::distance() provide(f_cvc_inv_gradient); provide(f_cvc_Jacobian); enable(f_cvc_com_based); - b_no_PBC = false; x.type(colvarvalue::type_scalar); } void colvar::distance::calc_value() { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { dist_v = group2->center_of_mass() - group1->center_of_mass(); } else { dist_v = cvm::position_distance(group1->center_of_mass(), - group2->center_of_mass()); + group2->center_of_mass()); } x.real_value = dist_v.norm(); } @@ -107,6 +102,7 @@ colvar::distance_vec::distance_vec(std::string const &conf) { function_type = "distance_vec"; enable(f_cvc_com_based); + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_3vector); } @@ -116,17 +112,18 @@ colvar::distance_vec::distance_vec() { function_type = "distance_vec"; enable(f_cvc_com_based); + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_3vector); } void colvar::distance_vec::calc_value() { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { x.rvector_value = group2->center_of_mass() - group1->center_of_mass(); } else { x.rvector_value = cvm::position_distance(group1->center_of_mass(), - group2->center_of_mass()); + group2->center_of_mass()); } } @@ -214,10 +211,6 @@ colvar::distance_z::distance_z(std::string const &conf) fixed_axis = true; } - if (get_keyval(conf, "forceNoPBC", b_no_PBC, false)) { - cvm::log("Computing distance using absolute positions (not minimal-image)"); - } - init_total_force_params(conf); } @@ -236,22 +229,24 @@ colvar::distance_z::distance_z() void colvar::distance_z::calc_value() { if (fixed_axis) { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { dist_v = main->center_of_mass() - ref1->center_of_mass(); } else { dist_v = cvm::position_distance(ref1->center_of_mass(), - main->center_of_mass()); + main->center_of_mass()); } } else { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { dist_v = main->center_of_mass() - (0.5 * (ref1->center_of_mass() + ref2->center_of_mass())); axis = ref2->center_of_mass() - ref1->center_of_mass(); } else { dist_v = cvm::position_distance(0.5 * (ref1->center_of_mass() + - ref2->center_of_mass()), main->center_of_mass()); - axis = cvm::position_distance(ref1->center_of_mass(), ref2->center_of_mass()); + ref2->center_of_mass()), + main->center_of_mass()); + axis = cvm::position_distance(ref1->center_of_mass(), + ref2->center_of_mass()); } axis_norm = axis.norm(); axis = axis.unit(); @@ -268,16 +263,20 @@ void colvar::distance_z::calc_gradients() if (fixed_axis) { ref1->set_weighted_gradient(-1.0 * axis); } else { - if (b_no_PBC) { - ref1->set_weighted_gradient( 1.0 / axis_norm * (main->center_of_mass() - ref2->center_of_mass() - + if (!is_enabled(f_cvc_pbc_minimum_image)) { + ref1->set_weighted_gradient( 1.0 / axis_norm * + (main->center_of_mass() - ref2->center_of_mass() - x.real_value * axis )); - ref2->set_weighted_gradient( 1.0 / axis_norm * (ref1->center_of_mass() - main->center_of_mass() + + ref2->set_weighted_gradient( 1.0 / axis_norm * + (ref1->center_of_mass() - main->center_of_mass() + x.real_value * axis )); } else { ref1->set_weighted_gradient( 1.0 / axis_norm * ( - cvm::position_distance(ref2->center_of_mass(), main->center_of_mass()) - x.real_value * axis )); + cvm::position_distance(ref2->center_of_mass(), + main->center_of_mass()) - x.real_value * axis )); ref2->set_weighted_gradient( 1.0 / axis_norm * ( - cvm::position_distance(main->center_of_mass(), ref1->center_of_mass()) + x.real_value * axis )); + cvm::position_distance(main->center_of_mass(), + ref1->center_of_mass()) + x.real_value * axis )); } } } @@ -390,17 +389,18 @@ colvar::distance_xy::distance_xy() void colvar::distance_xy::calc_value() { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { dist_v = main->center_of_mass() - ref1->center_of_mass(); } else { dist_v = cvm::position_distance(ref1->center_of_mass(), - main->center_of_mass()); + main->center_of_mass()); } if (!fixed_axis) { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { v12 = ref2->center_of_mass() - ref1->center_of_mass(); } else { - v12 = cvm::position_distance(ref1->center_of_mass(), ref2->center_of_mass()); + v12 = cvm::position_distance(ref1->center_of_mass(), + ref2->center_of_mass()); } axis_norm = v12.norm(); axis = v12.unit(); @@ -425,10 +425,11 @@ void colvar::distance_xy::calc_gradients() ref1->set_weighted_gradient(-1.0 * x_inv * dist_v_ortho); main->set_weighted_gradient( x_inv * dist_v_ortho); } else { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { v13 = main->center_of_mass() - ref1->center_of_mass(); } else { - v13 = cvm::position_distance(ref1->center_of_mass(), main->center_of_mass()); + v13 = cvm::position_distance(ref1->center_of_mass(), + main->center_of_mass()); } A = (dist_v * axis) / axis_norm; @@ -480,6 +481,7 @@ colvar::distance_dir::distance_dir(std::string const &conf) { function_type = "distance_dir"; enable(f_cvc_com_based); + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_unit3vector); } @@ -489,13 +491,14 @@ colvar::distance_dir::distance_dir() { function_type = "distance_dir"; enable(f_cvc_com_based); + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_unit3vector); } void colvar::distance_dir::calc_value() { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { dist_v = group2->center_of_mass() - group1->center_of_mass(); } else { dist_v = cvm::position_distance(group1->center_of_mass(), @@ -539,22 +542,26 @@ cvm::real colvar::distance_dir::dist2(colvarvalue const &x1, colvarvalue colvar::distance_dir::dist2_lgrad(colvarvalue const &x1, colvarvalue const &x2) const { - return colvarvalue((x1.rvector_value - x2.rvector_value), colvarvalue::type_unit3vector); + return colvarvalue((x1.rvector_value - x2.rvector_value), colvarvalue::type_unit3vectorderiv); } colvarvalue colvar::distance_dir::dist2_rgrad(colvarvalue const &x1, colvarvalue const &x2) const { - return colvarvalue((x2.rvector_value - x1.rvector_value), colvarvalue::type_unit3vector); + return colvarvalue((x2.rvector_value - x1.rvector_value), colvarvalue::type_unit3vectorderiv); } colvar::distance_inv::distance_inv(std::string const &conf) - : distance(conf) + : cvc(conf) { function_type = "distance_inv"; + + group1 = parse_group(conf, "group1"); + group2 = parse_group(conf, "group2"); + get_keyval(conf, "exponent", exponent, 6); if (exponent%2) { cvm::error("Error: odd exponent provided, can only use even ones.\n"); @@ -589,7 +596,7 @@ colvar::distance_inv::distance_inv() void colvar::distance_inv::calc_value() { x.real_value = 0.0; - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { for (cvm::atom_iter ai1 = group1->begin(); ai1 != group1->end(); ai1++) { for (cvm::atom_iter ai2 = group2->begin(); ai2 != group2->end(); ai2++) { cvm::rvector const dv = ai2->pos - ai1->pos; @@ -655,14 +662,11 @@ colvar::distance_pairs::distance_pairs(std::string const &conf) { function_type = "distance_pairs"; - if (get_keyval(conf, "forceNoPBC", b_no_PBC, false)) { - cvm::log("Computing distance using absolute positions (not minimal-image)"); - } - group1 = parse_group(conf, "group1"); group2 = parse_group(conf, "group2"); x.type(colvarvalue::type_vector); + enable(f_cvc_implicit_gradient); x.vector1d_value.resize(group1->size() * group2->size()); } @@ -670,6 +674,7 @@ colvar::distance_pairs::distance_pairs(std::string const &conf) colvar::distance_pairs::distance_pairs() { function_type = "distance_pairs"; + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_vector); } @@ -678,7 +683,7 @@ void colvar::distance_pairs::calc_value() { x.vector1d_value.resize(group1->size() * group2->size()); - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { size_t i1, i2; for (i1 = 0; i1 < group1->size(); i1++) { for (i2 = 0; i2 < group2->size(); i2++) { @@ -693,7 +698,8 @@ void colvar::distance_pairs::calc_value() size_t i1, i2; for (i1 = 0; i1 < group1->size(); i1++) { for (i2 = 0; i2 < group2->size(); i2++) { - cvm::rvector const dv = cvm::position_distance((*group1)[i1].pos, (*group2)[i2].pos); + cvm::rvector const dv = cvm::position_distance((*group1)[i1].pos, + (*group2)[i2].pos); cvm::real const d = dv.norm(); x.vector1d_value[i1*group2->size() + i2] = d; (*group1)[i1].grad = -1.0 * dv.unit(); @@ -712,7 +718,7 @@ void colvar::distance_pairs::calc_gradients() void colvar::distance_pairs::apply_force(colvarvalue const &force) { - if (b_no_PBC) { + if (!is_enabled(f_cvc_pbc_minimum_image)) { size_t i1, i2; for (i1 = 0; i1 < group1->size(); i1++) { for (i2 = 0; i2 < group2->size(); i2++) { @@ -725,7 +731,8 @@ void colvar::distance_pairs::apply_force(colvarvalue const &force) size_t i1, i2; for (i1 = 0; i1 < group1->size(); i1++) { for (i2 = 0; i2 < group2->size(); i2++) { - cvm::rvector const dv = cvm::position_distance((*group1)[i1].pos, (*group2)[i2].pos); + cvm::rvector const dv = cvm::position_distance((*group1)[i1].pos, + (*group2)[i2].pos); (*group1)[i1].apply_force(force[i1*group2->size() + i2] * (-1.0) * dv.unit()); (*group2)[i2].apply_force(force[i1*group2->size() + i2] * dv.unit()); } @@ -999,7 +1006,7 @@ colvar::rmsd::rmsd(std::string const &conf) cvm::log("This is a standard minimum RMSD, derivatives of the optimal rotation " "will not be computed as they cancel out in the gradients."); - atoms->b_fit_gradients = false; + atoms->disable(f_ag_fit_gradients); // request the calculation of the derivatives of the rotation defined by the atom group atoms->rot.request_group1_gradients(atoms->size()); @@ -1191,8 +1198,8 @@ colvar::eigenvector::eigenvector(std::string const &conf) atoms->b_rotate = true; atoms->ref_pos = ref_pos; atoms->center_ref_pos(); - atoms->b_fit_gradients = false; // cancel out if group is fitted on itself - // and cvc is translationally invariant + atoms->disable(f_ag_fit_gradients); // cancel out if group is fitted on itself + // and cvc is translationally invariant // request the calculation of the derivatives of the rotation defined by the atom group atoms->rot.request_group1_gradients(atoms->size()); @@ -1207,8 +1214,9 @@ colvar::eigenvector::eigenvector(std::string const &conf) if (b_inline) { cvm::log("Using vector components from input file.\n"); if (eigenvec.size() != atoms->size()) { - cvm::fatal_error("Error: vector components do not " + cvm::error("Error: vector components do not " "match the number of requested atoms->\n"); + return; } } @@ -1422,6 +1430,7 @@ colvar::cartesian::cartesian(std::string const &conf) } x.type(colvarvalue::type_vector); + enable(f_cvc_implicit_gradient); x.vector1d_value.resize(atoms->size() * axes.size()); } diff --git a/lib/colvars/colvarcomp_protein.cpp b/lib/colvars/colvarcomp_protein.cpp index 393c7dcf9a..b8fc96cfad 100644 --- a/lib/colvars/colvarcomp_protein.cpp +++ b/lib/colvars/colvarcomp_protein.cpp @@ -20,15 +20,6 @@ // alpha component ////////////////////////////////////////////////////////////////////// - // FIXME: this will not make collect_gradients work - // because gradients in individual atom groups - // are those of the sub-cvcs (angle, hb), not those - // of this cvc (alpha) - // This is true of all cvcs with sub-cvcs, and those - // that do not calculate explicit gradients - // SO: we need a flag giving the availability of - // atomic gradients - colvar::alpha_angles::alpha_angles(std::string const &conf) : cvc(conf) { @@ -36,6 +27,7 @@ colvar::alpha_angles::alpha_angles(std::string const &conf) cvm::log("Initializing alpha_angles object.\n"); function_type = "alpha_angles"; + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_scalar); std::string segment_id; @@ -44,7 +36,7 @@ colvar::alpha_angles::alpha_angles(std::string const &conf) std::vector residues; { std::string residues_conf = ""; - key_lookup(conf, "residueRange", residues_conf); + key_lookup(conf, "residueRange", &residues_conf); if (residues_conf.size()) { std::istringstream is(residues_conf); int initial, final; @@ -57,12 +49,14 @@ colvar::alpha_angles::alpha_angles(std::string const &conf) } } } else { - cvm::fatal_error("Error: no residues defined in \"residueRange\".\n"); + cvm::error("Error: no residues defined in \"residueRange\".\n"); + return; } } if (residues.size() < 5) { - cvm::fatal_error("Error: not enough residues defined in \"residueRange\".\n"); + cvm::error("Error: not enough residues defined in \"residueRange\".\n"); + return; } std::string const &sid = segment_id; @@ -71,7 +65,8 @@ colvar::alpha_angles::alpha_angles(std::string const &conf) get_keyval(conf, "hBondCoeff", hb_coeff, 0.5); if ( (hb_coeff < 0.0) || (hb_coeff > 1.0) ) { - cvm::fatal_error("Error: hBondCoeff must be defined between 0 and 1.\n"); + cvm::error("Error: hBondCoeff must be defined between 0 and 1.\n"); + return; } @@ -84,9 +79,9 @@ colvar::alpha_angles::alpha_angles(std::string const &conf) theta.push_back(new colvar::angle(cvm::atom(r[i ], "CA", sid), cvm::atom(r[i+1], "CA", sid), cvm::atom(r[i+2], "CA", sid))); - atom_groups.push_back(theta.back()->atom_groups[0]); - atom_groups.push_back(theta.back()->atom_groups[1]); - atom_groups.push_back(theta.back()->atom_groups[2]); + register_atom_group(theta.back()->atom_groups[0]); + register_atom_group(theta.back()->atom_groups[1]); + register_atom_group(theta.back()->atom_groups[2]); } } else { @@ -106,7 +101,7 @@ colvar::alpha_angles::alpha_angles(std::string const &conf) hb.push_back(new colvar::h_bond(cvm::atom(r[i ], "O", sid), cvm::atom(r[i+4], "N", sid), r0, en, ed)); - atom_groups.push_back(hb.back()->atom_groups[0]); + register_atom_group(hb.back()->atom_groups[0]); } } else { @@ -123,6 +118,7 @@ colvar::alpha_angles::alpha_angles() : cvc() { function_type = "alpha_angles"; + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_scalar); } @@ -239,15 +235,6 @@ simple_scalar_dist_functions(alpha_angles) // dihedral principal component ////////////////////////////////////////////////////////////////////// - // FIXME: this will not make collect_gradients work - // because gradients in individual atom groups - // are those of the sub-cvcs (dihedral), not those - // of this cvc - // This is true of all cvcs with sub-cvcs, and those - // that do not calculate explicit gradients - // SO: we need a flag giving the availability of - // atomic gradients - colvar::dihedPC::dihedPC(std::string const &conf) : cvc(conf) { @@ -255,6 +242,7 @@ colvar::dihedPC::dihedPC(std::string const &conf) cvm::log("Initializing dihedral PC object.\n"); function_type = "dihedPC"; + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_scalar); std::string segment_id; @@ -263,7 +251,7 @@ colvar::dihedPC::dihedPC(std::string const &conf) std::vector residues; { std::string residues_conf = ""; - key_lookup(conf, "residueRange", residues_conf); + key_lookup(conf, "residueRange", &residues_conf); if (residues_conf.size()) { std::istringstream is(residues_conf); int initial, final; @@ -276,12 +264,14 @@ colvar::dihedPC::dihedPC(std::string const &conf) } } } else { - cvm::fatal_error("Error: no residues defined in \"residueRange\".\n"); + cvm::error("Error: no residues defined in \"residueRange\".\n"); + return; } } if (residues.size() < 2) { - cvm::fatal_error("Error: dihedralPC requires at least two residues.\n"); + cvm::error("Error: dihedralPC requires at least two residues.\n"); + return; } std::string const &sid = segment_id; @@ -291,13 +281,16 @@ colvar::dihedPC::dihedPC(std::string const &conf) int vecNumber; if (get_keyval(conf, "vectorFile", vecFileName, vecFileName)) { get_keyval(conf, "vectorNumber", vecNumber, 0); - if (vecNumber < 1) - cvm::fatal_error("A positive value of vectorNumber is required."); + if (vecNumber < 1) { + cvm::error("A positive value of vectorNumber is required."); + return; + } std::ifstream vecFile; vecFile.open(vecFileName.c_str()); - if (!vecFile.good()) - cvm::fatal_error("Error opening dihedral PCA vector file " + vecFileName + " for reading"); + if (!vecFile.good()) { + cvm::error("Error opening dihedral PCA vector file " + vecFileName + " for reading"); + } // TODO: adapt to different formats by setting this flag bool eigenvectors_as_columns = true; @@ -321,8 +314,9 @@ colvar::dihedPC::dihedPC(std::string const &conf) for (int i = 1; iatom_groups[0]); - atom_groups.push_back(theta.back()->atom_groups[1]); - atom_groups.push_back(theta.back()->atom_groups[2]); - atom_groups.push_back(theta.back()->atom_groups[3]); + register_atom_group(theta.back()->atom_groups[0]); + register_atom_group(theta.back()->atom_groups[1]); + register_atom_group(theta.back()->atom_groups[2]); + register_atom_group(theta.back()->atom_groups[3]); // Phi (next res) theta.push_back(new colvar::dihedral(cvm::atom(r[i ], "C", sid), cvm::atom(r[i+1], "N", sid), cvm::atom(r[i+1], "CA", sid), cvm::atom(r[i+1], "C", sid))); - atom_groups.push_back(theta.back()->atom_groups[0]); - atom_groups.push_back(theta.back()->atom_groups[1]); - atom_groups.push_back(theta.back()->atom_groups[2]); - atom_groups.push_back(theta.back()->atom_groups[3]); + register_atom_group(theta.back()->atom_groups[0]); + register_atom_group(theta.back()->atom_groups[1]); + register_atom_group(theta.back()->atom_groups[2]); + register_atom_group(theta.back()->atom_groups[3]); } if (cvm::debug()) @@ -377,6 +372,7 @@ colvar::dihedPC::dihedPC() : cvc() { function_type = "dihedPC"; + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_scalar); } diff --git a/lib/colvars/colvarcomp_rotations.cpp b/lib/colvars/colvarcomp_rotations.cpp index 936e770169..2650a9fe18 100644 --- a/lib/colvars/colvarcomp_rotations.cpp +++ b/lib/colvars/colvarcomp_rotations.cpp @@ -22,6 +22,7 @@ colvar::orientation::orientation(std::string const &conf) { function_type = "orientation"; atoms = parse_group(conf, "atoms"); + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_quaternion); ref_pos.reserve(atoms->size()); @@ -29,8 +30,9 @@ colvar::orientation::orientation(std::string const &conf) if (get_keyval(conf, "refPositions", ref_pos, ref_pos)) { cvm::log("Using reference positions from input file.\n"); if (ref_pos.size() != atoms->size()) { - cvm::fatal_error("Error: reference positions do not " + cvm::error("Error: reference positions do not " "match the number of requested atoms.\n"); + return; } } @@ -43,9 +45,11 @@ colvar::orientation::orientation(std::string const &conf) if (get_keyval(conf, "refPositionsCol", file_col, std::string(""))) { // use PDB flags if column is provided bool found = get_keyval(conf, "refPositionsColValue", file_col_value, 0.0); - if (found && file_col_value==0.0) - cvm::fatal_error("Error: refPositionsColValue, " + if (found && file_col_value==0.0) { + cvm::error("Error: refPositionsColValue, " "if provided, must be non-zero.\n"); + return; + } } else { // if not, use atom indices atoms->create_sorted_ids(); @@ -56,8 +60,9 @@ colvar::orientation::orientation(std::string const &conf) } if (!ref_pos.size()) { - cvm::fatal_error("Error: must define a set of " + cvm::error("Error: must define a set of " "reference coordinates.\n"); + return; } @@ -88,6 +93,7 @@ colvar::orientation::orientation() : cvc() { function_type = "orientation"; + enable(f_cvc_implicit_gradient); x.type(colvarvalue::type_quaternion); } diff --git a/lib/colvars/colvardeps.cpp b/lib/colvars/colvardeps.cpp index 8252f77e62..8f241a6255 100644 --- a/lib/colvars/colvardeps.cpp +++ b/lib/colvars/colvardeps.cpp @@ -10,24 +10,77 @@ #include "colvardeps.h" +colvardeps::colvardeps() + : time_step_factor (1) {} colvardeps::~colvardeps() { size_t i; - // Do not delete features if it's static -// for (i=0; idescription); } } + + // Do not delete features if it's a static object + // may change in the future though +// for (i=0; irequires_children.size(); i++) { + int g = features()[fid]->requires_children[i]; + for (j=0; jfeatures()[g]->description); + children[j]->decr_ref_count(g); + } + } + } + } + cvm::decrease_depth(); +} + + +// re-enable children features (and increase ref count accordingly) +// So free_children_deps() can be called whenever an object becomes inactive +void colvardeps::restore_children_deps() { + size_t i,j,fid; + + cvm::increase_depth(); + for (fid = 0; fid < feature_states.size(); fid++) { + if (is_enabled(fid)) { + for (i=0; irequires_children.size(); i++) { + int g = features()[fid]->requires_children[i]; + for (j=0; jfeatures()[g]->description); + children[j]->enable(g, false, false); + } + } + } + } + cvm::decrease_depth(); } @@ -37,15 +90,10 @@ void colvardeps::provide(int feature_id, bool truefalse) { void colvardeps::set_enabled(int feature_id, bool truefalse) { -// if (!is_static(feature_id)) { -// cvm::error("Cannot set feature " + features()[feature_id]->description + " statically in " + description + ".\n"); -// return; -// } if (truefalse) { - // Resolve dependencies too enable(feature_id); } else { - feature_states[feature_id].enabled = false; + disable(feature_id); } } @@ -56,7 +104,7 @@ bool colvardeps::get_keyval_feature(colvarparse *cvp, colvarparse::Parse_Mode const parse_mode) { if (!is_user(feature_id)) { - cvm::error("Cannot set feature " + features()[feature_id]->description + " from user input in " + description + ".\n"); + cvm::error("Cannot set feature \"" + features()[feature_id]->description + "\" from user input in \"" + description + "\".\n"); return false; } bool value; @@ -83,21 +131,34 @@ int colvardeps::enable(int feature_id, if (cvm::debug()) { cvm::log("DEPS: " + description + - (dry_run ? " testing " : " requiring ") + + (dry_run ? " testing " : " enabling ") + "\"" + f->description +"\""); } if (fs->enabled) { - // Do not try to solve deps if already enabled + if (!(dry_run || toplevel)) { + // This is a dependency + // Prevent disabling this feature as long + // as requirement is enabled + fs->ref_count++; + if (cvm::debug()) + cvm::log("DEPS: bumping ref_count to " + cvm::to_str(fs->ref_count)); + } + // Do not try to further resolve deps return COLVARS_OK; } + std::string feature_type_descr = is_static(feature_id) ? "Static" : + (is_dynamic(feature_id) ? "Dynamic" : "User-controlled"); + if (!fs->available) { if (!dry_run) { if (toplevel) { - cvm::error("Error: Feature unavailable: \"" + f->description + "\" in " + description + "."); + cvm::error("Error: " + feature_type_descr + " feature unavailable: \"" + + f->description + "\" in " + description + "."); } else { - cvm::log("Feature unavailable: \"" + f->description + "\" in " + description); + cvm::log(feature_type_descr + " feature unavailable: \"" + + f->description + "\" in " + description + "."); } } return COLVARS_ERROR; @@ -105,21 +166,22 @@ int colvardeps::enable(int feature_id, if (!toplevel && !is_dynamic(feature_id)) { if (!dry_run) { - cvm::log("Non-dynamic feature : \"" + f->description - + "\" in " + description + " may not be enabled as a dependency.\n"); + cvm::log(feature_type_descr + " feature \"" + f->description + + "\" may not be enabled as a dependency in " + description + ".\n"); } return COLVARS_ERROR; } // 1) enforce exclusions + // reminder: exclusions must be mutual for this to work for (i=0; irequires_exclude.size(); i++) { feature *g = features()[f->requires_exclude[i]]; if (cvm::debug()) cvm::log(f->description + " requires exclude " + g->description); if (is_enabled(f->requires_exclude[i])) { if (!dry_run) { - cvm::log("Features \"" + f->description + "\" is incompatible with \"" - + g->description + "\" in " + description); + cvm::log("Feature \"" + f->description + "\" is incompatible with \"" + + g->description + "\" in " + description + "."); if (toplevel) { cvm::error("Error: Failed dependency in " + description + "."); } @@ -156,23 +218,27 @@ int colvardeps::enable(int feature_id, res = enable(g, true, false); // see if available if (res == COLVARS_OK) { ok = true; - if (!dry_run) enable(g, false, false); // Require again, for real + if (!dry_run) { + enable(g, false, false); // Require again, for real + fs->alternate_refs.push_back(g); // We remember we enabled this + // so we can free it if this feature gets disabled + } break; } } if (!ok) { if (!dry_run) { - cvm::log("No dependency satisfied among alternates:"); - cvm::log("-----------------------------------------"); + cvm::log("\"" + f->description + "\" in " + description + + " requires one of the following features, none of which can be enabled:\n"); + cvm::log("-----------------------------------------\n"); + cvm::increase_depth(); for (j=0; jrequires_alt[i].size(); j++) { int g = f->requires_alt[i][j]; cvm::log(cvm::to_str(j+1) + ". " + features()[g]->description); - cvm::increase_depth(); enable(g, false, false); // Just for printing error output - cvm::decrease_depth(); } + cvm::decrease_depth(); cvm::log("-----------------------------------------"); - cvm::log("for \"" + f->description + "\" in " + description); if (toplevel) { cvm::error("Error: Failed dependency in " + description + "."); } @@ -182,12 +248,13 @@ int colvardeps::enable(int feature_id, } // 4) solve deps in children + // if the object is inactive, we solve but do not enable: will be enabled + // when the object becomes active + cvm::increase_depth(); for (i=0; irequires_children.size(); i++) { int g = f->requires_children[i]; for (j=0; jenable(g, dry_run, false); - cvm::decrease_depth(); + res = children[j]->enable(g, dry_run || !is_enabled(), false); if (res != COLVARS_OK) { if (!dry_run) { cvm::log("...required by \"" + f->description + "\" in " + description); @@ -198,28 +265,117 @@ int colvardeps::enable(int feature_id, return res; } } - // If we've just touched the features of child objects, refresh them - if (!dry_run && f->requires_children.size() != 0) { - for (j=0; jrefresh_deps(); - } - } } + cvm::decrease_depth(); // Actually enable feature only once everything checks out - if (!dry_run) fs->enabled = true; + if (!dry_run) { + fs->enabled = true; + // This should be the only reference + if (!toplevel) fs->ref_count = 1; + if (feature_id == 0) { + // Waking up this object, enable all deps in children + restore_children_deps(); + } + do_feature_side_effects(feature_id); + if (cvm::debug()) + cvm::log("DEPS: feature \"" + f->description + "\" in " + + description + " enabled, ref_count = 1."); + } return COLVARS_OK; } -// disable() { -// -// // we need refs to parents to walk up the deps tree! -// // or refresh -// } +int colvardeps::disable(int feature_id) { + size_t i, j; + feature *f = features()[feature_id]; + feature_state *fs = &feature_states[feature_id]; + + if (cvm::debug()) cvm::log("DEPS: disabling feature \"" + + f->description + "\" in " + description); + + if (fs->enabled == false) { + return COLVARS_OK; + } + + if (fs->ref_count > 1) { + cvm::error("Error: cannot disable feature \"" + f->description + + "\" in " + description + " because of " + cvm::to_str(fs->ref_count-1) + + " remaining references.\n" ); + return COLVARS_ERROR; + } + + // internal deps (self) + for (i=0; irequires_self.size(); i++) { + if (cvm::debug()) cvm::log("DEPS: dereferencing self " + + features()[f->requires_self[i]]->description); + decr_ref_count(f->requires_self[i]); + } + + // alternates + for (i=0; ialternate_refs.size(); i++) { + if (cvm::debug()) cvm::log("DEPS: dereferencing alt " + + features()[fs->alternate_refs[i]]->description); + decr_ref_count(fs->alternate_refs[i]); + } + // Forget these, now that they are dereferenced + fs->alternate_refs.clear(); + + // deps in children + // except if the object is inactive, then children dependencies + // have already been dereferenced by this function + // (or never referenced if feature was enabled while the object + // was inactive) + if (is_enabled()) { + cvm::increase_depth(); + for (i=0; irequires_children.size(); i++) { + int g = f->requires_children[i]; + for (j=0; jfeatures()[g]->description); + children[j]->decr_ref_count(g); + } + } + cvm::decrease_depth(); + } + + fs->enabled = false; + fs->ref_count = 0; + if (feature_id == 0) { + // Putting this object to sleep + free_children_deps(); + } + return COLVARS_OK; +} + +int colvardeps::decr_ref_count(int feature_id) { + int &rc = feature_states[feature_id].ref_count; + feature *f = features()[feature_id]; + + if (cvm::debug()) + cvm::log("DEPS: decreasing reference count of \"" + f->description + + "\" in " + description + ".\n"); + + if (rc <= 0) { + cvm::error("Error: cannot decrease reference count of feature \"" + f->description + + "\" in " + description + ", which is " + cvm::to_str(rc) + ".\n"); + return COLVARS_ERROR; + } + + rc--; + if (rc == 0 && f->is_dynamic()) { + // we can auto-disable this feature + if (cvm::debug()) + cvm::log("DEPS will now auto-disable dynamic feature \"" + f->description + + "\" in " + description + ".\n"); + disable(feature_id); + } + return COLVARS_OK; +} + void colvardeps::init_feature(int feature_id, const char *description, feature_type type) { - features()[feature_id]->description = description; - features()[feature_id]->type = type; + modify_features()[feature_id]->description = description; + modify_features()[feature_id]->type = type; } // Shorthand macros for describing dependencies @@ -235,17 +391,25 @@ void colvardeps::init_feature(int feature_id, const char *description, feature_t features()[f]->requires_alt.back()[0] = g; \ features()[f]->requires_alt.back()[1] = h; \ features()[f]->requires_alt.back()[2] = i +#define f_req_alt4(f, g, h, i, j) features()[f]->requires_alt.push_back(std::vector(4));\ + features()[f]->requires_alt.back()[0] = g; \ + features()[f]->requires_alt.back()[1] = h; \ + features()[f]->requires_alt.back()[2] = i; \ + features()[f]->requires_alt.back()[3] = j void colvardeps::init_cvb_requires() { int i; if (features().size() == 0) { for (i = 0; i < f_cvb_ntot; i++) { - features().push_back(new feature); + modify_features().push_back(new feature); } init_feature(f_cvb_active, "active", f_type_dynamic); f_req_children(f_cvb_active, f_cv_active); + init_feature(f_cvb_awake, "awake", f_type_static); + f_req_self(f_cvb_awake, f_cvb_active); + init_feature(f_cvb_apply_force, "apply force", f_type_user); f_req_children(f_cvb_apply_force, f_cv_gradient); @@ -274,13 +438,16 @@ void colvardeps::init_cv_requires() { size_t i; if (features().size() == 0) { for (i = 0; i < f_cv_ntot; i++) { - features().push_back(new feature); + modify_features().push_back(new feature); } init_feature(f_cv_active, "active", f_type_dynamic); - f_req_children(f_cv_active, f_cvc_active); - // Colvars must be either a linear combination, or scalar (and polynomial) or scripted - f_req_alt3(f_cv_active, f_cv_scalar, f_cv_linear, f_cv_scripted); + // Do not require f_cvc_active in children, as some components may be disabled + // Colvars must be either a linear combination, or scalar (and polynomial) or scripted/custom + f_req_alt4(f_cv_active, f_cv_scalar, f_cv_linear, f_cv_scripted, f_cv_custom_function); + + init_feature(f_cv_awake, "awake", f_type_static); + f_req_self(f_cv_awake, f_cv_active); init_feature(f_cv_gradient, "gradient", f_type_dynamic); f_req_children(f_cv_gradient, f_cvc_gradient); @@ -288,8 +455,10 @@ void colvardeps::init_cv_requires() { init_feature(f_cv_collect_gradient, "collect gradient", f_type_dynamic); f_req_self(f_cv_collect_gradient, f_cv_gradient); f_req_self(f_cv_collect_gradient, f_cv_scalar); + // The following exlusion could be lifted by implementing the feature + f_req_exclude(f_cv_collect_gradient, f_cv_scripted); - init_feature(f_cv_fdiff_velocity, "fdiff_velocity", f_type_dynamic); + init_feature(f_cv_fdiff_velocity, "velocity from finite differences", f_type_dynamic); // System force: either trivial (spring force); through extended Lagrangian, or calculated explicitly init_feature(f_cv_total_force, "total force", f_type_dynamic); @@ -335,6 +504,9 @@ void colvardeps::init_cv_requires() { init_feature(f_cv_subtract_applied_force, "subtract applied force from total force", f_type_user); f_req_self(f_cv_subtract_applied_force, f_cv_total_force); + // There is no well-defined way to implement f_cv_subtract_applied_force + // in the case of extended-Lagrangian colvars + f_req_exclude(f_cv_subtract_applied_force, f_cv_extended_Lagrangian); init_feature(f_cv_lower_boundary, "lower boundary", f_type_user); f_req_self(f_cv_lower_boundary, f_cv_scalar); @@ -350,12 +522,21 @@ void colvardeps::init_cv_requires() { init_feature(f_cv_corrfunc, "correlation function", f_type_user); - init_feature(f_cv_scripted, "scripted", f_type_static); + init_feature(f_cv_scripted, "scripted", f_type_user); + + init_feature(f_cv_custom_function, "custom function", f_type_user); + f_req_exclude(f_cv_custom_function, f_cv_scripted); + init_feature(f_cv_periodic, "periodic", f_type_static); f_req_self(f_cv_periodic, f_cv_homogeneous); init_feature(f_cv_scalar, "scalar", f_type_static); init_feature(f_cv_linear, "linear", f_type_static); init_feature(f_cv_homogeneous, "homogeneous", f_type_static); + + // because total forces are obtained from the previous time step, + // we cannot (currently) have colvar values and total forces for the same timestep + init_feature(f_cv_multiple_ts, "multiple timestep colvar"); + f_req_exclude(f_cv_multiple_ts, f_cv_total_force_calc); } // Initialize feature_states for each instance @@ -365,23 +546,6 @@ void colvardeps::init_cv_requires() { // Most features are available, so we set them so // and list exceptions below } - -// // properties that may NOT be enabled as a dependency -// // This will be deprecated by feature types -// int unavailable_deps[] = { -// f_cv_lower_boundary, -// f_cv_upper_boundary, -// f_cv_extended_Lagrangian, -// f_cv_Langevin, -// f_cv_scripted, -// f_cv_periodic, -// f_cv_scalar, -// f_cv_linear, -// f_cv_homogeneous -// }; -// for (i = 0; i < sizeof(unavailable_deps) / sizeof(unavailable_deps[0]); i++) { -// feature_states[unavailable_deps[i]].available = false; -// } } @@ -390,7 +554,7 @@ void colvardeps::init_cvc_requires() { // Initialize static array once and for all if (features().size() == 0) { for (i = 0; i < colvardeps::f_cvc_ntot; i++) { - features().push_back(new feature); + modify_features().push_back(new feature); } init_feature(f_cvc_active, "active", f_type_dynamic); @@ -401,20 +565,26 @@ void colvardeps::init_cvc_requires() { init_feature(f_cvc_gradient, "gradient", f_type_dynamic); + init_feature(f_cvc_implicit_gradient, "implicit gradient", f_type_static); + f_req_children(f_cvc_implicit_gradient, f_ag_implicit_gradient); + init_feature(f_cvc_inv_gradient, "inverse gradient", f_type_dynamic); f_req_self(f_cvc_inv_gradient, f_cvc_gradient); init_feature(f_cvc_debug_gradient, "debug gradient", f_type_user); f_req_self(f_cvc_debug_gradient, f_cvc_gradient); + f_req_exclude(f_cvc_debug_gradient, f_cvc_implicit_gradient); init_feature(f_cvc_Jacobian, "Jacobian derivative", f_type_dynamic); f_req_self(f_cvc_Jacobian, f_cvc_inv_gradient); init_feature(f_cvc_com_based, "depends on group centers of mass", f_type_static); + // init_feature(f_cvc_pbc_minimum_image, "use minimum-image distances with PBCs", f_type_user); + // Compute total force on first site only to avoid unwanted // coupling to other colvars (see e.g. Ciccotti et al., 2005) - init_feature(f_cvc_one_site_total_force, "compute total collective force only from one group center", f_type_user); + init_feature(f_cvc_one_site_total_force, "compute total force from one group", f_type_user); f_req_self(f_cvc_one_site_total_force, f_cvc_com_based); init_feature(f_cvc_scalable, "scalable calculation", f_type_static); @@ -438,11 +608,17 @@ void colvardeps::init_cvc_requires() { feature_states.push_back(feature_state(avail, false)); } + // CVCs are enabled from the start - get disabled based on flags + feature_states[f_cvc_active].enabled = true; + // Features that are implemented by all cvcs by default // Each cvc specifies what other features are available feature_states[f_cvc_active].available = true; feature_states[f_cvc_gradient].available = true; + // Use minimum-image distances by default + feature_states[f_cvc_pbc_minimum_image].enabled = true; + // Features that are implemented by default if their requirements are feature_states[f_cvc_one_site_total_force].available = true; @@ -457,15 +633,17 @@ void colvardeps::init_ag_requires() { // Initialize static array once and for all if (features().size() == 0) { for (i = 0; i < f_ag_ntot; i++) { - features().push_back(new feature); + modify_features().push_back(new feature); } init_feature(f_ag_active, "active", f_type_dynamic); init_feature(f_ag_center, "translational fit", f_type_static); init_feature(f_ag_rotate, "rotational fit", f_type_static); init_feature(f_ag_fitting_group, "reference positions group", f_type_static); - init_feature(f_ag_fit_gradient_group, "fit gradient for main group", f_type_static); - init_feature(f_ag_fit_gradient_ref, "fit gradient for reference group", f_type_static); + init_feature(f_ag_implicit_gradient, "implicit atom gradient", f_type_dynamic); + init_feature(f_ag_fit_gradients, "fit gradients", f_type_user); + f_req_exclude(f_ag_fit_gradients, f_ag_implicit_gradient); + init_feature(f_ag_atom_forces, "atomic forces", f_type_dynamic); // parallel calculation implies that we have at least a scalable center of mass, @@ -493,29 +671,50 @@ void colvardeps::init_ag_requires() { feature_states[f_ag_scalable_com].available = false; // TODO make f_ag_scalable depend on f_ag_scalable_com (or something else) feature_states[f_ag_scalable].available = true; + feature_states[f_ag_fit_gradients].available = true; + feature_states[f_ag_implicit_gradient].available = true; } void colvardeps::print_state() { size_t i; - cvm::log("Enabled features of " + description); + cvm::log("Enabled features of \"" + description + "\" (with reference count)"); for (i = 0; i < feature_states.size(); i++) { - if (feature_states[i].enabled) - cvm::log("- " + features()[i]->description); + if (is_enabled(i)) + cvm::log("- " + features()[i]->description + " (" + + cvm::to_str(feature_states[i].ref_count) + ")"); } + cvm::increase_depth(); for (i=0; iprint_state(); - cvm::decrease_depth(); } + cvm::decrease_depth(); } void colvardeps::add_child(colvardeps *child) { + children.push_back(child); child->parents.push_back((colvardeps *)this); + + // Solve dependencies of already enabled parent features + // in the new child + + size_t i, fid; + cvm::increase_depth(); + for (fid = 0; fid < feature_states.size(); fid++) { + if (is_enabled(fid)) { + for (i=0; irequires_children.size(); i++) { + int g = features()[fid]->requires_children[i]; + if (cvm::debug()) cvm::log("DEPS: re-enabling children's " + + child->features()[g]->description); + child->enable(g, false, false); + } + } + } + cvm::decrease_depth(); } diff --git a/lib/colvars/colvardeps.h b/lib/colvars/colvardeps.h index fd07cb6457..dfb10d00e4 100644 --- a/lib/colvars/colvardeps.h +++ b/lib/colvars/colvardeps.h @@ -23,10 +23,14 @@ /// 3. Static features are static properties of the object, determined /// programatically at initialization time. /// +/// In all classes, feature 0 is active. When an object is inactivated +/// all its children dependencies are dereferenced (free_children_deps) +/// While the object is inactive, no dependency solving is done on children +/// it is done when the object is activated back (restore_children_deps) class colvardeps { public: - colvardeps() {} + colvardeps(); virtual ~colvardeps(); // Subclasses should initialize the following members: @@ -34,9 +38,10 @@ public: std::string description; // reference to object name (cv, cvc etc.) /// This contains the current state of each feature for each object + // since the feature class only contains static properties struct feature_state { feature_state(bool a, bool e) - : available(a), enabled(e) {} + : available(a), enabled(e), ref_count(0) {} /// Feature may be enabled, subject to possible dependencies bool available; @@ -44,9 +49,28 @@ public: /// TODO consider implications for dependency solving: anyone who disables /// it should trigger a refresh of parent objects bool enabled; // see if this should be private depending on implementation + // bool enabledOnce; // this should trigger an update when object is evaluated + + /// Number of features requiring this one as a dependency + /// When it falls to zero: + /// - a dynamic feature is disabled automatically + /// - other features may be disabled statically + int ref_count; + /// List of features that were enabled by this one + /// as part of an alternate requirement (for ref counting purposes) + /// This is necessary because we don't know which feature in the list + /// we enabled, otherwise + std::vector alternate_refs; }; +protected: + /// Time step multiplier (for coarse-timestep biases & colvars) + /// Biases and colvars will only be calculated at those times + /// (f_cvb_awake and f_cv_awake); a + /// Biases use this to apply "impulse" biasing forces at the outer timestep + /// Unused by lower-level objects (cvcs and atom groups) + int time_step_factor; private: /// List of the states of all features @@ -61,10 +85,13 @@ private: }; public: + /// \brief returns time_step_factor + inline int get_time_step_factor() const {return time_step_factor;} + /// Pair a numerical feature ID with a description and type void init_feature(int feature_id, const char *description, feature_type type = f_type_not_set); - /// Describes a feature and its dependecies + /// Describes a feature and its dependencies /// used in a static array within each subclass class feature { @@ -108,7 +135,8 @@ public: // with a non-static array // Intermediate classes (colvarbias and colvarcomp, which are also base classes) // implement this as virtual to allow overriding - virtual std::vector&features() = 0; + virtual const std::vector&features() = 0; + virtual std::vector&modify_features() = 0; void add_child(colvardeps *child); @@ -120,30 +148,16 @@ public: private: - // pointers to objects this object depends on - // list should be maintained by any code that modifies the object - // this could be secured by making lists of colvars / cvcs / atom groups private and modified through accessor functions + /// pointers to objects this object depends on + /// list should be maintained by any code that modifies the object + /// this could be secured by making lists of colvars / cvcs / atom groups private and modified through accessor functions std::vector children; - // pointers to objects that depend on this object - // the size of this array is in effect a reference counter + /// pointers to objects that depend on this object + /// the size of this array is in effect a reference counter std::vector parents; public: - // disabling a feature f: - // if parents depend on f, tell them to refresh / check that they are ok? - // if children provide features to satisfy f ONLY, disable that - - // When the state of this object has changed, recursively tell parents - // to enforce their dependencies -// void refresh_parents() { -// -// } - - // std::vector parents; // Needed to trigger a refresh if capabilities of this object change - - // End of members to be initialized by subclasses - // Checks whether given feature is enabled // Defaults to querying f_*_active inline bool is_enabled(int f = f_cv_active) const { @@ -161,9 +175,7 @@ public: /// dependencies will be checked by enable() void provide(int feature_id, bool truefalse = true); - /// Set the feature's enabled flag, without dependency check or resolution - /// To be used for static properties only - /// Checking for availability is up to the caller + /// Enable or disable, depending on flag value void set_enabled(int feature_id, bool truefalse = true); protected: @@ -178,31 +190,57 @@ protected: public: - int enable(int f, bool dry_run = false, bool toplevel = true); // enable a feature and recursively solve its dependencies - // dry_run is set to true to recursively test if a feature is available, without enabling it -// int disable(int f); + /// enable a feature and recursively solve its dependencies + /// for proper reference counting, one should not add + /// spurious calls to enable() + /// dry_run is set to true to recursively test if a feature is available, without enabling it + int enable(int f, bool dry_run = false, bool toplevel = true); + /// Disable a feature, decrease the reference count of its dependencies + /// and recursively disable them as applicable + int disable(int f); - /// This function is called whenever feature states are changed outside - /// of the object's control, that is, by parents - /// Eventually it may also be used when properties of children change - virtual int refresh_deps() { return COLVARS_OK; } + /// disable all enabled features to free their dependencies + /// to be done when deleting the object + /// Cannot be in the base class destructor because it needs the derived class features() + void free_children_deps(); + + /// re-enable children features (to be used when object becomes active) + void restore_children_deps(); + + /// Decrement the reference count of a feature + /// disabling it if it's dynamic and count reaches zero + int decr_ref_count(int f); + + /// Implements possible actions to be carried out + /// when a given feature is enabled + /// Base function does nothing, can be overloaded + virtual void do_feature_side_effects(int id) {} // NOTE that all feature enums should start with f_*_active enum features_biases { /// \brief Bias is active f_cvb_active, - f_cvb_apply_force, // will apply forces - f_cvb_get_total_force, // requires total forces - f_cvb_history_dependent, // depends on simulation history - f_cvb_scalar_variables, // requires scalar colvars - f_cvb_calc_pmf, // whether this bias will compute a PMF + /// \brief Bias is awake (active on its own accord) this timestep + f_cvb_awake, + /// \brief will apply forces + f_cvb_apply_force, + /// \brief requires total forces + f_cvb_get_total_force, + /// \brief depends on simulation history + f_cvb_history_dependent, + /// \brief requires scalar colvars + f_cvb_scalar_variables, + /// \brief whether this bias will compute a PMF + f_cvb_calc_pmf, f_cvb_ntot }; enum features_colvar { /// \brief Calculate colvar f_cv_active, + /// \brief Colvar is awake (active on its own accord) this timestep + f_cv_awake, /// \brief Gradients are calculated and temporarily stored, so /// that external forces can be applied f_cv_gradient, @@ -254,12 +292,16 @@ public: f_cv_corrfunc, /// \brief Value and gradient computed by user script f_cv_scripted, + /// \brief Value and gradient computed by user function through Lepton + f_cv_custom_function, /// \brief Colvar is periodic f_cv_periodic, /// \brief is scalar f_cv_scalar, f_cv_linear, f_cv_homogeneous, + /// \brief multiple timestep through time_step_factor + f_cv_multiple_ts, /// \brief Number of colvar features f_cv_ntot }; @@ -268,10 +310,13 @@ public: f_cvc_active, f_cvc_scalar, f_cvc_gradient, + /// \brief CVC doesn't calculate and store explicit atom gradients + f_cvc_implicit_gradient, f_cvc_inv_gradient, /// \brief If enabled, calc_gradients() will call debug_gradients() for every group needed f_cvc_debug_gradient, f_cvc_Jacobian, + f_cvc_pbc_minimum_image, f_cvc_one_site_total_force, f_cvc_com_based, f_cvc_scalable, @@ -287,9 +332,9 @@ public: /// Perform a standard minimum msd fit for given atoms /// ie. not using refpositionsgroup // f_ag_min_msd_fit, - f_ag_fit_gradient_group,// TODO check that these are sometimes needed separately - // maybe for minimum RMSD? - f_ag_fit_gradient_ref, + /// \brief Does not have explicit atom gradients from parent CVC + f_ag_implicit_gradient, + f_ag_fit_gradients, f_ag_atom_forces, f_ag_scalable, f_ag_scalable_com, diff --git a/lib/colvars/colvargrid.cpp b/lib/colvars/colvargrid.cpp index 3b25acd2ef..9016e2c23a 100644 --- a/lib/colvars/colvargrid.cpp +++ b/lib/colvars/colvargrid.cpp @@ -144,7 +144,8 @@ void colvar_grid_gradient::write_1D_integral(std::ostream &os) os << "# xi A(xi)\n"; if ( cv.size() != 1 ) { - cvm::fatal_error("Cannot write integral for multi-dimensional gradient grids."); + cvm::error("Cannot write integral for multi-dimensional gradient grids."); + return; } integral = 0.0; diff --git a/lib/colvars/colvargrid.h b/lib/colvars/colvargrid.h index d4b9295c6e..6f06cb1066 100644 --- a/lib/colvars/colvargrid.h +++ b/lib/colvars/colvargrid.h @@ -198,7 +198,6 @@ public: /// Default constructor colvar_grid() : has_data(false) { - save_delimiters = false; nd = nt = 0; mult = 1; this->setup(); @@ -225,7 +224,6 @@ public: widths(g.widths), has_data(false) { - save_delimiters = false; } /// \brief Constructor from explicit grid sizes \param nx_i Number @@ -237,7 +235,6 @@ public: size_t mult_i = 1) : has_data(false) { - save_delimiters = false; this->setup(nx_i, t, mult_i); } @@ -248,7 +245,6 @@ public: bool margin = false) : has_data(false) { - save_delimiters = false; this->init_from_colvars(colvars, t, mult_i, margin); } @@ -840,7 +836,7 @@ public: // reallocate the array in case the grid params have just changed if (new_params) { init_from_boundaries(); - // data.resize(0); // no longer needed: setup calls clear() + // data.clear(); // no longer needed: setup calls clear() return this->setup(nx, T(), mult); } diff --git a/lib/colvars/colvarmodule.cpp b/lib/colvars/colvarmodule.cpp index 10cd3c0e47..780dc28afa 100644 --- a/lib/colvars/colvarmodule.cpp +++ b/lib/colvars/colvarmodule.cpp @@ -21,10 +21,14 @@ #include "colvarbias_meta.h" #include "colvarbias_restraint.h" #include "colvarscript.h" +#include "colvaratoms.h" colvarmodule::colvarmodule(colvarproxy *proxy_in) { + depth_s = 0; + cv_traj_os = NULL; + // pointer to the proxy object if (proxy == NULL) { proxy = proxy_in; @@ -33,12 +37,10 @@ colvarmodule::colvarmodule(colvarproxy *proxy_in) // TODO relax this error to handle multiple molecules in VMD // once the module is not static anymore cvm::error("Error: trying to allocate the collective " - "variable module twice.\n"); + "variable module twice.\n", BUG_ERROR); return; } - depth_s = 0; - cvm::log(cvm::line_marker); cvm::log("Initializing the collective variables module, version "+ cvm::to_str(COLVARS_VERSION)+".\n"); @@ -222,9 +224,9 @@ int colvarmodule::parse_config(std::string &conf) // update any necessary proxy data proxy->setup(); - if (cv_traj_os.is_open()) { + if (cv_traj_os != NULL) { // configuration might have changed, better redo the labels - write_traj_label(cv_traj_os); + write_traj_label(*cv_traj_os); } return get_error(); @@ -295,7 +297,7 @@ int colvarmodule::parse_colvars(std::string const &conf) std::string colvar_conf = ""; size_t pos = 0; - while (parse->key_lookup(conf, "colvar", colvar_conf, pos)) { + while (parse->key_lookup(conf, "colvar", &colvar_conf, &pos)) { if (colvar_conf.size()) { cvm::log(cvm::line_marker); @@ -350,7 +352,7 @@ int colvarmodule::parse_biases_type(std::string const &conf, { std::string bias_conf = ""; size_t conf_saved_pos = 0; - while (parse->key_lookup(conf, keyword, bias_conf, conf_saved_pos)) { + while (parse->key_lookup(conf, keyword, &bias_conf, &conf_saved_pos)) { if (bias_conf.size()) { cvm::log(cvm::line_marker); cvm::increase_depth(); @@ -409,12 +411,6 @@ int colvarmodule::parse_biases(std::string const &conf) size_t i; - for (i = 0; i < biases.size(); i++) { - biases[i]->enable(colvardeps::f_cvb_active); - if (cvm::debug()) - biases[i]->print_state(); - } - size_t n_hist_dep_biases = 0; std::vector hist_dep_biases_names; for (i = 0; i < biases.size(); i++) { @@ -487,7 +483,8 @@ int colvarmodule::catch_input_errors(int result) } -colvarbias * colvarmodule::bias_by_name(std::string const &name) { +colvarbias * colvarmodule::bias_by_name(std::string const &name) +{ colvarmodule *cv = cvm::main(); for (std::vector::iterator bi = cv->biases.begin(); bi != cv->biases.end(); @@ -500,7 +497,8 @@ colvarbias * colvarmodule::bias_by_name(std::string const &name) { } -colvar *colvarmodule::colvar_by_name(std::string const &name) { +colvar *colvarmodule::colvar_by_name(std::string const &name) +{ colvarmodule *cv = cvm::main(); for (std::vector::iterator cvi = cv->colvars.begin(); cvi != cv->colvars.end(); @@ -513,6 +511,20 @@ colvar *colvarmodule::colvar_by_name(std::string const &name) { } +cvm::atom_group *colvarmodule::atom_group_by_name(std::string const &name) +{ + colvarmodule *cv = cvm::main(); + for (std::vector::iterator agi = cv->named_atom_groups.begin(); + agi != cv->named_atom_groups.end(); + agi++) { + if ((*agi)->name == name) { + return (*agi); + } + } + return NULL; +} + + int colvarmodule::change_configuration(std::string const &bias_name, std::string const &conf) { @@ -521,7 +533,10 @@ int colvarmodule::change_configuration(std::string const &bias_name, cvm::increase_depth(); colvarbias *b; b = bias_by_name(bias_name); - if (b == NULL) { cvm::error("Error: bias not found: " + bias_name); } + if (b == NULL) { + cvm::error("Error: bias not found: " + bias_name); + return COLVARS_ERROR; + } b->change_configuration(conf); cvm::decrease_depth(); return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); @@ -534,7 +549,10 @@ std::string colvarmodule::read_colvar(std::string const &name) colvar *c; std::stringstream ss; c = colvar_by_name(name); - if (c == NULL) { cvm::fatal_error("Error: colvar not found: " + name); } + if (c == NULL) { + cvm::error("Error: colvar not found: " + name); + return std::string(); + } ss << c->value(); cvm::decrease_depth(); return ss.str(); @@ -547,7 +565,10 @@ cvm::real colvarmodule::energy_difference(std::string const &bias_name, colvarbias *b; cvm::real energy_diff = 0.; b = bias_by_name(bias_name); - if (b == NULL) { cvm::fatal_error("Error: bias not found: " + bias_name); } + if (b == NULL) { + cvm::error("Error: bias not found: " + bias_name); + return 0.; + } energy_diff = b->energy_difference(conf); cvm::decrease_depth(); return energy_diff; @@ -666,18 +687,36 @@ int colvarmodule::calc_colvars() cvm::log("Calculating collective variables.\n"); // calculate collective variables and their gradients + // First, we need to decide which biases are awake + // so they can activate colvars as needed + std::vector::iterator bi; + for (bi = biases.begin(); bi != biases.end(); bi++) { + int tsf = (*bi)->get_time_step_factor(); + if (tsf > 0 && (step_absolute() % tsf == 0)) { + (*bi)->enable(colvardeps::f_cvb_awake); + } else { + (*bi)->disable(colvardeps::f_cvb_awake); + } + } + int error_code = COLVARS_OK; std::vector::iterator cvi; // Determine which colvars are active at this iteration - variables_active()->resize(0); + variables_active()->clear(); variables_active()->reserve(variables()->size()); for (cvi = variables()->begin(); cvi != variables()->end(); cvi++) { - // This is a dynamic feature - the next call should be to enable() - // or disable() when dynamic dependency resolution is fully implemented - (*cvi)->set_enabled(colvardeps::f_cv_active, - step_absolute() % (*cvi)->get_time_step_factor() == 0); - variables_active()->push_back(*cvi); + // Wake up or put to sleep variables + int tsf = (*cvi)->get_time_step_factor(); + if (tsf > 0 && (step_absolute() % tsf == 0)) { + (*cvi)->enable(colvardeps::f_cv_awake); + } else { + (*cvi)->disable(colvardeps::f_cv_awake); + } + + if ((*cvi)->is_enabled()) { + variables_active()->push_back(*cvi); + } } // if SMP support is available, split up the work @@ -685,8 +724,8 @@ int colvarmodule::calc_colvars() // first, calculate how much work (currently, how many active CVCs) each colvar has - variables_active_smp()->resize(0); - variables_active_smp_items()->resize(0); + variables_active_smp()->clear(); + variables_active_smp_items()->clear(); variables_active_smp()->reserve(variables_active()->size()); variables_active_smp_items()->reserve(variables_active()->size()); @@ -748,7 +787,8 @@ int colvarmodule::calc_biases() total_bias_energy = 0.0; // update the list of active biases - biases_active()->resize(0); + // which may have changed based on f_cvb_awake in calc_colvars() + biases_active()->clear(); biases_active()->reserve(biases.size()); for (bi = biases.begin(); bi != biases.end(); bi++) { if ((*bi)->is_enabled()) { @@ -828,8 +868,7 @@ int colvarmodule::update_colvar_forces() "of colvars (if they have any).\n"); cvm::increase_depth(); for (cvi = variables()->begin(); cvi != variables()->end(); cvi++) { - // Here we call even inactive colvars, so they accumulate biasing forces - // as well as update their extended-system dynamics + // Inactive colvars will only reset their forces and return 0 energy total_colvar_energy += (*cvi)->update_forces_energy(); if (cvm::get_error()) { return COLVARS_ERROR; @@ -883,11 +922,13 @@ int colvarmodule::write_restart_files() ((cvm::step_absolute() % restart_out_freq) == 0) ) { cvm::log("Writing the state file \""+ restart_out_name+"\".\n"); - proxy->backup_file(restart_out_name.c_str()); - restart_out_os.open(restart_out_name.c_str()); - if (!restart_out_os.is_open() || !write_restart(restart_out_os)) - cvm::error("Error: in writing restart file.\n"); - restart_out_os.close(); + proxy->backup_file(restart_out_name); + std::ostream *restart_out_os = proxy->output_stream(restart_out_name); + if (!restart_out_os) return cvm::get_error(); + if (!write_restart(*restart_out_os)) { + return cvm::error("Error: in writing restart file.\n", FILE_ERROR); + } + proxy->close_output_stream(restart_out_name); } return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); @@ -896,26 +937,26 @@ int colvarmodule::write_restart_files() int colvarmodule::write_traj_files() { - if (!cv_traj_os.is_open()) { + if (cv_traj_os == NULL) { open_traj_file(cv_traj_name); } // write labels in the traj file every 1000 lines and at first timestep if ((cvm::step_absolute() % (cv_traj_freq * 1000)) == 0 || cvm::step_relative() == 0) { - write_traj_label(cv_traj_os); + write_traj_label(*cv_traj_os); } if ((cvm::step_absolute() % cv_traj_freq) == 0) { - write_traj(cv_traj_os); + write_traj(*cv_traj_os); } - if (restart_out_freq && cv_traj_os.is_open()) { + if (restart_out_freq && (cv_traj_os != NULL)) { // flush the trajectory file if we are at the restart frequency if ( (cvm::step_relative() > 0) && ((cvm::step_absolute() % restart_out_freq) == 0) ) { cvm::log("Synchronizing (emptying the buffer of) trajectory file \""+ cv_traj_name+"\".\n"); - cv_traj_os.flush(); + proxy->flush_output_stream(cv_traj_os); } } @@ -1003,9 +1044,11 @@ int colvarmodule::reset() index_groups.clear(); index_group_names.clear(); - if (cv_traj_os.is_open()) { + proxy->reset(); + + if (cv_traj_os != NULL) { // Do not close file here, as we might not be done with it yet. - cv_traj_os.flush(); + proxy->flush_output_stream(cv_traj_os); } return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); @@ -1264,9 +1307,9 @@ int colvarmodule::write_output_files() } cvm::decrease_depth(); - if (cv_traj_os.is_open()) { - // do not close to avoid problems with multiple NAMD runs - cv_traj_os.flush(); + if (cv_traj_os != NULL) { + // do not close, there may be another run command + proxy->flush_output_stream(cv_traj_os); } return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); @@ -1380,9 +1423,10 @@ std::ostream & colvarmodule::write_restart(std::ostream &os) return os; } + int colvarmodule::open_traj_file(std::string const &file_name) { - if (cv_traj_os.is_open()) { + if (cv_traj_os != NULL) { return COLVARS_OK; } @@ -1390,36 +1434,35 @@ int colvarmodule::open_traj_file(std::string const &file_name) if (cv_traj_append) { cvm::log("Appending to colvar trajectory file \""+file_name+ "\".\n"); - cv_traj_os.open(file_name.c_str(), std::ios::app); + cv_traj_os = (cvm::proxy)->output_stream(file_name, std::ios::app); } else { cvm::log("Writing to colvar trajectory file \""+file_name+ "\".\n"); proxy->backup_file(file_name.c_str()); - cv_traj_os.open(file_name.c_str()); + cv_traj_os = (cvm::proxy)->output_stream(file_name); } - if (!cv_traj_os.is_open()) { + if (cv_traj_os == NULL) { cvm::error("Error: cannot write to file \""+file_name+"\".\n", FILE_ERROR); } - return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); + return cvm::get_error(); } + int colvarmodule::close_traj_file() { - if (cv_traj_os.is_open()) { - cv_traj_os.close(); + if (cv_traj_os != NULL) { + proxy->close_output_stream(cv_traj_name); + cv_traj_os = NULL; } - return (cvm::get_error() ? COLVARS_ERROR : COLVARS_OK); + return cvm::get_error(); } + std::ostream & colvarmodule::write_traj_label(std::ostream &os) { - if (!os.good()) { - cvm::error("Cannot write to trajectory file."); - return os; - } os.setf(std::ios::scientific, std::ios::floatfield); os << "# " << cvm::wrap_string("step", cvm::it_width-2) @@ -1437,13 +1480,16 @@ std::ostream & colvarmodule::write_traj_label(std::ostream &os) (*bi)->write_traj_label(os); } os << "\n"; + if (cvm::debug()) { - os.flush(); + proxy->flush_output_stream(&os); } + cvm::decrease_depth(); return os; } + std::ostream & colvarmodule::write_traj(std::ostream &os) { os.setf(std::ios::scientific, std::ios::floatfield); @@ -1463,9 +1509,11 @@ std::ostream & colvarmodule::write_traj(std::ostream &os) (*bi)->write_traj(os); } os << "\n"; + if (cvm::debug()) { - os.flush(); + proxy->flush_output_stream(&os); } + cvm::decrease_depth(); return os; } @@ -1540,25 +1588,19 @@ void colvarmodule::clear_error() } -void cvm::error(std::string const &message, int code) +int colvarmodule::error(std::string const &message, int code) { set_error_bits(code); proxy->error(message); + return get_error(); } -void cvm::fatal_error(std::string const &message) +int colvarmodule::fatal_error(std::string const &message) { - // TODO once all non-fatal errors have been set to be handled by error(), - // set DELETE_COLVARS here for VMD to handle it set_error_bits(FATAL_ERROR); proxy->fatal_error(message); -} - - -void cvm::exit(std::string const &message) -{ - proxy->exit(message); + return get_error(); } diff --git a/lib/colvars/colvarmodule.h b/lib/colvars/colvarmodule.h index b4f80e0b5e..0f6efd14c4 100644 --- a/lib/colvars/colvarmodule.h +++ b/lib/colvars/colvarmodule.h @@ -10,9 +10,7 @@ #ifndef COLVARMODULE_H #define COLVARMODULE_H -#ifndef COLVARS_VERSION -#define COLVARS_VERSION "2017-03-09" -#endif +#include "colvars_version.h" #ifndef COLVARS_DEBUG #define COLVARS_DEBUG false @@ -54,11 +52,6 @@ You can browse the class hierarchy or the list of source files. #include #include -#ifdef NAMD_VERSION -// use Lustre-friendly wrapper to POSIX write() -#include "fstream_namd.h" -#endif - class colvarparse; class colvar; class colvarbias; @@ -188,7 +181,13 @@ private: /// Indexes of the items to calculate for each colvar std::vector colvars_smp_items; + /// Array of named atom groups + std::vector named_atom_groups; public: + /// Register a named atom group into named_atom_groups + inline void register_named_atom_group(atom_group * ag) { + named_atom_groups.push_back(ag); + } /// Array of collective variables std::vector *variables(); @@ -319,12 +318,6 @@ public: /// (Re)initialize the output trajectory and state file (does not write it yet) int setup_output(); -#ifdef NAMD_VERSION - typedef ofstream_namd ofstream; -#else - typedef std::ofstream ofstream; -#endif - /// Read the input restart file std::istream & read_restart(std::istream &is); /// Write the output restart file @@ -332,7 +325,7 @@ public: /// Open a trajectory file if requested (and leave it open) int open_traj_file(std::string const &file_name); - /// Close it + /// Close it (note: currently unused) int close_traj_file(); /// Write in the trajectory file std::ostream & write_traj(std::ostream &os); @@ -354,6 +347,9 @@ public: /// Look up a colvar by name; returns NULL if not found static colvar * colvar_by_name(std::string const &name); + /// Look up a named atom group by name; returns NULL if not found + static atom_group * atom_group_by_name(std::string const &name); + /// Load new configuration for the given bias - /// currently works for harmonic (force constant and/or centers) int change_configuration(std::string const &bias_name, std::string const &conf); @@ -452,10 +448,10 @@ public: static void log(std::string const &message); /// Print a message to the main log and exit with error code - static void fatal_error(std::string const &message); + static int fatal_error(std::string const &message); /// Print a message to the main log and set global error code - static void error(std::string const &message, int code = COLVARS_ERROR); + static int error(std::string const &message, int code = COLVARS_ERROR); /// Print a message to the main log and exit normally static void exit(std::string const &message); @@ -471,8 +467,7 @@ public: /// \brief Get the distance between two atomic positions with pbcs handled /// correctly static rvector position_distance(atom_pos const &pos1, - atom_pos const &pos2); - + atom_pos const &pos2); /// \brief Get the square distance between two positions (with /// periodic boundary conditions handled transparently) @@ -481,21 +476,7 @@ public: /// an analytical square distance (while taking the square of /// position_distance() would produce leads to a cusp) static real position_dist2(atom_pos const &pos1, - atom_pos const &pos2); - - /// \brief Get the closest periodic image to a reference position - /// \param pos The position to look for the closest periodic image - /// \param ref_pos (optional) The reference position - static void select_closest_image(atom_pos &pos, - atom_pos const &ref_pos); - - /// \brief Perform select_closest_image() on a set of atomic positions - /// - /// After that, distance vectors can then be calculated directly, - /// without using position_distance() - static void select_closest_images(std::vector &pos, - atom_pos const &ref_pos); - + atom_pos const &pos2); /// \brief Names of groups from a Gromacs .ndx file to be read at startup std::list index_group_names; @@ -556,14 +537,11 @@ protected: std::string cv_traj_name; /// Collective variables output trajectory file - colvarmodule::ofstream cv_traj_os; + std::ostream *cv_traj_os; /// Appending to the existing trajectory file? bool cv_traj_append; - /// Output restart file - colvarmodule::ofstream restart_out_os; - private: /// Counter for the current depth in the object hierarchy (useg e.g. in output) @@ -704,18 +682,6 @@ inline void cvm::request_total_force() proxy->request_total_force(true); } -inline void cvm::select_closest_image(atom_pos &pos, - atom_pos const &ref_pos) -{ - proxy->select_closest_image(pos, ref_pos); -} - -inline void cvm::select_closest_images(std::vector &pos, - atom_pos const &ref_pos) -{ - proxy->select_closest_images(pos, ref_pos); -} - inline cvm::rvector cvm::position_distance(atom_pos const &pos1, atom_pos const &pos2) { diff --git a/lib/colvars/colvarparse.cpp b/lib/colvars/colvarparse.cpp index 8055d925db..9f333b7b76 100644 --- a/lib/colvars/colvarparse.cpp +++ b/lib/colvars/colvarparse.cpp @@ -17,10 +17,7 @@ // space & tab -std::string const colvarparse::white_space = " \t"; - -std::string colvarparse::dummy_string = ""; -size_t colvarparse::dummy_pos = 0; +char const * const colvarparse::white_space = " \t"; // definition of single-value keyword parsers @@ -37,7 +34,7 @@ template bool colvarparse::_get_keyval_scalar_(std::string const do { std::string data_this = ""; - b_found = key_lookup(conf, key, data_this, save_pos); + b_found = key_lookup(conf, key, &data_this, &save_pos); if (b_found) { if (!b_found_any) b_found_any = true; @@ -92,7 +89,7 @@ bool colvarparse::_get_keyval_scalar_string_(std::string const &conf, do { std::string data_this = ""; - b_found = key_lookup(conf, key, data_this, save_pos); + b_found = key_lookup(conf, key, &data_this, &save_pos); if (b_found) { if (!b_found_any) b_found_any = true; @@ -156,7 +153,7 @@ template bool colvarparse::_get_keyval_vector_(std::string const do { std::string data_this = ""; - b_found = key_lookup(conf, key, data_this, save_pos); + b_found = key_lookup(conf, key, &data_this, &save_pos); if (b_found) { if (!b_found_any) b_found_any = true; @@ -313,7 +310,7 @@ bool colvarparse::get_keyval(std::string const &conf, do { std::string data_this = ""; - b_found = key_lookup(conf, key, data_this, save_pos); + b_found = key_lookup(conf, key, &data_this, &save_pos); if (b_found) { if (!b_found_any) b_found_any = true; @@ -552,8 +549,8 @@ std::istream & colvarparse::getline_nocomments(std::istream &is, bool colvarparse::key_lookup(std::string const &conf, char const *key_in, - std::string &data, - size_t &save_pos) + std::string *data, + size_t *save_pos) { if (cvm::debug()) { cvm::log("Looking for the keyword \""+std::string(key_in)+"\" and its value.\n"); @@ -570,14 +567,12 @@ bool colvarparse::key_lookup(std::string const &conf, std::string const conf_lower(to_lower_cppstr(conf)); // by default, there is no value, unless we found one - data = ""; - - // when the function is invoked without save_pos, ensure that we - // start from zero - colvarparse::dummy_pos = 0; + if (data != NULL) { + data->clear(); + } // start from the first occurrence of key - size_t pos = conf_lower.find(key, save_pos); + size_t pos = conf_lower.find(key, (save_pos != NULL) ? *save_pos : 0); // iterate over all instances of the substring until it finds it as isolated keyword while (true) { @@ -593,7 +588,7 @@ bool colvarparse::key_lookup(std::string const &conf, bool b_isolated_left = true, b_isolated_right = true; if (pos > 0) { - if ( std::string("\n"+white_space+ + if ( std::string("\n"+std::string(white_space)+ "}").find(conf[pos-1]) == std::string::npos ) { // none of the valid delimiting characters is on the left of key @@ -602,7 +597,7 @@ bool colvarparse::key_lookup(std::string const &conf, } if (pos < conf.size()-key.size()-1) { - if ( std::string("\n"+white_space+ + if ( std::string("\n"+std::string(white_space)+ "{").find(conf[pos+key.size()]) == std::string::npos ) { // none of the valid delimiting characters is on the right of key @@ -625,9 +620,11 @@ bool colvarparse::key_lookup(std::string const &conf, } } + if (save_pos != NULL) { // save the pointer for a future call (when iterating over multiple // valid instances of the same keyword) - save_pos = pos + key.size(); + *save_pos = pos + key.size(); + } // get the remainder of the line size_t pl = conf.rfind("\n", pos); @@ -716,19 +713,21 @@ bool colvarparse::key_lookup(std::string const &conf, data_end) + 1; } - data.append(line, data_begin, (data_end-data_begin)); + if (data != NULL) { + data->append(line, data_begin, (data_end-data_begin)); - if (cvm::debug()) { - cvm::log("Keyword value = \""+data+"\".\n"); - } + if (cvm::debug()) { + cvm::log("Keyword value = \""+*data+"\".\n"); + } - if (data.size() && save_delimiters) { - data_begin_pos.push_back(conf.find(data, pos+key.size())); - data_end_pos.push_back(data_begin_pos.back()+data.size()); + if (data->size()) { + data_begin_pos.push_back(conf.find(*data, pos+key.size())); + data_end_pos.push_back(data_begin_pos.back()+data->size()); + } } } - save_pos = line_end; + if (save_pos != NULL) *save_pos = line_end; return true; } diff --git a/lib/colvars/colvarparse.h b/lib/colvars/colvarparse.h index 9f116caafd..9389bc49da 100644 --- a/lib/colvars/colvarparse.h +++ b/lib/colvars/colvarparse.h @@ -24,7 +24,7 @@ /// need to parse input inherit from this class colvarparse { -protected: +private: /// \brief List of legal keywords for this object: this is updated /// by each call to colvarparse::get_keyval() or @@ -41,14 +41,6 @@ protected: /// values before the keyword check is performed std::list data_end_pos; - /// \brief Whether or not to accumulate data_begin_pos and - /// data_end_pos in key_lookup(); it may be useful to disable - /// this after the constructor is called, because other files may be - /// read (e.g. restart) that would mess up the registry; in any - /// case, nothing serious happens until check_keywords() is invoked - /// (which should happen only right after construction) - bool save_delimiters; - /// \brief Add a new valid keyword to the list void add_keyword(char const *key); @@ -62,14 +54,12 @@ public: inline colvarparse() - : save_delimiters(true) { init(); } /// Constructor that stores the object's config string inline colvarparse(const std::string& conf) - : save_delimiters(true) { init(conf); } @@ -115,8 +105,6 @@ public: /// \brief Use this after parsing a config string (note that check_keywords() calls it already) void clear_keyword_registry(); -public: - /// \fn get_keyval bool const get_keyval (std::string const &conf, /// char const *key, _type_ &value, _type_ const &def_value, /// Parse_Mode const parse_mode) \brief Helper function to parse @@ -282,7 +270,7 @@ public: /// Accepted white space delimiters, used in key_lookup() - static std::string const white_space; + static const char * const white_space; /// \brief Low-level function for parsing configuration strings; /// automatically adds the requested keyword to the list of valid @@ -293,13 +281,8 @@ public: /// within "conf", useful when doing multiple calls bool key_lookup(std::string const &conf, char const *key, - std::string &data = dummy_string, - size_t &save_pos = dummy_pos); - - /// Used as a default argument by key_lookup - static std::string dummy_string; - /// Used as a default argument by key_lookup - static size_t dummy_pos; + std::string *data = NULL, + size_t *save_pos = NULL); /// \brief Works as std::getline() but also removes everything /// between a comment character and the following newline diff --git a/lib/colvars/colvarproxy.cpp b/lib/colvars/colvarproxy.cpp new file mode 100644 index 0000000000..fa24091d52 --- /dev/null +++ b/lib/colvars/colvarproxy.cpp @@ -0,0 +1,492 @@ +// -*- c++ -*- + +// This file is part of the Collective Variables module (Colvars). +// The original version of Colvars and its updates are located at: +// https://github.com/colvars/colvars +// Please update all Colvars source files before making any changes. +// If you wish to distribute your changes, please submit them to the +// Colvars repository at GitHub. + +#include +#include + +#include "colvarmodule.h" +#include "colvarproxy.h" +#include "colvarscript.h" +#include "colvaratoms.h" + + + +colvarproxy_system::colvarproxy_system() {} + + +colvarproxy_system::~colvarproxy_system() {} + + +void colvarproxy_system::add_energy(cvm::real energy) {} + + +void colvarproxy_system::request_total_force(bool yesno) +{ + if (yesno == true) + cvm::error("Error: total forces are currently not implemented.\n", + COLVARS_NOT_IMPLEMENTED); +} + + +bool colvarproxy_system::total_forces_enabled() const +{ + return false; +} + + +cvm::real colvarproxy_system::position_dist2(cvm::atom_pos const &pos1, + cvm::atom_pos const &pos2) +{ + return (position_distance(pos1, pos2)).norm2(); +} + + + +colvarproxy_atoms::colvarproxy_atoms() {} + + +colvarproxy_atoms::~colvarproxy_atoms() +{ + reset(); +} + + +int colvarproxy_atoms::reset() +{ + atoms_ids.clear(); + atoms_ncopies.clear(); + atoms_masses.clear(); + atoms_charges.clear(); + atoms_positions.clear(); + atoms_total_forces.clear(); + atoms_new_colvar_forces.clear(); + return COLVARS_OK; +} + + +int colvarproxy_atoms::add_atom_slot(int atom_id) +{ + atoms_ids.push_back(atom_id); + atoms_ncopies.push_back(1); + atoms_masses.push_back(1.0); + atoms_charges.push_back(0.0); + atoms_positions.push_back(cvm::rvector(0.0, 0.0, 0.0)); + atoms_total_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); + atoms_new_colvar_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); + return (atoms_ids.size() - 1); +} + + +int colvarproxy_atoms::init_atom(cvm::residue_id const &residue, + std::string const &atom_name, + std::string const &segment_id) +{ + cvm::error("Error: initializing an atom by name and residue number is currently not supported.\n", + COLVARS_NOT_IMPLEMENTED); + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_atoms::check_atom_id(cvm::residue_id const &residue, + std::string const &atom_name, + std::string const &segment_id) +{ + colvarproxy_atoms::init_atom(residue, atom_name, segment_id); + return COLVARS_NOT_IMPLEMENTED; +} + + +void colvarproxy_atoms::clear_atom(int index) +{ + if (((size_t) index) >= atoms_ids.size()) { + cvm::error("Error: trying to disable an atom that was not previously requested.\n", + INPUT_ERROR); + } + if (atoms_ncopies[index] > 0) { + atoms_ncopies[index] -= 1; + } +} + + +int colvarproxy_atoms::load_atoms(char const *filename, + cvm::atom_group &atoms, + std::string const &pdb_field, + double const) +{ + return cvm::error("Error: loading atom identifiers from a file " + "is currently not implemented.\n", + COLVARS_NOT_IMPLEMENTED); +} + + +int colvarproxy_atoms::load_coords(char const *filename, + std::vector &pos, + const std::vector &indices, + std::string const &pdb_field, + double const) +{ + return cvm::error("Error: loading atomic coordinates from a file " + "is currently not implemented.\n", + COLVARS_NOT_IMPLEMENTED); +} + + + +colvarproxy_atom_groups::colvarproxy_atom_groups() {} + + +colvarproxy_atom_groups::~colvarproxy_atom_groups() +{ + reset(); +} + + +int colvarproxy_atom_groups::reset() +{ + atom_groups_ids.clear(); + atom_groups_ncopies.clear(); + atom_groups_masses.clear(); + atom_groups_charges.clear(); + atom_groups_coms.clear(); + atom_groups_total_forces.clear(); + atom_groups_new_colvar_forces.clear(); + return COLVARS_OK; +} + + +int colvarproxy_atom_groups::add_atom_group_slot(int atom_group_id) +{ + atom_groups_ids.push_back(atom_group_id); + atom_groups_ncopies.push_back(1); + atom_groups_masses.push_back(1.0); + atom_groups_charges.push_back(0.0); + atom_groups_coms.push_back(cvm::rvector(0.0, 0.0, 0.0)); + atom_groups_total_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); + atom_groups_new_colvar_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); + return (atom_groups_ids.size() - 1); +} + + +int colvarproxy_atom_groups::scalable_group_coms() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_atom_groups::init_atom_group(std::vector const &atoms_ids) +{ + cvm::error("Error: initializing a group outside of the Colvars module " + "is currently not supported.\n", + COLVARS_NOT_IMPLEMENTED); + return COLVARS_NOT_IMPLEMENTED; +} + + +void colvarproxy_atom_groups::clear_atom_group(int index) +{ + if (((size_t) index) >= atom_groups_ids.size()) { + cvm::error("Error: trying to disable an atom group " + "that was not previously requested.\n", + INPUT_ERROR); + } + if (atom_groups_ncopies[index] > 0) { + atom_groups_ncopies[index] -= 1; + } +} + + + +colvarproxy_smp::colvarproxy_smp() +{ + b_smp_active = true; +} + + +colvarproxy_smp::~colvarproxy_smp() {} + + +int colvarproxy_smp::smp_enabled() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_smp::smp_colvars_loop() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_smp::smp_biases_loop() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_smp::smp_biases_script_loop() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_smp::smp_thread_id() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_smp::smp_num_threads() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_smp::smp_lock() +{ + return COLVARS_OK; +} + + +int colvarproxy_smp::smp_trylock() +{ + return COLVARS_OK; +} + + +int colvarproxy_smp::smp_unlock() +{ + return COLVARS_OK; +} + + + + +colvarproxy_replicas::colvarproxy_replicas() {} + + +colvarproxy_replicas::~colvarproxy_replicas() {} + + +bool colvarproxy_replicas::replica_enabled() +{ + return false; +} + + +int colvarproxy_replicas::replica_index() +{ + return 0; +} + + +int colvarproxy_replicas::replica_num() +{ + return 1; +} + + +void colvarproxy_replicas::replica_comm_barrier() {} + + +int colvarproxy_replicas::replica_comm_recv(char* msg_data, + int buf_len, + int src_rep) +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_replicas::replica_comm_send(char* msg_data, + int msg_len, + int dest_rep) +{ + return COLVARS_NOT_IMPLEMENTED; +} + + + + +colvarproxy_script::colvarproxy_script() +{ + script = NULL; +} + + +colvarproxy_script::~colvarproxy_script() {} + + +char *colvarproxy_script::script_obj_to_str(unsigned char *obj) +{ + return reinterpret_cast(obj); +} + + +int colvarproxy_script::run_force_callback() +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_script::run_colvar_callback( + std::string const &name, + std::vector const &cvcs, + colvarvalue &value) +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_script::run_colvar_gradient_callback( + std::string const &name, + std::vector const &cvcs, + std::vector > &gradient) +{ + return COLVARS_NOT_IMPLEMENTED; +} + + + + +colvarproxy_io::colvarproxy_io() {} + + +colvarproxy_io::~colvarproxy_io() {} + + +int colvarproxy_io::get_frame(long int&) +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +int colvarproxy_io::set_frame(long int) +{ + return COLVARS_NOT_IMPLEMENTED; +} + + +std::ostream * colvarproxy_io::output_stream(std::string const &output_name, + std::ios_base::openmode mode) +{ + if (cvm::debug()) { + cvm::log("Using colvarproxy::output_stream()\n"); + } + std::list::iterator osi = output_files.begin(); + std::list::iterator osni = output_stream_names.begin(); + for ( ; osi != output_files.end(); osi++, osni++) { + if (*osni == output_name) { + return *osi; + } + } + if (!(mode & (std::ios_base::app | std::ios_base::ate))) { + backup_file(output_name); + } + std::ofstream *os = new std::ofstream(output_name.c_str(), mode); + if (!os->is_open()) { + cvm::error("Error: cannot write to file/channel \""+output_name+"\".\n", + FILE_ERROR); + return NULL; + } + output_stream_names.push_back(output_name); + output_files.push_back(os); + return os; +} + + +int colvarproxy_io::flush_output_stream(std::ostream *os) +{ + std::list::iterator osi = output_files.begin(); + std::list::iterator osni = output_stream_names.begin(); + for ( ; osi != output_files.end(); osi++, osni++) { + if (*osi == os) { + ((std::ofstream *) (*osi))->flush(); + return COLVARS_OK; + } + } + return cvm::error("Error: trying to flush an output file/channel " + "that wasn't open.\n", BUG_ERROR); +} + + +int colvarproxy_io::close_output_stream(std::string const &output_name) +{ + std::list::iterator osi = output_files.begin(); + std::list::iterator osni = output_stream_names.begin(); + for ( ; osi != output_files.end(); osi++, osni++) { + if (*osni == output_name) { + ((std::ofstream *) (*osi))->close(); + output_files.erase(osi); + output_stream_names.erase(osni); + return COLVARS_OK; + } + } + return cvm::error("Error: trying to close an output file/channel " + "that wasn't open.\n", BUG_ERROR); +} + + +int colvarproxy_io::backup_file(char const *filename) +{ + return COLVARS_NOT_IMPLEMENTED; +} + + + +colvarproxy::colvarproxy() +{ + colvars = NULL; + b_simulation_running = true; +} + + +colvarproxy::~colvarproxy() {} + + +int colvarproxy::reset() +{ + int error_code = COLVARS_OK; + error_code |= colvarproxy_atoms::reset(); + error_code |= colvarproxy_atom_groups::reset(); + return error_code; +} + + +int colvarproxy::setup() +{ + return COLVARS_OK; +} + + +int colvarproxy::update_input() +{ + return COLVARS_OK; +} + + +int colvarproxy::update_output() +{ + return COLVARS_OK; +} + + +size_t colvarproxy::restart_frequency() +{ + return 0; +} + + + + + + + + + + + diff --git a/lib/colvars/colvarproxy.h b/lib/colvars/colvarproxy.h index 5b216c9d41..95d13cd7e0 100644 --- a/lib/colvars/colvarproxy.h +++ b/lib/colvars/colvarproxy.h @@ -16,55 +16,36 @@ #include "colvarmodule.h" #include "colvarvalue.h" + +/// \file colvarproxy.h +/// \brief Colvars proxy classes +/// +/// This file declares the class for the object responsible for interfacing +/// Colvars with other codes (MD engines, VMD, Python). The \link colvarproxy +/// \endlink class is a derivative of multiple classes, each devoted to a +/// specific task (e.g. \link colvarproxy_atoms \endlink to access data for +/// individual atoms). +/// +/// To interface to a new MD engine, the simplest solution is to derive a new +/// class from \link colvarproxy \endlink. Currently implemented are: \link +/// colvarproxy_lammps, \endlink, \link colvarproxy_namd, \endlink, \link +/// colvarproxy_vmd, \endlink. + + // forward declarations class colvarscript; -/// \brief Interface between the collective variables module and -/// the simulation or analysis program (NAMD, VMD, LAMMPS...). -/// This is the base class: each interfaced program is supported by a derived class. -/// Only pure virtual functions ("= 0") must be reimplemented to ensure baseline functionality. -class colvarproxy { +/// Methods for accessing the simulation system (PBCs, integrator, etc) +class colvarproxy_system { public: - /// Pointer to the main object - colvarmodule *colvars; - /// Constructor - colvarproxy() - { - colvars = NULL; - b_simulation_running = true; - b_smp_active = true; - script = NULL; - } + colvarproxy_system(); /// Destructor - virtual ~colvarproxy() - {} - - /// (Re)initialize required member data after construction - virtual int setup() - { - return COLVARS_OK; - } - - /// \brief Update data required by the colvars module (e.g. cache atom positions) - /// - /// TODO Break up colvarproxy_namd and colvarproxy_lammps function into these - virtual int update_input() - { - return COLVARS_OK; - } - - /// \brief Update data based from the results of a module update (e.g. send forces) - virtual int update_output() - { - return COLVARS_OK; - } - - // **************** SIMULATION PARAMETERS **************** + virtual ~colvarproxy_system(); /// \brief Value of the unit for atomic coordinates with respect to /// angstroms (used by some variables for hard-coded default values) @@ -73,7 +54,7 @@ public: /// \brief Boltzmann constant virtual cvm::real boltzmann() = 0; - /// \brief Temperature of the simulation (K) + /// \brief Target temperature of the simulation (K units) virtual cvm::real temperature() = 0; /// \brief Time step of the simulation (fs) @@ -82,263 +63,9 @@ public: /// \brief Pseudo-random number with Gaussian distribution virtual cvm::real rand_gaussian(void) = 0; - /// \brief Get the current frame number - // Returns error code - virtual int get_frame(long int&) { return COLVARS_NOT_IMPLEMENTED; } - - /// \brief Set the current frame number (as well as colvarmodule::it) - // Returns error code - virtual int set_frame(long int) { return COLVARS_NOT_IMPLEMENTED; } - - /// \brief Prefix to be used for input files (restarts, not - /// configuration) - std::string input_prefix_str, output_prefix_str, restart_output_prefix_str; - - inline std::string & input_prefix() - { - return input_prefix_str; - } - - /// \brief Prefix to be used for output restart files - inline std::string restart_output_prefix() - { - return restart_output_prefix_str; - } - - /// \brief Prefix to be used for output files (final system - /// configuration) - inline std::string output_prefix() - { - return output_prefix_str; - } - - /// \brief Restarts will be written each time this number of steps has passed - virtual size_t restart_frequency() - { - return 0; - } - -protected: - - /// Whether a simulation is running (and try to prevent irrecovarable errors) - bool b_simulation_running; - -public: - - /// Whether a simulation is running (and try to prevent irrecovarable errors) - virtual bool simulation_running() const - { - return b_simulation_running; - } - -protected: - - /// \brief Currently opened output files: by default, these are ofstream objects. - /// Allows redefinition to implement different output mechanisms - std::list output_files; - /// \brief Identifiers for output_stream objects: by default, these are the names of the files - std::list output_stream_names; - -public: - - // ***************** SHARED-MEMORY PARALLELIZATION ***************** - - /// Whether threaded parallelization is available (TODO: make this a cvm::deps feature) - virtual int smp_enabled() - { - return COLVARS_NOT_IMPLEMENTED; - } - - /// Whether threaded parallelization should be used (TODO: make this a cvm::deps feature) - bool b_smp_active; - - /// Distribute calculation of colvars (and their components) across threads - virtual int smp_colvars_loop() - { - return COLVARS_NOT_IMPLEMENTED; - } - - /// Distribute calculation of biases across threads - virtual int smp_biases_loop() - { - return COLVARS_NOT_IMPLEMENTED; - } - - /// Distribute calculation of biases across threads 2nd through last, with all scripted biased on 1st thread - virtual int smp_biases_script_loop() - { - return COLVARS_NOT_IMPLEMENTED; - } - - /// Index of this thread - virtual int smp_thread_id() - { - return COLVARS_NOT_IMPLEMENTED; - } - - /// Number of threads sharing this address space - virtual int smp_num_threads() - { - return COLVARS_NOT_IMPLEMENTED; - } - - /// Lock the proxy's shared data for access by a thread, if threads are implemented; if not implemented, does nothing - virtual int smp_lock() - { - return COLVARS_OK; - } - - /// Attempt to lock the proxy's shared data - virtual int smp_trylock() - { - return COLVARS_OK; - } - - /// Release the lock - virtual int smp_unlock() - { - return COLVARS_OK; - } - - // **************** MULTIPLE REPLICAS COMMUNICATION **************** - - // Replica exchange commands: - - /// \brief Indicate if multi-replica support is available and active - virtual bool replica_enabled() { return false; } - - /// \brief Index of this replica - virtual int replica_index() { return 0; } - - /// \brief Total number of replica - virtual int replica_num() { return 1; } - - /// \brief Synchronize replica - virtual void replica_comm_barrier() {} - - /// \brief Receive data from other replica - virtual int replica_comm_recv(char* msg_data, int buf_len, int src_rep) { - return COLVARS_NOT_IMPLEMENTED; - } - - /// \brief Send data to other replica - virtual int replica_comm_send(char* msg_data, int msg_len, int dest_rep) { - return COLVARS_NOT_IMPLEMENTED; - } - - - // **************** SCRIPTING INTERFACE **************** - - /// Pointer to the scripting interface object - /// (does not need to be allocated in a new interface) - colvarscript *script; - - /// is a user force script defined? - bool force_script_defined; - - /// Do we have a scripting interface? - bool have_scripts; - - /// Run a user-defined colvar forces script - virtual int run_force_callback() { return COLVARS_NOT_IMPLEMENTED; } - - virtual int run_colvar_callback(std::string const &name, - std::vector const &cvcs, - colvarvalue &value) - { return COLVARS_NOT_IMPLEMENTED; } - - virtual int run_colvar_gradient_callback(std::string const &name, - std::vector const &cvcs, - std::vector > &gradient) - { return COLVARS_NOT_IMPLEMENTED; } - - - // **************** INPUT/OUTPUT **************** - - /// Print a message to the main log - virtual void log(std::string const &message) = 0; - - /// Print a message to the main log and let the rest of the program handle the error - virtual void error(std::string const &message) = 0; - - /// Print a message to the main log and exit with error code - virtual void fatal_error(std::string const &message) = 0; - - /// Print a message to the main log and exit normally - virtual void exit(std::string const &message) - { - cvm::error("Error: exiting without error is not implemented, returning error code.\n", - COLVARS_NOT_IMPLEMENTED); - } - - // TODO the following definitions may be moved to a .cpp file - - /// \brief Returns a reference to the given output channel; - /// if this is not open already, then open it - virtual std::ostream * output_stream(std::string const &output_name) - { - std::list::iterator osi = output_files.begin(); - std::list::iterator osni = output_stream_names.begin(); - for ( ; osi != output_files.end(); osi++, osni++) { - if (*osni == output_name) { - return *osi; - } - } - output_stream_names.push_back(output_name); - std::ofstream * os = new std::ofstream(output_name.c_str()); - if (!os->is_open()) { - cvm::error("Error: cannot write to file \""+output_name+"\".\n", - FILE_ERROR); - } - output_files.push_back(os); - return os; - } - - /// \brief Closes the given output channel - virtual int close_output_stream(std::string const &output_name) - { - std::list::iterator osi = output_files.begin(); - std::list::iterator osni = output_stream_names.begin(); - for ( ; osi != output_files.end(); osi++, osni++) { - if (*osni == output_name) { - ((std::ofstream *) (*osi))->close(); - output_files.erase(osi); - output_stream_names.erase(osni); - return COLVARS_OK; - } - } - cvm::error("Error: trying to close an output file or stream that wasn't open.\n", - BUG_ERROR); - return COLVARS_ERROR; - } - - /// \brief Rename the given file, before overwriting it - virtual int backup_file(char const *filename) - { - return COLVARS_NOT_IMPLEMENTED; - } - - - - // **************** ACCESS SYSTEM DATA **************** - /// Pass restraint energy value for current timestep to MD engine virtual void add_energy(cvm::real energy) = 0; - /// Tell the proxy whether total forces are needed (may not always be available) - virtual void request_total_force(bool yesno) - { - if (yesno == true) - cvm::error("Error: total forces are currently not implemented.\n", - COLVARS_NOT_IMPLEMENTED); - } - - /// Are total forces being used? - virtual bool total_forces_enabled() const - { - return false; - } - /// \brief Get the PBC-aware distance vector between two positions virtual cvm::rvector position_distance(cvm::atom_pos const &pos1, cvm::atom_pos const &pos2) = 0; @@ -346,107 +73,72 @@ public: /// \brief Get the PBC-aware square distance between two positions; /// may need to be reimplemented independently from position_distance() for optimization purposes virtual cvm::real position_dist2(cvm::atom_pos const &pos1, - cvm::atom_pos const &pos2) - { - return (position_distance(pos1, pos2)).norm2(); - } + cvm::atom_pos const &pos2); - /// \brief Get the closest periodic image to a reference position - /// \param pos The position to look for the closest periodic image - /// \param ref_pos The reference position - virtual void select_closest_image(cvm::atom_pos &pos, - cvm::atom_pos const &ref_pos) - { - pos = position_distance(ref_pos, pos) + ref_pos; - } + /// Tell the proxy whether total forces are needed (may not always be available) + virtual void request_total_force(bool yesno); - /// \brief Perform select_closest_image() on a set of atomic positions - /// - /// After that, distance vectors can then be calculated directly, - /// without using position_distance() - void select_closest_images(std::vector &pos, - cvm::atom_pos const &ref_pos) - { - for (std::vector::iterator pi = pos.begin(); - pi != pos.end(); ++pi) { - select_closest_image(*pi, ref_pos); - } - } + /// Are total forces being used? + virtual bool total_forces_enabled() const; +}; - // **************** ACCESS ATOMIC DATA **************** -protected: - - /// \brief Array of 0-based integers used to uniquely associate atoms - /// within the host program - std::vector atoms_ids; - /// \brief Keep track of how many times each atom is used by a separate colvar object - std::vector atoms_ncopies; - /// \brief Masses of the atoms (allow redefinition during a run, as done e.g. in LAMMPS) - std::vector atoms_masses; - /// \brief Charges of the atoms (allow redefinition during a run, as done e.g. in LAMMPS) - std::vector atoms_charges; - /// \brief Current three-dimensional positions of the atoms - std::vector atoms_positions; - /// \brief Most recent total forces on each atom - std::vector atoms_total_forces; - /// \brief Forces applied from colvars, to be communicated to the MD integrator - std::vector atoms_new_colvar_forces; - - /// Used by all init_atom() functions: create a slot for an atom not requested yet - inline int add_atom_slot(int atom_id) - { - atoms_ids.push_back(atom_id); - atoms_ncopies.push_back(1); - atoms_masses.push_back(1.0); - atoms_charges.push_back(0.0); - atoms_positions.push_back(cvm::rvector(0.0, 0.0, 0.0)); - atoms_total_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); - atoms_new_colvar_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); - return (atoms_ids.size() - 1); - } +/// \brief Container of atomic data for processing by Colvars +class colvarproxy_atoms { public: - /// Prepare this atom for collective variables calculation, selecting it by numeric index (1-based) + /// Constructor + colvarproxy_atoms(); + + /// Destructor + virtual ~colvarproxy_atoms(); + + /// Prepare this atom for collective variables calculation, selecting it by + /// numeric index (1-based) virtual int init_atom(int atom_number) = 0; - /// Check that this atom number is valid, but do not initialize the corresponding atom yet + /// Check that this atom number is valid, but do not initialize the + /// corresponding atom yet virtual int check_atom_id(int atom_number) = 0; - /// Select this atom for collective variables calculation, using name and residue number. - /// Not all programs support this: leave this function as is in those cases. + /// Select this atom for collective variables calculation, using name and + /// residue number. Not all programs support this: leave this function as + /// is in those cases. virtual int init_atom(cvm::residue_id const &residue, std::string const &atom_name, - std::string const &segment_id) - { - cvm::error("Error: initializing an atom by name and residue number is currently not supported.\n", - COLVARS_NOT_IMPLEMENTED); - return COLVARS_NOT_IMPLEMENTED; - } + std::string const &segment_id); /// Check that this atom is valid, but do not initialize it yet virtual int check_atom_id(cvm::residue_id const &residue, std::string const &atom_name, - std::string const &segment_id) - { - cvm::error("Error: initializing an atom by name and residue number is currently not supported.\n", - COLVARS_NOT_IMPLEMENTED); - return COLVARS_NOT_IMPLEMENTED; - } + std::string const &segment_id); /// \brief Used by the atom class destructor: rather than deleting the array slot /// (costly) set the corresponding atoms_ncopies to zero - virtual void clear_atom(int index) - { - if (((size_t) index) >= atoms_ids.size()) { - cvm::error("Error: trying to disable an atom that was not previously requested.\n", - INPUT_ERROR); - } - if (atoms_ncopies[index] > 0) { - atoms_ncopies[index] -= 1; - } - } + virtual void clear_atom(int index); + + /// \brief Read atom identifiers from a file \param filename name of + /// the file (usually a PDB) \param atoms array to which atoms read + /// from "filename" will be appended \param pdb_field (optiona) if + /// "filename" is a PDB file, use this field to determine which are + /// the atoms to be set + virtual int load_atoms(char const *filename, + cvm::atom_group &atoms, + std::string const &pdb_field, + double const pdb_field_value = 0.0); + + /// \brief Load the coordinates for a group of atoms from a file + /// (usually a PDB); if "pos" is already allocated, the number of its + /// elements must match the number of atoms in "filename" + virtual int load_coords(char const *filename, + std::vector &pos, + const std::vector &indices, + std::string const &pdb_field, + double const pdb_field_value = 0.0); + + /// Clear atomic data + int reset(); /// Get the numeric ID of the given atom (for the program) inline int get_atom_id(int index) const @@ -485,120 +177,95 @@ public: } /// Read the current velocity of the given atom - virtual cvm::rvector get_atom_velocity(int index) + inline cvm::rvector get_atom_velocity(int index) { - cvm::error("Error: reading the current velocity of an atom is not yet implemented.\n", + cvm::error("Error: reading the current velocity of an atom " + "is not yet implemented.\n", COLVARS_NOT_IMPLEMENTED); return cvm::rvector(0.0); } - // useful functions for data management outside this class - inline std::vector *modify_atom_ids() { return &atoms_ids; } - inline std::vector *modify_atom_masses() { return &atoms_masses; } - inline std::vector *modify_atom_charges() { return &atoms_charges; } - inline std::vector *modify_atom_positions() { return &atoms_positions; } - inline std::vector *modify_atom_total_forces() { return &atoms_total_forces; } - inline std::vector *modify_atom_new_colvar_forces() { return &atoms_new_colvar_forces; } - - /// \brief Read atom identifiers from a file \param filename name of - /// the file (usually a PDB) \param atoms array to which atoms read - /// from "filename" will be appended \param pdb_field (optiona) if - /// "filename" is a PDB file, use this field to determine which are - /// the atoms to be set - virtual int load_atoms(char const *filename, - cvm::atom_group &atoms, - std::string const &pdb_field, - double const pdb_field_value = 0.0) + inline std::vector *modify_atom_ids() { - cvm::error("Error: loading atom identifiers from a file is currently not implemented.\n", - COLVARS_NOT_IMPLEMENTED); - return COLVARS_NOT_IMPLEMENTED; + return &atoms_ids; } - /// \brief Load the coordinates for a group of atoms from a file - /// (usually a PDB); if "pos" is already allocated, the number of its - /// elements must match the number of atoms in "filename" - virtual int load_coords(char const *filename, - std::vector &pos, - const std::vector &indices, - std::string const &pdb_field, - double const pdb_field_value = 0.0) + inline std::vector *modify_atom_masses() { - cvm::error("Error: loading atomic coordinates from a file is currently not implemented.\n"); - return COLVARS_NOT_IMPLEMENTED; + return &atoms_masses; } - // **************** ACCESS GROUP DATA **************** + inline std::vector *modify_atom_charges() + { + return &atoms_charges; + } + + inline std::vector *modify_atom_positions() + { + return &atoms_positions; + } + + inline std::vector *modify_atom_total_forces() + { + return &atoms_total_forces; + } + + inline std::vector *modify_atom_new_colvar_forces() + { + return &atoms_new_colvar_forces; + } protected: - /// \brief Array of 0-based integers used to uniquely associate atom groups + /// \brief Array of 0-based integers used to uniquely associate atoms /// within the host program - std::vector atom_groups_ids; - /// \brief Keep track of how many times each group is used by a separate cvc - std::vector atom_groups_ncopies; - /// \brief Total masses of the atom groups - std::vector atom_groups_masses; - /// \brief Total charges of the atom groups (allow redefinition during a run, as done e.g. in LAMMPS) - std::vector atom_groups_charges; - /// \brief Current centers of mass of the atom groups - std::vector atom_groups_coms; - /// \brief Most recently updated total forces on the com of each group - std::vector atom_groups_total_forces; + std::vector atoms_ids; + /// \brief Keep track of how many times each atom is used by a separate colvar object + std::vector atoms_ncopies; + /// \brief Masses of the atoms (allow redefinition during a run, as done e.g. in LAMMPS) + std::vector atoms_masses; + /// \brief Charges of the atoms (allow redefinition during a run, as done e.g. in LAMMPS) + std::vector atoms_charges; + /// \brief Current three-dimensional positions of the atoms + std::vector atoms_positions; + /// \brief Most recent total forces on each atom + std::vector atoms_total_forces; /// \brief Forces applied from colvars, to be communicated to the MD integrator - std::vector atom_groups_new_colvar_forces; + std::vector atoms_new_colvar_forces; - /// TODO Add here containers of handles to cvc objects that are computed in parallel + /// Used by all init_atom() functions: create a slot for an atom not + /// requested yet; returns the index in the arrays + int add_atom_slot(int atom_id); + +}; + + +/// \brief Container of atom group data (allow collection of aggregated atomic +/// data) +class colvarproxy_atom_groups { public: - /// \brief Whether this proxy implementation has capability for scalable groups - virtual int scalable_group_coms() - { - return COLVARS_NOT_IMPLEMENTED; - } + /// Contructor + colvarproxy_atom_groups(); - /// Used by all init_atom_group() functions: create a slot for an atom group not requested yet - // TODO Add a handle to cvc objects - inline int add_atom_group_slot(int atom_group_id) - { - atom_groups_ids.push_back(atom_group_id); - atom_groups_ncopies.push_back(1); - atom_groups_masses.push_back(1.0); - atom_groups_charges.push_back(0.0); - atom_groups_coms.push_back(cvm::rvector(0.0, 0.0, 0.0)); - atom_groups_total_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); - atom_groups_new_colvar_forces.push_back(cvm::rvector(0.0, 0.0, 0.0)); - return (atom_groups_ids.size() - 1); - } + /// Destructor + virtual ~colvarproxy_atom_groups(); + + /// Clear atom group data + int reset(); + + /// \brief Whether this proxy implementation has capability for scalable groups + virtual int scalable_group_coms(); /// Prepare this group for collective variables calculation, selecting atoms by internal ids (0-based) - virtual int init_atom_group(std::vector const &atoms_ids) // TODO Add a handle to cvc objects - { - cvm::error("Error: initializing a group outside of the colvars module is currently not supported.\n", - COLVARS_NOT_IMPLEMENTED); - return COLVARS_NOT_IMPLEMENTED; - } + virtual int init_atom_group(std::vector const &atoms_ids); /// \brief Used by the atom_group class destructor - virtual void clear_atom_group(int index) - { - if (cvm::debug()) { - log("Trying to remove/disable atom group number "+cvm::to_str(index)+"\n"); - } - - if (((size_t) index) >= atom_groups_ids.size()) { - cvm::error("Error: trying to disable an atom group that was not previously requested.\n", - INPUT_ERROR); - } - - if (atom_groups_ncopies[index] > 0) { - atom_groups_ncopies[index] -= 1; - } - } + virtual void clear_atom_group(int index); /// Get the numeric ID of the given atom group (for the MD program) - inline cvm::real get_atom_group_id(int index) const + inline int get_atom_group_id(int index) const { return atom_groups_ids[index]; } @@ -634,13 +301,288 @@ public: } /// Read the current velocity of the given atom group - virtual cvm::rvector get_atom_group_velocity(int index) + inline cvm::rvector get_atom_group_velocity(int index) { cvm::error("Error: reading the current velocity of an atom group is not yet implemented.\n", COLVARS_NOT_IMPLEMENTED); return cvm::rvector(0.0); } +protected: + + /// \brief Array of 0-based integers used to uniquely associate atom groups + /// within the host program + std::vector atom_groups_ids; + /// \brief Keep track of how many times each group is used by a separate cvc + std::vector atom_groups_ncopies; + /// \brief Total masses of the atom groups + std::vector atom_groups_masses; + /// \brief Total charges of the atom groups (allow redefinition during a run, as done e.g. in LAMMPS) + std::vector atom_groups_charges; + /// \brief Current centers of mass of the atom groups + std::vector atom_groups_coms; + /// \brief Most recently updated total forces on the com of each group + std::vector atom_groups_total_forces; + /// \brief Forces applied from colvars, to be communicated to the MD integrator + std::vector atom_groups_new_colvar_forces; + + /// Used by all init_atom_group() functions: create a slot for an atom group not requested yet + int add_atom_group_slot(int atom_group_id); +}; + + +/// \brief Methods for SMP parallelization +class colvarproxy_smp { + +public: + + /// Constructor + colvarproxy_smp(); + + /// Destructor + virtual ~colvarproxy_smp(); + + /// Whether threaded parallelization should be used (TODO: make this a + /// cvm::deps feature) + bool b_smp_active; + + /// Whether threaded parallelization is available (TODO: make this a cvm::deps feature) + virtual int smp_enabled(); + + /// Distribute calculation of colvars (and their components) across threads + virtual int smp_colvars_loop(); + + /// Distribute calculation of biases across threads + virtual int smp_biases_loop(); + + /// Distribute calculation of biases across threads 2nd through last, with all scripted biased on 1st thread + virtual int smp_biases_script_loop(); + + /// Index of this thread + virtual int smp_thread_id(); + + /// Number of threads sharing this address space + virtual int smp_num_threads(); + + /// Lock the proxy's shared data for access by a thread, if threads are implemented; if not implemented, does nothing + virtual int smp_lock(); + + /// Attempt to lock the proxy's shared data + virtual int smp_trylock(); + + /// Release the lock + virtual int smp_unlock(); +}; + + +/// \brief Methods for multiple-replica communication +class colvarproxy_replicas { + +public: + + /// Constructor + colvarproxy_replicas(); + + /// Destructor + virtual ~colvarproxy_replicas(); + + /// \brief Indicate if multi-replica support is available and active + virtual bool replica_enabled(); + + /// \brief Index of this replica + virtual int replica_index(); + + /// \brief Total number of replica + virtual int replica_num(); + + /// \brief Synchronize replica + virtual void replica_comm_barrier(); + + /// \brief Receive data from other replica + virtual int replica_comm_recv(char* msg_data, int buf_len, int src_rep); + + /// \brief Send data to other replica + virtual int replica_comm_send(char* msg_data, int msg_len, int dest_rep); + +}; + + +/// Method for scripting language interface (Tcl or Python) +class colvarproxy_script { + +public: + + /// Constructor + colvarproxy_script(); + + /// Destructor + virtual ~colvarproxy_script(); + + /// Convert a script object (Tcl or Python call argument) to a C string + virtual char *script_obj_to_str(unsigned char *obj); + + /// Pointer to the scripting interface object + /// (does not need to be allocated in a new interface) + colvarscript *script; + + /// is a user force script defined? + bool force_script_defined; + + /// Do we have a scripting interface? + bool have_scripts; + + /// Run a user-defined colvar forces script + virtual int run_force_callback(); + + virtual int run_colvar_callback( + std::string const &name, + std::vector const &cvcs, + colvarvalue &value); + + virtual int run_colvar_gradient_callback( + std::string const &name, + std::vector const &cvcs, + std::vector > &gradient); +}; + + +/// Methods for data input/output +class colvarproxy_io { + +public: + + /// Constructor + colvarproxy_io(); + + /// Destructor + virtual ~colvarproxy_io(); + + /// \brief Save the current frame number in the argument given + // Returns error code + virtual int get_frame(long int &); + + /// \brief Set the current frame number (as well as colvarmodule::it) + // Returns error code + virtual int set_frame(long int); + + /// \brief Returns a reference to the given output channel; + /// if this is not open already, then open it + virtual std::ostream *output_stream(std::string const &output_name, + std::ios_base::openmode mode = + std::ios_base::out); + + /// \brief Flushes the given output channel + virtual int flush_output_stream(std::ostream *os); + + /// \brief Closes the given output channel + virtual int close_output_stream(std::string const &output_name); + + /// \brief Rename the given file, before overwriting it + virtual int backup_file(char const *filename); + + /// \brief Rename the given file, before overwriting it + inline int backup_file(std::string const &filename) + { + return backup_file(filename.c_str()); + } + + /// \brief Prefix of the input state file + inline std::string & input_prefix() + { + return input_prefix_str; + } + + /// \brief Prefix to be used for output restart files + inline std::string & restart_output_prefix() + { + return restart_output_prefix_str; + } + + /// \brief Prefix to be used for output files (final system + /// configuration) + inline std::string & output_prefix() + { + return output_prefix_str; + } + +protected: + + /// \brief Prefix to be used for input files (restarts, not + /// configuration) + std::string input_prefix_str, output_prefix_str, restart_output_prefix_str; + + /// \brief Currently opened output files: by default, these are ofstream objects. + /// Allows redefinition to implement different output mechanisms + std::list output_files; + /// \brief Identifiers for output_stream objects: by default, these are the names of the files + std::list output_stream_names; + +}; + + + +/// \brief Interface between the collective variables module and +/// the simulation or analysis program (NAMD, VMD, LAMMPS...). +/// This is the base class: each interfaced program is supported by a derived class. +/// Only pure virtual functions ("= 0") must be reimplemented to ensure baseline functionality. +class colvarproxy + : public colvarproxy_system, + public colvarproxy_atoms, + public colvarproxy_atom_groups, + public colvarproxy_smp, + public colvarproxy_replicas, + public colvarproxy_script, + public colvarproxy_io +{ + +public: + + /// Pointer to the main object + colvarmodule *colvars; + + /// Constructor + colvarproxy(); + + /// Destructor + virtual ~colvarproxy(); + + /// \brief Reset proxy state, e.g. requested atoms + virtual int reset(); + + /// (Re)initialize required member data after construction + virtual int setup(); + + /// \brief Update data required by the colvars module (e.g. cache atom positions) + /// + /// TODO Break up colvarproxy_namd and colvarproxy_lammps function into these + virtual int update_input(); + + /// \brief Update data based from the results of a module update (e.g. send forces) + virtual int update_output(); + + /// Print a message to the main log + virtual void log(std::string const &message) = 0; + + /// Print a message to the main log and let the rest of the program handle the error + virtual void error(std::string const &message) = 0; + + /// Print a message to the main log and exit with error code + virtual void fatal_error(std::string const &message) = 0; + + /// \brief Restarts will be written each time this number of steps has passed + virtual size_t restart_frequency(); + + /// Whether a simulation is running (warn against irrecovarable errors) + inline bool simulation_running() const + { + return b_simulation_running; + } + +protected: + + /// Whether a simulation is running (warn against irrecovarable errors) + bool b_simulation_running; + }; diff --git a/lib/colvars/colvars_version.h b/lib/colvars/colvars_version.h new file mode 100644 index 0000000000..312c0fd1a0 --- /dev/null +++ b/lib/colvars/colvars_version.h @@ -0,0 +1,10 @@ +#ifndef COLVARS_VERSION +#define COLVARS_VERSION "2017-08-06" +// This file is part of the Collective Variables module (Colvars). +// The original version of Colvars and its updates are located at: +// https://github.com/colvars/colvars +// Please update all Colvars source files before making any changes. +// If you wish to distribute your changes, please submit them to the +// Colvars repository at GitHub. + +#endif diff --git a/lib/colvars/colvarscript.cpp b/lib/colvars/colvarscript.cpp index f192dcb7c0..89302a16a2 100644 --- a/lib/colvars/colvarscript.cpp +++ b/lib/colvars/colvarscript.cpp @@ -12,6 +12,7 @@ #include #include "colvarscript.h" +#include "colvarproxy.h" #include "colvardeps.h" @@ -27,7 +28,7 @@ extern "C" { // Generic hooks; NAMD and VMD have Tcl-specific versions in the respective proxies - int run_colvarscript_command(int argc, const char **argv) + int run_colvarscript_command(int objc, unsigned char *const objv[]) { colvarproxy *cvp = cvm::proxy; if (!cvp) { @@ -37,7 +38,7 @@ extern "C" { cvm::error("Called run_colvarscript_command without a script object initialized.\n"); return -1; } - return cvp->script->run(argc, argv); + return cvp->script->run(objc, objv); } const char * get_colvarscript_result() @@ -53,30 +54,52 @@ extern "C" { /// Run method based on given arguments -int colvarscript::run(int argc, char const *argv[]) { - - result = ""; +int colvarscript::run(int objc, unsigned char *const objv[]) +{ + result.clear(); if (cvm::debug()) { - cvm::log("Called script run with " + cvm::to_str(argc) + " args"); - for (int i = 0; i < argc; i++) { cvm::log(argv[i]); } + cvm::log("Called script run with " + cvm::to_str(objc) + " args:"); + for (int i = 0; i < objc; i++) { + cvm::log(obj_to_str(objv[i])); + } } - if (argc < 2) { + if (objc < 2) { result = help_string(); return COLVARS_OK; } - std::string cmd = argv[1]; + std::string const cmd(obj_to_str(objv[1])); int error_code = COLVARS_OK; if (cmd == "colvar") { - return proc_colvar(argc-1, &(argv[1])); + if (objc < 3) { + result = "Missing parameters\n" + help_string(); + return COLVARSCRIPT_ERROR; + } + std::string const name(obj_to_str(objv[2])); + colvar *cv = cvm::colvar_by_name(name); + if (cv == NULL) { + result = "Colvar not found: " + name; + return COLVARSCRIPT_ERROR; + } + return proc_colvar(cv, objc-1, &(objv[1])); } if (cmd == "bias") { - return proc_bias(argc-1, &(argv[1])); + if (objc < 3) { + result = "Missing parameters\n" + help_string(); + return COLVARSCRIPT_ERROR; + } + std::string const name(obj_to_str(objv[2])); + colvarbias *b = cvm::bias_by_name(name); + if (b == NULL) { + result = "Bias not found: " + name; + return COLVARSCRIPT_ERROR; + } + return proc_bias(b, objc-1, &(objv[1])); } if (cmd == "version") { @@ -102,20 +125,20 @@ int colvarscript::run(int argc, char const *argv[]) { error_code |= colvars->calc(); error_code |= proxy->update_output(); if (error_code) { - result += "Error updating the colvars module.\n"; + result += "Error updating the Colvars module.\n"; } return error_code; } if (cmd == "list") { - if (argc == 2) { + if (objc == 2) { for (std::vector::iterator cvi = colvars->colvars.begin(); cvi != colvars->colvars.end(); ++cvi) { result += (cvi == colvars->colvars.begin() ? "" : " ") + (*cvi)->name; } return COLVARS_OK; - } else if (argc == 3 && !strcmp(argv[2], "biases")) { + } else if (objc == 3 && !strcmp(obj_to_str(objv[2]), "biases")) { for (std::vector::iterator bi = colvars->biases.begin(); bi != colvars->biases.end(); ++bi) { @@ -130,11 +153,11 @@ int colvarscript::run(int argc, char const *argv[]) { /// Parse config from file if (cmd == "configfile") { - if (argc < 3) { + if (objc < 3) { result = "Missing arguments\n" + help_string(); return COLVARSCRIPT_ERROR; } - if (colvars->read_config_file(argv[2]) == COLVARS_OK) { + if (colvars->read_config_file(obj_to_str(objv[2])) == COLVARS_OK) { return COLVARS_OK; } else { result = "Error parsing configuration file"; @@ -144,11 +167,11 @@ int colvarscript::run(int argc, char const *argv[]) { /// Parse config from string if (cmd == "config") { - if (argc < 3) { + if (objc < 3) { result = "Missing arguments\n" + help_string(); return COLVARSCRIPT_ERROR; } - std::string conf = argv[2]; + std::string const conf(obj_to_str(objv[2])); if (colvars->read_config_string(conf) == COLVARS_OK) { return COLVARS_OK; } else { @@ -159,11 +182,11 @@ int colvarscript::run(int argc, char const *argv[]) { /// Load an input state file if (cmd == "load") { - if (argc < 3) { + if (objc < 3) { result = "Missing arguments\n" + help_string(); return COLVARSCRIPT_ERROR; } - proxy->input_prefix() = argv[2]; + proxy->input_prefix() = obj_to_str(objv[2]); if (colvars->setup_input() == COLVARS_OK) { return COLVARS_OK; } else { @@ -174,11 +197,11 @@ int colvarscript::run(int argc, char const *argv[]) { /// Save to an output state file if (cmd == "save") { - if (argc < 3) { + if (objc < 3) { result = "Missing arguments"; return COLVARSCRIPT_ERROR; } - proxy->output_prefix_str = argv[2]; + proxy->output_prefix() = obj_to_str(objv[2]); int error = 0; error |= colvars->setup_output(); error |= colvars->write_output_files(); @@ -200,7 +223,7 @@ int colvarscript::run(int argc, char const *argv[]) { } if (cmd == "frame") { - if (argc == 2) { + if (objc == 2) { long int f; int error = proxy->get_frame(f); if (error == COLVARS_OK) { @@ -210,10 +233,10 @@ int colvarscript::run(int argc, char const *argv[]) { result = "Frame number is not available"; return COLVARSCRIPT_ERROR; } - } else if (argc == 3) { + } else if (objc == 3) { // Failure of this function does not trigger an error, but // returns nonzero, to let scripts detect available frames - int error = proxy->set_frame(strtol(argv[2], NULL, 10)); + int error = proxy->set_frame(strtol(obj_to_str(objv[2]), NULL, 10)); result = cvm::to_str(error == COLVARS_OK ? 0 : -1); return COLVARS_OK; } else { @@ -223,8 +246,8 @@ int colvarscript::run(int argc, char const *argv[]) { } if (cmd == "addenergy") { - if (argc == 3) { - colvars->total_bias_energy += strtod(argv[2], NULL); + if (objc == 3) { + colvars->total_bias_energy += strtod(obj_to_str(objv[2]), NULL); return COLVARS_OK; } else { result = "Wrong arguments to command \"addenergy\"\n" + help_string(); @@ -237,19 +260,9 @@ int colvarscript::run(int argc, char const *argv[]) { } -int colvarscript::proc_colvar(int argc, char const *argv[]) { - if (argc < 3) { - result = "Missing parameters\n" + help_string(); - return COLVARSCRIPT_ERROR; - } +int colvarscript::proc_colvar(colvar *cv, int objc, unsigned char *const objv[]) { - std::string name = argv[1]; - colvar *cv = cvm::colvar_by_name(name); - if (cv == NULL) { - result = "Colvar not found: " + name; - return COLVARSCRIPT_ERROR; - } - std::string subcmd = argv[2]; + std::string const subcmd(obj_to_str(objv[2])); if (subcmd == "value") { result = (cv->value()).to_simple_string(); @@ -278,11 +291,11 @@ int colvarscript::proc_colvar(int argc, char const *argv[]) { for (i = 0; i < cv->biases.size(); i++) { delete cv->biases[i]; } - cv->biases.resize(0); + cv->biases.clear(); // colvar destructor is tasked with the cleanup delete cv; // TODO this could be done by the destructors - colvars->write_traj_label(colvars->cv_traj_os); + colvars->write_traj_label(*(colvars->cv_traj_os)); return COLVARS_OK; } @@ -308,11 +321,11 @@ int colvarscript::proc_colvar(int argc, char const *argv[]) { } if (subcmd == "addforce") { - if (argc < 4) { + if (objc < 4) { result = "addforce: missing parameter: force value\n" + help_string(); return COLVARSCRIPT_ERROR; } - std::string f_str = argv[3]; + std::string const f_str(obj_to_str(objv[3])); std::istringstream is(f_str); is.width(cvm::cv_width); is.precision(cvm::cv_prec); @@ -328,11 +341,11 @@ int colvarscript::proc_colvar(int argc, char const *argv[]) { } if (subcmd == "cvcflags") { - if (argc < 4) { + if (objc < 4) { result = "cvcflags: missing parameter: vector of flags"; return COLVARSCRIPT_ERROR; } - std::string flags_str = argv[3]; + std::string const flags_str(obj_to_str(objv[3])); std::istringstream is(flags_str); std::vector flags; @@ -351,7 +364,7 @@ int colvarscript::proc_colvar(int argc, char const *argv[]) { } if ((subcmd == "get") || (subcmd == "set") || (subcmd == "state")) { - return proc_features(cv, argc, argv); + return proc_features(cv, objc, objv); } result = "Syntax error\n" + help_string(); @@ -359,20 +372,10 @@ int colvarscript::proc_colvar(int argc, char const *argv[]) { } -int colvarscript::proc_bias(int argc, char const *argv[]) { - if (argc < 3) { - result = "Missing parameters\n" + help_string(); - return COLVARSCRIPT_ERROR; - } +int colvarscript::proc_bias(colvarbias *b, int objc, unsigned char *const objv[]) { - std::string name = argv[1]; - colvarbias *b = cvm::bias_by_name(name); - if (b == NULL) { - result = "Bias not found: " + name; - return COLVARSCRIPT_ERROR; - } - - std::string subcmd = argv[2]; + std::string const key(obj_to_str(objv[0])); + std::string const subcmd(obj_to_str(objv[2])); if (subcmd == "energy") { result = cvm::to_str(b->get_energy()); @@ -422,16 +425,16 @@ int colvarscript::proc_bias(int argc, char const *argv[]) { // the bias destructor takes care of the cleanup at cvm level delete b; // TODO this could be done by the destructors - colvars->write_traj_label(colvars->cv_traj_os); + colvars->write_traj_label(*(colvars->cv_traj_os)); return COLVARS_OK; } if ((subcmd == "get") || (subcmd == "set") || (subcmd == "state")) { - return proc_features(b, argc, argv); + return proc_features(b, objc, objv); } - if (argc >= 4) { - std::string param = argv[3]; + if (objc >= 4) { + std::string const param(obj_to_str(objv[3])); if (subcmd == "count") { int index; if (!(std::istringstream(param) >> index)) { @@ -452,11 +455,11 @@ int colvarscript::proc_bias(int argc, char const *argv[]) { int colvarscript::proc_features(colvardeps *obj, - int argc, char const *argv[]) { + int objc, unsigned char *const objv[]) { // size was already checked before calling - std::string subcmd = argv[2]; + std::string const subcmd(obj_to_str(objv[2])); - if (argc == 3) { + if (objc == 3) { if (subcmd == "state") { // TODO make this returned as result? obj->print_state(); @@ -469,8 +472,8 @@ int colvarscript::proc_features(colvardeps *obj, } if ((subcmd == "get") || (subcmd == "set")) { - std::vector &features = obj->features(); - std::string const req_feature(argv[3]); + std::vector const &features = obj->features(); + std::string const req_feature(obj_to_str(objv[3])); colvardeps::feature *f = NULL; int fid = 0; for (fid = 0; fid < int(features.size()); fid++) { @@ -499,9 +502,9 @@ int colvarscript::proc_features(colvardeps *obj, } if (subcmd == "set") { - if (argc == 5) { + if (objc == 5) { std::string const yesno = - colvarparse::to_lower_cppstr(std::string(argv[4])); + colvarparse::to_lower_cppstr(std::string(obj_to_str(objv[4]))); if ((yesno == std::string("yes")) || (yesno == std::string("on")) || (yesno == std::string("1"))) { @@ -510,10 +513,7 @@ int colvarscript::proc_features(colvardeps *obj, } else if ((yesno == std::string("no")) || (yesno == std::string("off")) || (yesno == std::string("0"))) { - // TODO disable() function does not exist yet, - // dependencies will not be resolved - // obj->disable(fid); - obj->set_enabled(fid, false); + obj->disable(fid); return COLVARS_OK; } } @@ -533,11 +533,11 @@ std::string colvarscript::help_string() std::string buf; buf = "Usage: cv [args...]\n\ \n\ -Managing the colvars module:\n\ +Managing the Colvars module:\n\ configfile -- read configuration from a file\n\ config -- read configuration from the given string\n\ reset -- delete all internal configuration\n\ - delete -- delete this colvars module instance\n\ + delete -- delete this Colvars module instance\n\ version -- return version of colvars code\n\ \n\ Input and output:\n\ diff --git a/lib/colvars/colvarscript.h b/lib/colvars/colvarscript.h index 46b1ddd203..94d451809c 100644 --- a/lib/colvars/colvarscript.h +++ b/lib/colvars/colvarscript.h @@ -41,22 +41,30 @@ public: /// If an error is returned by one of the methods, it should set this to the error message std::string result; - /// Run script command with given positional arguments - int run(int argc, char const *argv[]); + /// Run script command with given positional arguments (objects) + int run(int objc, unsigned char *const objv[]); private: /// Run subcommands on colvar - int proc_colvar(int argc, char const *argv[]); + int proc_colvar(colvar *cv, int argc, unsigned char *const argv[]); /// Run subcommands on bias - int proc_bias(int argc, char const *argv[]); + int proc_bias(colvarbias *b, int argc, unsigned char *const argv[]); /// Run subcommands on base colvardeps object (colvar, bias, ...) int proc_features(colvardeps *obj, - int argc, char const *argv[]); + int argc, unsigned char *const argv[]); - /// Builds and return a short help + /// Build and return a short help std::string help_string(void); + +public: + + inline char const *obj_to_str(unsigned char *const obj) + { + return cvm::proxy->script_obj_to_str(obj); + } + }; diff --git a/lib/colvars/colvartypes.cpp b/lib/colvars/colvartypes.cpp index 5200d4d041..428fe1a4b1 100644 --- a/lib/colvars/colvartypes.cpp +++ b/lib/colvars/colvartypes.cpp @@ -19,6 +19,17 @@ bool colvarmodule::rotation::monitor_crossings = false; cvm::real colvarmodule::rotation::crossing_threshold = 1.0E-02; +/// Numerical recipes diagonalization +static int jacobi(cvm::real **a, cvm::real *d, cvm::real **v, int *nrot); + +/// Eigenvector sort +static int eigsrt(cvm::real *d, cvm::real **v); + +/// Transpose the matrix +static int transpose(cvm::real **v); + + + std::string cvm::rvector::to_simple_string() const { std::ostringstream os; @@ -286,7 +297,12 @@ void colvarmodule::rotation::diagonalize_matrix(cvm::matrix2d &S, // diagonalize int jac_nrot = 0; - jacobi(S.c_array(), S_eigval.c_array(), S_eigvec.c_array(), &jac_nrot); + if (jacobi(S.c_array(), S_eigval.c_array(), S_eigvec.c_array(), &jac_nrot) != + COLVARS_OK) { + cvm::error("Too many iterations in routine jacobi.\n" + "This is usually the result of an ill-defined set of atoms for " + "rotational alignment (RMSD, rotateReference, etc).\n"); + } eigsrt(S_eigval.c_array(), S_eigvec.c_array()); // jacobi saves eigenvectors by columns transpose(S_eigvec.c_array()); @@ -528,7 +544,7 @@ void colvarmodule::rotation::calc_optimal_rotation(std::vector co #define n 4 -void jacobi(cvm::real **a, cvm::real *d, cvm::real **v, int *nrot) +int jacobi(cvm::real **a, cvm::real *d, cvm::real **v, int *nrot) { int j,iq,ip,i; cvm::real tresh,theta,tau,t,sm,s,h,g,c; @@ -554,7 +570,7 @@ void jacobi(cvm::real **a, cvm::real *d, cvm::real **v, int *nrot) sm += std::fabs(a[ip][iq]); } if (sm == 0.0) { - return; + return COLVARS_OK; } if (i < 4) tresh=0.2*sm/(n*n); @@ -606,10 +622,11 @@ void jacobi(cvm::real **a, cvm::real *d, cvm::real **v, int *nrot) z[ip]=0.0; } } - cvm::error("Too many iterations in routine jacobi.\n"); + return COLVARS_ERROR; } -void eigsrt(cvm::real *d, cvm::real **v) + +int eigsrt(cvm::real *d, cvm::real **v) { int k,j,i; cvm::real p; @@ -628,9 +645,11 @@ void eigsrt(cvm::real *d, cvm::real **v) } } } + return COLVARS_OK; } -void transpose(cvm::real **v) + +int transpose(cvm::real **v) { cvm::real p; int i,j; @@ -641,6 +660,7 @@ void transpose(cvm::real **v) v[j][i]=p; } } + return COLVARS_OK; } #undef n diff --git a/lib/colvars/colvartypes.h b/lib/colvars/colvartypes.h index e0cebb83bc..fe3160eb4b 100644 --- a/lib/colvars/colvartypes.h +++ b/lib/colvars/colvartypes.h @@ -91,6 +91,11 @@ public: data.resize(n); } + inline void clear() + { + data.clear(); + } + inline T & operator [] (size_t const i) { return data[i]; } @@ -1015,16 +1020,6 @@ inline cvm::rvector operator * (cvm::rmatrix const &m, } -/// Numerical recipes diagonalization -void jacobi(cvm::real **a, cvm::real *d, cvm::real **v, int *nrot); - -/// Eigenvector sort -void eigsrt(cvm::real *d, cvm::real **v); - -/// Transpose the matrix -void transpose(cvm::real **v); - - /// \brief 1-dimensional vector of real numbers with four components and diff --git a/lib/colvars/colvarvalue.cpp b/lib/colvars/colvarvalue.cpp index deccc6b7e0..312d101603 100644 --- a/lib/colvars/colvarvalue.cpp +++ b/lib/colvars/colvarvalue.cpp @@ -16,6 +16,274 @@ +std::string const colvarvalue::type_desc(Type t) +{ + switch (t) { + case colvarvalue::type_scalar: + return "scalar number"; break; + case colvarvalue::type_3vector: + return "3-dimensional vector"; break; + case colvarvalue::type_unit3vector: + return "3-dimensional unit vector"; break; + case colvarvalue::type_unit3vectorderiv: + return "derivative of a 3-dimensional unit vector"; break; + case colvarvalue::type_quaternion: + return "4-dimensional unit quaternion"; break; + case colvarvalue::type_quaternionderiv: + return "4-dimensional tangent vector"; break; + case colvarvalue::type_vector: + return "n-dimensional vector"; break; + case colvarvalue::type_notset: + // fallthrough + default: + return "not set"; break; + } +} + + +std::string const colvarvalue::type_keyword(Type t) +{ + switch (t) { + case colvarvalue::type_notset: + default: + return "not_set"; break; + case colvarvalue::type_scalar: + return "scalar"; break; + case colvarvalue::type_3vector: + return "vector3"; break; + case colvarvalue::type_unit3vector: + return "unit_vector3"; break; + case colvarvalue::type_unit3vectorderiv: + return ""; break; + case colvarvalue::type_quaternion: + return "unit_quaternion"; break; + case colvarvalue::type_quaternionderiv: + return ""; break; + case colvarvalue::type_vector: + return "vector"; break; + } +} + + +size_t colvarvalue::num_df(Type t) +{ + switch (t) { + case colvarvalue::type_notset: + default: + return 0; break; + case colvarvalue::type_scalar: + return 1; break; + case colvarvalue::type_3vector: + return 3; break; + case colvarvalue::type_unit3vector: + case colvarvalue::type_unit3vectorderiv: + return 2; break; + case colvarvalue::type_quaternion: + case colvarvalue::type_quaternionderiv: + return 3; break; + case colvarvalue::type_vector: + // the size of a vector is unknown without its object + return 0; break; + } +} + + +size_t colvarvalue::num_dimensions(Type t) +{ + switch (t) { + case colvarvalue::type_notset: + default: + return 0; break; + case colvarvalue::type_scalar: + return 1; break; + case colvarvalue::type_3vector: + case colvarvalue::type_unit3vector: + case colvarvalue::type_unit3vectorderiv: + return 3; break; + case colvarvalue::type_quaternion: + case colvarvalue::type_quaternionderiv: + return 4; break; + case colvarvalue::type_vector: + // the size of a vector is unknown without its object + return 0; break; + } +} + + +void colvarvalue::reset() +{ + switch (value_type) { + case colvarvalue::type_scalar: + real_value = 0.0; + break; + case colvarvalue::type_3vector: + case colvarvalue::type_unit3vector: + case colvarvalue::type_unit3vectorderiv: + rvector_value.reset(); + break; + case colvarvalue::type_quaternion: + case colvarvalue::type_quaternionderiv: + quaternion_value.reset(); + break; + case colvarvalue::type_vector: + vector1d_value.reset(); + break; + case colvarvalue::type_notset: + default: + break; + } +} + + +void colvarvalue::apply_constraints() +{ + switch (value_type) { + case colvarvalue::type_scalar: + case colvarvalue::type_3vector: + case colvarvalue::type_unit3vectorderiv: + case colvarvalue::type_quaternionderiv: + break; + case colvarvalue::type_unit3vector: + rvector_value /= std::sqrt(rvector_value.norm2()); + break; + case colvarvalue::type_quaternion: + quaternion_value /= std::sqrt(quaternion_value.norm2()); + break; + case colvarvalue::type_vector: + if (elem_types.size() > 0) { + // if we have information about non-scalar types, use it + size_t i; + for (i = 0; i < elem_types.size(); i++) { + if (elem_sizes[i] == 1) continue; // TODO this can be optimized further + colvarvalue cvtmp(vector1d_value.slice(elem_indices[i], + elem_indices[i] + elem_sizes[i]), elem_types[i]); + cvtmp.apply_constraints(); + set_elem(i, cvtmp); + } + } + break; + case colvarvalue::type_notset: + default: + break; + } +} + + +void colvarvalue::type(Type const &vti) +{ + if (vti != value_type) { + // reset the value based on the previous type + reset(); + if ((value_type == type_vector) && (vti != type_vector)) { + vector1d_value.clear(); + } + value_type = vti; + } +} + + +void colvarvalue::type(colvarvalue const &x) +{ + if (x.type() != value_type) { + // reset the value based on the previous type + reset(); + if (value_type == type_vector) { + vector1d_value.clear(); + } + value_type = x.type(); + } + + if (x.type() == type_vector) { + vector1d_value.resize(x.vector1d_value.size()); + } +} + + +void colvarvalue::is_derivative() +{ + switch (value_type) { + case colvarvalue::type_scalar: + case colvarvalue::type_3vector: + case colvarvalue::type_unit3vectorderiv: + case colvarvalue::type_quaternionderiv: + break; + case colvarvalue::type_unit3vector: + type(colvarvalue::type_unit3vectorderiv); + break; + case colvarvalue::type_quaternion: + type(colvarvalue::type_quaternionderiv); + break; + case colvarvalue::type_vector: + // TODO + break; + case colvarvalue::type_notset: + default: + break; + } +} + + +colvarvalue::colvarvalue(colvarvalue const &x) + : value_type(x.type()) +{ + switch (x.type()) { + case type_scalar: + real_value = x.real_value; + break; + case type_3vector: + case type_unit3vector: + case type_unit3vectorderiv: + rvector_value = x.rvector_value; + break; + case type_quaternion: + case type_quaternionderiv: + quaternion_value = x.quaternion_value; + break; + case type_vector: + vector1d_value = x.vector1d_value; + elem_types = x.elem_types; + elem_indices = x.elem_indices; + elem_sizes = x.elem_sizes; + case type_notset: + default: + break; + } +} + + +colvarvalue::colvarvalue(cvm::vector1d const &v, Type vti) +{ + if ((vti != type_vector) && (v.size() != num_dimensions(vti))) { + cvm::error("Error: trying to initialize a variable of type \""+type_desc(vti)+ + "\" using a vector of size "+cvm::to_str(v.size())+ + ".\n"); + value_type = type_notset; + } else { + value_type = vti; + switch (vti) { + case type_scalar: + real_value = v[0]; + break; + case type_3vector: + case type_unit3vector: + case type_unit3vectorderiv: + rvector_value = cvm::rvector(v); + break; + case type_quaternion: + case type_quaternionderiv: + quaternion_value = cvm::quaternion(v); + break; + case type_vector: + vector1d_value = v; + break; + case type_notset: + default: + break; + } + } +} + + void colvarvalue::add_elem(colvarvalue const &x) { if (this->value_type != type_vector) { @@ -111,6 +379,13 @@ void colvarvalue::set_random() } +void colvarvalue::undef_op() const +{ + cvm::error("Error: Undefined operation on a colvar of type \""+ + type_desc(this->type())+"\".\n"); +} + + // binary operations between two colvarvalues colvarvalue operator + (colvarvalue const &x1, @@ -295,6 +570,50 @@ colvarvalue colvarvalue::dist2_grad(colvarvalue const &x2) const } +/// Return the midpoint between x1 and x2, optionally weighted by lambda +/// (which must be between 0.0 and 1.0) +colvarvalue const colvarvalue::interpolate(colvarvalue const &x1, + colvarvalue const &x2, + cvm::real const lambda) +{ + colvarvalue::check_types(x1, x2); + + if ((lambda < 0.0) || (lambda > 1.0)) { + cvm::error("Error: trying to interpolate between two colvarvalues with a " + "lamdba outside [0:1].\n", BUG_ERROR); + } + + colvarvalue interp = ((1.0-lambda)*x1 + lambda*x2); + cvm::real const d2 = x1.dist2(x2); + + switch (x1.type()) { + case colvarvalue::type_scalar: + case colvarvalue::type_3vector: + case colvarvalue::type_vector: + case colvarvalue::type_unit3vectorderiv: + case colvarvalue::type_quaternionderiv: + return interp; + break; + case colvarvalue::type_unit3vector: + case colvarvalue::type_quaternion: + if (interp.norm()/std::sqrt(d2) < 1.0e-6) { + cvm::error("Error: interpolation between "+cvm::to_str(x1)+" and "+ + cvm::to_str(x2)+" with lambda = "+cvm::to_str(lambda)+ + " is undefined: result = "+cvm::to_str(interp)+"\n", + INPUT_ERROR); + } + interp.apply_constraints(); + return interp; + break; + case colvarvalue::type_notset: + default: + x1.undef_op(); + break; + } + return colvarvalue(colvarvalue::type_notset); +} + + std::string colvarvalue::to_simple_string() const { switch (type()) { diff --git a/lib/colvars/colvarvalue.h b/lib/colvars/colvarvalue.h index e369feefcd..41759e92b0 100644 --- a/lib/colvars/colvarvalue.h +++ b/lib/colvars/colvarvalue.h @@ -169,38 +169,14 @@ public: } /// Set the type explicitly - inline void type(Type const &vti) - { - if (vti != value_type) { - // reset the value based on the previous type - reset(); - if ((value_type == type_vector) && (vti != type_vector)) { - vector1d_value.resize(0); - } - value_type = vti; - } - } + void type(Type const &vti); /// Set the type after another \link colvarvalue \endlink - inline void type(colvarvalue const &x) - { - if (x.type() != value_type) { - // reset the value based on the previous type - reset(); - if (value_type == type_vector) { - vector1d_value.resize(0); - } - value_type = x.type(); - } - - if (x.type() == type_vector) { - vector1d_value.resize(x.vector1d_value.size()); - } - } + void type(colvarvalue const &x); /// Make the type a derivative of the original type /// (so that its constraints do not apply) - inline void is_derivative(); + void is_derivative(); /// Square norm of this colvarvalue cvm::real norm2() const; @@ -217,6 +193,12 @@ public: /// Derivative with respect to this \link colvarvalue \endlink of the square distance colvarvalue dist2_grad(colvarvalue const &x2) const; + /// Return the midpoint between x1 and x2, optionally weighted by lambda + /// (which must be between 0.0 and 1.0) + static colvarvalue const interpolate(colvarvalue const &x1, + colvarvalue const &x2, + cvm::real const lambda = 0.5); + /// Assignment operator (type of x is checked) colvarvalue & operator = (colvarvalue const &x); @@ -303,34 +285,16 @@ public: void set_elem(int const icv, colvarvalue const &x); /// Get a scalar number out of an element of the vector - inline cvm::real operator [] (int const i) const - { - if (vector1d_value.size() > 0) { - return vector1d_value[i]; - } else { - cvm::error("Error: trying to use as a vector a variable that is not initialized as such.\n"); - return 0.0; - } - } + cvm::real operator [] (int const i) const; /// Use an element of the vector as a scalar number - inline cvm::real & operator [] (int const i) - { - if (vector1d_value.size() > 0) { - return vector1d_value[i]; - } else { - cvm::error("Error: trying to use as a vector a variable that is not initialized as such.\n"); - real_value = 0.0; - return real_value; - } - } - + cvm::real & operator [] (int const i); /// Ensure that the two types are the same within a binary operator - int static check_types(colvarvalue const &x1, colvarvalue const &x2); + static int check_types(colvarvalue const &x1, colvarvalue const &x2); /// Ensure that the two types are the same within an assignment, or that the left side is type_notset - int static check_types_assign(Type const &vt1, Type const &vt2); + static int check_types_assign(Type const &vt1, Type const &vt2); /// Undefined operation void undef_op() const; @@ -359,14 +323,14 @@ public: /// \brief Optimized routine for the inner product of one collective /// variable with an array - void static inner_opt(colvarvalue const &x, + static void inner_opt(colvarvalue const &x, std::vector::iterator &xv, std::vector::iterator const &xv_end, std::vector::iterator &result); /// \brief Optimized routine for the inner product of one collective /// variable with an array - void static inner_opt(colvarvalue const &x, + static void inner_opt(colvarvalue const &x, std::list::iterator &xv, std::list::iterator const &xv_end, std::vector::iterator &result); @@ -374,14 +338,14 @@ public: /// \brief Optimized routine for the second order Legendre /// polynomial, (3cos^2(w)-1)/2, of one collective variable with an /// array - void static p2leg_opt(colvarvalue const &x, + static void p2leg_opt(colvarvalue const &x, std::vector::iterator &xv, std::vector::iterator const &xv_end, std::vector::iterator &result); /// \brief Optimized routine for the second order Legendre /// polynomial of one collective variable with an array - void static p2leg_opt(colvarvalue const &x, + static void p2leg_opt(colvarvalue const &x, std::list::iterator &xv, std::list::iterator const &xv_end, std::vector::iterator &result); @@ -389,101 +353,6 @@ public: }; - -inline std::string const colvarvalue::type_desc(Type t) -{ - switch (t) { - case colvarvalue::type_scalar: - return "scalar number"; break; - case colvarvalue::type_3vector: - return "3-dimensional vector"; break; - case colvarvalue::type_unit3vector: - return "3-dimensional unit vector"; break; - case colvarvalue::type_unit3vectorderiv: - return "derivative of a 3-dimensional unit vector"; break; - case colvarvalue::type_quaternion: - return "4-dimensional unit quaternion"; break; - case colvarvalue::type_quaternionderiv: - return "4-dimensional tangent vector"; break; - case colvarvalue::type_vector: - return "n-dimensional vector"; break; - case colvarvalue::type_notset: - // fallthrough - default: - return "not set"; break; - } -} - - -inline std::string const colvarvalue::type_keyword(Type t) -{ - switch (t) { - case colvarvalue::type_notset: - default: - return "not_set"; break; - case colvarvalue::type_scalar: - return "scalar"; break; - case colvarvalue::type_3vector: - return "vector3"; break; - case colvarvalue::type_unit3vector: - return "unit_vector3"; break; - case colvarvalue::type_unit3vectorderiv: - return ""; break; - case colvarvalue::type_quaternion: - return "unit_quaternion"; break; - case colvarvalue::type_quaternionderiv: - return ""; break; - case colvarvalue::type_vector: - return "vector"; break; - } -} - - -inline size_t colvarvalue::num_df(Type t) -{ - switch (t) { - case colvarvalue::type_notset: - default: - return 0; break; - case colvarvalue::type_scalar: - return 1; break; - case colvarvalue::type_3vector: - return 3; break; - case colvarvalue::type_unit3vector: - case colvarvalue::type_unit3vectorderiv: - return 2; break; - case colvarvalue::type_quaternion: - case colvarvalue::type_quaternionderiv: - return 3; break; - case colvarvalue::type_vector: - // the size of a vector is unknown without its object - return 0; break; - } -} - - -inline size_t colvarvalue::num_dimensions(Type t) -{ - switch (t) { - case colvarvalue::type_notset: - default: - return 0; break; - case colvarvalue::type_scalar: - return 1; break; - case colvarvalue::type_3vector: - case colvarvalue::type_unit3vector: - case colvarvalue::type_unit3vectorderiv: - return 3; break; - case colvarvalue::type_quaternion: - case colvarvalue::type_quaternionderiv: - return 4; break; - case colvarvalue::type_vector: - // the size of a vector is unknown without its object - return 0; break; - } -} - - inline size_t colvarvalue::size() const { switch (value_type) { @@ -505,62 +374,48 @@ inline size_t colvarvalue::size() const } -inline colvarvalue::colvarvalue(colvarvalue const &x) - : value_type(x.type()) +inline cvm::real colvarvalue::operator [] (int const i) const { - switch (x.type()) { - case type_scalar: - real_value = x.real_value; - break; - case type_3vector: - case type_unit3vector: - case type_unit3vectorderiv: - rvector_value = x.rvector_value; - break; - case type_quaternion: - case type_quaternionderiv: - quaternion_value = x.quaternion_value; - break; - case type_vector: - vector1d_value = x.vector1d_value; - elem_types = x.elem_types; - elem_indices = x.elem_indices; - elem_sizes = x.elem_sizes; - case type_notset: + switch (value_type) { + case colvarvalue::type_notset: default: - break; + cvm::error("Error: trying to access a colvar value " + "that is not initialized.\n", BUG_ERROR); + return 0.0; break; + case colvarvalue::type_scalar: + return real_value; break; + case colvarvalue::type_3vector: + case colvarvalue::type_unit3vector: + case colvarvalue::type_unit3vectorderiv: + return rvector_value[i]; break; + case colvarvalue::type_quaternion: + case colvarvalue::type_quaternionderiv: + return quaternion_value[i]; break; + case colvarvalue::type_vector: + return vector1d_value[i]; break; } } -inline colvarvalue::colvarvalue(cvm::vector1d const &v, Type vti) + +inline cvm::real & colvarvalue::operator [] (int const i) { - if ((vti != type_vector) && (v.size() != num_dimensions(vti))) { - cvm::error("Error: trying to initialize a variable of type \""+type_desc(vti)+ - "\" using a vector of size "+cvm::to_str(v.size())+ - ".\n"); - value_type = type_notset; - } else { - value_type = vti; - switch (vti) { - case type_scalar: - real_value = v[0]; - break; - case type_3vector: - case type_unit3vector: - case type_unit3vectorderiv: - rvector_value = cvm::rvector(v); - break; - case type_quaternion: - case type_quaternionderiv: - quaternion_value = cvm::quaternion(v); - break; - case type_vector: - vector1d_value = v; - break; - case type_notset: - default: - break; - } + switch (value_type) { + case colvarvalue::type_notset: + default: + cvm::error("Error: trying to access a colvar value " + "that is not initialized.\n", BUG_ERROR); + return real_value; break; + case colvarvalue::type_scalar: + return real_value; break; + case colvarvalue::type_3vector: + case colvarvalue::type_unit3vector: + case colvarvalue::type_unit3vectorderiv: + return rvector_value[i]; break; + case colvarvalue::type_quaternion: + case colvarvalue::type_quaternionderiv: + return quaternion_value[i]; break; + case colvarvalue::type_vector: + return vector1d_value[i]; break; } } @@ -638,13 +493,6 @@ inline int colvarvalue::check_types_assign(colvarvalue::Type const &vt1, } -inline void colvarvalue::undef_op() const -{ - cvm::error("Error: Undefined operation on a colvar of type \""+ - type_desc(this->type())+"\".\n"); -} - - inline colvarvalue & colvarvalue::operator = (colvarvalue const &x) { check_types_assign(this->type(), x.type()); @@ -704,6 +552,7 @@ inline void colvarvalue::operator += (colvarvalue const &x) } } + inline void colvarvalue::operator -= (colvarvalue const &x) { colvarvalue::check_types(*this, x); @@ -802,89 +651,6 @@ inline cvm::vector1d const colvarvalue::as_vector() const } -inline void colvarvalue::reset() -{ - switch (value_type) { - case colvarvalue::type_scalar: - real_value = 0.0; - break; - case colvarvalue::type_3vector: - case colvarvalue::type_unit3vector: - case colvarvalue::type_unit3vectorderiv: - rvector_value.reset(); - break; - case colvarvalue::type_quaternion: - case colvarvalue::type_quaternionderiv: - quaternion_value.reset(); - break; - case colvarvalue::type_vector: - vector1d_value.reset(); - break; - case colvarvalue::type_notset: - default: - break; - } -} - - -inline void colvarvalue::apply_constraints() -{ - switch (value_type) { - case colvarvalue::type_scalar: - case colvarvalue::type_3vector: - case colvarvalue::type_unit3vectorderiv: - case colvarvalue::type_quaternionderiv: - break; - case colvarvalue::type_unit3vector: - rvector_value /= std::sqrt(rvector_value.norm2()); - break; - case colvarvalue::type_quaternion: - quaternion_value /= std::sqrt(quaternion_value.norm2()); - break; - case colvarvalue::type_vector: - if (elem_types.size() > 0) { - // if we have information about non-scalar types, use it - size_t i; - for (i = 0; i < elem_types.size(); i++) { - if (elem_sizes[i] == 1) continue; // TODO this can be optimized further - colvarvalue cvtmp(vector1d_value.slice(elem_indices[i], - elem_indices[i] + elem_sizes[i]), elem_types[i]); - cvtmp.apply_constraints(); - set_elem(i, cvtmp); - } - } - break; - case colvarvalue::type_notset: - default: - break; - } -} - - -inline void colvarvalue::is_derivative() -{ - switch (value_type) { - case colvarvalue::type_scalar: - case colvarvalue::type_3vector: - case colvarvalue::type_unit3vectorderiv: - case colvarvalue::type_quaternionderiv: - break; - case colvarvalue::type_unit3vector: - type(colvarvalue::type_unit3vectorderiv); - break; - case colvarvalue::type_quaternion: - type(colvarvalue::type_quaternionderiv); - break; - case colvarvalue::type_vector: - // TODO - break; - case colvarvalue::type_notset: - default: - break; - } -} - - inline cvm::real colvarvalue::norm2() const { switch (value_type) { diff --git a/lib/gpu/.gitignore b/lib/gpu/.gitignore index 228a9f7731..9ad6046a09 100644 --- a/lib/gpu/.gitignore +++ b/lib/gpu/.gitignore @@ -1,4 +1,6 @@ -obj -obj_ocl -ocl_get_devices -nvc_get_devices +/obj +/obj_ocl +/ocl_get_devices +/nvc_get_devices +/*.cubin +/*_cubin.h diff --git a/lib/gpu/Install.py b/lib/gpu/Install.py index d396be5e1a..6ea2159de5 100644 --- a/lib/gpu/Install.py +++ b/lib/gpu/Install.py @@ -3,44 +3,57 @@ # Install.py tool to build the GPU library # used to automate the steps described in the README file in this dir -import sys,os,re,commands +from __future__ import print_function +import sys,os,subprocess # help message help = """ -Syntax: python Install.py -i isuffix -h hdir -a arch -p precision -e esuffix -m -o osuffix - specify one or more options, order does not matter - copies an existing Makefile.isuffix in lib/gpu to Makefile.auto - optionally edits these variables in Makefile.auto: - CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE - optionally uses Makefile.auto to build the GPU library -> libgpu.a - and to copy a Makefile.lammps.esuffix -> Makefile.lammps - optionally copies Makefile.auto to a new Makefile.osuffix +Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix" +Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix - -i = use Makefile.isuffix as starting point, copy to Makefile.auto - default isuffix = linux +specify one or more options, order does not matter + +copies an existing Makefile.machine in lib/gpu to Makefile.auto +optionally edits these variables in Makefile.auto: + CUDA_HOME, CUDA_ARCH, CUDA_PRECISION, EXTRAMAKE +optionally uses Makefile.auto to build the GPU library -> libgpu.a + and to copy a Makefile.lammps.esuffix -> Makefile.lammps +optionally copies Makefile.auto to a new Makefile.osuffix + + -m = use Makefile.machine as starting point, copy to Makefile.auto + default machine = linux -h = set CUDA_HOME variable in Makefile.auto to hdir hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda -a = set CUDA_ARCH variable in Makefile.auto to arch - use arch = ?? for K40 (Tesla) - use arch = 37 for dual K80 (Tesla) - use arch = 60 for P100 (Pascal) + use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0) + or GeForce GTX 580 or similar + use arch = 30 for Tesla K10 (Kepler) + use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar + use arch = 37 for Tesla dual K80 (Kepler) + use arch = 60 for Tesla P100 (Pascal) -p = set CUDA_PRECISION variable in Makefile.auto to precision use precision = double or mixed or single -e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix - -m = make the GPU library using Makefile.auto + -b = make the GPU library using Makefile.auto first performs a "make clean" - produces libgpu.a if successful + then produces libgpu.a if successful also copies EXTRAMAKE file -> Makefile.lammps -e can set which Makefile.lammps.esuffix file is copied -o = copy final Makefile.auto to Makefile.osuffix + +Examples: + +make lib-gpu args="-b" # build GPU lib with default Makefile.linux +make lib-gpu args="-m xk7 -p single -o xk7.single" # create new Makefile.xk7.single, altered for single-precision +make lib-gpu args="-m mpi -a 35 -p single -o mpi.mixed -b" # create new Makefile.mpi.mixed, also build GPU lib with these settings """ # print error message or help def error(str=None): - if not str: print help - else: print "ERROR",str + if not str: print(help) + else: print("ERROR",str) sys.exit() # parse args @@ -56,7 +69,7 @@ outflag = 0 iarg = 0 while iarg < nargs: - if args[iarg] == "-i": + if args[iarg] == "-m": if iarg+2 > nargs: error() isuffix = args[iarg+1] iarg += 2 @@ -80,7 +93,7 @@ while iarg < nargs: eflag = 1 lmpsuffix = args[iarg+1] iarg += 2 - elif args[iarg] == "-m": + elif args[iarg] == "-b": makeflag = 1 iarg += 1 elif args[iarg] == "-o": @@ -95,10 +108,10 @@ if pflag: elif precision == "mixed": precstr = "-D_SINGLE_DOUBLE" elif precision == "single": precstr = "-D_SINGLE_SINGLE" else: error("Invalid precision setting") - + # create Makefile.auto # reset EXTRAMAKE, CUDA_HOME, CUDA_ARCH, CUDA_PRECISION if requested - + if not os.path.exists("Makefile.%s" % isuffix): error("lib/gpu/Makefile.%s does not exist" % isuffix) @@ -108,9 +121,9 @@ fp = open("Makefile.auto",'w') for line in lines: words = line.split() if len(words) != 3: - print >>fp,line, + fp.write(line) continue - + if hflag and words[0] == "CUDA_HOME" and words[1] == '=': line = line.replace(words[2],hdir) if aflag and words[0] == "CUDA_ARCH" and words[1] == '=': @@ -119,20 +132,20 @@ for line in lines: line = line.replace(words[2],precstr) if eflag and words[0] == "EXTRAMAKE" and words[1] == '=': line = line.replace(words[2],"Makefile.lammps.%s" % lmpsuffix) - - print >>fp,line, + fp.write(line) fp.close() # perform make # make operations copies EXTRAMAKE file to Makefile.lammps if makeflag: - print "Building libgpu.a ..." + print("Building libgpu.a ...") cmd = "rm -f libgpu.a" - commands.getoutput(cmd) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) cmd = "make -f Makefile.auto clean; make -f Makefile.auto" - commands.getoutput(cmd) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + print(txt.decode('UTF-8')) if not os.path.exists("libgpu.a"): error("Build of lib/gpu/libgpu.a was NOT successful") if not os.path.exists("Makefile.lammps"): @@ -141,6 +154,6 @@ if makeflag: # copy new Makefile.auto to Makefile.osuffix if outflag: - print "Creating new Makefile.%s" % osuffix + print("Creating new Makefile.%s" % osuffix) cmd = "cp Makefile.auto Makefile.%s" % osuffix - commands.getoutput(cmd) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) diff --git a/lib/gpu/Makefile.linux b/lib/gpu/Makefile.linux index d72c0ba437..dfcc5bf7d3 100644 --- a/lib/gpu/Makefile.linux +++ b/lib/gpu/Makefile.linux @@ -37,7 +37,7 @@ CUDA_INCLUDE = -I$(CUDA_HOME)/include CUDA_LIB = -L$(CUDA_HOME)/lib64 CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) -CUDR_CPP = mpic++ -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC +CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias BIN_DIR = ./ diff --git a/lib/gpu/Makefile.mingw32-cross b/lib/gpu/Makefile.mingw32-cross deleted file mode 100644 index 6f77634755..0000000000 --- a/lib/gpu/Makefile.mingw32-cross +++ /dev/null @@ -1,17 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \ - -mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \ - -I$(CUDA_HOME)/include -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic -L../../src/STUBS -lmpi_mingw32 -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw32 -OBJ_DIR = Obj_mingw32 -LIB_DIR = Obj_mingw32 -AR = i686-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mingw32-cross-mpi b/lib/gpu/Makefile.mingw32-cross-mpi deleted file mode 100644 index 94099cd90b..0000000000 --- a/lib/gpu/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,19 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = i686-w64-mingw32-g++ -O2 -march=i686 -mtune=generic -mfpmath=387 \ - -mpc64 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \ - -I../../tools/mingw-cross/mpich2-win32/include/ \ - -DMPICH_IGNORE_CXX_SEEK -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw32 -Wl,-Bdynamic,-lOpenCL,-Bstatic \ - -L../../tools/mingw-cross/mpich2-win32/lib -lmpi -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw32-mpi -OBJ_DIR = Obj_mingw32-mpi -LIB_DIR = Obj_mingw32-mpi -AR = i686-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mingw64-cross b/lib/gpu/Makefile.mingw64-cross deleted file mode 100644 index 54f6af8c65..0000000000 --- a/lib/gpu/Makefile.mingw64-cross +++ /dev/null @@ -1,18 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \ - -msse2 -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS \ - -I$(CUDA_HOME)/include -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \ - -L../../src/STUBS -lmpi_mingw64 -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw64 -OBJ_DIR = Obj_mingw64 -LIB_DIR = Obj_mingw64 -AR = x86_64-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mingw64-cross-mpi b/lib/gpu/Makefile.mingw64-cross-mpi deleted file mode 100644 index 2ff72d98b1..0000000000 --- a/lib/gpu/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,20 +0,0 @@ -CUDA_HOME = ../../tools/mingw-cross/OpenCL - -OCL_CPP = x86_64-w64-mingw32-g++ -O3 -march=core2 -mtune=core2 -mpc64 \ - -msse2 -DMPI_GERYON -DUCL_NO_EXIT -I$(CUDA_HOME)/include \ - -I../../tools/mingw-cross/mpich2-win64/include/ \ - -DMPICH_IGNORE_CXX_SEEK - -OCL_LINK = -static -Wl,--enable-stdcall-fixup -L$(CUDA_HOME)/../Obj_mingw64 -Wl,-Bdynamic,-lOpenCL,-Bstatic \ - -L../../tools/mingw-cross/mpich2-win64/lib -lmpi -OCL_PREC = -D_SINGLE_DOUBLE -OCL_TUNE = -DFERMI_OCL -EXTRAMAKE = Makefile.lammps.mingw-cross - -BIN_DIR = Obj_mingw64-mpi -OBJ_DIR = Obj_mingw64-mpi -LIB_DIR = Obj_mingw64-mpi -AR = x86_64-w64-mingw32-ar -BSH = /bin/sh - -include Opencl.makefile diff --git a/lib/gpu/Makefile.mpi b/lib/gpu/Makefile.mpi new file mode 120000 index 0000000000..8bad27d081 --- /dev/null +++ b/lib/gpu/Makefile.mpi @@ -0,0 +1 @@ +Makefile.linux \ No newline at end of file diff --git a/lib/gpu/Makefile.serial b/lib/gpu/Makefile.serial index 809e99cc94..9348dc565a 100644 --- a/lib/gpu/Makefile.serial +++ b/lib/gpu/Makefile.serial @@ -1,5 +1,5 @@ # /* ---------------------------------------------------------------------- -# Generic Makefile for CUDA using MPI STUBS library +# Generic Linux Makefile for CUDA # - Change CUDA_ARCH for your GPU # ------------------------------------------------------------------------- */ @@ -7,23 +7,38 @@ EXTRAMAKE = Makefile.lammps.standard -CUDA_HOME = $(HOME)/cuda +ifeq ($(CUDA_HOME),) +CUDA_HOME = /usr/local/cuda +endif + NVCC = nvcc # Tesla CUDA -CUDA_ARCH = -arch=sm_20 +CUDA_ARCH = -arch=sm_21 # newer CUDA #CUDA_ARCH = -arch=sm_13 # older CUDA #CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE +CUDA_ARCH = -arch=sm_35 + +# this setting should match LAMMPS Makefile +# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL + +LMP_INC = -DLAMMPS_SMALLBIG + +# precision for GPU calculations +# -D_SINGLE_SINGLE # Single precision for all calculations +# -D_DOUBLE_DOUBLE # Double precision for all calculations +# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double CUDA_PRECISION = -D_SINGLE_DOUBLE -CUDA_INCLUDE = -I$(CUDA_HOME)/include -CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi -CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math -CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -I../../src/STUBS -CUDR_OPTS = -O2 +CUDA_INCLUDE = -I$(CUDA_HOME)/include +CUDA_LIB = -L$(CUDA_HOME)/lib64 -L../../src/STUBS -lmpi_stubs +CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) + +CUDR_CPP = g++ -DMPI_GERYON -DUCL_NO_EXIT -fPIC -I../../src/STUBS +CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias BIN_DIR = ./ OBJ_DIR = ./ @@ -31,5 +46,7 @@ LIB_DIR = ./ AR = ar BSH = /bin/sh +CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini + include Nvidia.makefile diff --git a/lib/gpu/lal_aux_fun1.h b/lib/gpu/lal_aux_fun1.h index b40bb7f943..47a216ff6f 100644 --- a/lib/gpu/lal_aux_fun1.h +++ b/lib/gpu/lal_aux_fun1.h @@ -22,21 +22,21 @@ offset=tid & (t_per_atom-1); \ ii=fast_mul((int)BLOCK_ID_X,(int)(BLOCK_SIZE_X)/t_per_atom)+tid/t_per_atom; -#define nbor_info(nbor_mem, packed_mem, nbor_stride, t_per_atom, ii, offset, \ - i, numj, stride, nbor_end, nbor_begin) \ - i=nbor_mem[ii]; \ - nbor_begin=ii+nbor_stride; \ - numj=nbor_mem[nbor_begin]; \ - if (nbor_mem==packed_mem) { \ - nbor_begin+=nbor_stride+fast_mul(ii,t_per_atom-1); \ - stride=fast_mul(t_per_atom,nbor_stride); \ - nbor_end=nbor_begin+fast_mul(numj/t_per_atom,stride)+(numj & (t_per_atom-1)); \ +#define nbor_info(dev_nbor, dev_packed, nbor_pitch, t_per_atom, ii, offset, \ + i, numj, n_stride, nbor_end, nbor_begin) \ + i=dev_nbor[ii]; \ + nbor_begin=ii+nbor_pitch; \ + numj=dev_nbor[nbor_begin]; \ + if (dev_nbor==dev_packed) { \ + nbor_begin+=nbor_pitch+fast_mul(ii,t_per_atom-1); \ + n_stride=fast_mul(t_per_atom,nbor_pitch); \ + nbor_end=nbor_begin+fast_mul(numj/t_per_atom,n_stride)+(numj & (t_per_atom-1)); \ nbor_begin+=offset; \ } else { \ - nbor_begin+=nbor_stride; \ - nbor_begin=nbor_mem[nbor_begin]; \ + nbor_begin+=nbor_pitch; \ + nbor_begin=dev_nbor[nbor_begin]; \ nbor_end=nbor_begin+numj; \ - stride=t_per_atom; \ + n_stride=t_per_atom; \ nbor_begin+=offset; \ } diff --git a/lib/gpu/lal_base_three.cpp b/lib/gpu/lal_base_three.cpp index f772e36295..aa77a48c66 100644 --- a/lib/gpu/lal_base_three.cpp +++ b/lib/gpu/lal_base_three.cpp @@ -20,7 +20,7 @@ using namespace LAMMPS_AL; extern Device global_device; template -BaseThreeT::BaseThree() : _compiled(false), _max_bytes(0) { +BaseThreeT::BaseThree() : _compiled(false), _max_bytes(0) { device=&global_device; ans=new Answer(); nbor=new Neighbor(); @@ -53,8 +53,8 @@ int BaseThreeT::init_three(const int nlocal, const int nall, const int max_nbors, const int maxspecial, const double cell_size, const double gpu_split, FILE *_screen, const void *pair_program, - const char *k_two, const char *k_three_center, - const char *k_three_end) { + const char *two, const char *three_center, + const char *three_end, const char *short_nbor) { screen=_screen; int gpu_nbor=0; @@ -70,10 +70,10 @@ int BaseThreeT::init_three(const int nlocal, const int nall, _gpu_host=1; _threads_per_atom=device->threads_per_atom(); - if (_threads_per_atom>1 && gpu_nbor==0) { + if (_threads_per_atom>1 && gpu_nbor==0) { // neigh no and tpa > 1 nbor->packing(true); _nbor_data=&(nbor->dev_packed); - } else + } else // neigh yes or tpa == 1 _nbor_data=&(nbor->dev_nbor); if (_threads_per_atom*_threads_per_atom>device->warp_size()) return -10; @@ -97,7 +97,7 @@ int BaseThreeT::init_three(const int nlocal, const int nall, _block_pair=device->pair_block_size(); _block_size=device->block_ellipse(); - compile_kernels(*ucl_device,pair_program,k_two,k_three_center,k_three_end); + compile_kernels(*ucl_device,pair_program,two,three_center,three_end,short_nbor); // Initialize host-device load balancer hd_balancer.init(device,gpu_nbor,gpu_split); @@ -113,6 +113,11 @@ int BaseThreeT::init_three(const int nlocal, const int nall, _max_an_bytes+=ans2->gpu_bytes(); #endif + int ef_nall=nall; + if (ef_nall==0) + ef_nall=2000; + dev_short_nbor.alloc(ef_nall*(2+max_nbors),*(this->ucl_device),UCL_READ_WRITE); + return 0; } @@ -136,6 +141,7 @@ void BaseThreeT::clear_atomic() { k_three_end.clear(); k_three_end_vatom.clear(); k_pair.clear(); + k_short_nbor.clear(); delete pair_program; _compiled=false; } @@ -143,6 +149,7 @@ void BaseThreeT::clear_atomic() { time_pair.clear(); hd_balancer.clear(); + dev_short_nbor.clear(); nbor->clear(); ans->clear(); #ifdef THREE_CONCURRENT @@ -169,6 +176,8 @@ int * BaseThreeT::reset_nbors(const int nall, const int inum, const int nlist, if (!success) return NULL; + _nall = nall; + // originally the requirement that nall == nlist was enforced // to allow direct indexing neighbors of neighbors after re-arrangement // nbor->get_host3(nall,nlist,ilist,numj,firstneigh,block_size()); @@ -203,6 +212,8 @@ inline int BaseThreeT::build_nbor_list(const int inum, const int host_inum, return 0; atom->cast_copy_x(host_x,host_type); + _nall = nall; + int mn; nbor->build_nbor_list(host_x, nall, host_inum, nall, *atom, sublo, subhi, tag, nspecial, special, success, mn); @@ -247,12 +258,22 @@ void BaseThreeT::compute(const int f_ago, const int inum_full, const int nall, reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success); if (!success) return; + _max_nbors = nbor->max_nbor_loop(nlist,numj,ilist); } atom->cast_x_data(host_x,host_type); hd_balancer.start_timer(); atom->add_x_data(host_x,host_type); + // re-allocate dev_short_nbor if necessary + if (nall*(2+_max_nbors) > dev_short_nbor.cols()) { + int _nmax=static_cast(static_cast(nall)*1.10); + dev_short_nbor.resize((2+_max_nbors)*_nmax); + } + + // _ainum to be used in loop() for short neighbor list build + _ainum = nlist; + int evatom=0; if (eatom || vatom) evatom=1; @@ -300,7 +321,7 @@ int ** BaseThreeT::compute(const int ago, const int inum_full, // Build neighbor list on GPU if necessary if (ago==0) { - build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, + _max_nbors = build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, sublo, subhi, tag, nspecial, special, success); if (!success) return NULL; @@ -313,6 +334,15 @@ int ** BaseThreeT::compute(const int ago, const int inum_full, *ilist=nbor->host_ilist.begin(); *jnum=nbor->host_acc.begin(); + // re-allocate dev_short_nbor if necessary + if (nall*(2+_max_nbors) > dev_short_nbor.cols()) { + int _nmax=static_cast(static_cast(nall)*1.10); + dev_short_nbor.resize((2+_max_nbors)*_nmax); + } + + // _ainum to be used in loop() for short neighbor list build + _ainum = nall; + int evatom=0; if (eatom || vatom) evatom=1; @@ -339,19 +369,20 @@ double BaseThreeT::host_memory_usage_atomic() const { template void BaseThreeT::compile_kernels(UCL_Device &dev, const void *pair_str, - const char *ktwo, const char *kthree_center, - const char *kthree_end) { + const char *two, const char *three_center, + const char *three_end, const char* short_nbor) { if (_compiled) return; - std::string vatom_name=std::string(kthree_end)+"_vatom"; + std::string vatom_name=std::string(three_end)+"_vatom"; pair_program=new UCL_Program(dev); pair_program->load_string(pair_str,device->compile_string().c_str()); - k_three_center.set_function(*pair_program,kthree_center); - k_three_end.set_function(*pair_program,kthree_end); + k_three_center.set_function(*pair_program,three_center); + k_three_end.set_function(*pair_program,three_end); k_three_end_vatom.set_function(*pair_program,vatom_name.c_str()); - k_pair.set_function(*pair_program,ktwo); + k_pair.set_function(*pair_program,two); + k_short_nbor.set_function(*pair_program,short_nbor); pos_tex.get_texture(*pair_program,"pos_tex"); #ifdef THREE_CONCURRENT diff --git a/lib/gpu/lal_base_three.h b/lib/gpu/lal_base_three.h index 4f27ecdf92..f5f36863c4 100644 --- a/lib/gpu/lal_base_three.h +++ b/lib/gpu/lal_base_three.h @@ -56,7 +56,8 @@ class BaseThree { const int maxspecial, const double cell_size, const double gpu_split, FILE *screen, const void *pair_program, const char *k_two, - const char *k_three_center, const char *k_three_end); + const char *k_three_center, const char *k_three_end, + const char *k_short_nbor=NULL); /// Estimate the overhead for GPU context changes and CPU driver void estimate_gpu_overhead(); @@ -73,18 +74,18 @@ class BaseThree { } /// Check if there is enough storage for neighbors and realloc if not - /** \param nlocal number of particles whose nbors must be stored on device - * \param host_inum number of particles whose nbors need to copied to host - * \param current maximum number of neighbors + /** \param inum number of particles whose nbors must be stored on device + * \param max_nbors maximum number of neighbors + * \param success set to false if insufficient memory * \note olist_size=total number of local particles **/ inline void resize_local(const int inum, const int max_nbors, bool &success) { nbor->resize(inum,max_nbors,success); } /// Check if there is enough storage for neighbors and realloc if not - /** \param nlocal number of particles whose nbors must be stored on device + /** \param inum number of particles whose nbors must be stored on device * \param host_inum number of particles whose nbors need to copied to host - * \param current maximum number of neighbors + * \param max_nbors current maximum number of neighbors * \note host_inum is 0 if the host is performing neighboring * \note nlocal+host_inum=total number local particles * \note olist_size=0 **/ @@ -143,14 +144,6 @@ class BaseThree { const bool vflag, const bool eatom, const bool vatom, int &host_start, const double cpu_time, bool &success); - /// Pair loop with device neighboring - int * compute(const int ago, const int inum_full, const int nall, - double **host_x, int *host_type, double *sublo, - double *subhi, tagint *tag, int **nspecial, - tagint **special, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - const double cpu_time, bool &success); - /// Pair loop with device neighboring int ** compute(const int ago, const int inum_full, const int nall, double **host_x, int *host_type, double *sublo, @@ -193,6 +186,9 @@ class BaseThree { /// Neighbor data Neighbor *nbor; + UCL_D_Vec dev_short_nbor; + UCL_Kernel k_short_nbor; + // ------------------------- DEVICE KERNELS ------------------------- UCL_Program *pair_program; UCL_Kernel k_pair, k_three_center, k_three_end, k_three_end_vatom; @@ -207,12 +203,13 @@ class BaseThree { int _block_pair, _block_size, _threads_per_atom, _end_command_queue; int _gpu_nbor; double _max_bytes, _max_an_bytes; + int _max_nbors, _ainum, _nall; double _gpu_overhead, _driver_overhead; UCL_D_Vec *_nbor_data; void compile_kernels(UCL_Device &dev, const void *pair_string, - const char *k_two, const char *k_three_center, - const char *k_three_end); + const char *two, const char *three_center, + const char *three_end, const char* short_nbor); virtual void loop(const bool _eflag, const bool _vflag, const int evatom) = 0; diff --git a/lib/gpu/lal_sw.cpp b/lib/gpu/lal_sw.cpp index 3492d7030e..24984e4878 100644 --- a/lib/gpu/lal_sw.cpp +++ b/lib/gpu/lal_sw.cpp @@ -55,7 +55,7 @@ int SWT::init(const int ntypes, const int nlocal, const int nall, const int max_ int success; success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split, _screen,sw,"k_sw","k_sw_three_center", - "k_sw_three_end"); + "k_sw_three_end","k_sw_short_nbor"); if (success!=0) return success; @@ -193,19 +193,30 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) { else vflag=0; - int GX=static_cast(ceil(static_cast(this->ans->inum())/ + // build the short neighbor list + int ainum=this->_ainum; + int nbor_pitch=this->nbor->nbor_pitch(); + int GX=static_cast(ceil(static_cast(ainum)/ (BX/this->_threads_per_atom))); + this->k_short_nbor.set_size(GX,BX); + this->k_short_nbor.run(&this->atom->x, &sw3, &map, &elem2param, &_nelements, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &ainum, + &nbor_pitch, &this->_threads_per_atom); // this->_nbor_data == nbor->dev_packed for gpu_nbor == 0 and tpa > 1 // this->_nbor_data == nbor->dev_nbor for gpu_nbor == 1 or tpa == 1 - int ainum=this->ans->inum(); - int nbor_pitch=this->nbor->nbor_pitch(); + ainum=this->ans->inum(); + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); this->time_pair.start(); - + this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->x, &sw1, &sw2, &sw3, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); @@ -217,6 +228,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_center.run(&this->atom->x, &sw1, &sw2, &sw3, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &evatom); @@ -231,7 +243,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &sw1, &sw2, &sw3, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -240,7 +252,7 @@ void SWT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &sw1, &sw2, &sw3, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); diff --git a/lib/gpu/lal_sw.cu b/lib/gpu/lal_sw.cu index 46330c59e4..a5c9f49d08 100644 --- a/lib/gpu/lal_sw.cu +++ b/lib/gpu/lal_sw.cu @@ -130,6 +130,63 @@ texture sw3_tex; #endif +__kernel void k_sw_short_nbor(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict sw3, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, + const __global int * dev_nbor, + const __global int * dev_packed, + __global int * dev_short_nbor, + const int inum, const int nbor_pitch, const int t_per_atom) { + __local int n_stride; + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + if (iiinit_three(nlocal,nall,max_nbors,0,cell_size,gpu_split, _screen,tersoff,"k_tersoff_repulsive", - "k_tersoff_three_center", "k_tersoff_three_end"); + "k_tersoff_three_center", "k_tersoff_three_end", + "k_tersoff_short_nbor"); if (success!=0) return success; @@ -157,11 +158,16 @@ int TersoffT::init(const int ntypes, const int nlocal, const int nall, const int UCL_H_Vec cutsq_view(nparams,*(this->ucl_device), UCL_WRITE_ONLY); - for (int i=0; i(host_cutsq[i]); + if (cutsqmax < host_cutsq[i]) cutsqmax = host_cutsq[i]; + } cutsq.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ucl_copy(cutsq,cutsq_view,false); + _cutshortsq = static_cast(cutsqmax); + UCL_H_Vec dview_elem2param(nelements*nelements*nelements, *(this->ucl_device), UCL_WRITE_ONLY); @@ -219,171 +225,6 @@ double TersoffT::host_memory_usage() const { #define KTHREADS this->_threads_per_atom #define JTHREADS this->_threads_per_atom -// --------------------------------------------------------------------------- -// Copy nbor list from host if necessary and then calculate forces, virials,.. -// --------------------------------------------------------------------------- -template -void TersoffT::compute(const int f_ago, const int inum_full, const int nall, - const int nlist, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - const double cpu_time, bool &success) { - this->acc_timers(); - if (inum_full==0) { - host_start=0; - // Make sure textures are correct if realloc by a different hybrid style - this->resize_atom(0,nall,success); - this->zero_timers(); - return; - } - - int ago=this->hd_balancer.ago_first(f_ago); - int inum=this->hd_balancer.balance(ago,inum_full,cpu_time); - this->ans->inum(inum); - #ifdef THREE_CONCURRENT - this->ans2->inum(inum); - #endif - host_start=inum; - - if (ago==0) { - this->reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success); - if (!success) - return; - _max_nbors = this->nbor->max_nbor_loop(nlist,numj,ilist); - } - - this->atom->cast_x_data(host_x,host_type); - this->hd_balancer.start_timer(); - this->atom->add_x_data(host_x,host_type); - - // re-allocate zetaij if necessary - if (nall*_max_nbors > _zetaij.cols()) { - int _nmax=static_cast(static_cast(nall)*1.10); - _zetaij.resize(_max_nbors*_nmax); - } - - int _eflag; - if (eflag) - _eflag=1; - else - _eflag=0; - - int ainum=nlist; - int nbor_pitch=this->nbor->nbor_pitch(); - int BX=this->block_pair(); - int GX=static_cast(ceil(static_cast(ainum)/ - (BX/(JTHREADS*KTHREADS)))); - - this->k_zeta.set_size(GX,BX); - this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, - &map, &elem2param, &_nelements, &_nparams, &_zetaij, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &_eflag, &ainum, &nbor_pitch, &this->_threads_per_atom); - - int evatom=0; - if (eatom || vatom) - evatom=1; - #ifdef THREE_CONCURRENT - this->ucl_device->sync(); - #endif - loop(eflag,vflag,evatom); - this->ans->copy_answers(eflag,vflag,eatom,vatom,ilist); - this->device->add_ans_object(this->ans); - #ifdef THREE_CONCURRENT - this->ans2->copy_answers(eflag,vflag,eatom,vatom,ilist); - this->device->add_ans_object(this->ans2); - #endif - this->hd_balancer.stop_timer(); -} - -// --------------------------------------------------------------------------- -// Reneighbor on GPU if necessary and then compute forces, virials, energies -// --------------------------------------------------------------------------- -template -int ** TersoffT::compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, const bool eflag, - const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, - const double cpu_time, bool &success) { - this->acc_timers(); - - if (inum_full==0) { - host_start=0; - // Make sure textures are correct if realloc by a different hybrid style - this->resize_atom(0,nall,success); - this->zero_timers(); - return NULL; - } - - this->hd_balancer.balance(cpu_time); - int inum=this->hd_balancer.get_gpu_count(ago,inum_full); - this->ans->inum(inum); - #ifdef THREE_CONCURRENT - this->ans2->inum(inum); - #endif - host_start=inum; - - // Build neighbor list on GPU if necessary - if (ago==0) { - _max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, - sublo, subhi, tag, nspecial, special, success); - if (!success) - return NULL; - this->hd_balancer.start_timer(); - } else { - this->atom->cast_x_data(host_x,host_type); - this->hd_balancer.start_timer(); - this->atom->add_x_data(host_x,host_type); - } - *ilist=this->nbor->host_ilist.begin(); - *jnum=this->nbor->host_acc.begin(); - - // re-allocate zetaij if necessary - if (nall*_max_nbors > _zetaij.cols()) { - int _nmax=static_cast(static_cast(nall)*1.10); - _zetaij.resize(_max_nbors*_nmax); - } - - int _eflag; - if (eflag) - _eflag=1; - else - _eflag=0; - - int ainum=nall; - int nbor_pitch=this->nbor->nbor_pitch(); - int BX=this->block_pair(); - int GX=static_cast(ceil(static_cast(ainum)/ - (BX/(JTHREADS*KTHREADS)))); - - this->k_zeta.set_size(GX,BX); - this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, - &map, &elem2param, &_nelements, &_nparams, &_zetaij, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &_eflag, &ainum, &nbor_pitch, &this->_threads_per_atom); - - int evatom=0; - if (eatom || vatom) - evatom=1; - #ifdef THREE_CONCURRENT - this->ucl_device->sync(); - #endif - loop(eflag,vflag,evatom); - this->ans->copy_answers(eflag,vflag,eatom,vatom); - this->device->add_ans_object(this->ans); - #ifdef THREE_CONCURRENT - this->ans2->copy_answers(eflag,vflag,eatom,vatom); - this->device->add_ans_object(this->ans2); - #endif - this->hd_balancer.stop_timer(); - - return this->nbor->host_jlist.begin()-host_start; -} - // --------------------------------------------------------------------------- // Calculate energies, forces, and torques // --------------------------------------------------------------------------- @@ -402,9 +243,40 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) { else vflag=0; - int ainum=this->ans->inum(); + // build the short neighbor list + int ainum=this->_ainum; int nbor_pitch=this->nbor->nbor_pitch(); - int GX=static_cast(ceil(static_cast(this->ans->inum())/ + int GX=static_cast(ceil(static_cast(ainum)/ + (BX/this->_threads_per_atom))); + + this->k_short_nbor.set_size(GX,BX); + this->k_short_nbor.run(&this->atom->x, &cutsq, &map, + &elem2param, &_nelements, &_nparams, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &ainum, + &nbor_pitch, &this->_threads_per_atom); + + // re-allocate zetaij if necessary + int nall = this->_nall; + if (nall*this->_max_nbors > _zetaij.cols()) { + int _nmax=static_cast(static_cast(nall)*1.10); + _zetaij.resize(this->_max_nbors*_nmax); + } + + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->_ainum)/ + (BX/(JTHREADS*KTHREADS)))); + + this->k_zeta.set_size(GX,BX); + this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, + &map, &elem2param, &_nelements, &_nparams, &_zetaij, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, + &_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom); + + ainum=this->ans->inum(); + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/this->_threads_per_atom))); this->time_pair.start(); @@ -412,6 +284,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_pair.run(&this->atom->x, &ts1, &ts2, &cutsq, &map, &elem2param, &_nelements, &_nparams, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); @@ -423,6 +296,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_center.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &evatom); @@ -437,7 +311,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -446,7 +320,7 @@ void TersoffT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_tersoff.cu b/lib/gpu/lal_tersoff.cu index b7d48d9e34..cdeb5679d8 100644 --- a/lib/gpu/lal_tersoff.cu +++ b/lib/gpu/lal_tersoff.cu @@ -106,7 +106,7 @@ texture ts5_tex; ans[ii]=old; \ } -#define store_zeta(z, tid, t_per_atom, offset) \ +#define acc_zeta(z, tid, t_per_atom, offset) \ if (t_per_atom>1) { \ __local acctyp red_acc[BLOCK_PAIR]; \ red_acc[tid]=z; \ @@ -155,7 +155,7 @@ texture ts5_tex; ans[ii]=old; \ } -#define store_zeta(z, tid, t_per_atom, offset) \ +#define acc_zeta(z, tid, t_per_atom, offset) \ if (t_per_atom>1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ z += shfl_xor(z, s, t_per_atom); \ @@ -164,6 +164,65 @@ texture ts5_tex; #endif +__kernel void k_tersoff_short_nbor(const __global numtyp4 *restrict x_, + const __global numtyp *restrict cutsq, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, const int nparams, + const __global int * dev_nbor, + const __global int * dev_packed, + __global int * dev_short_nbor, + const int inum, const int nbor_pitch, + const int t_per_atom) { + __local int n_stride; + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + if (ii cutsq[ijparam]) continue; +// if (rsq1 > cutsq[ijparam]) continue; // compute zeta_ij z = (acctyp)0; int nbor_k = nborj_start-offset_j+offset_k; - for ( ; nbor_k < nbor_end; nbor_k+=n_stride) { - int k=dev_packed[nbor_k]; + int k_end = nbor_end; + if (dev_packed==dev_nbor) { + int numk = dev_short_nbor[nbor_k-n_stride]; + k_end = nbor_k+fast_mul(numk,n_stride); + } + + for ( ; nbor_k < k_end; nbor_k+=n_stride) { + int k=nbor_mem[nbor_k]; k &= NEIGHMASK; if (k == j) continue; @@ -284,10 +357,12 @@ __kernel void k_tersoff_zeta(const __global numtyp4 *restrict x_, //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; //int idx = jj*n_stride + i*t_per_atom + offset_j; - int idx; - zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, - i, nbor_j, offset_j, idx); - store_zeta(z, tid, t_per_atom, offset_k); + //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + int idx = nbor_j; + if (dev_packed==dev_nbor) idx -= n_stride; +// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, +// i, nbor_j, offset_j, idx); + acc_zeta(z, tid, t_per_atom, offset_k); numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex); numtyp ijparam_lam2 = ts1_ijparam.y; @@ -330,6 +405,7 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_, const int nelements, const int nparams, const __global int * dev_nbor, const __global int * dev_packed, + const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, @@ -356,8 +432,8 @@ __kernel void k_tersoff_repulsive(const __global numtyp4 *restrict x_, __syncthreads(); if (ii0) - energy+=feng[1]; - if (vflag>0) { - virial[0] += delx*delx*force; - virial[1] += dely*dely*force; - virial[2] += delz*delz*force; - virial[3] += delx*dely*force; - virial[4] += delx*delz*force; - virial[5] += dely*delz*force; - } + if (eflag>0) + energy+=feng[1]; + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; } } // for nbor @@ -428,6 +511,7 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, + const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, @@ -461,20 +545,28 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_, if (ii cutsq[ijparam]) continue; numtyp r1 = ucl_sqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1); @@ -497,9 +588,11 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_, //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride; //int idx = jj*n_stride + i*t_per_atom + offset_j; - int idx; - zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, - i, nbor_j, offset_j, idx); + //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + int idx = nbor_j; + if (dev_packed==dev_nbor) idx -= n_stride; +// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, +// i, nbor_j, offset_j, idx); acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex); numtyp force = zeta_ij.x*tpainv; numtyp prefactor = zeta_ij.y; @@ -520,9 +613,15 @@ __kernel void k_tersoff_three_center(const __global numtyp4 *restrict x_, virial[5] += delr1[1]*delr1[2]*mforce; } - int nbor_k=nborj_start-offset_j+offset_k; - for ( ; nbor_k cutsq[ijparam]) continue; - numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; mdelr1[1] = -delr1[1]; @@ -683,13 +790,20 @@ __kernel void k_tersoff_three_end(const __global numtyp4 *restrict x_, k_end=nbor_k+numk; nbor_k+=offset_k; } + + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } int nbork_start = nbor_k; // look up for zeta_ji: find i in the j's neighbor list int m = tid / t_per_atom; int ijnum = -1; for ( ; nbor_k cutsq[ijparam]) continue; - numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; mdelr1[1] = -delr1[1]; @@ -909,13 +1035,20 @@ __kernel void k_tersoff_three_end_vatom(const __global numtyp4 *restrict x_, k_end=nbor_k+numk; nbor_k+=offset_k; } + + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } int nbork_start = nbor_k; // look up for zeta_ji int m = tid / t_per_atom; int ijnum = -1; for ( ; nbor_k { const double* h, const double* gamma, const double* beta, const double* powern, const double* cutsq); - /// Pair loop with host neighboring - void compute(const int f_ago, const int inum_full, const int nall, - const int nlist, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, const bool eflag, - const bool vflag, const bool eatom, const bool vatom, - int &host_start, const double cpu_time, bool &success); - - /// Pair loop with device neighboring - int ** compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, tagint *tag, int **nspecial, - tagint **special, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success); - /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ void clear(); @@ -104,8 +89,7 @@ class Tersoff : public BaseThree { UCL_Kernel k_zeta; UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex; - - int _max_nbors; + numtyp _cutshortsq; private: bool _allocated; diff --git a/lib/gpu/lal_tersoff_mod.cpp b/lib/gpu/lal_tersoff_mod.cpp index 553dad3583..c37c07f1a1 100644 --- a/lib/gpu/lal_tersoff_mod.cpp +++ b/lib/gpu/lal_tersoff_mod.cpp @@ -55,7 +55,8 @@ int TersoffMT::init(const int ntypes, const int nlocal, const int nall, const in int success; success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split, _screen,tersoff_mod,"k_tersoff_mod_repulsive", - "k_tersoff_mod_three_center", "k_tersoff_mod_three_end"); + "k_tersoff_mod_three_center", "k_tersoff_mod_three_end", + "k_tersoff_mod_short_nbor"); if (success!=0) return success; @@ -157,11 +158,16 @@ int TersoffMT::init(const int ntypes, const int nlocal, const int nall, const in UCL_H_Vec cutsq_view(nparams,*(this->ucl_device), UCL_WRITE_ONLY); - for (int i=0; i(host_cutsq[i]); + if (cutsqmax < host_cutsq[i]) cutsqmax = host_cutsq[i]; + } cutsq.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ucl_copy(cutsq,cutsq_view,false); + _cutshortsq = static_cast(cutsqmax); + UCL_H_Vec dview_elem2param(nelements*nelements*nelements, *(this->ucl_device), UCL_WRITE_ONLY); @@ -219,171 +225,6 @@ double TersoffMT::host_memory_usage() const { #define KTHREADS this->_threads_per_atom #define JTHREADS this->_threads_per_atom -// --------------------------------------------------------------------------- -// Copy nbor list from host if necessary and then calculate forces, virials,.. -// --------------------------------------------------------------------------- -template -void TersoffMT::compute(const int f_ago, const int inum_full, const int nall, - const int nlist, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - const double cpu_time, bool &success) { - this->acc_timers(); - if (inum_full==0) { - host_start=0; - // Make sure textures are correct if realloc by a different hybrid style - this->resize_atom(0,nall,success); - this->zero_timers(); - return; - } - - int ago=this->hd_balancer.ago_first(f_ago); - int inum=this->hd_balancer.balance(ago,inum_full,cpu_time); - this->ans->inum(inum); - #ifdef THREE_CONCURRENT - this->ans2->inum(inum); - #endif - host_start=inum; - - if (ago==0) { - this->reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success); - if (!success) - return; - _max_nbors = this->nbor->max_nbor_loop(nlist,numj,ilist); - } - - this->atom->cast_x_data(host_x,host_type); - this->hd_balancer.start_timer(); - this->atom->add_x_data(host_x,host_type); - - // re-allocate zetaij if necessary - if (nall*_max_nbors > _zetaij.cols()) { - int _nmax=static_cast(static_cast(nall)*1.10); - _zetaij.resize(_max_nbors*_nmax); - } - - int _eflag; - if (eflag) - _eflag=1; - else - _eflag=0; - - int ainum=nlist; - int nbor_pitch=this->nbor->nbor_pitch(); - int BX=this->block_pair(); - int GX=static_cast(ceil(static_cast(ainum)/ - (BX/(JTHREADS*KTHREADS)))); - - this->k_zeta.set_size(GX,BX); - this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, - &map, &elem2param, &_nelements, &_nparams, &_zetaij, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &_eflag, &ainum, &nbor_pitch, &this->_threads_per_atom); - - int evatom=0; - if (eatom || vatom) - evatom=1; - #ifdef THREE_CONCURRENT - this->ucl_device->sync(); - #endif - loop(eflag,vflag,evatom); - this->ans->copy_answers(eflag,vflag,eatom,vatom,ilist); - this->device->add_ans_object(this->ans); - #ifdef THREE_CONCURRENT - this->ans2->copy_answers(eflag,vflag,eatom,vatom,ilist); - this->device->add_ans_object(this->ans2); - #endif - this->hd_balancer.stop_timer(); -} - -// --------------------------------------------------------------------------- -// Reneighbor on GPU if necessary and then compute forces, virials, energies -// --------------------------------------------------------------------------- -template -int ** TersoffMT::compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, const bool eflag, - const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, - const double cpu_time, bool &success) { - this->acc_timers(); - - if (inum_full==0) { - host_start=0; - // Make sure textures are correct if realloc by a different hybrid style - this->resize_atom(0,nall,success); - this->zero_timers(); - return NULL; - } - - this->hd_balancer.balance(cpu_time); - int inum=this->hd_balancer.get_gpu_count(ago,inum_full); - this->ans->inum(inum); - #ifdef THREE_CONCURRENT - this->ans2->inum(inum); - #endif - host_start=inum; - - // Build neighbor list on GPU if necessary - if (ago==0) { - _max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, - sublo, subhi, tag, nspecial, special, success); - if (!success) - return NULL; - this->hd_balancer.start_timer(); - } else { - this->atom->cast_x_data(host_x,host_type); - this->hd_balancer.start_timer(); - this->atom->add_x_data(host_x,host_type); - } - *ilist=this->nbor->host_ilist.begin(); - *jnum=this->nbor->host_acc.begin(); - - // re-allocate zetaij if necessary - if (nall*_max_nbors > _zetaij.cols()) { - int _nmax=static_cast(static_cast(nall)*1.10); - _zetaij.resize(_max_nbors*_nmax); - } - - int _eflag; - if (eflag) - _eflag=1; - else - _eflag=0; - - int ainum=nall; - int nbor_pitch=this->nbor->nbor_pitch(); - int BX=this->block_pair(); - int GX=static_cast(ceil(static_cast(ainum)/ - (BX/(JTHREADS*KTHREADS)))); - - this->k_zeta.set_size(GX,BX); - this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, - &map, &elem2param, &_nelements, &_nparams, &_zetaij, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &_eflag, &ainum, &nbor_pitch, &this->_threads_per_atom); - - int evatom=0; - if (eatom || vatom) - evatom=1; - #ifdef THREE_CONCURRENT - this->ucl_device->sync(); - #endif - loop(eflag,vflag,evatom); - this->ans->copy_answers(eflag,vflag,eatom,vatom); - this->device->add_ans_object(this->ans); - #ifdef THREE_CONCURRENT - this->ans2->copy_answers(eflag,vflag,eatom,vatom); - this->device->add_ans_object(this->ans2); - #endif - this->hd_balancer.stop_timer(); - - return this->nbor->host_jlist.begin()-host_start; -} - // --------------------------------------------------------------------------- // Calculate energies, forces, and torques // --------------------------------------------------------------------------- @@ -402,9 +243,40 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) { else vflag=0; - int ainum=this->ans->inum(); + // build the short neighbor list + int ainum=this->_ainum; int nbor_pitch=this->nbor->nbor_pitch(); - int GX=static_cast(ceil(static_cast(this->ans->inum())/ + int GX=static_cast(ceil(static_cast(ainum)/ + (BX/this->_threads_per_atom))); + + this->k_short_nbor.set_size(GX,BX); + this->k_short_nbor.run(&this->atom->x, &cutsq, &map, + &elem2param, &_nelements, &_nparams, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &ainum, + &nbor_pitch, &this->_threads_per_atom); + + // re-allocate zetaij if necessary + int nall = this->_nall; + if (nall*this->_max_nbors > _zetaij.cols()) { + int _nmax=static_cast(static_cast(nall)*1.10); + _zetaij.resize(this->_max_nbors*_nmax); + } + + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->_ainum)/ + (BX/(JTHREADS*KTHREADS)))); + + this->k_zeta.set_size(GX,BX); + this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &cutsq, + &map, &elem2param, &_nelements, &_nparams, &_zetaij, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, + &_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom); + + ainum=this->ans->inum(); + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/this->_threads_per_atom))); this->time_pair.start(); @@ -412,6 +284,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_pair.run(&this->atom->x, &ts1, &ts2, &cutsq, &map, &elem2param, &_nelements, &_nparams, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); @@ -423,6 +296,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_center.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &evatom); @@ -437,7 +311,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -446,7 +320,7 @@ void TersoffMT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &ts5, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_tersoff_mod.cu b/lib/gpu/lal_tersoff_mod.cu index 3a81b36941..576359b514 100644 --- a/lib/gpu/lal_tersoff_mod.cu +++ b/lib/gpu/lal_tersoff_mod.cu @@ -106,7 +106,7 @@ texture ts5_tex; ans[ii]=old; \ } -#define store_zeta(z, tid, t_per_atom, offset) \ +#define acc_zeta(z, tid, t_per_atom, offset) \ if (t_per_atom>1) { \ __local acctyp red_acc[BLOCK_PAIR]; \ red_acc[tid]=z; \ @@ -155,7 +155,7 @@ texture ts5_tex; ans[ii]=old; \ } -#define store_zeta(z, tid, t_per_atom, offset) \ +#define acc_zeta(z, tid, t_per_atom, offset) \ if (t_per_atom>1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ z += shfl_xor(z, s, t_per_atom); \ @@ -164,6 +164,65 @@ texture ts5_tex; #endif +__kernel void k_tersoff_mod_short_nbor(const __global numtyp4 *restrict x_, + const __global numtyp *restrict cutsq, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, const int nparams, + const __global int * dev_nbor, + const __global int * dev_packed, + __global int * dev_short_nbor, + const int inum, const int nbor_pitch, + const int t_per_atom) { + __local int n_stride; + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + if (ii cutsq[ijparam]) continue; - // compute zeta_ij - z = (numtyp)0; + z = (acctyp)0; int nbor_k = nborj_start-offset_j+offset_k; - for ( ; nbor_k < nbor_end; nbor_k+=n_stride) { - int k=dev_packed[nbor_k]; + int k_end = nbor_end; + if (dev_packed==dev_nbor) { + int numk = dev_short_nbor[nbor_k-n_stride]; + k_end = nbor_k+fast_mul(numk,n_stride); + } + + for ( ; nbor_k < k_end; nbor_k+=n_stride) { + int k=nbor_mem[nbor_k]; k &= NEIGHMASK; if (k == j) continue; @@ -287,10 +358,12 @@ __kernel void k_tersoff_mod_zeta(const __global numtyp4 *restrict x_, //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; //int idx = jj*n_stride + i*t_per_atom + offset_j; - int idx; - zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, - i, nbor_j, offset_j, idx); - store_zeta(z, tid, t_per_atom, offset_k); + //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + int idx = nbor_j; + if (dev_packed==dev_nbor) idx -= n_stride; +// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, +// i, nbor_j, offset_j, idx); + acc_zeta(z, tid, t_per_atom, offset_k); numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex); numtyp ijparam_lam2 = ts1_ijparam.y; @@ -331,6 +404,7 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_, const int nelements, const int nparams, const __global int * dev_nbor, const __global int * dev_packed, + const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, @@ -357,8 +431,8 @@ __kernel void k_tersoff_mod_repulsive(const __global numtyp4 *restrict x_, __syncthreads(); if (ii0) - energy+=feng[1]; - if (vflag>0) { - virial[0] += delx*delx*force; - virial[1] += dely*dely*force; - virial[2] += delz*delz*force; - virial[3] += delx*dely*force; - virial[4] += delx*delz*force; - virial[5] += dely*delz*force; - } + if (eflag>0) + energy+=feng[1]; + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; } } // for nbor @@ -430,6 +511,7 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, + const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, @@ -465,20 +547,28 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_, if (ii cutsq[ijparam]) continue; numtyp r1 = ucl_sqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1); @@ -501,9 +590,11 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_, //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride; //int idx = jj*n_stride + i*t_per_atom + offset_j; - int idx; - zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, - i, nbor_j, offset_j, idx); + //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + int idx = nbor_j; + if (dev_packed==dev_nbor) idx -= n_stride; +// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, +// i, nbor_j, offset_j, idx); acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex); numtyp force = zeta_ij.x*tpainv; numtyp prefactor = zeta_ij.y; @@ -524,9 +615,15 @@ __kernel void k_tersoff_mod_three_center(const __global numtyp4 *restrict x_, virial[5] += delr1[1]*delr1[2]*mforce; } - int nbor_k=nborj_start-offset_j+offset_k; - for ( ; nbor_k cutsq[ijparam]) continue; - numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; mdelr1[1] = -delr1[1]; @@ -693,13 +798,20 @@ __kernel void k_tersoff_mod_three_end(const __global numtyp4 *restrict x_, k_end=nbor_k+numk; nbor_k+=offset_k; } + + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } int nbork_start = nbor_k; // look up for zeta_ji: find i in the j's neighbor list int m = tid / t_per_atom; int ijnum = -1; for ( ; nbor_k cutsq[ijparam]) continue; - numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; mdelr1[1] = -delr1[1]; @@ -928,13 +1052,20 @@ __kernel void k_tersoff_mod_three_end_vatom(const __global numtyp4 *restrict x_, k_end=nbor_k+numk; nbor_k+=offset_k; } + + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } int nbork_start = nbor_k; // look up for zeta_ji int m = tid / t_per_atom; int ijnum = -1; for ( ; nbor_k { const double* h, const double* beta, const double* powern, const double* powern_del, const double* ca1, const double* cutsq); - /// Pair loop with host neighboring - void compute(const int f_ago, const int inum_full, const int nall, - const int nlist, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, const bool eflag, - const bool vflag, const bool eatom, const bool vatom, - int &host_start, const double cpu_time, bool &success); - - /// Pair loop with device neighboring - int ** compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, tagint *tag, int **nspecial, - tagint **special, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success); - /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ void clear(); @@ -104,8 +89,7 @@ class TersoffMod : public BaseThree { UCL_Kernel k_zeta; UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex; - - int _max_nbors; + numtyp _cutshortsq; private: bool _allocated; diff --git a/lib/gpu/lal_tersoff_zbl.cpp b/lib/gpu/lal_tersoff_zbl.cpp index 9cce8a802d..341f663030 100644 --- a/lib/gpu/lal_tersoff_zbl.cpp +++ b/lib/gpu/lal_tersoff_zbl.cpp @@ -62,7 +62,8 @@ int TersoffZT::init(const int ntypes, const int nlocal, const int nall, int success; success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split, _screen,tersoff_zbl,"k_tersoff_zbl_repulsive", - "k_tersoff_zbl_three_center", "k_tersoff_zbl_three_end"); + "k_tersoff_zbl_three_center", "k_tersoff_zbl_three_end", + "k_tersoff_zbl_short_nbor"); if (success!=0) return success; @@ -177,11 +178,16 @@ int TersoffZT::init(const int ntypes, const int nlocal, const int nall, UCL_H_Vec cutsq_view(nparams,*(this->ucl_device), UCL_WRITE_ONLY); - for (int i=0; i(host_cutsq[i]); + if (cutsqmax < host_cutsq[i]) cutsqmax = host_cutsq[i]; + } cutsq.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); ucl_copy(cutsq,cutsq_view,false); + _cutshortsq = static_cast(cutsqmax); + UCL_H_Vec dview_elem2param(nelements*nelements*nelements, *(this->ucl_device), UCL_WRITE_ONLY); @@ -244,171 +250,6 @@ double TersoffZT::host_memory_usage() const { #define KTHREADS this->_threads_per_atom #define JTHREADS this->_threads_per_atom -// --------------------------------------------------------------------------- -// Copy nbor list from host if necessary and then calculate forces, virials,.. -// --------------------------------------------------------------------------- -template -void TersoffZT::compute(const int f_ago, const int inum_full, const int nall, - const int nlist, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, - const bool eflag, const bool vflag, const bool eatom, - const bool vatom, int &host_start, - const double cpu_time, bool &success) { - this->acc_timers(); - if (inum_full==0) { - host_start=0; - // Make sure textures are correct if realloc by a different hybrid style - this->resize_atom(0,nall,success); - this->zero_timers(); - return; - } - - int ago=this->hd_balancer.ago_first(f_ago); - int inum=this->hd_balancer.balance(ago,inum_full,cpu_time); - this->ans->inum(inum); - #ifdef THREE_CONCURRENT - this->ans2->inum(inum); - #endif - host_start=inum; - - if (ago==0) { - this->reset_nbors(nall, inum, nlist, ilist, numj, firstneigh, success); - if (!success) - return; - _max_nbors = this->nbor->max_nbor_loop(nlist,numj,ilist); - } - - this->atom->cast_x_data(host_x,host_type); - this->hd_balancer.start_timer(); - this->atom->add_x_data(host_x,host_type); - - // re-allocate zetaij if necessary - if (nall*_max_nbors > _zetaij.cols()) { - int _nmax=static_cast(static_cast(nall)*1.10); - _zetaij.resize(_max_nbors*_nmax); - } - - int _eflag; - if (eflag) - _eflag=1; - else - _eflag=0; - - int ainum=nlist; - int nbor_pitch=this->nbor->nbor_pitch(); - int BX=this->block_pair(); - int GX=static_cast(ceil(static_cast(ainum)/ - (BX/(JTHREADS*KTHREADS)))); - - this->k_zeta.set_size(GX,BX); - this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &ts6, &cutsq, - &map, &elem2param, &_nelements, &_nparams, &_zetaij, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &_eflag, &ainum, &nbor_pitch, &this->_threads_per_atom); - - int evatom=0; - if (eatom || vatom) - evatom=1; - #ifdef THREE_CONCURRENT - this->ucl_device->sync(); - #endif - loop(eflag,vflag,evatom); - this->ans->copy_answers(eflag,vflag,eatom,vatom,ilist); - this->device->add_ans_object(this->ans); - #ifdef THREE_CONCURRENT - this->ans2->copy_answers(eflag,vflag,eatom,vatom,ilist); - this->device->add_ans_object(this->ans2); - #endif - this->hd_balancer.stop_timer(); -} - -// --------------------------------------------------------------------------- -// Reneighbor on GPU if necessary and then compute forces, virials, energies -// --------------------------------------------------------------------------- -template -int ** TersoffZT::compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, - double *sublo, double *subhi, tagint *tag, - int **nspecial, tagint **special, const bool eflag, - const bool vflag, const bool eatom, - const bool vatom, int &host_start, - int **ilist, int **jnum, - const double cpu_time, bool &success) { - this->acc_timers(); - - if (inum_full==0) { - host_start=0; - // Make sure textures are correct if realloc by a different hybrid style - this->resize_atom(0,nall,success); - this->zero_timers(); - return NULL; - } - - this->hd_balancer.balance(cpu_time); - int inum=this->hd_balancer.get_gpu_count(ago,inum_full); - this->ans->inum(inum); - #ifdef THREE_CONCURRENT - this->ans2->inum(inum); - #endif - host_start=inum; - - // Build neighbor list on GPU if necessary - if (ago==0) { - _max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type, - sublo, subhi, tag, nspecial, special, success); - if (!success) - return NULL; - this->hd_balancer.start_timer(); - } else { - this->atom->cast_x_data(host_x,host_type); - this->hd_balancer.start_timer(); - this->atom->add_x_data(host_x,host_type); - } - *ilist=this->nbor->host_ilist.begin(); - *jnum=this->nbor->host_acc.begin(); - - // re-allocate zetaij if necessary - if (nall*_max_nbors > _zetaij.cols()) { - int _nmax=static_cast(static_cast(nall)*1.10); - _zetaij.resize(_max_nbors*_nmax); - } - - int _eflag; - if (eflag) - _eflag=1; - else - _eflag=0; - - int ainum=nall; - int nbor_pitch=this->nbor->nbor_pitch(); - int BX=this->block_pair(); - int GX=static_cast(ceil(static_cast(ainum)/ - (BX/(JTHREADS*KTHREADS)))); - - this->k_zeta.set_size(GX,BX); - this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &ts6, &cutsq, - &map, &elem2param, &_nelements, &_nparams, &_zetaij, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &_eflag, &ainum, &nbor_pitch, &this->_threads_per_atom); - - int evatom=0; - if (eatom || vatom) - evatom=1; - #ifdef THREE_CONCURRENT - this->ucl_device->sync(); - #endif - loop(eflag,vflag,evatom); - this->ans->copy_answers(eflag,vflag,eatom,vatom); - this->device->add_ans_object(this->ans); - #ifdef THREE_CONCURRENT - this->ans2->copy_answers(eflag,vflag,eatom,vatom); - this->device->add_ans_object(this->ans2); - #endif - this->hd_balancer.stop_timer(); - - return this->nbor->host_jlist.begin()-host_start; -} - // --------------------------------------------------------------------------- // Calculate energies, forces, and torques // --------------------------------------------------------------------------- @@ -427,9 +268,40 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) { else vflag=0; - int ainum=this->ans->inum(); + // build the short neighbor list + int ainum=this->_ainum; int nbor_pitch=this->nbor->nbor_pitch(); - int GX=static_cast(ceil(static_cast(this->ans->inum())/ + int GX=static_cast(ceil(static_cast(ainum)/ + (BX/this->_threads_per_atom))); + + this->k_short_nbor.set_size(GX,BX); + this->k_short_nbor.run(&this->atom->x, &cutsq, &map, + &elem2param, &_nelements, &_nparams, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &ainum, + &nbor_pitch, &this->_threads_per_atom); + + // re-allocate zetaij if necessary + int nall = this->_nall; + if (nall*this->_max_nbors > _zetaij.cols()) { + int _nmax=static_cast(static_cast(nall)*1.10); + _zetaij.resize(this->_max_nbors*_nmax); + } + + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->_ainum)/ + (BX/(JTHREADS*KTHREADS)))); + + this->k_zeta.set_size(GX,BX); + this->k_zeta.run(&this->atom->x, &ts1, &ts2, &ts3, &ts4, &ts5, &ts6, &cutsq, + &map, &elem2param, &_nelements, &_nparams, &_zetaij, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, + &_eflag, &this->_ainum, &nbor_pitch, &this->_threads_per_atom); + + ainum=this->ans->inum(); + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->ans->inum())/ (BX/this->_threads_per_atom))); this->time_pair.start(); @@ -438,6 +310,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) { &_global_e, &_global_a_0, &_global_epsilon_0, &cutsq, &map, &elem2param, &_nelements, &_nparams, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom); @@ -449,6 +322,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_center.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &evatom); @@ -463,7 +337,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end_vatom.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); @@ -472,7 +346,7 @@ void TersoffZT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_end.run(&this->atom->x, &ts1, &ts2, &ts4, &cutsq, &map, &elem2param, &_nelements, &_nparams, &_zetaij, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_tersoff_zbl.cu b/lib/gpu/lal_tersoff_zbl.cu index 9509b9802c..e8bb017f59 100644 --- a/lib/gpu/lal_tersoff_zbl.cu +++ b/lib/gpu/lal_tersoff_zbl.cu @@ -109,7 +109,7 @@ texture ts6_tex; ans[ii]=old; \ } -#define store_zeta(z, tid, t_per_atom, offset) \ +#define acc_zeta(z, tid, t_per_atom, offset) \ if (t_per_atom>1) { \ __local acctyp red_acc[BLOCK_PAIR]; \ red_acc[tid]=z; \ @@ -158,7 +158,7 @@ texture ts6_tex; ans[ii]=old; \ } -#define store_zeta(z, tid, t_per_atom, offset) \ +#define acc_zeta(z, tid, t_per_atom, offset) \ if (t_per_atom>1) { \ for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \ z += shfl_xor(z, s, t_per_atom); \ @@ -167,6 +167,65 @@ texture ts6_tex; #endif +__kernel void k_tersoff_zbl_short_nbor(const __global numtyp4 *restrict x_, + const __global numtyp *restrict cutsq, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, const int nparams, + const __global int * dev_nbor, + const __global int * dev_packed, + __global int * dev_short_nbor, + const int inum, const int nbor_pitch, + const int t_per_atom) { + __local int n_stride; + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + if (ii cutsq[ijparam]) continue; - // compute zeta_ij z = (acctyp)0; int nbor_k = nborj_start-offset_j+offset_k; - for ( ; nbor_k < nbor_end; nbor_k+=n_stride) { - int k=dev_packed[nbor_k]; + int k_end = nbor_end; + if (dev_packed==dev_nbor) { + int numk = dev_short_nbor[nbor_k-n_stride]; + k_end = nbor_k+fast_mul(numk,n_stride); + } + + for ( ; nbor_k < k_end; nbor_k+=n_stride) { + int k=nbor_mem[nbor_k]; k &= NEIGHMASK; if (k == j) continue; @@ -290,10 +361,12 @@ __kernel void k_tersoff_zbl_zeta(const __global numtyp4 *restrict x_, //int jj = (nbor_j-offset_j-2*nbor_pitch)/n_stride; //int idx = jj*n_stride + i*t_per_atom + offset_j; - int idx; - zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, - i, nbor_j, offset_j, idx); - store_zeta(z, tid, t_per_atom, offset_k); + //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + int idx = nbor_j; + if (dev_packed==dev_nbor) idx -= n_stride; +// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, +// i, nbor_j, offset_j, idx); + acc_zeta(z, tid, t_per_atom, offset_k); numtyp4 ts1_ijparam = ts1[ijparam]; //fetch4(ts1_ijparam,ijparam,ts1_tex); numtyp ijparam_lam2 = ts1_ijparam.y; @@ -342,6 +415,7 @@ __kernel void k_tersoff_zbl_repulsive(const __global numtyp4 *restrict x_, const int nelements, const int nparams, const __global int * dev_nbor, const __global int * dev_packed, + const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, @@ -370,8 +444,8 @@ __kernel void k_tersoff_zbl_repulsive(const __global numtyp4 *restrict x_, __syncthreads(); if (ii0) - energy+=feng[1]; - if (vflag>0) { - virial[0] += delx*delx*force; - virial[1] += dely*dely*force; - virial[2] += delz*delz*force; - virial[3] += delx*dely*force; - virial[4] += delx*delz*force; - virial[5] += dely*delz*force; - } + if (eflag>0) + energy+=feng[1]; + if (vflag>0) { + virial[0] += delx*delx*force; + virial[1] += dely*dely*force; + virial[2] += delz*delz*force; + virial[3] += delx*dely*force; + virial[4] += delx*delz*force; + virial[5] += dely*delz*force; } } // for nbor @@ -448,6 +529,7 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_, const __global acctyp4 *restrict zetaij, const __global int * dev_nbor, const __global int * dev_packed, + const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, @@ -481,20 +563,28 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_, if (ii cutsq[ijparam]) continue; numtyp r1 = ucl_sqrt(rsq1); numtyp r1inv = ucl_rsqrt(rsq1); @@ -517,9 +606,11 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_, //int jj = (nbor_j-offset_j-2*nbor_pitch) / n_stride; //int idx = jj*n_stride + i*t_per_atom + offset_j; - int idx; - zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, - i, nbor_j, offset_j, idx); + //idx to zetaij is shifted by n_stride relative to nbor_j in dev_short_nbor + int idx = nbor_j; + if (dev_packed==dev_nbor) idx -= n_stride; +// zeta_idx(dev_nbor,dev_packed, nbor_pitch, n_stride, t_per_atom, +// i, nbor_j, offset_j, idx); acctyp4 zeta_ij = zetaij[idx]; // fetch(zeta_ij,idx,zeta_tex); numtyp force = zeta_ij.x*tpainv; numtyp prefactor = zeta_ij.y; @@ -540,9 +631,15 @@ __kernel void k_tersoff_zbl_three_center(const __global numtyp4 *restrict x_, virial[5] += delr1[1]*delr1[2]*mforce; } - int nbor_k=nborj_start-offset_j+offset_k; - for ( ; nbor_k cutsq[ijparam]) continue; - numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; mdelr1[1] = -delr1[1]; @@ -703,13 +808,20 @@ __kernel void k_tersoff_zbl_three_end(const __global numtyp4 *restrict x_, k_end=nbor_k+numk; nbor_k+=offset_k; } + + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } int nbork_start = nbor_k; // look up for zeta_ji: find i in the j's neighbor list int m = tid / t_per_atom; int ijnum = -1; for ( ; nbor_k cutsq[ijparam]) continue; - numtyp mdelr1[3]; mdelr1[0] = -delr1[0]; mdelr1[1] = -delr1[1]; @@ -929,13 +1053,20 @@ __kernel void k_tersoff_zbl_three_end_vatom(const __global numtyp4 *restrict x_, k_end=nbor_k+numk; nbor_k+=offset_k; } + + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } int nbork_start = nbor_k; // look up for zeta_ji int m = tid / t_per_atom; int ijnum = -1; for ( ; nbor_k { const double* ZBLcut, const double* ZBLexpscale, const double global_e, const double global_a_0, const double global_epsilon_0, const double* cutsq); - /// Pair loop with host neighboring - void compute(const int f_ago, const int inum_full, const int nall, - const int nlist, double **host_x, int *host_type, - int *ilist, int *numj, int **firstneigh, const bool eflag, - const bool vflag, const bool eatom, const bool vatom, - int &host_start, const double cpu_time, bool &success); - - /// Pair loop with device neighboring - int ** compute(const int ago, const int inum_full, - const int nall, double **host_x, int *host_type, double *sublo, - double *subhi, tagint *tag, int **nspecial, - tagint **special, const bool eflag, const bool vflag, - const bool eatom, const bool vatom, int &host_start, - int **ilist, int **numj, const double cpu_time, bool &success); - /// Clear all host and device data /** \note This is called at the beginning of the init() routine **/ void clear(); @@ -109,8 +94,8 @@ class TersoffZBL : public BaseThree { UCL_Kernel k_zeta; UCL_Texture ts1_tex, ts2_tex, ts3_tex, ts4_tex, ts5_tex, ts6_tex; - int _max_nbors; numtyp _global_e,_global_a_0,_global_epsilon_0; + numtyp _cutshortsq; private: bool _allocated; diff --git a/lib/gpu/lal_vashishta.cpp b/lib/gpu/lal_vashishta.cpp index 96537e65d3..d03ac992bd 100644 --- a/lib/gpu/lal_vashishta.cpp +++ b/lib/gpu/lal_vashishta.cpp @@ -59,7 +59,7 @@ int VashishtaT::init(const int ntypes, const int nlocal, const int nall, const i int success; success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split, _screen,vashishta,"k_vashishta","k_vashishta_three_center", - "k_vashishta_three_end"); + "k_vashishta_three_end","k_vashishta_short_nbor"); if (success!=0) return success; @@ -128,15 +128,18 @@ int VashishtaT::init(const int ntypes, const int nlocal, const int nall, const i param4.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY); + double r0sqmax = 0; for (int i=0; i(r0sq); dview[i].y=static_cast(gamma[i]); dview[i].z=static_cast(cutsq[i]); dview[i].w=static_cast(r0[i]); } + _cutshortsq = static_cast(r0sqmax); + ucl_copy(param4,dview,false); param4_tex.get_texture(*(this->pair_program),"param4_tex"); param4_tex.bind_float(param4,4); @@ -223,15 +226,28 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) { else vflag=0; - int GX=static_cast(ceil(static_cast(this->ans->inum())/ + // build the short neighbor list + int ainum=this->_ainum; + int nbor_pitch=this->nbor->nbor_pitch(); + int GX=static_cast(ceil(static_cast(ainum)/ (BX/this->_threads_per_atom))); + this->k_short_nbor.set_size(GX,BX); + this->k_short_nbor.run(&this->atom->x, ¶m4, &map, + &elem2param, &_nelements, &_nparams, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &ainum, + &nbor_pitch, &this->_threads_per_atom); + // this->_nbor_data == nbor->dev_packed for gpu_nbor == 0 and tpa > 1 // this->_nbor_data == nbor->dev_nbor for gpu_nbor == 1 or tpa == 1 - int ainum=this->ans->inum(); - int nbor_pitch=this->nbor->nbor_pitch(); + ainum=this->ans->inum(); + nbor_pitch=this->nbor->nbor_pitch(); + GX=static_cast(ceil(static_cast(this->ans->inum())/ + (BX/this->_threads_per_atom))); this->time_pair.start(); + // note that k_pair does not run with the short neighbor list this->k_pair.set_size(GX,BX); this->k_pair.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, &map, &elem2param, &_nelements, @@ -248,6 +264,7 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) { this->k_three_center.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &evatom); Answer *end_ans; @@ -257,21 +274,19 @@ void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) { end_ans=this->ans; #endif if (evatom!=0) { - this->k_three_end_vatom.set_size(GX,BX); this->k_three_end_vatom.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } else { - this->k_three_end.set_size(GX,BX); this->k_three_end.run(&this->atom->x, ¶m1, ¶m2, ¶m3, ¶m4, ¶m5, &map, &elem2param, &_nelements, &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->nbor->dev_acc, + &this->nbor->dev_acc, &this->dev_short_nbor, &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum, &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor); } diff --git a/lib/gpu/lal_vashishta.cu b/lib/gpu/lal_vashishta.cu index caa3c03613..fa7f413aa5 100644 --- a/lib/gpu/lal_vashishta.cu +++ b/lib/gpu/lal_vashishta.cu @@ -136,6 +136,64 @@ texture param5_tex; #endif +__kernel void k_vashishta_short_nbor(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict param4, + const __global int *restrict map, + const __global int *restrict elem2param, + const int nelements, const int nparams, + const __global int * dev_nbor, + const __global int * dev_packed, + __global int * dev_short_nbor, + const int inum, const int nbor_pitch, + const int t_per_atom) { + __local int n_stride; + int tid, ii, offset; + atom_info(t_per_atom,ii,tid,offset); + + if (ii0) energy += (param3_bigh*reta+vc2-vc3-param3_bigw*r6inv-r*param3_dvrc+param3_c0); @@ -255,31 +313,31 @@ __kernel void k_vashishta(const __global numtyp4 *restrict x_, numtyp r1 = ucl_sqrt(rsq1); \ numtyp rinvsq1 = ucl_recip(rsq1); \ numtyp rainv1 = ucl_recip(r1 - param_r0_ij); \ - numtyp gsrainv1 = param_gamma_ij * rainv1; \ + numtyp gsrainv1 = param_gamma_ij * rainv1; \ numtyp gsrainvsq1 = gsrainv1*rainv1/r1; \ numtyp expgsrainv1 = ucl_exp(gsrainv1); \ \ numtyp r2 = ucl_sqrt(rsq2); \ numtyp rinvsq2 = ucl_recip(rsq2); \ numtyp rainv2 = ucl_recip(r2 - param_r0_ik); \ - numtyp gsrainv2 = param_gamma_ik * rainv2; \ + numtyp gsrainv2 = param_gamma_ik * rainv2; \ numtyp gsrainvsq2 = gsrainv2*rainv2/r2; \ numtyp expgsrainv2 = ucl_exp(gsrainv2); \ \ numtyp rinv12 = ucl_recip(r1*r2); \ numtyp cs = (delr1x*delr2x + delr1y*delr2y + delr1z*delr2z) * rinv12; \ - numtyp delcs = cs - param_costheta_ijk; \ + numtyp delcs = cs - param_costheta_ijk; \ numtyp delcssq = delcs*delcs; \ - numtyp pcsinv = param_bigc_ijk*delcssq+1.0; \ + numtyp pcsinv = param_bigc_ijk*delcssq+1.0; \ numtyp pcsinvsq = pcsinv*pcsinv; \ numtyp pcs = delcssq/pcsinv; \ \ numtyp facexp = expgsrainv1*expgsrainv2; \ \ - numtyp facrad = param_bigb_ijk * facexp*pcs; \ + numtyp facrad = param_bigb_ijk * facexp*pcs; \ numtyp frad1 = facrad*gsrainvsq1; \ numtyp frad2 = facrad*gsrainvsq2; \ - numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq; \ + numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq; \ numtyp facang12 = rinv12*facang; \ numtyp csfacang = cs*facang; \ numtyp csfac1 = rinvsq1*csfacang; \ @@ -311,28 +369,28 @@ __kernel void k_vashishta(const __global numtyp4 *restrict x_, numtyp r1 = ucl_sqrt(rsq1); \ numtyp rinvsq1 = ucl_recip(rsq1); \ numtyp rainv1 = ucl_recip(r1 - param_r0_ij); \ - numtyp gsrainv1 = param_gamma_ij * rainv1; \ + numtyp gsrainv1 = param_gamma_ij * rainv1; \ numtyp gsrainvsq1 = gsrainv1*rainv1/r1; \ numtyp expgsrainv1 = ucl_exp(gsrainv1); \ \ numtyp r2 = ucl_sqrt(rsq2); \ numtyp rainv2 = ucl_recip(r2 - param_r0_ik); \ - numtyp gsrainv2 = param_gamma_ik * rainv2; \ + numtyp gsrainv2 = param_gamma_ik * rainv2; \ numtyp expgsrainv2 = ucl_exp(gsrainv2); \ \ numtyp rinv12 = ucl_recip(r1*r2); \ numtyp cs = (delr1x*delr2x + delr1y*delr2y + delr1z*delr2z) * rinv12; \ - numtyp delcs = cs - param_costheta_ijk; \ + numtyp delcs = cs - param_costheta_ijk; \ numtyp delcssq = delcs*delcs; \ - numtyp pcsinv = param_bigc_ijk*delcssq+1.0; \ + numtyp pcsinv = param_bigc_ijk*delcssq+1.0; \ numtyp pcsinvsq = pcsinv*pcsinv; \ numtyp pcs = delcssq/pcsinv; \ \ numtyp facexp = expgsrainv1*expgsrainv2; \ \ - numtyp facrad = param_bigb_ijk * facexp*pcs; \ + numtyp facrad = param_bigb_ijk * facexp*pcs; \ numtyp frad1 = facrad*gsrainvsq1; \ - numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq; \ + numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq; \ numtyp facang12 = rinv12*facang; \ numtyp csfacang = cs*facang; \ numtyp csfac1 = rinvsq1*csfacang; \ @@ -353,6 +411,7 @@ __kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_, const int nelements, const __global int * dev_nbor, const __global int * dev_packed, + const __global int * dev_short_nbor, __global acctyp4 *restrict ans, __global acctyp *restrict engv, const int eflag, const int vflag, @@ -377,7 +436,7 @@ __kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_, if (ii param_r0sq_ij) continue; + if (rsq1 > param_r0sq_ij) continue; // still keep this for neigh no and tpa > 1 param_gamma_ij=param4_ijparam.y; param_r0_ij=param4_ijparam.w; - int nbor_k=nbor_j-offset_j+offset_k; - if (nbor_k<=nbor_j) - nbor_k+=n_stride; + int nbor_k,k_end; + if (dev_packed==dev_nbor) { + nbor_k=nborj_start-offset_j+offset_k; + int numk = dev_short_nbor[nbor_k-n_stride]; + k_end = nbor_k+fast_mul(numk,n_stride); + } else { + nbor_k = nbor_j-offset_j+offset_k; + if (nbor_k<=nbor_j) nbor_k += n_stride; + k_end = nbor_end; + } - for ( ; nbor_k param_r0sq_ij) continue; + if (rsq1 > param_r0sq_ij) continue; // still keep this for neigh no and tpa > 1 param_gamma_ij=param4_ijparam.y; param_r0_ij = param4_ijparam.w; @@ -551,8 +637,15 @@ __kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_, nbor_k+=offset_k; } + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } + for ( ; nbor_k param_r0sq_ij) continue; + if (rsq1 > param_r0sq_ij) continue; // still keep this for neigh no and tpa > 1 param_gamma_ij=param4_ijparam.y; param_r0_ij=param4_ijparam.w; @@ -690,8 +792,15 @@ __kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_, nbor_k+=offset_k; } + // recalculate numk and k_end for the use of short neighbor list + if (dev_packed==dev_nbor) { + numk = dev_short_nbor[nbor_k]; + nbor_k += n_stride; + k_end = nbor_k+fast_mul(numk,n_stride); + } + for ( ; nbor_k { UCL_D_Vec elem2param; UCL_D_Vec map; int _nparams,_nelements; + numtyp _cutshortsq; UCL_Texture param1_tex, param2_tex, param3_tex, param4_tex, param5_tex; diff --git a/lib/h5md/Install.py b/lib/h5md/Install.py deleted file mode 100644 index 18b426f928..0000000000 --- a/lib/h5md/Install.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file - -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix - does not alter existing Makefile.machine -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None -extraflag = 0 - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 - suffix = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - -if not os.path.exists("Makefile.%s" % machine): - error("lib/%s/Makefile.%s does not exist" % (lib,machine)) - -lines = open("Makefile.%s" % machine,'r').readlines() -fp = open("Makefile.auto",'w') - -for line in lines: - words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - -fp.close() - -# make the library via Makefile.auto - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/h5md/Install.py b/lib/h5md/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/h5md/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/h5md/Makefile.h5cc b/lib/h5md/Makefile.h5cc index bd3e8a9784..9feed2d74e 100644 --- a/lib/h5md/Makefile.h5cc +++ b/lib/h5md/Makefile.h5cc @@ -9,12 +9,14 @@ HDF5_PATH=/usr INC=-I include AR=ar ARFLAGS=rc -LIB=libch5md.a +# need to build two libraries to not break compatibility and to support Install.py +LIB=libh5md.a libch5md.a all: lib Makefile.lammps build: mkdir -p build + build/ch5md.o: src/ch5md.c | build $(CC) $(INC) $(CFLAGS) -c $< -o $@ @@ -23,8 +25,11 @@ Makefile.lammps: .PHONY: all lib clean -$(LIB): build/ch5md.o - $(AR) $(ARFLAGS) $(LIB) build/ch5md.o +libch5md.a : build/ch5md.o + $(AR) $(ARFLAGS) $@ build/ch5md.o + +libh5md.a : build/ch5md.o + $(AR) $(ARFLAGS) $@ build/ch5md.o lib: $(LIB) diff --git a/lib/h5md/Makefile.mpi b/lib/h5md/Makefile.mpi new file mode 120000 index 0000000000..df682a9547 --- /dev/null +++ b/lib/h5md/Makefile.mpi @@ -0,0 +1 @@ +Makefile.h5cc \ No newline at end of file diff --git a/lib/h5md/Makefile.serial b/lib/h5md/Makefile.serial new file mode 120000 index 0000000000..df682a9547 --- /dev/null +++ b/lib/h5md/Makefile.serial @@ -0,0 +1 @@ +Makefile.h5cc \ No newline at end of file diff --git a/lib/h5md/include/ch5md.h b/lib/h5md/include/ch5md.h index 351e337ed4..8fefc9565d 100644 --- a/lib/h5md/include/ch5md.h +++ b/lib/h5md/include/ch5md.h @@ -9,13 +9,13 @@ #ifndef CH5MD_H #define CH5MD_H +#include "hdf5.h" +#include + #ifdef __cplusplus extern "C" { #endif -#include "hdf5.h" -#include - #define CH5MD_RANK_ERROR -10 typedef struct h5md_element_struct { diff --git a/lib/kim/.gitignore b/lib/kim/.gitignore new file mode 100644 index 0000000000..c1f57fe64c --- /dev/null +++ b/lib/kim/.gitignore @@ -0,0 +1,3 @@ +/Makefile.KIM_DIR +/Makefile.KIM_Config +/installed-kim-api-* diff --git a/lib/kim/Install.py b/lib/kim/Install.py new file mode 100644 index 0000000000..aa244ee6ea --- /dev/null +++ b/lib/kim/Install.py @@ -0,0 +1,317 @@ +#!/usr/bin/env python + +# install.py tool to download, compile, and setup the kim-api library +# used to automate the steps described in the README file in this dir + +from __future__ import print_function +import sys,os,re,subprocess + +# help message + +help = """ +Syntax from src dir: make lib-kim args="-b -v version -a kim-name" + or: make lib-kim args="-b -a everything" + or: make lib-kim args="-n -a kim-name" + or: make lib-kim args="-p /usr/local/open-kim -a kim-name" +Syntax from lib dir: python Install.py -b -v version -a kim-name + or: python Install.py -b -a everything + or: python Install.py -n -a kim-name + or: python Install.py -p /usr/local/open-kim -a kim-name + +specify one or more options, order does not matter + + -v = version of KIM API library to use + default = kim-api-v1.8.2 (current as of June 2017) + -b = download and build base KIM API library with example Models + this will delete any previous installation in the current folder + -n = do NOT download and build base KIM API library. + Use an existing installation + -p = specify location of KIM API installation (implies -n) + -a = add single KIM model or model driver with kim-name + to existing KIM API lib (see example below). + If kim-name = everything, then rebuild KIM API library with + *all* available OpenKIM Models (make take a long time). + -vv = be more verbose about what is happening while the script runs + +Examples: + +make lib-kim args="-b" # install KIM API lib with only example models +make lib-kim args="-a Glue_Ercolessi_Adams_Al__MO_324507536345_001" # Ditto plus one model +make lib-kim args="-b -a everything" # install KIM API lib with all models +make lib-kim args="-n -a EAM_Dynamo_Ackland_W__MO_141627196590_002" # only add one model or model driver + +See the list of KIM model drivers here: +https://openkim.org/kim-items/model-drivers/alphabetical + +See the list of all KIM models here: +https://openkim.org/kim-items/models/by-model-drivers + +See the list of example KIM models included by default here: +https://openkim.org/kim-api +in the "What is in the KIM API source package?" section +""" + +def error(str=None): + if not str: print(help) + else: print("ERROR",str) + sys.exit() + +# expand to full path name +# process leading '~' or relative path + +def fullpath(path): + return os.path.abspath(os.path.expanduser(path)) + +def which(program): + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + path = path.strip('"') + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + +def geturl(url,fname): + success = False + + if which('curl') != None: + cmd = 'curl -L -o "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling curl failed with: %s" % e.output.decode('UTF-8')) + + if not success and which('wget') != None: + cmd = 'wget -O "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling wget failed with: %s" % e.output.decode('UTF-8')) + + if not success: + error("Failed to download source code with 'curl' or 'wget'") + return + +# parse args + +args = sys.argv[1:] +nargs = len(args) +if nargs == 0: error() + +thisdir = os.environ['PWD'] +version = "kim-api-v1.8.2" + +buildflag = False +everythingflag = False +addflag = False +verboseflag = False +pathflag = False + +iarg = 0 +while iarg < len(args): + if args[iarg] == "-v": + if iarg+2 > len(args): error() + version = args[iarg+1] + iarg += 2 + elif args[iarg] == "-b": + buildflag = True + iarg += 1 + elif args[iarg] == "-n": + buildflag = False + iarg += 1 + elif args[iarg] == "-p": + if iarg+2 > len(args): error() + kimdir = fullpath(args[iarg+1]) + pathflag = True + buildflag = False + iarg += 2 + elif args[iarg] == "-a": + addflag = True + if iarg+2 > len(args): error() + addmodelname = args[iarg+1] + if addmodelname == "everything": + buildflag = True + everythingflag = True + addflag = False + iarg += 2 + elif args[iarg] == "-vv": + verboseflag = True + iarg += 1 + else: error() + +thisdir = os.path.abspath(thisdir) +url = "https://s3.openkim.org/kim-api/%s.tgz" % version + +# set KIM API directory + +if pathflag: + if not os.path.isdir(kimdir): + print("\nkim-api is not installed at %s" % kimdir) + error() + + # configure LAMMPS to use existing kim-api installation + with open("%s/Makefile.KIM_DIR" % thisdir, 'w') as mkfile: + mkfile.write("KIM_INSTALL_DIR=%s\n\n" % kimdir) + mkfile.write(".DUMMY: print_dir\n\n") + mkfile.write("print_dir:\n") + mkfile.write(" @printf $(KIM_INSTALL_DIR)\n") + + with open("%s/Makefile.KIM_Config" % thisdir, 'w') as cfgfile: + cfgfile.write("include %s/lib/kim-api/Makefile.KIM_Config" % kimdir) + + print("Created %s/Makefile.KIM_DIR\n using %s" % (thisdir,kimdir)) +else: + kimdir = os.path.join(os.path.abspath(thisdir), "installed-" + version) + +# download KIM tarball, unpack, build KIM +if buildflag: + + # check to see if an installed kim-api already exists and wipe it out. + + if os.path.isdir(kimdir): + print("kim-api is already installed at %s.\nRemoving it for re-install" % kimdir) + cmd = 'rm -rf "%s"' % kimdir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + + # configure LAMMPS to use kim-api to be installed + + with open("%s/Makefile.KIM_DIR" % thisdir, 'w') as mkfile: + mkfile.write("KIM_INSTALL_DIR=%s\n\n" % kimdir) + mkfile.write(".DUMMY: print_dir\n\n") + mkfile.write("print_dir:\n") + mkfile.write(" @printf $(KIM_INSTALL_DIR)\n") + + with open("%s/Makefile.KIM_Config" % thisdir, 'w') as cfgfile: + cfgfile.write("include %s/lib/kim-api/Makefile.KIM_Config" % kimdir) + + print("Created %s/Makefile.KIM_DIR\n using %s" % (thisdir,kimdir)) + + # download entire kim-api tarball + + print("Downloading kim-api tarball ...") + geturl(url,"%s/%s.tgz" % (thisdir,version)) + print("Unpacking kim-api tarball ...") + cmd = 'cd "%s"; rm -rf "%s"; tar -xzvf %s.tgz' % (thisdir,version,version) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + + # configure kim-api + + print("Configuring kim-api ...") + cmd = 'cd "%s/%s"; ./configure --prefix="%s"' % (thisdir,version,kimdir) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + + # build kim-api + + print("Configuring example Models") + cmd = 'cd "%s/%s"; make add-examples' % (thisdir,version) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print (txt.decode("UTF-8")) + + if everythingflag: + print("Configuring all OpenKIM models, this will take a while ...") + cmd = 'cd "%s/%s"; make add-OpenKIM' % (thisdir,version) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print(txt.decode("UTF-8")) + + print("Building kim-api ...") + cmd = 'cd "%s/%s"; make' % (thisdir,version) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print(txt.decode("UTF-8")) + + # install kim-api + + print("Installing kim-api ...") + cmd = 'cd "%s/%s"; make install' % (thisdir,version) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print(txt.decode("UTF-8")) + + cmd = 'cd "%s/%s"; make install-set-default-to-v1' %(thisdir,version) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + if verboseflag: print(txt.decode("UTF-8")) + + # remove source files + + print("Removing kim-api source and build files ...") + cmd = 'cd "%s"; rm -rf %s; rm -rf %s.tgz' % (thisdir,version,version) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + +# add a single model (and possibly its driver) to existing KIM installation + +if addflag: + + if not os.path.isdir(kimdir): + print("\nkim-api is not installed") + error() + + # download single model + + print("Downloading tarball for %s..." % addmodelname) + url = "https://openkim.org/download/%s.tgz" % addmodelname + geturl(url,"%s/%s.tgz" % (thisdir,addmodelname)) + + print("Unpacking item tarball ...") + cmd = 'cd "%s"; tar -xzvf %s.tgz' % (thisdir,addmodelname) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + + print("Building item ...") + cmd = 'cd "%s/%s"; make; make install' %(thisdir,addmodelname) + try: + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + except subprocess.CalledProcessError as e: + + # Error: but first, check to see if it needs a driver + firstRunOutput = e.output.decode("UTF-8") + + cmd = 'cd "%s/%s"; make kim-item-type' % (thisdir,addmodelname) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + txt = txt.decode("UTF-8") + if txt == "ParameterizedModel": + + # Get and install driver + + cmd = 'cd "%s/%s"; make model-driver-name' % (thisdir,addmodelname) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + adddrivername = txt.decode("UTF-8").strip() + print("First installing model driver: %s..." % adddrivername) + cmd = 'cd "%s"; python Install.py -n -a %s' % (thisdir,adddrivername) + try: + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + except subprocess.CalledProcessError as e: + print(e.output) + sys.exit() + + if verboseflag: print(txt.decode("UTF-8")) + + # now install the model that needed the driver + + print("Now installing model : %s" % addmodelname) + cmd = 'cd "%s"; python Install.py -n -a %s' % (thisdir,addmodelname) + try: + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + except subprocess.CalledProcessError as e: + print(e.output) + sys.exit() + print(txt.decode("UTF-8")) + sys.exit() + else: + print(firstRunOutput) + print("Error, unable to build and install OpenKIM item: %s" \ + % addmodelname) + sys.exit() + + # success the first time + + if verboseflag: print(txt.decode("UTF-8")) + print("Removing kim item source and build files ...") + cmd = 'cd "%s"; rm -rf %s; rm -rf %s.tgz' %(thisdir,addmodelname,addmodelname) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) diff --git a/lib/kim/Makefile.lammps b/lib/kim/Makefile.lammps index 427c62b5f3..b66d7005a4 100644 --- a/lib/kim/Makefile.lammps +++ b/lib/kim/Makefile.lammps @@ -16,7 +16,13 @@ # Settings that the LAMMPS build will import when this package is installed -KIM_CONFIG_HELPER = kim-api-build-config +include ../../lib/kim/Makefile.KIM_DIR + +ifeq ($(wildcard $(KIM_INSTALL_DIR)/bin/kim-api-build-config),) + KIM_CONFIG_HELPER = kim-api-build-config +else + KIM_CONFIG_HELPER = $(KIM_INSTALL_DIR)/bin/kim-api-build-config +endif ifeq ($(shell $(KIM_CONFIG_HELPER) --version 2> /dev/null),) $(error $(KIM_CONFIG_HELPER) utility is not available. Something is wrong with your KIM API package setup) endif diff --git a/lib/kim/README b/lib/kim/README index fcaf09bd17..7a4230dc25 100644 --- a/lib/kim/README +++ b/lib/kim/README @@ -8,72 +8,69 @@ James Sethna (Cornell U). Ryan Elliott is the main developer for the KIM API and he also maintains the code that implements the pair_style kim command. -To download, build, and install the KIM API on your system, follow -these steps. We are working on scripts that will automate this -process. +You can type "make lib-kim" from the src directory to see help on +how to download and build this library via make commands, or you can +do the same thing by typing "python Install.py" from within this +directory, or you can do it manually by following the instructions +below. -The KIM API is available for download from "this -site"_https://openkim.org, namely https://openkim.org. The tarball -you download is "kim-api-vX.Y.Z.tgz", which can be unpacked in this -directory or whereever you wish: +----------------- -tar xvfz kim*tgz +Instructions: -Note that if you unpack and build KIM in this directory, when you -download a new LAMMPS tarball, the files you have added here will be -lost. So you likely want to build it somewhere else. +1. Configure lammps for use with the kim-api library installed in this directory -The kim-api-vX.Y.Z/docs/ directory has further documentation for the -KIM API. In order to compile and install the KIM API follow the -instructions found in the file kim-api-vX.Y.Z/INSTALL. (Don't forget -to download and compile any Model Drivers and Models that you want to -use.) +$ printf "KIM_INSTALL_DIR=${PWD}\n" > ./Makefile.KIM_DIR +$ printf "include ${PWD}/lib/kim-api/Makefile.KIM_Config\n" > ./Makefile.KIM_Config -Once you have successfully compiled and installed the KIM API, you -need to make sure the utility kim-api-build-config is in your PATH so -that the LAMMPS build system can properly work with the KIM API. - -The following are example commands that perform these steps: +2. Download and unpack the kim-api # replace X.Y.Z as appropriate here and below $ wget http://s3.openkim.org/kim-api/kim-api-vX.Y.Z.tgz $ tar zxvf kim-api-vX.Y.Z.tgz -# get OpenKIM models, setup and compile +# configure the kim-api $ cd kim-api-vX.Y.Z -$ cp Makefile.KIM_Config.example Makefile.KIM_Config +$ ./configure --prefix=${PWD}/../ -# edit this file as appropriate following the instructions given in -# INSTALL. Here, we'll assume you set the 'prefix' variable as -# follows in order to install the KIM API to your home directory: -# prefix = $(HOME)/local -$ vi Makefile.KIM_Config +# setup the desired kim item +$ make add-Pair_Johnson_Fe__MO_857282754307_002 + +3. Build and install the kim-api and model -$ make add-EAM_Dynamo_Angelo_Moody_NiAlH__MO_418978237058_001 $ make $ make install # replace X with the KIM API major version number $ make install-set-default-to-vX +$ cd ../ -# In order to permanently add the kim-api-build-config utility to your -# PATH variable, perform the following: -# -# For the bash shell: -$ printf "export PATH=${PATH}:${HOME}/local/bin\n" >> ${HOME}/.bashrc -$ source ${HOME}/.bashrc -# -# For the csh shell: -% printf "setenv PATH ${PATH}:${HOME}/local/bin\b" >> ${HOME}/.cshrc -% source ${HOME}/.cshrc +4. Remove source and build files + +$ rm -rf kim-api-vX.Y.Z +$ rm -rf kim-api-vX.Y.Z.tgz + +5. To add additional items do the following (replace the kim item name with your + desired value) + +$ wget https://openkim.org/download/EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001.tgz +$ tar zxvf EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001.tgz +$ cd EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001 +$ make +$ make install +$ cd .. +$ rm -rf EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001 +$ rm -rf EAM_Johnson_NearestNeighbor_Cu__MO_887933271505_001.tgz + +----------------- When these steps are complete you can build LAMMPS with the KIM package installed: -$ cd lammps/src +$ cd lammpos/src $ make yes-kim $ make g++ (or whatever target you wish) -Note that the Makefile.lammps file in this directory is required -to allow the LAMMPS build to find the necessary KIM files. You -should not normally need to edit this file. +Note that the Makefile.lammps and Makefile.KIM_DIR files in this directory +are required to allow the LAMMPS build to find the necessary KIM files. +You should not normally need to edit this file. diff --git a/lib/kokkos/CHANGELOG.md b/lib/kokkos/CHANGELOG.md index acb54ff22f..3fe9e46111 100644 --- a/lib/kokkos/CHANGELOG.md +++ b/lib/kokkos/CHANGELOG.md @@ -1,5 +1,46 @@ # Change Log +## [2.03.13](https://github.com/kokkos/kokkos/tree/2.03.13) (2017-07-27) +[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.05...2.03.13) + +**Implemented enhancements:** + +- Disallow enabling both OpenMP and Threads in the same executable [\#406](https://github.com/kokkos/kokkos/issues/406) +- Make Kokkos::OpenMP respect OMP environment even if hwloc is available [\#630](https://github.com/kokkos/kokkos/issues/630) +- Improve Atomics Performance on KNL/Broadwell where PREFETCHW/RFO is Available [\#898](https://github.com/kokkos/kokkos/issues/898) +- Kokkos::resize should test whether dimensions have changed before resizing [\#904](https://github.com/kokkos/kokkos/issues/904) +- Develop performance-regression/acceptance tests [\#737](https://github.com/kokkos/kokkos/issues/737) +- Make the deep\_copy Profiling hook a start/end system [\#890](https://github.com/kokkos/kokkos/issues/890) +- Add deep\_copy Profiling hook [\#843](https://github.com/kokkos/kokkos/issues/843) +- Append tag name to parallel construct name for Profiling [\#842](https://github.com/kokkos/kokkos/issues/842) +- Add view label to `View bounds error` message for CUDA backend [\#870](https://github.com/kokkos/kokkos/issues/870) +- Disable printing the loaded profiling library [\#824](https://github.com/kokkos/kokkos/issues/824) +- "Declared but never referenced" warnings [\#853](https://github.com/kokkos/kokkos/issues/853) +- Warnings about lock\_address\_cuda\_space [\#852](https://github.com/kokkos/kokkos/issues/852) +- WorkGraph execution policy [\#771](https://github.com/kokkos/kokkos/issues/771) +- Simplify makefiles by guarding compilation with appropriate KOKKOS\_ENABLE\_\#\#\# macros [\#716](https://github.com/kokkos/kokkos/issues/716) +- Cmake build: wrong include install directory [\#668](https://github.com/kokkos/kokkos/issues/668) +- Derived View type and allocation [\#566](https://github.com/kokkos/kokkos/issues/566) +- Fix Compiler warnings when compiling core unit tests for Cuda [\#214](https://github.com/kokkos/kokkos/issues/214) + +**Fixed bugs:** + +- Out-of-bounds read in Kokkos\_Layout.hpp [\#975](https://github.com/kokkos/kokkos/issues/975) +- CudaClang: Fix failing test with Clang 4.0 [\#941](https://github.com/kokkos/kokkos/issues/941) +- Respawn when memory pool allocation fails \(not available memory\) [\#940](https://github.com/kokkos/kokkos/issues/940) +- Memory pool aborts on zero allocation request, returns NULL for \< minimum [\#939](https://github.com/kokkos/kokkos/issues/939) +- Error with TaskScheduler query of underlying memory pool [\#917](https://github.com/kokkos/kokkos/issues/917) +- Profiling::\*Callee static variables declared in header [\#863](https://github.com/kokkos/kokkos/issues/863) +- calling \*Space::name\(\) causes compile error [\#862](https://github.com/kokkos/kokkos/issues/862) +- bug in Profiling::deallocateData [\#860](https://github.com/kokkos/kokkos/issues/860) +- task\_depend test failing, CUDA 8.0 + Pascal + RDC [\#829](https://github.com/kokkos/kokkos/issues/829) +- \[develop branch\] Standalone cmake issues [\#826](https://github.com/kokkos/kokkos/issues/826) +- Kokkos CUDA failes to compile with OMPI\_CXX and MPICH\_CXX wrappers [\#776](https://github.com/kokkos/kokkos/issues/776) +- Task Team reduction on Pascal [\#767](https://github.com/kokkos/kokkos/issues/767) +- CUDA stack overflow with TaskDAG test [\#758](https://github.com/kokkos/kokkos/issues/758) +- TeamVector test on Cuda [\#670](https://github.com/kokkos/kokkos/issues/670) +- Clang 4.0 Cuda Build broken again [\#560](https://github.com/kokkos/kokkos/issues/560) + ## [2.03.05](https://github.com/kokkos/kokkos/tree/2.03.05) (2017-05-27) [Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.00...2.03.05) diff --git a/lib/kokkos/Makefile.kokkos b/lib/kokkos/Makefile.kokkos index 24cd772e00..d2967cf9a3 100644 --- a/lib/kokkos/Makefile.kokkos +++ b/lib/kokkos/Makefile.kokkos @@ -33,6 +33,7 @@ KOKKOS_INTERNAL_USE_LIBRT := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "lib KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "experimental_memkind" | wc -l)) # Check for advanced settings. +KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "compiler_warnings" | wc -l)) KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l)) KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l)) KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l)) @@ -78,14 +79,14 @@ KOKKOS_INTERNAL_COMPILER_PGI := $(strip $(shell $(CXX) --version 2 KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)) KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)) KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(CXX) --version 2>&1 | grep nvcc | wc -l)) -KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l)) -KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l)) ifneq ($(OMPI_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)) + KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(OMPI_CXX) --version 2>&1 | grep nvcc | wc -l)) endif ifneq ($(MPICH_CXX),) - KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)) + KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell $(MPICH_CXX) --version 2>&1 | grep nvcc | wc -l)) endif +KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l)) +KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l)) ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2) KOKKOS_INTERNAL_COMPILER_CLANG = 1 @@ -111,6 +112,36 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) endif endif +# Set compiler warnings flags. +ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + # TODO check if PGI accepts GNU style warnings + KOKKOS_INTERNAL_COMPILER_WARNINGS = + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1) + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1) + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1) + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + # TODO check if cray accepts GNU style warnings + KOKKOS_INTERNAL_COMPILER_WARNINGS = + else + #gcc + KOKKOS_INTERNAL_COMPILER_WARNINGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wignored-qualifiers -Wempty-body -Wclobbered -Wuninitialized + endif + endif + endif + endif + endif +else + KOKKOS_INTERNAL_COMPILER_WARNINGS = +endif + # Set OpenMP flags. ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) KOKKOS_INTERNAL_OPENMP_FLAG := -mp @@ -162,6 +193,7 @@ endif # Intel based. KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC | wc -l)) +KOKKOS_INTERNAL_USE_ARCH_WSM := $(strip $(shell echo $(KOKKOS_ARCH) | grep WSM | wc -l)) KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l)) KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l)) KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l)) @@ -229,13 +261,14 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l)) # Any AVX? +KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc )) KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) # Decide what ISA level we are able to support. -KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) +KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc )) KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc )) @@ -243,7 +276,7 @@ KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc )) # Incompatible flags? -KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc )) +KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_SSE42)+$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc )) KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc)) ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1) @@ -257,12 +290,10 @@ endif KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -# No warnings: KOKKOS_CXXFLAGS = -# INTEL and CLANG warnings: -#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -# GCC warnings: -#KOKKOS_CXXFLAGS = -Wall -Wshadow -pedantic -Wsign-compare -Wtype-limits -Wuninitialized -Wignored-qualifiers -Wempty-body -Wclobbered +ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1) + KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS) +endif KOKKOS_LIBS = -lkokkos -ldl KOKKOS_LDFLAGS = -L$(shell pwd) @@ -486,6 +517,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1) endif endif +ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1) + tmp := $(shell echo "\#define KOKKOS_ARCH_SSE42 1" >> KokkosCore_config.tmp ) + + ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1) + KOKKOS_CXXFLAGS += -xSSE4.2 + KOKKOS_LDFLAGS += -xSSE4.2 + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1) + + else + ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) + KOKKOS_CXXFLAGS += -tp=nehalem + KOKKOS_LDFLAGS += -tp=nehalem + else + # Assume that this is a really a GNU compiler. + KOKKOS_CXXFLAGS += -msse4.2 + KOKKOS_LDFLAGS += -msse4.2 + endif + endif + endif +endif + ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1) tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp ) @@ -689,7 +742,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1) endif endif -KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h) +KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1) ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h) KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l)) else diff --git a/lib/kokkos/Makefile.targets b/lib/kokkos/Makefile.targets index 3cb52a04cd..a9341a907c 100644 --- a/lib/kokkos/Makefile.targets +++ b/lib/kokkos/Makefile.targets @@ -20,8 +20,10 @@ Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Ta $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp Kokkos_HostThreadTeam.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HostThreadTeam.cpp -Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp - $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp +Kokkos_Spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Spinwait.cpp +Kokkos_Rendezvous.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Rendezvous.cpp Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp @@ -36,6 +38,8 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp +Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp endif ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1) diff --git a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp index 1e7ee68549..c2c118ce1a 100644 --- a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp +++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp @@ -61,14 +61,19 @@ protected: { std::cout << std::setprecision(5) << std::scientific; - unsigned threads_count = omp_get_max_threads(); + int threads_count = 0; + #pragma omp parallel + { + #pragma omp atomic + ++threads_count; + } - if ( Kokkos::hwloc::available() ) { - threads_count = Kokkos::hwloc::get_available_numa_count() * - Kokkos::hwloc::get_available_cores_per_numa(); + if (threads_count > 3) { + threads_count /= 2; } Kokkos::OpenMP::initialize( threads_count ); + Kokkos::OpenMP::print_configuration( std::cout ); } static void TearDownTestCase() diff --git a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp index 9cf02f74b4..2771f1793d 100644 --- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp +++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp @@ -1,12 +1,12 @@ //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -35,7 +35,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER @@ -283,12 +283,12 @@ struct test_random_scalar { RandomGenerator& pool, unsigned int num_draws) { - using std::cerr; + using std::cout; using std::endl; using Kokkos::parallel_reduce; { - cerr << " -- Testing randomness properties" << endl; + cout << " -- Testing randomness properties" << endl; RandomProperties result; typedef test_random_functor functor_type; @@ -307,7 +307,7 @@ struct test_random_scalar { ( 1.5*tolerance > variance_eps)) ? 1:0; pass_covar = ((-2.0*tolerance < covariance_eps) && ( 2.0*tolerance > covariance_eps)) ? 1:0; - cerr << "Pass: " << pass_mean + cout << "Pass: " << pass_mean << " " << pass_var << " " << mean_eps << " " << variance_eps @@ -315,7 +315,7 @@ struct test_random_scalar { << " || " << tolerance << endl; } { - cerr << " -- Testing 1-D histogram" << endl; + cout << " -- Testing 1-D histogram" << endl; RandomProperties result; typedef test_histogram1d_functor functor_type; @@ -335,7 +335,7 @@ struct test_random_scalar { pass_hist1d_covar = ((-0.06 < covariance_eps) && ( 0.06 > covariance_eps)) ? 1:0; - cerr << "Density 1D: " << mean_eps + cout << "Density 1D: " << mean_eps << " " << variance_eps << " " << (result.covariance/HIST_DIM1D/HIST_DIM1D) << " || " << tolerance @@ -348,7 +348,7 @@ struct test_random_scalar { << endl; } { - cerr << " -- Testing 3-D histogram" << endl; + cout << " -- Testing 3-D histogram" << endl; RandomProperties result; typedef test_histogram3d_functor functor_type; @@ -368,7 +368,7 @@ struct test_random_scalar { pass_hist3d_covar = ((-tolerance < covariance_eps) && ( tolerance > covariance_eps)) ? 1:0; - cerr << "Density 3D: " << mean_eps + cout << "Density 3D: " << mean_eps << " " << variance_eps << " " << result.covariance/HIST_DIM1D/HIST_DIM1D << " || " << tolerance @@ -381,18 +381,18 @@ struct test_random_scalar { template void test_random(unsigned int num_draws) { - using std::cerr; + using std::cout; using std::endl; typename test_random_functor::type_1d density_1d("D1d"); typename test_random_functor::type_3d density_3d("D3d"); uint64_t ticks = std::chrono::high_resolution_clock::now().time_since_epoch().count(); - cerr << "Test Seed:" << ticks << endl; + cout << "Test Seed:" << ticks << endl; RandomGenerator pool(ticks); - cerr << "Test Scalar=int" << endl; + cout << "Test Scalar=int" << endl; test_random_scalar test_int(density_1d,density_3d,pool,num_draws); ASSERT_EQ( test_int.pass_mean,1); ASSERT_EQ( test_int.pass_var,1); @@ -406,7 +406,7 @@ void test_random(unsigned int num_draws) deep_copy(density_1d,0); deep_copy(density_3d,0); - cerr << "Test Scalar=unsigned int" << endl; + cout << "Test Scalar=unsigned int" << endl; test_random_scalar test_uint(density_1d,density_3d,pool,num_draws); ASSERT_EQ( test_uint.pass_mean,1); ASSERT_EQ( test_uint.pass_var,1); @@ -420,7 +420,7 @@ void test_random(unsigned int num_draws) deep_copy(density_1d,0); deep_copy(density_3d,0); - cerr << "Test Scalar=int64_t" << endl; + cout << "Test Scalar=int64_t" << endl; test_random_scalar test_int64(density_1d,density_3d,pool,num_draws); ASSERT_EQ( test_int64.pass_mean,1); ASSERT_EQ( test_int64.pass_var,1); @@ -434,7 +434,7 @@ void test_random(unsigned int num_draws) deep_copy(density_1d,0); deep_copy(density_3d,0); - cerr << "Test Scalar=uint64_t" << endl; + cout << "Test Scalar=uint64_t" << endl; test_random_scalar test_uint64(density_1d,density_3d,pool,num_draws); ASSERT_EQ( test_uint64.pass_mean,1); ASSERT_EQ( test_uint64.pass_var,1); @@ -448,7 +448,7 @@ void test_random(unsigned int num_draws) deep_copy(density_1d,0); deep_copy(density_3d,0); - cerr << "Test Scalar=float" << endl; + cout << "Test Scalar=float" << endl; test_random_scalar test_float(density_1d,density_3d,pool,num_draws); ASSERT_EQ( test_float.pass_mean,1); ASSERT_EQ( test_float.pass_var,1); @@ -462,7 +462,7 @@ void test_random(unsigned int num_draws) deep_copy(density_1d,0); deep_copy(density_3d,0); - cerr << "Test Scalar=double" << endl; + cout << "Test Scalar=double" << endl; test_random_scalar test_double(density_1d,density_3d,pool,num_draws); ASSERT_EQ( test_double.pass_mean,1); ASSERT_EQ( test_double.pass_var,1); diff --git a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp index f952ab3db5..9e75b580bc 100644 --- a/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp +++ b/lib/kokkos/algorithms/unit_tests/UnitTestMain.cpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ diff --git a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp index f545247212..8db5ce0eb5 100644 --- a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp +++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp @@ -44,12 +44,13 @@ #include #include #include +#include int main(int argc, char* argv[]) { Kokkos::initialize(); - - if(argc<10) { + + if(argc<10) { printf("Arguments: N K R D U F T S\n"); printf(" P: Precision (1==float, 2==double)\n"); printf(" N,K: dimensions of the 2D array to allocate\n"); @@ -68,7 +69,7 @@ int main(int argc, char* argv[]) { Kokkos::finalize(); return 0; } - + int P = atoi(argv[1]); int N = atoi(argv[2]); @@ -80,7 +81,7 @@ int main(int argc, char* argv[]) { int T = atoi(argv[8]); int S = atoi(argv[9]); - if(U>8) {printf("U must be 1-8\n"); return 0;} + if(U>8) {printf("U must be 1-8\n"); return 0;} if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;} if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;} diff --git a/lib/kokkos/benchmarks/gather/main.cpp b/lib/kokkos/benchmarks/gather/main.cpp index 161c6f2091..88eb0493c1 100644 --- a/lib/kokkos/benchmarks/gather/main.cpp +++ b/lib/kokkos/benchmarks/gather/main.cpp @@ -44,11 +44,11 @@ #include #include #include +#include int main(int argc, char* argv[]) { Kokkos::initialize(argc,argv); - if(argc<8) { printf("Arguments: S N K D\n"); printf(" S: Scalar Type Size (1==float, 2==double, 4=complex)\n"); diff --git a/lib/kokkos/benchmarks/policy_performance/Makefile b/lib/kokkos/benchmarks/policy_performance/Makefile new file mode 100644 index 0000000000..13aef3209c --- /dev/null +++ b/lib/kokkos/benchmarks/policy_performance/Makefile @@ -0,0 +1,44 @@ +KOKKOS_PATH = ../.. +SRC = $(wildcard *.cpp) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper +CXXFLAGS = -O3 -g +LINK = ${CXX} +LINKFLAGS = +EXE = policy_performance.cuda +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +KOKKOS_CUDA_OPTIONS+=enable_lambda +else +CXX = g++ +CXXFLAGS = -O3 -g -Wall -Werror +LINK = ${CXX} +LINKFLAGS = +EXE = policy_performance.host +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +DEPFLAGS = -M + +OBJ = $(SRC:.cpp=.o) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +build: $(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) main.cpp policy_perf_test.hpp + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< diff --git a/lib/kokkos/benchmarks/policy_performance/main.cpp b/lib/kokkos/benchmarks/policy_performance/main.cpp new file mode 100644 index 0000000000..b0ed9bb512 --- /dev/null +++ b/lib/kokkos/benchmarks/policy_performance/main.cpp @@ -0,0 +1,170 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include "policy_perf_test.hpp" + +int main(int argc, char* argv[] ) { + Kokkos::initialize(argc,argv); + + if(argc<10) { + printf(" Ten arguments are needed to run this program:\n"); + printf(" (1)team_range, (2)thread_range, (3)vector_range, (4)outer_repeat, (5)thread_repeat, (6)vector_repeat, (7)team_size, (8)vector_size, (9)schedule, (10)test_type\n"); + printf(" team_range: number of teams (league_size)\n"); + printf(" thread_range: range for nested TeamThreadRange parallel_*\n"); + printf(" vector_range: range for nested ThreadVectorRange parallel_*\n"); + printf(" outer_repeat: number of repeats for outer parallel_* call\n"); + printf(" thread_repeat: number of repeats for TeamThreadRange parallel_* call\n"); + printf(" vector_repeat: number of repeats for ThreadVectorRange parallel_* call\n"); + printf(" team_size: number of team members (team_size)\n"); + printf(" vector_size: desired vectorization (if possible)\n"); + printf(" schedule: 1 == Static 2 == Dynamic\n"); + printf(" test_type: 3-digit code XYZ for testing (nested) parallel_*\n"); + printf(" code key: XYZ X in {1,2,3,4,5}, Y in {0,1,2}, Z in {0,1,2}\n"); + printf(" TeamPolicy:\n"); + printf(" X: 0 = none (never used, makes no sense); 1 = parallel_for; 2 = parallel_reduce\n"); + printf(" Y: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n"); + printf(" Z: 0 = none; 1 = parallel_for; 2 = parallel_reduce\n"); + printf(" RangePolicy:\n"); + printf(" X: 3 = parallel_for; 4 = parallel_reduce; 5 = parallel_scan\n"); + printf(" Y: 0 = none\n"); + printf(" Z: 0 = none\n"); + printf(" Example Input:\n"); + printf(" 100000 32 32 100 100 100 8 1 1 100\n"); + Kokkos::finalize(); + return 0; + } + + int team_range = atoi(argv[1]); + int thread_range = atoi(argv[2]); + int vector_range = atoi(argv[3]); + + int outer_repeat = atoi(argv[4]); + int thread_repeat = atoi(argv[5]); + int vector_repeat = atoi(argv[6]); + + int team_size = atoi(argv[7]); + int vector_size = atoi(argv[8]); + int schedule = atoi(argv[9]); + int test_type = atoi(argv[10]); + + int disable_verbose_output = 0; + if ( argc > 11 ) { + disable_verbose_output = atoi(argv[11]); + } + + if ( schedule != 1 && schedule != 2 ) { + printf("schedule: %d\n", schedule); + printf("Options for schedule are: 1 == Static 2 == Dynamic\n"); + Kokkos::finalize(); + return -1; + } + + if ( test_type != 100 && test_type != 110 && test_type != 111 && test_type != 112 && test_type != 120 && test_type != 121 && test_type != 122 + && test_type != 200 && test_type != 210 && test_type != 211 && test_type != 212 && test_type != 220 && test_type != 221 && test_type != 222 + && test_type != 300 && test_type != 400 && test_type != 500 + ) + { + printf("Incorrect test_type option\n"); + Kokkos::finalize(); + return -2; + } + + double result = 0.0; + + Kokkos::parallel_reduce( "parallel_reduce warmup", Kokkos::TeamPolicy<>(10,1), + KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type team, double& lval) { + lval += 1; + }, result); + + typedef Kokkos::View view_type_1d; + typedef Kokkos::View view_type_2d; + typedef Kokkos::View view_type_3d; + + // Allocate view without initializing + // Call a 'warmup' test with 1 repeat - this will initialize the corresponding view appropriately for test and should obey first-touch etc + // Second call to test is the one we actually care about and time + view_type_1d v_1( Kokkos::ViewAllocateWithoutInitializing("v_1"), team_range*team_size); + view_type_2d v_2( Kokkos::ViewAllocateWithoutInitializing("v_2"), team_range*team_size, thread_range); + view_type_3d v_3( Kokkos::ViewAllocateWithoutInitializing("v_3"), team_range*team_size, thread_range, vector_range); + + double result_computed = 0.0; + double result_expect = 0.0; + double time = 0.0; + + if(schedule==1) { + if ( test_type != 500 ) { + // warmup - no repeat of loops + test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + } + else { + // parallel_scan: initialize 1d view for parallel_scan + test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + } + } + if(schedule==2) { + if ( test_type != 500 ) { + // warmup - no repeat of loops + test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + } + else { + // parallel_scan: initialize 1d view for parallel_scan + test_policy,int>(team_range,thread_range,vector_range,1,1,1,team_size,vector_size,100,v_1,v_2,v_3,result_computed,result_expect,time); + test_policy,int>(team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,test_type,v_1,v_2,v_3,result_computed,result_expect,time); + } + } + + if ( disable_verbose_output == 0 ) { + printf("%7i %4i %2i %9i %4i %4i %4i %2i %1i %3i %e %e %lf\n",team_range,thread_range,vector_range,outer_repeat,thread_repeat,vector_repeat,team_size,vector_size,schedule,test_type,result_computed,result_expect,time); + } + else { + printf("%lf\n",time); + } + + Kokkos::finalize(); + + return 0; +} diff --git a/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp new file mode 100644 index 0000000000..8c79f3b88d --- /dev/null +++ b/lib/kokkos/benchmarks/policy_performance/policy_perf_test.hpp @@ -0,0 +1,354 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +template < class ViewType > +struct ParallelScanFunctor { + using value_type = double; + ViewType v; + + ParallelScanFunctor( const ViewType & v_ ) + : v(v_) + {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int idx, value_type& val, const bool& final ) const + { + // inclusive scan + val += v(idx); + if ( final ) { + v(idx) = val; + } + } +}; + +template +void test_policy(int team_range, int thread_range, int vector_range, + int outer_repeat, int thread_repeat, int inner_repeat, + int team_size, int vector_size, int test_type, + ViewType1 &v1, ViewType2 &v2, ViewType3 &v3, + double &result, double &result_expect, double &time) { + + typedef Kokkos::TeamPolicy t_policy; + typedef typename t_policy::member_type t_team; + Kokkos::Timer timer; + + for(int orep = 0; orep(v1) +#if 0 + // This does not compile with pre Cuda 8.0 - see Github Issue #913 for explanation + KOKKOS_LAMBDA (const int idx, double& val, const bool& final) { + // inclusive scan + val += v1(idx); + if ( final ) { + v1(idx) = val; + } + } +#endif + ); + // result = v1( team_size*team_range - 1 ); // won't work with Cuda - need to copy result back to host to print + // result_expect = 0.5*(team_size*team_range)*(team_size*team_range-1); + } + + } // end outer for loop + + time = timer.seconds(); +} //end test_policy diff --git a/lib/kokkos/benchmarks/policy_performance/script_basic_testing.sh b/lib/kokkos/benchmarks/policy_performance/script_basic_testing.sh new file mode 100755 index 0000000000..e621fffbd4 --- /dev/null +++ b/lib/kokkos/benchmarks/policy_performance/script_basic_testing.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +# Script to check policy_perf_test code works with each possible combo of options + +echo "Performance test results for parallel_reduce code computing sum of sequence [0,N) with various (nested) policies" + +EXECUTABLE=policy_performance + +TEAMRANGE=1000 +THREADRANGE=4 +VECTORRANGE=32 +TEAMSIZE=4 +VECTORSIZE=1 +OREPEAT=1 +MREPEAT=1 +IREPEAT=1 +SCHEDULE=1 + +SUFFIX=host +if [ -e $EXECUTABLE.$SUFFIX ] +then +SCHEDULE=1 +echo "Host tests Static schedule" +for CODE in {100,110,111,112,120,121,122,200,210,211,212,220,221,222,300,400,500} +do + OMP_PROC_BIND=true ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE +done + +SCHEDULE=2 +echo "Host tests Dynamic schedule" +for CODE in {100,110,111,112,120,121,122,200,210,211,212,220,221,222,300,400,500} +do + OMP_PROC_BIND=true ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE +done +fi + +SUFFIX=cuda +if [ -e $EXECUTABLE.$SUFFIX ] +then +SCHEDULE=1 +echo "Cuda tests Static schedule" +for CODE in {100,110,111,112,120,121,122,200,210,211,212,220,221,222,300,400,500} +do + ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE +done + +SCHEDULE=2 +echo "Cuda tests Dynamic schedule" +for CODE in {100,110,111,112,120,121,122,200,210,211,212,220,221,222,300,400,500} +do + ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE +done +fi diff --git a/lib/kokkos/benchmarks/policy_performance/script_sample_usage.sh b/lib/kokkos/benchmarks/policy_performance/script_sample_usage.sh new file mode 100755 index 0000000000..f4bfb87f8f --- /dev/null +++ b/lib/kokkos/benchmarks/policy_performance/script_sample_usage.sh @@ -0,0 +1,126 @@ +#!/bin/bash + +# Sample script for benchmarking policy performance + +# Suggested enviroment variables to export prior to executing script: +# KNL: +# OMP_NUM_THREADS=256 KMP_AFFINITY=compact +# Power: +# OMP_NUM_THREADS=64 OMP_PROC_BIND=true + +# Constants and Variables: +# Vary: TEAMSIZE, and THREADRANGE +# for TEAMSIZE in {1,2,4,5,8}; do +# for THREADRANGE in {32,41,1000}; do +# Fixed: TEAMRANGE, VECTORRANGE, VECTORSIZE +# System specific: Adjust REPEAT values to architecture tests are run on + +# Tests +# Static SCHEDULE = 1 +# Tier 1: parallel_for + RangePolicy 300 +# Tier 2: parallel_reduce, parallel_scan + RangePolicy 400 500 +# Tier 3: 'outer' parallel_for with TeamPolicy (nested parallelism) 1XY +# Tier 4: 'outer' parallel_reduce with TeamPolicy (nested parallelism) 2XY +# Dynamic SCHEDULE = 2 +# Tier 5: parallel_for + RangePolicy 300 +# Tier 6: parallel_reduce, parallel_scan + RangePolicy 400 500 +# Tier 7: 'outer' parallel_for with TeamPolicy (nested parallelism) 1XY +# Tier 8: 'outer' parallel_reduce with TeamPolicy (nested parallelism) 2XY + +# Results grouped by: +# 0) SCHEDULE 1) CODE (test) 2) TEAMRANGE 3) TEAMSIZE 4) THREADRANGE + +EXECUTABLE=policy_performance + +# Default defined values +TEAMRANGE=1000 +THREADRANGE=1 +VECTORRANGE=32 +TEAMSIZE=1 +VECTORSIZE=1 +OREPEAT=1 +MREPEAT=1 +IREPEAT=1 +SCHEDULE=1 + +# Host tests +SUFFIX=host +if [ -e $EXECUTABLE.$SUFFIX ]; then +echo "Host" + +for SCHEDULE in {1,2}; do + +# Tier 1 and 2, 5 and 6 +for CODE in {300,400,500}; do + for TEAMSIZE in {1,2,4,5,8}; do + OMP_PROC_BIND=true ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE + done +done + +# Tier 3, 7 +for CODE in {100,110,111,112,120,121,122}; do + for TEAMSIZE in {1,2,4,5,8}; do + for THREADRANGE in {32,41,1000}; do + OMP_PROC_BIND=true ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE + done + done +done + +# Tier 4, 8 +for CODE in {200,210,211,212,220,221,222}; do + for TEAMSIZE in {1,2,4,5,8}; do + for THREADRANGE in {32,41,1000}; do + OMP_PROC_BIND=true ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE + done + done +done + +done # end SCHEDULE + +fi # end host + + +# Cuda tests +SUFFIX=cuda +# TEAMRANGE=10000, TEAMSIZE=8 too large +# TEAMRANGE=10000, TEAMSIZE=8, THREADRANGE=1000 too large +if [ -e $EXECUTABLE.$SUFFIX ]; then +echo "Cuda" + +for SCHEDULE in {1,2}; do + +# Reset defaults +TEAMRANGE=1000 +THREADRANGE=1 +VECTORRANGE=32 +TEAMSIZE=1 +VECTORSIZE=1 + +# Tier 1 and 2, 5 and 6 +for CODE in {300,400,500}; do + for TEAMSIZE in {1,2,4,5,8}; do + ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE + done +done + +# Tier 3, 7 +for CODE in {100,110,111,112,120,121,122}; do + for TEAMSIZE in {1,2,4,5,8}; do + for THREADRANGE in {32,41,1000}; do + ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE + done + done +done + +# Tier 4, 8 +for CODE in {200,210,211,212,220,221,222}; do + for TEAMSIZE in {1,2,4,5,8}; do + for THREADRANGE in {32,41,1000}; do + ./$EXECUTABLE.$SUFFIX $TEAMRANGE $THREADRANGE $VECTORRANGE $OREPEAT $MREPEAT $IREPEAT $TEAMSIZE $VECTORSIZE $SCHEDULE $CODE + done + done +done + +done # end SCHEDULE + +fi #end cuda diff --git a/lib/kokkos/bin/hpcbind b/lib/kokkos/bin/hpcbind new file mode 100755 index 0000000000..ca34648780 --- /dev/null +++ b/lib/kokkos/bin/hpcbind @@ -0,0 +1,454 @@ +#!/usr/bin/env bash + +################################################################################ +# Check if hwloc commands exist +################################################################################ +declare -i HPCBIND_HAS_HWLOC=1 +type hwloc-bind >/dev/null 2>&1 +HPCBIND_HAS_HWLOC=$((HPCBIND_HAS_HWLOC & ! $?)) + +type hwloc-distrib >/dev/null 2>&1 +HPCBIND_HAS_HWLOC=$((HPCBIND_HAS_HWLOC & ! $?)) + +type hwloc-ls >/dev/null 2>&1 +HPCBIND_HAS_HWLOC=$((HPCBIND_HAS_HWLOC & ! $?)) + +type hwloc-calc >/dev/null 2>&1 +HPCBIND_HAS_HWLOC=$((HPCBIND_HAS_HWLOC & ! $?)) + +type hwloc-ps >/dev/null 2>&1 +HPCBIND_HAS_HWLOC=$((HPCBIND_HAS_HWLOC & ! $?)) + +if [[ ${HPCBIND_HAS_HWLOC} -eq 0 ]]; then + echo "hwloc not found, no process binding will occur" +fi + +# Get parent cpuset +HPCBIND_HWLOC_PARENT_CPUSET="" +if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then + MY_PID="$BASHPID" + HPCBIND_HWLOC_PARENT_CPUSET=$(hwloc-ps --cpuset | grep "${MY_PID}" | cut -f 2) +fi + +################################################################################ +# Check if nvidia-smi exist +################################################################################ +declare -i HPCBIND_HAS_NVIDIA=0 +type nvidia-smi >/dev/null 2>&1 +HPCBIND_HAS_NVIDIA=$((!$?)) + + +################################################################################ +# Get visible gpu +################################################################################ +declare -i NUM_GPUS=0 +HPCBIND_VISIBLE_GPUS="" +if [[ ${HPCBIND_HAS_NVIDIA} -eq 1 ]]; then + NUM_GPUS=$(nvidia-smi -L | wc -l); + GPU_LIST="$( seq 0 $((NUM_GPUS-1)) )" + HPCBIND_VISIBLE_GPUS=${CUDA_VISIBLE_DEVICES:-${GPU_LIST}} +fi + +declare -i HPCBIND_ENABLE_GPU_MAPPING=$((NUM_GPUS > 0)) + + +################################################################################ +# Get queue id +# supports sbatch, bsub, aprun +################################################################################ +HPCBIND_QUEUE_NAME="" +declare -i HPCBIND_QUEUE_INDEX=0 +declare -i HPCBIND_QUEUE_GPU_MAPPING=0 + +if [[ ! -z "${SLURM_LOCAL_ID}" ]]; then + HPCBIND_QUEUE_GPU_MAPPING=1 + HPCBIND_QUEUE_NAME="sbatch" + HPCBIND_QUEUE_INDEX=${SLURM_LOCAL_ID} +elif [[ ! -z "${LBS_JOBINDEX}" ]]; then + HPCBIND_QUEUE_GPU_MAPPING=1 + HPCBIND_QUEUE_NAME="bsub" + HPCBIND_QUEUE_INDEX=${LBS_JOBINDEX} +elif [[ ! -z "${ALPS_APP_PE}" ]]; then + HPCBIND_QUEUE_GPU_MAPPING=1 + HPCBIND_QUEUE_NAME="aprun" + HPCBIND_QUEUE_INDEX=${ALPS_APP_PE} +fi + + +################################################################################ +# Show help +################################################################################ +function show_help { + local cmd=$(basename "$0") + echo "Usage: ${cmd} -- command ..." + echo " Set the process mask, OMP environment variables and CUDA environment" + echo " variables to sane values if possible. Uses hwloc and nvidia-smi if" + echo " available. Will preserve the current process binding, so it is safe" + echo " to use with a queuing system or mpiexec." + echo "" + echo "Options:" + echo " --no-hwloc-bind Disable binding" + echo " --proc-bind= Set the initial process mask for the script" + echo " LOC can be any valid location argument for" + echo " hwloc-calc Default: all" + echo " --distribute=N Distribute the current cpuset into N partitions" + echo " --distribute-partition=I" + echo " Use the i'th partition (zero based)" + echo " --visible-gpus= Comma separated list of gpu ids" + echo " Default: CUDA_VISIBLE_DEVICES or all gpus in" + echo " sequential order" + echo " --gpu-ignore-queue Ignore queue job id when choosing visible GPU" + echo " --no-gpu-mapping Do not set CUDA_VISIBLE_DEVICES" + echo " --openmp=M.m Set env variables for the given OpenMP version" + echo " Default: 4.0" + echo " --openmp-percent=N Integer percentage of cpuset to use for OpenMP" + echo " threads Default: 100" + echo " --openmp-places= Op=threads|cores|sockets. Default: threads" + echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES" + echo " --force-openmp-num-threads=N" + echo " Override logic for selecting OMP_NUM_THREADS" + echo " --force-openmp-proc-bind=" + echo " Override logic for selecting OMP_PROC_BIND" + echo " --no-openmp-nested Set OMP_NESTED to false" + echo " --show-bindings Show the bindings" + echo " --lstopo Show bindings in lstopo without executing a command" + echo " -v|--verbose Show options and relevant environment variables" + echo " -h|--help Show this message" + echo "" + echo "Sample Usage:" + echo " Split the current process cpuset into 4 and use the 3rd partition" + echo " ${cmd} --distribute=4 --distribute-partition=2 -v -- command ..." + echo " Bing the process to all even cores" + echo " ${cmd} --proc-bind=core:even -v -- command ..." + echo " Bind to the first 64 cores and split the current process cpuset into 4" + echo " ${cmd} --proc-bind=core:0-63 --distribute=4 --distribute-partition=0 -- command ..." + echo " skip GPU 0 when mapping visible devices" + echo " ${cmd} --distribute=4 --distribute-partition=0 --visible-gpus=1,2 -v -- command ..." + echo " Display the current bindings" + echo " ${cmd} --proc-bind=numa:0 --show-bindings -- command" + echo " Display the current bindings using lstopo" + echo " ${cmd} --proc-bind=numa:0.core:odd --lstopo" + echo "" +} + + +################################################################################ +# Parse command line arguments +################################################################################ +# Show help if no command line arguments given +if [[ "$#" -eq 0 ]]; then + show_help + exit 0 +fi + +declare -a UNKNOWN_ARGS=() +declare -i HPCBIND_ENABLE_HWLOC_BIND=${HPCBIND_HAS_HWLOC} +declare -i HPCBIND_DISTRIBUTE=1 +declare -i HPCBIND_PARTITION=0 +HPCBIND_PROC_BIND="all" +HPCBIND_OPENMP_VERSION=4.0 +declare -i HPCBIND_OPENMP_PERCENT=100 +HPCBIND_OPENMP_PLACES=${OMP_PLACES:-threads} +declare -i HPCBIND_OPENMP_PROC_BIND=1 +declare -i HPCBIND_OPENMP_FORCE_NUM_THREADS=-1 +HPCBIND_OPENMP_FORCE_PROC_BIND="" +HPCBIND_OPENMP_NESTED=${OMP_NESTED:-true} +declare -i HPCBIND_VERBOSE=0 + +declare -i HPCBIND_SHOW_BINDINGS=0 +declare -i HPCBIND_LSTOPO=0 + +for i in $@; do + case $i in + # number of partitions to create + --no-hwloc-bind) + HPCBIND_ENABLE_HWLOC_BIND=0 + shift + ;; + --proc-bind=*) + HPCBIND_PROC_BIND="${i#*=}" + shift + ;; + --distribute=*) + HPCBIND_DISTRIBUTE="${i#*=}" + shift + ;; + # which partition to use + --distribute-partition=*) + HPCBIND_PARTITION="${i#*=}" + shift + ;; + --visible-gpus=*) + HPCBIND_VISIBLE_GPUS=$(echo "${i#*=}" | tr ',' ' ') + shift + ;; + --gpu-ignore-queue) + HPCBIND_QUEUE_GPU_MAPPING=0 + shift + ;; + --no-gpu-mapping) + HPCBIND_ENABLE_GPU_MAPPING=0 + shift + ;; + --openmp=*) + HPCBIND_OPENMP_VERSION="${i#*=}" + shift + ;; + --openmp-percent=*) + HPCBIND_OPENMP_PERCENT="${i#*=}" + shift + ;; + --openmp-places=*) + HPCBIND_OPENMP_PLACES="${i#*=}" + shift + ;; + --no-openmp-proc-bind) + HPCBIND_OPENMP_PROC_BIND=0 + shift + ;; + --force-openmp-proc-bind=*) + HPCBIND_OPENMP_FORCE_PROC_BIND="${i#*=}" + shift + ;; + --force-openmp-num-threads=*) + HPCBIND_OPENMP_FORCE_NUM_THREADS="${i#*=}" + shift + ;; + --no-openmp-nested) + HPCBIND_OPENMP_NESTED="false" + shift + ;; + --show-bindings) + HPCBIND_VERBOSE=1 + HPCBIND_SHOW_BINDINGS=1 + shift + ;; + --lstopo) + HPCBIND_VERBOSE=1 + HPCBIND_SHOW_BINDINGS=0 + HPCBIND_LSTOPO=1 + shift + ;; + -v|--verbose) + HPCBIND_VERBOSE=1 + shift + ;; + -h|--help) + show_help + exit 0 + ;; + # ignore remaining arguments + --) + shift + break + ;; + # unknown option + *) + UNKNOWN_ARGS+=("$i") + shift + ;; + esac +done + + +################################################################################ +# Check unknown arguments +################################################################################ +if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then + echo "Uknown options: ${UNKNOWN_ARGS[*]}" + exit 1 +fi + + +################################################################################ +# Check that visible gpus are valid +################################################################################ +HPCBIND_VISIBLE_GPUS=(${HPCBIND_VISIBLE_GPUS}) +if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then + for ((i=0; i < ${#HPCBIND_VISIBLE_GPUS[*]}; i++)); do + if [[ ${HPCBIND_VISIBLE_GPUS[$i]} -ge ${NUM_GPUS} || + ${HPCBIND_VISIBLE_GPUS[$i]} -lt 0 ]]; then + echo "Invaild GPU ID ${HPCBIND_VISIBLE_GPUS[$i]}, setting to 0" + HPCBIND_VISIBLE_GPUS[$i]=0; + fi + done + NUM_GPUS=${#HPCBIND_VISIBLE_GPUS[@]} +fi + + +################################################################################ +# Check OpenMP percent +################################################################################ +if [[ ${HPCBIND_OPENMP_PERCENT} -lt 1 ]]; then + echo "OpenMP percent < 1, setting to 1" + HPCBIND_OPENMP_PERCENT=1 +elif [[ ${HPCBIND_OPENMP_PERCENT} -gt 100 ]]; then + echo "OpenMP percent > 100, setting to 100" + HPCBIND_OPENMP_PERCENT=100 +fi + +################################################################################ +# Check distribute +################################################################################ +if [[ ${HPCBIND_DISTRIBUTE} -le 0 ]]; then + echo "Invalid input for distribute, changing distribute to 1" + HPCBIND_DISTRIBUTE=1 +fi + +if [[ ${HPCBIND_PARTITION} -ge ${HPCBIND_DISTRIBUTE} ]]; then + echo "Invalid input for distribute-partition, changing to 0" + HPCBIND_PARTITION=0 +fi + + +################################################################################ +# Find cpuset and num threads +################################################################################ +HPCBIND_HWLOC_CPUSET="" +declare -i HPCBIND_NUM_PUS=0 + +if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then + if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then + BINDING=$(hwloc-calc ${HPCBIND_PROC_BIND}) + else + BINDING=$(hwloc-calc --restrict ${HPCBIND_HWLOC_PARENT_CPUSET} ${HPCBIND_PROC_BIND}) + fi + + CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${HPCBIND_DISTRIBUTE})) + HPCBIND_HWLOC_CPUSET=${CPUSETS[${HPCBIND_PARTITION}]} + HPCBIND_NUM_PUS=$(hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu | wc -l) +else + HPCBIND_NUM_PUS=$(cat /proc/cpuinfo | grep -c processor) +fi + +declare -i HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_NUM_PUS * HPCBIND_OPENMP_PERCENT)) +HPCBIND_OPENMP_NUM_THREADS=$((HPCBIND_OPENMP_NUM_THREADS / 100)) + + +if [[ ${HPCBIND_OPENMP_NUM_THREADS} -lt 1 ]]; then + HPCBIND_OPENMP_NUM_THREADS=1 +elif [[ ${HPCBIND_OPENMP_NUM_THREADS} -gt ${HPCBIND_NUM_PUS} ]]; then + HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_NUM_PUS} +fi + +if [[ ${HPCBIND_OPENMP_FORCE_NUM_THREADS} -gt 0 ]]; then + HPCBIND_OPENMP_NUM_THREADS=${HPCBIND_OPENMP_FORCE_NUM_THREADS} +fi + +################################################################################ +# Set OpenMP environment variables +################################################################################ + +# set OMP_NUM_THREADS +export OMP_NUM_THREADS=${HPCBIND_OPENMP_NUM_THREADS} + +# set OMP_PROC_BIND and OMP_PLACES +if [[ ${HPCBIND_OPENMP_PROC_BIND} -eq 1 ]]; then + if [[ "${HPCBIND_OPENMP_FORCE_PROC_BIND}" == "" ]]; then + #default proc bind logic + if [[ "${HPCBIND_OPENMP_VERSION}" == "4.0" || "${HPCBIND_OPENMP_VERSION}" > "4.0" ]]; then + export OMP_PLACES="${HPCBIND_OPENMP_PLACES}" + export OMP_PROC_BIND="spread" + else + export OMP_PROC_BIND="true" + unset OMP_PLACES + fi + else + #force proc bind + export OMP_PLACES="${HPCBIND_OPENMP_PLACES}" + export OMP_PROC_BIND="${HPCBIND_OPENMP_FORCE_PROC_BIND}" + fi +else + # no openmp proc bind + unset OMP_PLACES + unset OMP_PROC_BIND +fi + +# set OMP_NESTED +export OMP_NESTED=${HPCBIND_OPENMP_NESTED} + + +################################################################################ +# Set CUDA environment variables +################################################################################ + +if [[ ${HPCBIND_ENABLE_GPU_MAPPING} -eq 1 ]]; then + if [[ ${HPCBIND_QUEUE_GPU_MAPPING} -eq 0 ]]; then + declare -i GPU_ID=$((HPCBIND_PARTITION % NUM_GPUS)) + export CUDA_VISIBLE_DEVICES=${HPCBIND_VISIBLE_GPUS[${GPU_ID}]} + else + declare -i MY_TASK_ID=$((HPCBIND_QUEUE_INDEX * HPCBIND_DISTRIBUTE + HPCBIND_PARTITION)) + declare -i GPU_ID=$((MY_TASK_ID % NUM_GPUS)) + export CUDA_VISIBLE_DEVICES=${HPCBIND_VISIBLE_GPUS[${GPU_ID}]} + fi +fi + +################################################################################ +# Set hpcbind environment variables +################################################################################ +export HPCBIND_HAS_HWLOC=${HPCBIND_HAS_HWLOC} +export HPCBIND_HAS_NVIDIA=${HPCBIND_HAS_NVIDIA} +export HPCBIND_NUM_PUS=${HPCBIND_NUM_PUS} +export HPCBIND_HWLOC_CPUSET=${HPCBIND_HWLOC_CPUSET} +export HPCBIND_HWLOC_DISTRIBUTE=${HPCBIND_DISTRIBUTE} +export HPCBIND_HWLOC_DISTRIBUTE_PARTITION=${HPCBIND_PARTITION} +if [[ "${HPCBIND_HWLOC_PARENT_CPUSET}" == "" ]]; then + export HPCBIND_HWLOC_PARENT_CPUSET="all" +else + export HPCBIND_HWLOC_PARENT_CPUSET=${HPCBIND_HWLOC_PARENT_CPUSET} +fi +export HPCBIND_HWLOC_PROC_BIND=${HPCBIND_PROC_BIND} +export HPCBIND_NVIDIA_ENABLE_GPU_MAPPING=${HPCBIND_ENABLE_GPU_MAPPING} +export HPCBIND_NVIDIA_VISIBLE_GPUS=$(echo "${HPCBIND_VISIBLE_GPUS[*]}" | tr ' ' ',') +export HPCBIND_OPENMP_VERSION=${HPCBIND_OPENMP_VERSION} +if [[ "${HPCBIND_QUEUE_NAME}" != "" ]]; then + export HPCBIND_QUEUE_INDEX=${HPCBIND_QUEUE_INDEX} + export HPCBIND_QUEUE_NAME=${HPCBIND_QUEUE_NAME} + export HPCBIND_QUEUE_GPU_MAPPING=${HPCBIND_QUEUE_GPU_MAPPING} +fi + + +################################################################################ +# Print verbose +################################################################################ + +if [[ ${HPCBIND_VERBOSE} -eq 1 ]]; then + MY_ENV=$(env | sort) + echo "[HPCBIND]" + echo "${MY_ENV}" | grep -E "^HPCBIND_" + echo "[CUDA]" + echo "${MY_ENV}" | grep -E "^CUDA_" + echo "[OPENMP]" + echo "${MY_ENV}" | grep -E "^OMP_" +fi + +if [[ ${HPCBIND_HAS_HWLOC} -eq 1 && ${HPCBIND_SHOW_BINDINGS} -eq 1 ]]; then + echo "[BINDINGS]" + hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu +elif [[ ${HPCBIND_SHOW_BINDINGS} -eq 1 ]]; then + echo "Unable to show bindings, hwloc not available." +fi + +################################################################################ +# Run command +################################################################################ + +if [[ ${HPCBIND_LSTOPO} -eq 0 ]]; then + if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 ]]; then + hwloc-bind ${HPCBIND_HWLOC_CPUSET} -- $@ + else + eval $@ + fi +else + if [[ ${HPCBIND_HAS_HWLOC} -eq 1 ]]; then + if [[ ${HPCBIND_ENABLE_HWLOC_BIND} -eq 1 && ! -z ${DISPLAY} ]]; then + echo "[BINDINGS]" + hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} --only pu + hwloc-bind ${HPCBIND_HWLOC_CPUSET} -- lstopo --pid 0 + else + hwloc-ls --restrict ${HPCBIND_HWLOC_CPUSET} + fi + else + echo "Unable to show bindings, hwloc not available." + fi +fi diff --git a/lib/kokkos/bin/kokkos-bind b/lib/kokkos/bin/kokkos-bind new file mode 100755 index 0000000000..b6fe07a1bd --- /dev/null +++ b/lib/kokkos/bin/kokkos-bind @@ -0,0 +1,221 @@ +#!/usr/bin/env bash + +# check if hwloc commands exist +declare -i HAS_HWLOC=0 +type hwloc-bind >/dev/null 2>&1 +HAS_HWLOC="${HAS_HWLOC} + $?" + +type hwloc-distrib >/dev/null 2>&1 +HAS_HWLOC="${HAS_HWLOC} + $?" + +type hwloc-ls >/dev/null 2>&1 +HAS_HWLOC="${HAS_HWLOC} + $?" + +type hwloc-calc >/dev/null 2>&1 +HAS_HWLOC="${HAS_HWLOC} + $?" + +type hwloc-ps >/dev/null 2>&1 +HAS_HWLOC="${HAS_HWLOC} + $?" + + +#parse args +declare -a UNKNOWN_ARGS=() +declare -i DISTRIBUTE=1 +declare -i INDEX=0 +PROC_BIND="all" +CURRENT_CPUSET="" +OPENMP_VERSION=4.0 +OPENMP_PROC_BIND=True +OPENMP_NESTED=True +VERBOSE=False + +#get the current process cpuset +if [[ ${HAS_HWLOC} -eq 0 ]]; then + MY_PID="$BASHPID" + CURRENT_CPUSET=$(hwloc-ps --cpuset | grep "${MY_PID}" | cut -f 2) + echo "$CURRENT_CPUSET" +fi + +function show_help { + local cmd=$(basename "$0") + echo "Usage: ${cmd} -- command ..." + echo " Uses hwloc to divide the node into the given number of groups," + echo " set the appropriate OMP_NUM_THREADS and execute the command on the" + echo " selected group." + echo "" + echo " NOTE: This command assumes it has exclusive use of the node" + echo "" + echo "Options:" + echo " --proc-bind= Set the initial process mask for the script. " + echo " LOC can be any valid location argumnet for" + echo " hwloc-calc. Defaults to the entire machine" + echo " --distribute=N Distribute the current proc-bind into N groups" + echo " --index=I Use the i'th group (zero based)" + echo " --openmp=M.m Set env variables for the given OpenMP version" + echo " (default 4.0)" + echo " --no-openmp-proc-bind Set OMP_PROC_BIND to false and unset OMP_PLACES" + echo " --no-openmp-nested Set OMP_NESTED to false" + echo " -v|--verbose" + echo " -h|--help" + echo "" + echo "Sample Usage:" + echo " ${cmd} --distribute=4 --index=2 -v -- command ..." + echo "" +} + +if [[ "$#" -eq 0 ]]; then + show_help + exit 0 +fi + + +for i in $@; do + case $i in + # number of partitions to create + --proc-bind=*) + PROC_BIND="${i#*=}" + shift + ;; + --distribute=*) + DISTRIBUTE="${i#*=}" + shift + ;; + # which group to use + --index=*) + INDEX="${i#*=}" + shift + ;; + --openmp=*) + OPENMP_VERSION="${i#*=}" + shift + ;; + --no-openmp-proc-bind) + OPENMP_PROC_BIND=False + shift + ;; + --no-openmp-nested) + OPENMP_NESTED=False + shift + ;; + -v|--verbose) + VERBOSE=True + shift + ;; + -h|--help) + show_help + exit 0 + ;; + # ignore remaining arguments + --) + shift + break + ;; + # unknown option + *) + UNKNOWN_ARGS+=("$i") + shift + ;; + esac +done + +if [[ ${#UNKNOWN_ARGS[*]} > 0 ]]; then + echo "Uknown options: ${UNKNOWN_ARGS[*]}" + exit 1 +fi + +if [[ ${DISTRIBUTE} -le 0 ]]; then + echo "Invalid input for distribute, changing distribute to 1" + DISTRIBUTE=1 +fi + +if [[ ${INDEX} -ge ${DISTRIBUTE} ]]; then + echo "Invalid input for index, changing index to 0" + INDEX=0 +fi + +if [[ ${HAS_HWLOC} -ne 0 ]]; then + echo "hwloc not found, no process binding will occur" + DISTRIBUTE=1 + INDEX=0 +fi + +if [[ ${HAS_HWLOC} -eq 0 ]]; then + + if [[ "${CURRENT_CPUSET}" == "" ]]; then + BINDING=$(hwloc-calc ${PROC_BIND}) + else + BINDING=$(hwloc-calc --restrict ${CURRENT_CPUSET} ${PROC_BIND}) + fi + + CPUSETS=($(hwloc-distrib --restrict ${BINDING} --at core ${DISTRIBUTE})) + CPUSET=${CPUSETS[${INDEX}]} + NUM_THREADS=$(hwloc-ls --restrict ${CPUSET} --only pu | wc -l) + + if [[ "${VERBOSE}" == "True" ]]; then + echo "hwloc: true" + echo " proc_bind: ${PROC_BIND}" + echo " distribute: ${DISTRIBUTE}" + echo " index: ${INDEX}" + echo " parent_cpuset: ${CURRENT_CPUSET}" + echo " cpuset: ${CPUSET}" + echo "omp_num_threads: ${NUM_THREADS}" + echo "omp_proc_bind: ${OPENMP_PROC_BIND}" + echo "omp_nested: ${OPENMP_NESTED}" + echo "OpenMP: ${OPENMP_VERSION}" + fi + + # set OMP env + if [[ "${OPENMP_PROC_BIND}" == "True" ]]; then + if [[ "${OPENMP_VERSION}" == "4.0" || "${OPENMP_VERSION}" > "4.0" ]]; then + export OMP_PLACES="threads" + export OMP_PROC_BIND="spread" + else + export OMP_PROC_BIND="true" + unset OMP_PLACES + fi + else + unset OMP_PLACES + unset OMP_PROC_BIND + fi + if [[ "${OPENMP_NESTED}" == "True" ]]; then + export OMP_NESTED="true" + else + export OMP_NESTED="false" + fi + export OMP_NUM_THREADS="${NUM_THREADS}" + + hwloc-bind ${CPUSET} -- $@ +else + NUM_THREADS=$(cat /proc/cpuinfo | grep -c processor) + + if [[ "${VERBOSE}" == "True" ]]; then + echo "hwloc: false" + echo "omp_num_threads: ${NUM_THREADS}" + echo "omp_proc_bind: ${OPENMP_PROC_BIND}" + echo "omp_nested: ${OPENMP_NESTED}" + echo "OpenMP: ${OPENMP_VERSION}" + fi + + # set OMP env + if [[ "${OPENMP_PROC_BIND}" == "True" ]]; then + if [[ "${OPENMP_VERSION}" == "4.0" || "${OPENMP_VERSION}" > "4.0" ]]; then + export OMP_PLACES="threads" + export OMP_PROC_BIND="spread" + else + export OMP_PROC_BIND="true" + unset OMP_PLACES + fi + else + unset OMP_PLACES + unset OMP_PROC_BIND + fi + if [[ "${OPENMP_NESTED}" == "True" ]]; then + export OMP_NESTED="true" + else + export OMP_NESTED="false" + fi + export OMP_NUM_THREADS="${NUM_THREADS}" + + eval $@ +fi + diff --git a/lib/kokkos/bin/runtest b/lib/kokkos/bin/runtest new file mode 100755 index 0000000000..92411fe5ba --- /dev/null +++ b/lib/kokkos/bin/runtest @@ -0,0 +1,165 @@ +#!/usr/bin/env bash + +function get_path() { + cd "$(dirname "$0")" + cd .. + echo "$(pwd -P)" +} + +KOKKOS_PATH="$(get_path "$0")" + +function show_help() { + local cmd=$(basename "$0") + echo "Usage: ${cmd} " + echo " Build and run the tests" + echo "" + echo "Options:" + echo " -j=N|--make-j=N Build the tests in parallel" + echo " -c|--clean Clean build and regenerate make files" + echo " --clean-on-pass Clean build when runtest passes" + echo " --output-prefix=
  Prefix of log files  Default: runtest"
+  echo "  --build-only           Only build the tests"
+  echo "  -v|--verbose           Tee STDOUT and STDERR to screen and files"
+  echo "  -h|--help              Show this message"
+  echo ""
+  ${KOKKOS_PATH}/generate_makefile.bash --help
+  return 0
+}
+
+
+declare -a GENERATE_ARGS=()
+declare -i VERBOSE=0
+declare -i CLEAN=0
+declare -i CLEAN_ON_PASS=0
+declare -i BUILD_ONLY=0
+OUTPUT="runtest"
+
+declare -i MAKE_J=${HPCBIND_NUM_PUS:-1}
+
+for i in $@; do
+  case $i in
+    -j=*|--make-j=*)
+      MAKE_J=${i#*=}
+      shift
+      ;;
+    -c|--clean)
+      CLEAN=1
+      shift
+      ;;
+    --clean-on-pass)
+      CLEAN_ON_PASS=1
+      shift
+      ;;
+    --output-prefix=*)
+      OUTPUT=${i#*=}
+      shift
+      ;;
+    --build-only)
+      BUILD_ONLY=1
+      shift
+      ;;
+    -v|--verbose)
+      VERBOSE=1
+      shift
+      ;;
+    -h|--help)
+      show_help
+      exit 0
+      ;;
+    *)
+      GENERATE_ARGS+=("$i")
+      shift
+      ;;
+  esac
+done
+
+if [[ "$(pwd -P)" == ${KOKKOS_PATH} ]]; then
+  echo "Cannot call $0 from root repository path ${KOKKOS_PATH}"
+  exit 1
+fi
+
+# Some makefile dependencies are incorrect, so clean needs to force
+# a new call to generate_makefiles.bash
+if [[ ${CLEAN} -eq 1 ]]; then
+  START=${SECONDS}
+  echo "Cleaning"
+  /bin/rm -rf algorithms containers core example install Makefile >/dev/null 2>&1
+  END=${SECONDS}
+  echo "    $((END-START)) seconds"
+  if [[ ${VERBOSE} -eq 1 ]]; then
+    echo ""
+    echo ""
+  fi
+fi
+
+declare -i START=${SECONDS}
+echo "Generating Makefile"
+echo "    ${KOKKOS_PATH}/generate_makefile.bash --kokkos-path=${KOKKOS_PATH} ${GENERATE_ARGS[@]}"
+
+if [[ ${VERBOSE} -eq 0 ]]; then
+  "${KOKKOS_PATH}"/generate_makefile.bash --kokkos-path="${KOKKOS_PATH}" "${GENERATE_ARGS[@]}" > ${OUTPUT}.out 2> >(tee ${OUTPUT}.err >&2)
+else
+  "${KOKKOS_PATH}"/generate_makefile.bash --kokkos-path="${KOKKOS_PATH}" "${GENERATE_ARGS[@]}" > >(tee ${OUTPUT}.out) 2> >(tee ${OUTPUT}.err >&2)
+fi
+declare -i RESULT=$?
+declare -i END=${SECONDS}
+if [[ ${RESULT} -eq 0 ]]; then
+  echo "    PASS:  $((END-START)) seconds"
+  if [[ ${VERBOSE} -eq 1 ]]; then
+    echo ""
+    echo ""
+  fi
+else
+  cat ${OUTPUT}.out | grep "FAIL"
+  cat ${OUTPUT}.err | grep "FAIL"
+  echo "    FAIL:  $((END-START)) seconds"
+  exit 1
+fi
+
+START=${SECONDS}
+echo "Building"
+if [[ ${VERBOSE} -eq 0 ]]; then
+  make --keep-going -j ${MAKE_J} build-test >> ${OUTPUT}.out 2> >(tee -a ${OUTPUT}.err >&2)
+else
+  make --keep-going -j ${MAKE_J} build-test > >(tee -a ${OUTPUT}.out) 2> >(tee -a ${OUTPUT}.err >&2)
+fi
+RESULT=$?
+END=${SECONDS}
+if [[ ${RESULT} -eq 0 ]]; then
+  echo "    PASS:  $((END-START)) seconds"
+  if [[ ${VERBOSE} -eq 1 ]]; then
+    echo ""
+    echo ""
+  fi
+else
+  cat ${OUTPUT}.out | grep -E "[[:space:]]error:[[:space:]]"
+  cat ${OUTPUT}.err | grep -E "[[:space:]]error:[[:space:]]"
+  echo "    FAIL:  $((END-START)) seconds"
+  exit 1
+fi
+
+if [[ ${BUILD_ONLY} -eq 0 ]]; then
+  START=${SECONDS}
+  echo "Testing"
+  if [[ ${VERBOSE} -eq 0 ]]; then
+    make --keep-going test >> ${OUTPUT}.out 2> >(tee -a ${OUTPUT}.err >&2)
+  else
+    make --keep-going test > >(tee -a ${OUTPUT}.out) 2> >(tee -a ${OUTPUT}.err >&2)
+  fi
+  RESULT=$?
+  END=${SECONDS}
+  if [[ ${RESULT} -eq 0 ]]; then
+    echo "    PASS:  $((END-START)) seconds"
+    if [[ ${CLEAN_ON_PASS} -eq 1 ]]; then
+      make clean
+    fi
+  else
+    cat ${OUTPUT}.out | grep "FAIL"
+    cat ${OUTPUT}.err | grep "FAIL"
+    echo "    FAIL:  $((END-START)) seconds"
+    exit 1
+  fi
+fi
+
+exit ${RESULT}
+
diff --git a/lib/kokkos/cmake/kokkos.cmake b/lib/kokkos/cmake/kokkos.cmake
index 235b7eaba4..396822c7fa 100644
--- a/lib/kokkos/cmake/kokkos.cmake
+++ b/lib/kokkos/cmake/kokkos.cmake
@@ -999,8 +999,12 @@ SET (Kokkos_INCLUDE_DIRS
     ${Kokkos_SOURCE_DIR}/containers/src
     ${Kokkos_SOURCE_DIR}/algorithms/src
     ${Kokkos_BINARY_DIR}  # to find KokkosCore_config.h
+    ${KOKKOS_INCLUDE_DIRS}
 )
 
+# pass include dirs back to parent scope
+SET(Kokkos_INCLUDE_DIRS_RET ${Kokkos_INCLUDE_DIRS} PARENT_SCOPE)
+
 INCLUDE_DIRECTORIES(${Kokkos_INCLUDE_DIRS})
 
 IF(KOKKOS_SEPARATE_LIBS)
diff --git a/lib/kokkos/config/master_history.txt b/lib/kokkos/config/master_history.txt
index cc6f4c97d7..0447db4b2b 100644
--- a/lib/kokkos/config/master_history.txt
+++ b/lib/kokkos/config/master_history.txt
@@ -7,3 +7,4 @@ tag:  2.02.07    date: 12:16:2016    master: 4b4cc4ba    develop: 382c0966
 tag:  2.02.15    date: 02:10:2017    master: 8c64cd93    develop: 28dea8b6
 tag:  2.03.00    date: 04:25:2017    master: 120d9ce7    develop: 015ba641
 tag:  2.03.05    date: 05:27:2017    master: 36b92f43    develop: 79073186
+tag:  2.03.13    date: 07:27:2017    master: da314444    develop: 29ccb58a
diff --git a/lib/kokkos/config/query_cuda_arch.cpp b/lib/kokkos/config/query_cuda_arch.cpp
new file mode 100644
index 0000000000..383f04e34e
--- /dev/null
+++ b/lib/kokkos/config/query_cuda_arch.cpp
@@ -0,0 +1,24 @@
+#include 
+#include 
+int main()
+{
+	cudaDeviceProp prop;
+  const cudaError_t err_code = cudaGetDeviceProperties(&prop, 0);
+  if (cudaSuccess != err_code) {
+		fprintf(stderr,"cudaGetDeviceProperties failed: %s\n", cudaGetErrorString(err_code));
+		return -1;
+	}
+  switch (prop.major) {
+    case 3:
+      printf("Kepler"); break;
+    case 5:
+      printf("Maxwell"); break;
+    case 6:
+      printf("Pascal"); break;
+    default:
+      fprintf(stderr, "Unspported Device %d%d\n", (int)prop.major, (int)prop.minor);
+      return -1;
+  }
+  printf("%d%d\n", (int)prop.major, (int)prop.minor);
+  return 0;
+}
diff --git a/lib/kokkos/config/test_all_sandia b/lib/kokkos/config/test_all_sandia
index 8e1246bf8b..005cd20721 100755
--- a/lib/kokkos/config/test_all_sandia
+++ b/lib/kokkos/config/test_all_sandia
@@ -160,9 +160,14 @@ if [ "$MACHINE" = "sems" ]; then
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
                "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
                "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
@@ -280,13 +285,13 @@ elif [ "$MACHINE" = "apollo" ]; then
                "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
                "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
                "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
-               "clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
+               "clang/4.0.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
                "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
     )
   else
     # Format: (compiler module-list build-list exe-name warning-flag)
     COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
-               "clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
+               "clang/4.0.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
                "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
                "gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
                "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
@@ -584,7 +589,7 @@ single_build_and_test() {
   else
     run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
     local -i build_start_time=$(date +%s)
-    run_cmd make build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
+    run_cmd make -j 32 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
     local -i build_end_time=$(date +%s)
     comment="build_time=$(($build_end_time-$build_start_time))"
 
diff --git a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel
index 23968e8c0f..6527df2eb9 100755
--- a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel
+++ b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_pthread_intel
@@ -28,14 +28,14 @@ export JENKINS_DO_PTHREAD=ON
 export JENKINS_DO_SERIAL=OFF
 export JENKINS_DO_COMPLEX=OFF
 
-export ARCH_CXX_FLAG="-xCORE-AVX2 -mkl"
-export ARCH_C_FLAG="-xCORE-AVX2 -mkl"
+export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX2 -mkl"
+export JENKINS_ARCH_C_FLAG="-xCORE-AVX2 -mkl"
 export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a"
 export LAPACK_LIBRARIES=${BLAS_LIBRARIES}
 
 export JENKINS_DO_TESTS=ON
 export JENKINS_DO_EXAMPLES=ON
-export JENKINS_DO_SHARED=OFF
+export JENKINS_DO_SHARED=ON
 
 export QUEUE=haswell
 
diff --git a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel
index 964de3a002..1a306bc2b2 100755
--- a/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel
+++ b/lib/kokkos/config/trilinos-integration/shepard_jenkins_run_script_serial_intel
@@ -28,14 +28,14 @@ export JENKINS_DO_PTHREAD=OFF
 export JENKINS_DO_SERIAL=ON
 export JENKINS_DO_COMPLEX=ON
 
-export ARCH_CXX_FLAG="-xCORE-AVX2 -mkl"
-export ARCH_C_FLAG="-xCORE-AVX2 -mkl"
+export JENKINS_ARCH_CXX_FLAG="-xCORE-AVX2 -mkl"
+export JENKINS_ARCH_C_FLAG="-xCORE-AVX2 -mkl"
 export BLAS_LIBRARIES="-mkl;${MKLROOT}/lib/intel64/libmkl_intel_lp64.a;${MKLROOT}/lib/intel64/libmkl_intel_thread.a;${MKLROOT}/lib/intel64/libmkl_core.a"
 export LAPACK_LIBRARIES=${BLAS_LIBRARIES}
 
 export JENKINS_DO_TESTS=ON
 export JENKINS_DO_EXAMPLES=ON
-export JENKINS_DO_SHARED=OFF
+export JENKINS_DO_SHARED=ON
 
 export QUEUE=haswell
 
diff --git a/lib/kokkos/containers/performance_tests/Makefile b/lib/kokkos/containers/performance_tests/Makefile
index edaaf1ee51..ec69363a17 100644
--- a/lib/kokkos/containers/performance_tests/Makefile
+++ b/lib/kokkos/containers/performance_tests/Makefile
@@ -60,7 +60,6 @@ test-threads: KokkosContainers_PerformanceTest_Threads
 test-openmp: KokkosContainers_PerformanceTest_OpenMP
 	./KokkosContainers_PerformanceTest_OpenMP
 
-
 build_all: $(TARGETS)
 
 test: $(TEST_TARGETS)
diff --git a/lib/kokkos/containers/performance_tests/TestMain.cpp b/lib/kokkos/containers/performance_tests/TestMain.cpp
index f952ab3db5..1224af7cdb 100644
--- a/lib/kokkos/containers/performance_tests/TestMain.cpp
+++ b/lib/kokkos/containers/performance_tests/TestMain.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,12 +36,15 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
 
 #include 
+#include 
+
+#include 
 
 int main(int argc, char *argv[]) {
   ::testing::InitGoogleTest(&argc,argv);
diff --git a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp
index b674ec4a74..6631184624 100644
--- a/lib/kokkos/containers/performance_tests/TestOpenMP.cpp
+++ b/lib/kokkos/containers/performance_tests/TestOpenMP.cpp
@@ -69,30 +69,13 @@ protected:
   {
     std::cout << std::setprecision(5) << std::scientific;
 
-    unsigned num_threads = 4;
-
-    if (Kokkos::hwloc::available()) {
-      num_threads = Kokkos::hwloc::get_available_numa_count()
-                    * Kokkos::hwloc::get_available_cores_per_numa()
-                    * Kokkos::hwloc::get_available_threads_per_core()
-                    ;
-
-    }
-
-    std::cout << "OpenMP: " << num_threads << std::endl;
-
-    Kokkos::OpenMP::initialize( num_threads );
-
-    std::cout << "available threads: " << omp_get_max_threads() << std::endl;
+    Kokkos::OpenMP::initialize();
+    Kokkos::OpenMP::print_configuration( std::cout );
   }
 
   static void TearDownTestCase()
   {
     Kokkos::OpenMP::finalize();
-
-    omp_set_num_threads(1);
-
-    ASSERT_EQ( 1 , omp_get_max_threads() );
   }
 };
 
diff --git a/lib/kokkos/containers/src/Kokkos_DualView.hpp b/lib/kokkos/containers/src/Kokkos_DualView.hpp
index 937eab0d88..35cc8ec753 100644
--- a/lib/kokkos/containers/src/Kokkos_DualView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DualView.hpp
@@ -564,7 +564,7 @@ namespace Impl {
 template< class D, class A1, class A2, class A3, class ... Args >
 struct DualViewSubview {
 
-  typedef typename Kokkos::Experimental::Impl::ViewMapping
+  typedef typename Kokkos::Impl::ViewMapping
     < void
     , Kokkos::ViewTraits< D, A1, A2, A3 >
     , Args ...
diff --git a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
index 8e464506f9..d22d6b865d 100644
--- a/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DynRankView.hpp
@@ -46,19 +46,6 @@
 ///
 /// This header file declares and defines Kokkos::Experimental::DynRankView and its
 /// related nonmember functions.
-/*
- *   Changes from View
- *   1. The rank of the DynRankView is returned by the method rank()
- *   2. Max rank of a DynRankView is 7
- *   3. subview name is subdynrankview
- *   4. Every subdynrankview is returned with LayoutStride
- *
- *   NEW: Redesigned DynRankView
- *   5. subview function name now available
- *   6. Copy and Copy-Assign View to DynRankView
- *   7. deep_copy between Views and DynRankViews
- *   8. rank( view ); returns the rank of View or DynRankView
- */
 
 #ifndef KOKKOS_DYNRANKVIEW_HPP
 #define KOKKOS_DYNRANKVIEW_HPP
@@ -117,6 +104,14 @@ struct DynRankDimTraits {
                       , layout.dimension[7] );
   }
 
+  // Extra overload to match that for specialize types v2
+  template 
+  KOKKOS_INLINE_FUNCTION
+  static size_t computeRank( const Kokkos::Impl::ViewCtorProp& prop, const Layout& layout )
+  {
+    return computeRank(layout);
+  }
+
   // Create the layout for the rank-7 view.
   // Non-strided Layout
   template 
@@ -158,8 +153,17 @@ struct DynRankDimTraits {
                  );
   }
 
+  // Extra overload to match that for specialize types
+  template 
+  KOKKOS_INLINE_FUNCTION
+  static typename std::enable_if< (std::is_same::value || std::is_same::value || std::is_same::value) , typename Traits::array_layout >::type createLayout( const ViewCtorProp& prop, const typename Traits::array_layout& layout )
+  {
+    return createLayout( layout );
+  }
+
   // Create a view from the given dimension arguments.
   // This is only necessary because the shmem constructor doesn't take a layout.
+  //   NDE shmem View's are not compatible with the added view_alloc value_type / fad_dim deduction functionality
   template 
   static ViewType createView( const ViewArg& arg
                             , const size_t N0
@@ -186,7 +190,8 @@ struct DynRankDimTraits {
   // Non-strided Layout
   template 
   KOKKOS_INLINE_FUNCTION
-  static typename std::enable_if< (std::is_same::value || std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank )
+  static typename std::enable_if< (std::is_same::value || std::is_same::value) && std::is_integral::value , Layout >::type
+  reconstructLayout( const Layout& layout , iType dynrank )
   {
     return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0)
                  , dynrank > 1 ? layout.dimension[1] : ~size_t(0)
@@ -202,7 +207,8 @@ struct DynRankDimTraits {
   // LayoutStride
   template 
   KOKKOS_INLINE_FUNCTION
-  static typename std::enable_if< (std::is_same::value) && std::is_integral::value , Layout >::type reconstructLayout( const Layout& layout , iType dynrank )
+  static typename std::enable_if< (std::is_same::value) && std::is_integral::value , Layout >::type
+  reconstructLayout( const Layout& layout , iType dynrank )
   {
     return Layout( dynrank > 0 ? layout.dimension[0] : ~size_t(0)
                  , dynrank > 0 ? layout.stride[0] : (0)
@@ -311,6 +317,11 @@ void dyn_rank_view_verify_operator_bounds
 /** \brief  Assign compatible default mappings */
 struct ViewToDynRankViewTag {};
 
+} // namespace Impl
+} // namespace Experimental
+
+namespace Impl {
+
 template< class DstTraits , class SrcTraits >
 class ViewMapping< DstTraits , SrcTraits ,
   typename std::enable_if<(
@@ -337,7 +348,7 @@ class ViewMapping< DstTraits , SrcTraits ,
         )
       )
     )
-  ) , ViewToDynRankViewTag >::type >
+  ) , Kokkos::Experimental::Impl::ViewToDynRankViewTag >::type >
 {
 private:
 
@@ -376,7 +387,7 @@ public:
 
       typedef typename DstType::offset_type  dst_offset_type ;
       dst.m_map.m_offset = dst_offset_type(std::integral_constant() , src.layout() ); //Check this for integer input1 for padding, etc
-      dst.m_map.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_handle , src.m_track );
+      dst.m_map.m_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_map.m_handle , src.m_track );
       dst.m_track.assign( src.m_track , DstTraits::is_managed );
       dst.m_rank = src.Rank ;
     }
@@ -384,22 +395,20 @@ public:
 
 } //end Impl
 
+namespace Experimental {
+
 /* \class DynRankView
  * \brief Container that creates a Kokkos view with rank determined at runtime.
- *   Essentially this is a rank 7 view that wraps the access operators
- *   to yield the functionality of a view
+ *   Essentially this is a rank 7 view
  *
  *   Changes from View
  *   1. The rank of the DynRankView is returned by the method rank()
  *   2. Max rank of a DynRankView is 7
- *   3. subview name is subdynrankview
- *   4. Every subdynrankview is returned with LayoutStride
- *
- *   NEW: Redesigned DynRankView
- *   5. subview function name now available
- *   6. Copy and Copy-Assign View to DynRankView
- *   7. deep_copy between Views and DynRankViews
- *   8. rank( view ); returns the rank of View or DynRankView
+ *   3. subview called with 'subview(...)' or 'subdynrankview(...)' (backward compatibility) 
+ *   4. Every subview is returned with LayoutStride
+ *   5. Copy and Copy-Assign View to DynRankView
+ *   6. deep_copy between Views and DynRankViews
+ *   7. rank( view ); returns the rank of View or DynRankView
  *
  */
 
@@ -427,7 +436,7 @@ public:
 
 
 private:
-  typedef Kokkos::Experimental::Impl::ViewMapping< traits , void > map_type ;
+  typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
   typedef Kokkos::Experimental::Impl::SharedAllocationTracker      track_type ;
 
   track_type  m_track ;
@@ -556,7 +565,7 @@ public:
   // Allow specializations to query their specialized map
 
   KOKKOS_INLINE_FUNCTION
-  const Kokkos::Experimental::Impl::ViewMapping< traits , void > &
+  const Kokkos::Impl::ViewMapping< traits , void > &
   implementation_map() const { return m_map ; }
 
   //----------------------------------------
@@ -803,7 +812,7 @@ public:
     , m_rank(rhs.m_rank)
     {
       typedef typename DynRankView ::traits SrcTraits ;
-      typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
     }
@@ -813,7 +822,7 @@ public:
   DynRankView & operator = (const DynRankView & rhs )
     {
       typedef typename DynRankView ::traits SrcTraits ;
-      typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , void > Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
       Mapping::assign( m_map , rhs.m_map , rhs.m_track );
       m_track.assign( rhs.m_track , traits::is_managed );
@@ -831,7 +840,7 @@ public:
     , m_rank( rhs.Rank )
     {
       typedef typename View::traits  SrcTraits ;
-      typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag >  Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag >  Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
       Mapping::assign( *this , rhs );
     }
@@ -841,7 +850,7 @@ public:
   DynRankView & operator = ( const View & rhs )
     {
       typedef typename View::traits  SrcTraits ;
-      typedef Kokkos::Experimental::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag >  Mapping ;
+      typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag >  Mapping ;
       static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" );
       Mapping::assign( *this , rhs );
       return *this ;
@@ -870,7 +879,7 @@ public:
       )
       : m_track()
       , m_map()
-      , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) )
+      , m_rank( Impl::DynRankDimTraits::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) )
     {
       // Append layout and spaces if not input
       typedef Impl::ViewCtorProp< P ... > alloc_prop_input ;
@@ -923,7 +932,7 @@ public:
 //------------------------------------------------------------
 
       Kokkos::Experimental::Impl::SharedAllocationRecord<> *
-        record = m_map.allocate_shared( prop , Impl::DynRankDimTraits::createLayout(arg_layout) );
+        record = m_map.allocate_shared( prop , Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) );
 
 //------------------------------------------------------------
 #if defined( KOKKOS_ENABLE_CUDA )
@@ -947,8 +956,8 @@ public:
                                >::type const & arg_layout
       )
       : m_track() // No memory tracking
-      , m_map( arg_prop , Impl::DynRankDimTraits::createLayout(arg_layout) )
-      , m_rank( Impl::DynRankDimTraits::computeRank(arg_layout) )
+      , m_map( arg_prop , Impl::DynRankDimTraits::template createLayout(arg_prop, arg_layout) )
+      , m_rank( Impl::DynRankDimTraits::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) )
     {
       static_assert(
         std::is_same< pointer_type
@@ -1034,6 +1043,7 @@ public:
     {}
 
   // For backward compatibility
+  // NDE This ctor does not take ViewCtorProp argument - should not use alternative createLayout call
   explicit inline
   DynRankView( const ViewAllocateWithoutInitializing & arg_prop
       , const typename traits::array_layout & arg_layout
@@ -1179,6 +1189,11 @@ namespace Impl {
 
 struct DynRankSubviewTag {};
 
+} // namespace Impl
+} // namespace Experimental
+
+namespace Impl {
+
 template< class SrcTraits , class ... Args >
 struct ViewMapping
   < typename std::enable_if<(
@@ -1192,7 +1207,7 @@ struct ViewMapping
         std::is_same< typename SrcTraits::array_layout
                     , Kokkos::LayoutStride >::value
       )
-    ), DynRankSubviewTag >::type
+    ), Kokkos::Experimental::Impl::DynRankSubviewTag >::type
   , SrcTraits
   , Args ... >
 {
@@ -1264,7 +1279,7 @@ public:
   };
 
 
-  typedef DynRankView< value_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits >  ret_type;
+  typedef Kokkos::Experimental::DynRankView< value_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits >  ret_type;
 
   template < typename T , class ... P >
   KOKKOS_INLINE_FUNCTION
@@ -1336,9 +1351,10 @@ public:
 
 } // end Impl
 
+namespace Experimental {
 
 template< class V , class ... Args >
-using Subdynrankview = typename Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , V , Args... >::ret_type ;
+using Subdynrankview = typename Kokkos::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , V , Args... >::ret_type ;
 
 template< class D , class ... P , class ...Args >
 KOKKOS_INLINE_FUNCTION
@@ -1348,7 +1364,7 @@ subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args.
     if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args
       { Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); }
 
-    typedef Kokkos::Experimental::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
+    typedef Kokkos::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
 
     return metafcn::subview( src.rank() , src , args... );
   }
diff --git a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
index da96db2d6b..e9059d64c4 100644
--- a/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
+++ b/lib/kokkos/containers/src/Kokkos_DynamicView.hpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,7 +36,7 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
@@ -57,7 +57,7 @@ namespace Experimental {
  */
 template< typename DataType , typename ... P >
 class DynamicView : public Kokkos::ViewTraits< DataType , P ... >
-{ 
+{
 public:
 
   typedef Kokkos::ViewTraits< DataType , P ... >  traits ;
@@ -68,7 +68,7 @@ private:
 
   typedef Kokkos::Experimental::Impl::SharedAllocationTracker   track_type ;
 
-  static_assert( traits::rank == 1 && traits::rank_dynamic == 1 
+  static_assert( traits::rank == 1 && traits::rank_dynamic == 1
                , "DynamicView must be rank-one" );
 
   static_assert( std::is_trivial< typename traits::value_type >::value &&
@@ -216,14 +216,14 @@ public:
         // Verify that allocation of the requested chunk in in progress.
 
         // The allocated chunk counter is m_chunks[ m_chunk_max ]
-        const uintptr_t n = 
+        const uintptr_t n =
           *reinterpret_cast( m_chunks + m_chunk_max );
 
         if ( n <= ic ) {
           Kokkos::abort("Kokkos::DynamicView array bounds error");
         }
 
-        // Allocation of this chunk is in progress 
+        // Allocation of this chunk is in progress
         // so wait for allocation to complete.
         while ( 0 == *ch );
       }
@@ -267,7 +267,7 @@ public:
         const uintptr_t jc_try = jc ;
 
         // Jump iteration to the chunk counter.
-        
+
         jc = atomic_compare_exchange( pc , jc_try , jc_try + 1 );
 
         if ( jc_try == jc ) {
@@ -316,7 +316,7 @@ public:
       }
       else {
         while ( NC + 1 <= *pc ) {
-          --*pc ;        
+          --*pc ;
           m_pool.deallocate( m_chunks[*pc]
                            , sizeof(value_type) << m_chunk_shift );
           m_chunks[*pc] = 0 ;
@@ -331,7 +331,7 @@ public:
     typename traits::value_type ** m_chunks ;
     uintptr_t                    * m_pc ;
     uintptr_t                      m_nc ;
-    unsigned                       m_chunk_shift ;  
+    unsigned                       m_chunk_shift ;
 
     KOKKOS_INLINE_FUNCTION
     void operator()( int ) const
@@ -348,7 +348,7 @@ public:
         }
         else {
           while ( m_nc + 1 <= *m_pc ) {
-            --*m_pc ;        
+            --*m_pc ;
             m_pool.deallocate( m_chunks[*m_pc]
                              , sizeof(value_type) << m_chunk_shift );
             m_chunks[*m_pc] = 0 ;
@@ -482,7 +482,7 @@ public:
   };
 
 
-  /**\brief  Allocation constructor 
+  /**\brief  Allocation constructor
    *
    *  Memory is allocated in chunks from the memory pool.
    *  The chunk size conforms to the memory pool's chunk size.
@@ -557,7 +557,7 @@ void deep_copy( const View & dst
 
   if ( DstExecCanAccessSrc ) {
     // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
-    Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src );
+    Kokkos::Impl::ViewRemap< dst_type , src_type >( dst , src );
   }
   else {
     Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
@@ -581,7 +581,7 @@ void deep_copy( const DynamicView & dst
 
   if ( DstExecCanAccessSrc ) {
     // Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
-    Kokkos::Experimental::Impl::ViewRemap< dst_type , src_type >( dst , src );
+    Kokkos::Impl::ViewRemap< dst_type , src_type >( dst , src );
   }
   else {
     Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
diff --git a/lib/kokkos/containers/unit_tests/TestCuda.cpp b/lib/kokkos/containers/unit_tests/TestCuda.cpp
index 5a78a5de9e..651a4e7eb8 100644
--- a/lib/kokkos/containers/unit_tests/TestCuda.cpp
+++ b/lib/kokkos/containers/unit_tests/TestCuda.cpp
@@ -69,6 +69,8 @@
 #include 
 #include 
 
+#include 
+
 //----------------------------------------------------------------------------
 
 
@@ -94,6 +96,10 @@ TEST_F( cuda , dyn_view_api) {
   TestDynViewAPI< double , Kokkos::Cuda >();
 }
 
+TEST_F( cuda, viewctorprop_embedded_dim ) {
+  TestViewCtorProp_EmbeddedDim< Kokkos::Cuda >::test_vcpt( 2, 3 );
+}
+
 TEST_F( cuda , staticcrsgraph )
 {
   TestStaticCrsGraph::run_test_graph< Kokkos::Cuda >();
diff --git a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp
index 2448bd077b..5365d91361 100644
--- a/lib/kokkos/containers/unit_tests/TestOpenMP.cpp
+++ b/lib/kokkos/containers/unit_tests/TestOpenMP.cpp
@@ -66,6 +66,8 @@
 #include 
 #include 
 
+#include 
+
 #include 
 
 namespace Test {
@@ -76,14 +78,7 @@ protected:
   {
     std::cout << std::setprecision(5) << std::scientific;
 
-    unsigned threads_count = 4 ;
-
-    if ( Kokkos::hwloc::available() ) {
-      threads_count = Kokkos::hwloc::get_available_numa_count() *
-                      Kokkos::hwloc::get_available_cores_per_numa();
-    }
-
-    Kokkos::OpenMP::initialize( threads_count );
+    Kokkos::OpenMP::initialize();
   }
 
   static void TearDownTestCase()
@@ -96,6 +91,10 @@ TEST_F( openmp, dyn_view_api) {
   TestDynViewAPI< double , Kokkos::OpenMP >();
 }
 
+TEST_F( openmp, viewctorprop_embedded_dim ) {
+  TestViewCtorProp_EmbeddedDim< Kokkos::OpenMP >::test_vcpt( 2, 3 );
+}
+
 TEST_F( openmp, bitset )
 {
   test_bitset();
diff --git a/lib/kokkos/containers/unit_tests/TestSerial.cpp b/lib/kokkos/containers/unit_tests/TestSerial.cpp
index 06c4d9f6ed..1b9b5a2da3 100644
--- a/lib/kokkos/containers/unit_tests/TestSerial.cpp
+++ b/lib/kokkos/containers/unit_tests/TestSerial.cpp
@@ -67,6 +67,8 @@
 #include 
 #include 
 
+#include 
+
 namespace Test {
 
 class serial : public ::testing::Test {
@@ -85,6 +87,10 @@ TEST_F( serial, dyn_view_api) {
   TestDynViewAPI< double , Kokkos::Serial >();
 }
 
+TEST_F( serial, viewctorprop_embedded_dim ) {
+  TestViewCtorProp_EmbeddedDim< Kokkos::Serial >::test_vcpt( 2, 3 );
+}
+
 TEST_F( serial , staticcrsgraph )
 {
   TestStaticCrsGraph::run_test_graph< Kokkos::Serial >();
diff --git a/lib/kokkos/containers/unit_tests/TestThreads.cpp b/lib/kokkos/containers/unit_tests/TestThreads.cpp
index 938ec88e90..aca0b57d65 100644
--- a/lib/kokkos/containers/unit_tests/TestThreads.cpp
+++ b/lib/kokkos/containers/unit_tests/TestThreads.cpp
@@ -70,6 +70,8 @@
 #include 
 #include 
 
+#include 
+
 namespace Test {
 
 class threads : public ::testing::Test {
@@ -103,6 +105,10 @@ TEST_F( threads , dyn_view_api) {
   TestDynViewAPI< double , Kokkos::Threads >();
 }
 
+TEST_F( threads, viewctorprop_embedded_dim ) {
+  TestViewCtorProp_EmbeddedDim< Kokkos::Threads >::test_vcpt( 2, 3 );
+}
+
 TEST_F( threads , staticcrsgraph )
 {
   TestStaticCrsGraph::run_test_graph< Kokkos::Threads >();
diff --git a/lib/kokkos/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp b/lib/kokkos/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp
new file mode 100644
index 0000000000..1efd1ddc51
--- /dev/null
+++ b/lib/kokkos/containers/unit_tests/TestViewCtorPropEmbeddedDim.hpp
@@ -0,0 +1,213 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include 
+
+#include 
+
+#include 
+#include 
+
+#include 
+#include 
+
+namespace Test {
+
+namespace {
+
+template 
+struct TestViewCtorProp_EmbeddedDim {
+
+  using ViewIntType     = typename Kokkos::View< int**, ExecSpace >;
+  using ViewDoubleType     = typename Kokkos::View< double*, ExecSpace >;
+
+  using DynRankViewIntType     = typename Kokkos::DynRankView< int, ExecSpace >;
+  using DynRankViewDoubleType     = typename Kokkos::DynRankView< double, ExecSpace >;
+
+  // Cuda 7.0 has issues with using a lamda in parallel_for to initialize the view - replace with this functor
+  template < class ViewType >
+  struct Functor {
+
+    ViewType v;
+
+    Functor( const ViewType & v_ ) : v(v_) {}
+
+    KOKKOS_INLINE_FUNCTION
+    void operator()( const int i ) const {
+      v(i) = i;
+    }
+
+  };
+
+
+  static void test_vcpt( const int N0, const int N1 )
+  {
+
+    // Create two views to test
+    {
+      using VIT = typename TestViewCtorProp_EmbeddedDim::ViewIntType ;
+      using VDT = typename TestViewCtorProp_EmbeddedDim::ViewDoubleType ;
+
+      VIT vi1("vi1", N0, N1);
+      VDT vd1("vd1", N0);
+
+      // TEST: Test for common type between two views, one with type double, other with type int
+      // Deduce common value_type and construct a view with that type
+      {
+        // Two views
+        auto view_alloc_arg = Kokkos::common_view_alloc_prop(vi1, vd1);
+        typedef typename decltype( view_alloc_arg )::value_type                    CommonViewValueType;
+        typedef typename Kokkos::View< CommonViewValueType*, ExecSpace >  CVT;
+        typedef typename CVT::HostMirror                                           HostCVT;
+
+        // Construct View using the common type; for case of specialization, an 'embedded_dim' would be stored by view_alloc_arg
+        CVT cv1( Kokkos::view_alloc( "cv1", view_alloc_arg ), N0*N1 );
+
+        Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace >(0, N0*N1), 
+          Functor(cv1)
+        );
+
+        HostCVT hcv1 = Kokkos::create_mirror_view( cv1 );
+        Kokkos::deep_copy( hcv1, cv1 );
+
+        ASSERT_EQ( (std::is_same< CommonViewValueType, double >::value) , true ) ;
+      #if 0
+      // debug output
+      for ( int i = 0; i < N0*N1; ++i ) {
+        printf(" Output check: hcv1(%d) = %lf\n ", i, hcv1(i) );
+      }
+
+      printf( " Common value type view: %s \n", typeid( CVT() ).name() );
+      printf( " Common value type: %s \n", typeid( CommonViewValueType() ).name() );
+      if ( std::is_same< CommonViewValueType, double >::value == true ) {
+        printf("Proper common value_type\n");
+      }
+      else {
+        printf("WRONG common value_type\n");
+      }
+      // end debug output
+      #endif
+      }
+
+      {
+        // Single view
+        auto view_alloc_arg = Kokkos::common_view_alloc_prop(vi1);
+        typedef typename decltype( view_alloc_arg )::value_type                    CommonViewValueType;
+        typedef typename Kokkos::View< CommonViewValueType*, ExecSpace >  CVT;
+        typedef typename CVT::HostMirror                                           HostCVT;
+
+        // Construct View using the common type; for case of specialization, an 'embedded_dim' would be stored by view_alloc_arg
+        CVT cv1( Kokkos::view_alloc( "cv1", view_alloc_arg ), N0*N1 );
+
+        Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace >(0, N0*N1), 
+          Functor(cv1)
+        );
+
+        HostCVT hcv1 = Kokkos::create_mirror_view( cv1 );
+        Kokkos::deep_copy( hcv1, cv1 );
+
+        ASSERT_EQ( (std::is_same< CommonViewValueType, int>::value) , true ) ;
+      }
+
+    }
+
+    // Create two dynamic rank views to test
+    {
+      using VIT = typename TestViewCtorProp_EmbeddedDim::DynRankViewIntType ;
+      using VDT = typename TestViewCtorProp_EmbeddedDim::DynRankViewDoubleType ;
+
+      VIT vi1("vi1", N0, N1);
+      VDT vd1("vd1", N0);
+
+      // TEST: Test for common type between two views, one with type double, other with type int
+      // Deduce common value_type and construct a view with that type
+      {
+        // Two views
+        auto view_alloc_arg = Kokkos::common_view_alloc_prop( vi1, vd1 );
+        typedef typename decltype( view_alloc_arg )::value_type                    CommonViewValueType;
+        typedef typename Kokkos::View< CommonViewValueType*, ExecSpace >  CVT;
+        typedef typename CVT::HostMirror                                           HostCVT;
+
+        // Construct View using the common type; for case of specialization, an 'embedded_dim' would be stored by view_alloc_arg
+        CVT cv1( Kokkos::view_alloc( "cv1", view_alloc_arg ), N0*N1 );
+
+
+        Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace >(0, N0*N1), 
+          Functor(cv1)
+        );
+
+        HostCVT hcv1 = Kokkos::create_mirror_view( cv1 );
+        Kokkos::deep_copy( hcv1, cv1 );
+
+        ASSERT_EQ( (std::is_same< CommonViewValueType, double >::value) , true ) ;
+      }
+
+      {
+        // Single views
+        auto view_alloc_arg = Kokkos::common_view_alloc_prop( vi1 );
+        typedef typename decltype( view_alloc_arg )::value_type                    CommonViewValueType;
+        typedef typename Kokkos::View< CommonViewValueType*, ExecSpace >  CVT;
+        typedef typename CVT::HostMirror                                           HostCVT;
+
+        // Construct View using the common type; for case of specialization, an 'embedded_dim' would be stored by view_alloc_arg
+        CVT cv1( Kokkos::view_alloc( "cv1", view_alloc_arg ), N0*N1 );
+
+        Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace >(0, N0*N1), 
+          Functor(cv1)
+        );
+
+        HostCVT hcv1 = Kokkos::create_mirror_view( cv1 );
+        Kokkos::deep_copy( hcv1, cv1 );
+
+        ASSERT_EQ( (std::is_same< CommonViewValueType, int>::value) , true ) ;
+      }
+    }
+
+
+  } // end test_vcpt
+
+}; // end struct
+
+} // namespace
+
+} // namespace Test
diff --git a/lib/kokkos/containers/unit_tests/UnitTestMain.cpp b/lib/kokkos/containers/unit_tests/UnitTestMain.cpp
index f952ab3db5..2b73535c83 100644
--- a/lib/kokkos/containers/unit_tests/UnitTestMain.cpp
+++ b/lib/kokkos/containers/unit_tests/UnitTestMain.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,12 +36,14 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
 
 #include 
+#include 
+#include 
 
 int main(int argc, char *argv[]) {
   ::testing::InitGoogleTest(&argc,argv);
diff --git a/lib/kokkos/core/perf_test/Makefile b/lib/kokkos/core/perf_test/Makefile
index f59e7bbe1c..bb9353f583 100644
--- a/lib/kokkos/core/perf_test/Makefile
+++ b/lib/kokkos/core/perf_test/Makefile
@@ -79,7 +79,6 @@ test-mempool: KokkosCore_PerformanceTest_Mempool
 test-taskdag: KokkosCore_PerformanceTest_TaskDAG
 	./KokkosCore_PerformanceTest_TaskDAG
 
-
 build_all: $(TARGETS)
 
 test: $(TEST_TARGETS)
diff --git a/lib/kokkos/core/perf_test/PerfTestMain.cpp b/lib/kokkos/core/perf_test/PerfTestMain.cpp
index d80cfab8b5..832f650b9a 100644
--- a/lib/kokkos/core/perf_test/PerfTestMain.cpp
+++ b/lib/kokkos/core/perf_test/PerfTestMain.cpp
@@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@@ -36,12 +36,14 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */
 
 #include 
+#include 
+
 #include 
 
 namespace Test {
diff --git a/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp
new file mode 100644
index 0000000000..46321378d9
--- /dev/null
+++ b/lib/kokkos/core/src/Cuda/KokkosExp_Cuda_IterateTile_Refactor.hpp
@@ -0,0 +1,2715 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CUDA_EXP_ITERATE_TILE_REFACTOR_HPP
+#define KOKKOS_CUDA_EXP_ITERATE_TILE_REFACTOR_HPP
+
+#include 
+#if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
+
+#include 
+#include 
+#include 
+
+#include 
+
+// #include
+// Including the file above leads to following type of errors:
+// /home/ndellin/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp(84): error: incomplete type is not allowed
+// use existing Kokkos functionality, e.g. max blocks, once resolved
+
+#if defined(KOKKOS_ENABLE_PROFILING)
+#include 
+#include 
+#endif
+
+namespace Kokkos { namespace Experimental { namespace Impl {
+
+namespace Refactor {
+
+// ------------------------------------------------------------------ //
+// ParallelFor iteration pattern
+template< int N , typename RP , typename Functor , typename Tag >
+struct DeviceIterateTile;
+
+//Rank 2
+// Specializations for void tag type
+template< typename RP , typename Functor >
+struct DeviceIterateTile<2,RP,Functor,void >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    // LL
+    if (RP::inner_direction == RP::Left) {
+      for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+        const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+        if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+
+          for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+            const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+            if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+              m_func(offset_0 , offset_1);
+            }
+          }
+        }
+      }
+    }
+    // LR
+    else {
+      for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+
+          for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+              m_func(offset_0 , offset_1);
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag >
+struct DeviceIterateTile<2,RP,Functor,Tag>
+{
+  using index_type = typename RP::index_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if (RP::inner_direction == RP::Left) {
+      // Loop over size maxnumblocks until full range covered
+      for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+        const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+        if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+
+          for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+            const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+            if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+              m_func(Tag(), offset_0 , offset_1);
+            }
+          }
+        }
+      }
+    }
+    else {
+      for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+
+          for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+              m_func(Tag(), offset_0 , offset_1);
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+
+//Rank 3
+// Specializations for void tag type
+template< typename RP , typename Functor >
+struct DeviceIterateTile<3,RP,Functor,void >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    // LL
+    if (RP::inner_direction == RP::Left) {
+      for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
+        const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+        if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
+
+          for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+
+              for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+                const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+                if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+                  m_func(offset_0 , offset_1 , offset_2);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    // LR
+    else {
+      for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+
+          for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+
+              for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+                if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
+                  m_func(offset_0 , offset_1 , offset_2);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+// Specializations for void tag type
+template< typename RP , typename Functor , typename Tag >
+struct DeviceIterateTile<3,RP,Functor,Tag>
+{
+  using index_type = typename RP::index_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if (RP::inner_direction == RP::Left) {
+      for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
+        const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+        if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
+
+          for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+
+              for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+                const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+                if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+                  m_func(Tag(), offset_0 , offset_1 , offset_2);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    else {
+      for ( index_type tile_id0 = (index_type)blockIdx.x; tile_id0 < m_rp.m_tile_end[0]; tile_id0 += gridDim.x ) {
+        const index_type offset_0 = tile_id0*m_rp.m_tile[0] + (index_type)threadIdx.x;
+        if ( offset_0 < m_rp.m_upper[0] && (index_type)threadIdx.x < m_rp.m_tile[0] ) {
+
+          for ( index_type tile_id1 = (index_type)blockIdx.y; tile_id1 < m_rp.m_tile_end[1]; tile_id1 += gridDim.y ) {
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + (index_type)threadIdx.y;
+            if ( offset_1 < m_rp.m_upper[1] && (index_type)threadIdx.y < m_rp.m_tile[1] ) {
+
+              for ( index_type tile_id2 = (index_type)blockIdx.z; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.z ) {
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.z;
+                if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.z < m_rp.m_tile[2] ) {
+                  m_func(Tag(), offset_0 , offset_1 , offset_2);
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+
+//Rank 4
+// Specializations for void tag type
+template< typename RP , typename Functor >
+struct DeviceIterateTile<4,RP,Functor,void >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    // LL
+    if (RP::inner_direction == RP::Left) {
+      const index_type temp0  =  m_rp.m_tile_end[0];
+      const index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x % numbl0;
+      const index_type tile_id1 = (index_type)blockIdx.x / numbl0;
+      const index_type thr_id0 = (index_type)threadIdx.x % m_rp.m_tile[0];
+      const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
+
+      for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
+        const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+        if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
+
+          for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
+            const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+            if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
+
+              for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+                const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+                  for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+                    const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                    if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+                      m_func(offset_0 , offset_1 , offset_2 , offset_3);
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    // LR
+    else {
+      const index_type temp0  =  m_rp.m_tile_end[0];
+      const index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) :
+          ( temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x / numbl1;
+      const index_type tile_id1 = (index_type)blockIdx.x % numbl1;
+      const index_type thr_id0 = (index_type)threadIdx.x / m_rp.m_tile[1];
+      const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
+
+      for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+
+          for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+              for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+                if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
+
+                  for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
+                    const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+                    if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
+                      m_func(offset_0 , offset_1 , offset_2 , offset_3);
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+// Specializations for void tag type
+template< typename RP , typename Functor , typename Tag >
+struct DeviceIterateTile<4,RP,Functor,Tag>
+{
+  using index_type = typename RP::index_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if (RP::inner_direction == RP::Left) {
+      const index_type temp0  =  m_rp.m_tile_end[0];
+      const index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x % numbl0;
+      const index_type tile_id1 = (index_type)blockIdx.x / numbl0;
+      const index_type thr_id0 = (index_type)threadIdx.x % m_rp.m_tile[0];
+      const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
+
+      for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
+        const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+        if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
+
+          for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
+            const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+            if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
+
+              for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+                const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+                  for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+                    const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                    if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+                      m_func(Tag(), offset_0 , offset_1 , offset_2 , offset_3);
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    else {
+      const index_type temp0  =  m_rp.m_tile_end[0];
+      const index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) :
+          ( temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x / numbl1;
+      const index_type tile_id1 = (index_type)blockIdx.x % numbl1;
+      const index_type thr_id0 = (index_type)threadIdx.x / m_rp.m_tile[1];
+      const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
+
+      for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+
+          for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+            const index_type offset_1 = tile_id1*m_rp.m_tile[1] + thr_id1;
+            if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+              for ( index_type tile_id2 = (index_type)blockIdx.y; tile_id2 < m_rp.m_tile_end[2]; tile_id2 += gridDim.y ) {
+                const index_type offset_2 = tile_id2*m_rp.m_tile[2] + (index_type)threadIdx.y;
+                if ( offset_2 < m_rp.m_upper[2] && (index_type)threadIdx.y < m_rp.m_tile[2] ) {
+
+                  for ( index_type tile_id3 = (index_type)blockIdx.z; tile_id3 < m_rp.m_tile_end[3]; tile_id3 += gridDim.z ) {
+                    const index_type offset_3 = tile_id3*m_rp.m_tile[3] + (index_type)threadIdx.z;
+                    if ( offset_3 < m_rp.m_upper[3] && (index_type)threadIdx.z < m_rp.m_tile[3] ) {
+                      m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3);
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+
+//Rank 5
+// Specializations for void tag type
+template< typename RP , typename Functor >
+struct DeviceIterateTile<5,RP,Functor,void >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    // LL
+    if (RP::inner_direction == RP::Left) {
+
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x % numbl0;
+      const index_type tile_id1 = (index_type)blockIdx.x / numbl0;
+      const index_type thr_id0 = (index_type)threadIdx.x % m_rp.m_tile[0];
+      const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y % numbl2;
+      const index_type tile_id3 = (index_type)blockIdx.y / numbl2;
+      const index_type thr_id2 = (index_type)threadIdx.y % m_rp.m_tile[2];
+      const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2];
+
+      for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
+        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+        if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
+
+          for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+            const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+            if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+              for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                  for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+                    const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                    if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+                      for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+                        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+                          m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4);
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    // LR
+    else {
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) :
+          ( temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x / numbl1;
+      const index_type tile_id1 = (index_type)blockIdx.x % numbl1;
+      const index_type thr_id0 = (index_type)threadIdx.x / m_rp.m_tile[1];
+      const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) :
+          (  temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y / numbl3;
+      const index_type tile_id3 = (index_type)blockIdx.y % numbl3;
+      const index_type thr_id2 = (index_type)threadIdx.y / m_rp.m_tile[3];
+      const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3];
+
+      for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+
+          for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+              for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                  for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+                      for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
+                        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+                        if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
+                          m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4);
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag >
+struct DeviceIterateTile<5,RP,Functor,Tag>
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    // LL
+    if (RP::inner_direction == RP::Left) {
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x % numbl0;
+      const index_type tile_id1 = (index_type)blockIdx.x / numbl0;
+      const index_type thr_id0 = (index_type)threadIdx.x % m_rp.m_tile[0];
+      const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y % numbl2;
+      const index_type tile_id3 = (index_type)blockIdx.y / numbl2;
+      const index_type thr_id2 = (index_type)threadIdx.y % m_rp.m_tile[2];
+      const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2];
+
+      for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
+        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+        if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
+
+          for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+            const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+            if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+              for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                  for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+                    const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                    if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+                      for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+                        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+                          m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4);
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    // LR
+    else {
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) :
+          ( temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x / numbl1;
+      const index_type tile_id1 = (index_type)blockIdx.x % numbl1;
+      const index_type thr_id0 = (index_type)threadIdx.x / m_rp.m_tile[1];
+      const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) :
+          (  temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y / numbl3;
+      const index_type tile_id3 = (index_type)blockIdx.y % numbl3;
+      const index_type thr_id2 = (index_type)threadIdx.y / m_rp.m_tile[3];
+      const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3];
+
+      for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+
+          for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+              for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                  for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+                      for ( index_type tile_id4 = (index_type)blockIdx.z; tile_id4 < m_rp.m_tile_end[4]; tile_id4 += gridDim.z ) {
+                        const index_type offset_4 = tile_id4*m_rp.m_tile[4] + (index_type)threadIdx.z;
+                        if ( offset_4 < m_rp.m_upper[4] && (index_type)threadIdx.z < m_rp.m_tile[4] ) {
+                          m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4);
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+
+//Rank 6
+// Specializations for void tag type
+template< typename RP , typename Functor >
+struct DeviceIterateTile<6,RP,Functor,void >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    // LL
+    if (RP::inner_direction == RP::Left) {
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x % numbl0;
+      const index_type tile_id1 = (index_type)blockIdx.x / numbl0;
+      const index_type thr_id0 = (index_type)threadIdx.x % m_rp.m_tile[0];
+      const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y % numbl2;
+      const index_type tile_id3 = (index_type)blockIdx.y / numbl2;
+      const index_type thr_id2 = (index_type)threadIdx.y % m_rp.m_tile[2];
+      const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2];
+
+      temp0  =  m_rp.m_tile_end[4];
+      temp1  =  m_rp.m_tile_end[5];
+      const index_type numbl4 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl5 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl4 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id4 = (index_type)blockIdx.z % numbl4;
+      const index_type tile_id5 = (index_type)blockIdx.z / numbl4;
+      const index_type thr_id4 = (index_type)threadIdx.z % m_rp.m_tile[4];
+      const index_type thr_id5 = (index_type)threadIdx.z / m_rp.m_tile[4];
+
+      for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
+        const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+        if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
+
+          for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
+            const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+            if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
+
+              for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+                const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+                  for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                    const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                    if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                      for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+                        const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                        if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+                          for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+                            const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                            if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+                              m_func(offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5);
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    // LR
+    else {
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) :
+          ( temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x / numbl1;
+      const index_type tile_id1 = (index_type)blockIdx.x % numbl1;
+      const index_type thr_id0 = (index_type)threadIdx.x / m_rp.m_tile[1];
+      const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) :
+          (  temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y / numbl3;
+      const index_type tile_id3 = (index_type)blockIdx.y % numbl3;
+      const index_type thr_id2 = (index_type)threadIdx.y / m_rp.m_tile[3];
+      const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3];
+
+      temp0  =  m_rp.m_tile_end[4];
+      temp1  =  m_rp.m_tile_end[5];
+      const index_type numbl5 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl4 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl5 ) :
+          (  temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id4 = (index_type)blockIdx.z / numbl5;
+      const index_type tile_id5 = (index_type)blockIdx.z % numbl5;
+      const index_type thr_id4 = (index_type)threadIdx.z / m_rp.m_tile[5];
+      const index_type thr_id5 = (index_type)threadIdx.z % m_rp.m_tile[5];
+
+      for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+
+          for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+              for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                  for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+                      for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
+                        const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+                        if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
+
+                          for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
+                            const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+                            if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
+                              m_func(offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5);
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag >
+struct DeviceIterateTile<6,RP,Functor,Tag>
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ )
+  : m_rp(rp_)
+  , m_func(f_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    // LL
+    if (RP::inner_direction == RP::Left) {
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl0 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl1 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl0 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x % numbl0;
+      const index_type tile_id1 = (index_type)blockIdx.x / numbl0;
+      const index_type thr_id0 = (index_type)threadIdx.x % m_rp.m_tile[0];
+      const index_type thr_id1 = (index_type)threadIdx.x / m_rp.m_tile[0];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl2 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl3 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl2 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y % numbl2;
+      const index_type tile_id3 = (index_type)blockIdx.y / numbl2;
+      const index_type thr_id2 = (index_type)threadIdx.y % m_rp.m_tile[2];
+      const index_type thr_id3 = (index_type)threadIdx.y / m_rp.m_tile[2];
+
+      temp0  =  m_rp.m_tile_end[4];
+      temp1  =  m_rp.m_tile_end[5];
+      const index_type numbl4 = ( temp0 <= max_blocks ? temp0 : max_blocks ) ;
+      const index_type numbl5 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl4 ) :
+          (  temp1 <= max_blocks ? temp1 : max_blocks ) );
+
+      const index_type tile_id4 = (index_type)blockIdx.z % numbl4;
+      const index_type tile_id5 = (index_type)blockIdx.z / numbl4;
+      const index_type thr_id4 = (index_type)threadIdx.z % m_rp.m_tile[4];
+      const index_type thr_id5 = (index_type)threadIdx.z / m_rp.m_tile[4];
+
+      for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
+        const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+        if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
+
+          for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
+            const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+            if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
+
+              for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+                const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+                  for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                    const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                    if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                      for ( index_type j = tile_id1 ; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+                        const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+                        if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+                          for ( index_type i = tile_id0 ; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+                            const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+                            if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+                              m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3, offset_4, offset_5);
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    // LR
+    else {
+      index_type temp0  =  m_rp.m_tile_end[0];
+      index_type temp1  =  m_rp.m_tile_end[1];
+      const index_type numbl1 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl0 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl1 ) :
+          ( temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id0 = (index_type)blockIdx.x / numbl1;
+      const index_type tile_id1 = (index_type)blockIdx.x % numbl1;
+      const index_type thr_id0 = (index_type)threadIdx.x / m_rp.m_tile[1];
+      const index_type thr_id1 = (index_type)threadIdx.x % m_rp.m_tile[1];
+
+      temp0  =  m_rp.m_tile_end[2];
+      temp1  =  m_rp.m_tile_end[3];
+      const index_type numbl3 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl2 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl3 ) :
+          (  temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id2 = (index_type)blockIdx.y / numbl3;
+      const index_type tile_id3 = (index_type)blockIdx.y % numbl3;
+      const index_type thr_id2 = (index_type)threadIdx.y / m_rp.m_tile[3];
+      const index_type thr_id3 = (index_type)threadIdx.y % m_rp.m_tile[3];
+
+      temp0  =  m_rp.m_tile_end[4];
+      temp1  =  m_rp.m_tile_end[5];
+      const index_type numbl5 = ( temp1 <= max_blocks ? temp1 : max_blocks ) ;
+      const index_type numbl4 = ( temp0*temp1 > max_blocks ? index_type( max_blocks / numbl5 ) :
+          (  temp0 <= max_blocks ? temp0 : max_blocks ) );
+
+      const index_type tile_id4 = (index_type)blockIdx.z / numbl5;
+      const index_type tile_id5 = (index_type)blockIdx.z % numbl5;
+      const index_type thr_id4 = (index_type)threadIdx.z / m_rp.m_tile[5];
+      const index_type thr_id5 = (index_type)threadIdx.z % m_rp.m_tile[5];
+
+      for ( index_type i = tile_id0; i < m_rp.m_tile_end[0]; i += numbl0 ) {
+        const index_type offset_0 = i*m_rp.m_tile[0] + thr_id0;
+        if ( offset_0 < m_rp.m_upper[0] && thr_id0 < m_rp.m_tile[0] ) {
+
+          for ( index_type j = tile_id1; j < m_rp.m_tile_end[1]; j += numbl1 ) {
+            const index_type offset_1 = j*m_rp.m_tile[1] + thr_id1;
+            if ( offset_1 < m_rp.m_upper[1] && thr_id1 < m_rp.m_tile[1] ) {
+
+              for ( index_type k = tile_id2; k < m_rp.m_tile_end[2]; k += numbl2 ) {
+                const index_type offset_2 = k*m_rp.m_tile[2] + thr_id2;
+                if ( offset_2 < m_rp.m_upper[2] && thr_id2 < m_rp.m_tile[2] ) {
+
+                  for ( index_type l = tile_id3; l < m_rp.m_tile_end[3]; l += numbl3 ) {
+                    const index_type offset_3 = l*m_rp.m_tile[3] + thr_id3;
+                    if ( offset_3 < m_rp.m_upper[3] && thr_id3 < m_rp.m_tile[3] ) {
+
+                      for ( index_type m = tile_id4; m < m_rp.m_tile_end[4]; m += numbl4 ) {
+                        const index_type offset_4 = m*m_rp.m_tile[4] + thr_id4;
+                        if ( offset_4 < m_rp.m_upper[4] && thr_id4 < m_rp.m_tile[4] ) {
+
+                          for ( index_type n = tile_id5; n < m_rp.m_tile_end[5]; n += numbl5 ) {
+                            const index_type offset_5 = n*m_rp.m_tile[5] + thr_id5;
+                            if ( offset_5 < m_rp.m_upper[5] && thr_id5 < m_rp.m_tile[5] ) {
+                              m_func(Tag() , offset_0 , offset_1 , offset_2 , offset_3 , offset_4 , offset_5);
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+};
+
+} // Refactor
+
+// ----------------------------------------------------------------------------------
+
+namespace Reduce {
+
+template < typename T >
+using is_void = std::is_same< T, void >;
+
+template < typename T >
+struct is_array_type : std::false_type
+{
+  using value_type = T;
+};
+
+template < typename T >
+struct is_array_type< T* > : std::true_type
+{
+  using value_type = T;
+};
+
+template < typename T >
+struct is_array_type< T[] > : std::true_type
+{
+  using value_type = T;
+};
+
+// ------------------------------------------------------------------ //
+template< int N , typename RP , typename Functor , typename Tag , typename ValueType , typename Enable = void >
+struct DeviceIterateTile;
+
+// ParallelReduce iteration pattern
+// Scalar reductions
+
+// num_blocks = min( num_tiles, max_num_blocks ); //i.e. determined by number of tiles and reduction algorithm constraints
+// extract n-dim tile offsets (i.e. tile's global starting mulit-index) from the tileid = blockid using tile dimensions
+// local indices within a tile extracted from (index_type)threadIdx.x using tile dims, constrained by blocksize
+// combine tile and local id info for multi-dim global ids
+
+// Pattern:
+// Each block+thread is responsible for a tile+local_id combo (additional when striding by num_blocks)
+// 1. create offset arrays
+// 2. loop over number of tiles, striding by griddim (equal to num tiles, or max num blocks)
+// 3. temps set for tile_idx and thrd_idx, which will be modified
+// 4. if LL vs LR:
+//      determine tile starting point offsets (multidim)
+//      determine local index offsets (multidim)
+//      concatentate tile offset + local offset for global multi-dim index
+//    if offset withinin range bounds AND local offset within tile bounds, call functor
+
+// ValueType = T
+//Rank 2
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<2,RP,Functor,void,ValueType, typename std::enable_if< !is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_v ); }
+        }
+      }
+    }
+
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<2,RP,Functor,Tag, ValueType, typename std::enable_if< !is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]); // Move this to first computation, add to m_offset right away
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+//Rank 3
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<3,RP,Functor,void,ValueType , typename std::enable_if< !is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]); // Move this to first computation, add to m_offset right away
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+// Specializations for void tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<3,RP,Functor,Tag, ValueType, typename std::enable_if< !is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]); // Move this to first computation, add to m_offset right away
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+//Rank 4
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<4,RP,Functor,void,ValueType , typename std::enable_if< !is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+// Specializations for void tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<4,RP,Functor,Tag,ValueType, typename std::enable_if< !is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+//Rank 5
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<5,RP,Functor,void,ValueType , typename std::enable_if< !is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<5,RP,Functor,Tag,ValueType, typename std::enable_if< !is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+//Rank 6
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<6,RP,Functor,void,ValueType , typename std::enable_if< !is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_offset[5], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<6,RP,Functor,Tag,ValueType, typename std::enable_if< !is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , ValueType & v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_offset[5], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  ValueType & m_v;
+};
+
+
+// ValueType = T[], T*
+//Rank 2
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<2,RP,Functor,void,ValueType, typename std::enable_if< is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]); // Move this to first computation, add to m_offset right away
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<2,RP,Functor,Tag, ValueType, typename std::enable_if< is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_v ); }
+        }
+      } //end for loop over num_tiles - product of tiles in each direction
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+//Rank 3
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<3,RP,Functor,void,ValueType , typename std::enable_if< is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]); // Move this to first computation, add to m_offset right away
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+// Specializations for void tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<3,RP,Functor,Tag, ValueType, typename std::enable_if< is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  inline __device__
+  void exec_range() const
+  {
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+//Rank 4
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<4,RP,Functor,void,ValueType , typename std::enable_if< is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+// Specializations for void tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<4,RP,Functor,Tag,ValueType, typename std::enable_if< is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  inline __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+//Rank 5
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<5,RP,Functor,void,ValueType , typename std::enable_if< is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<5,RP,Functor,Tag,ValueType, typename std::enable_if< is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+//Rank 6
+// Specializations for void tag type
+template< typename RP , typename Functor , typename ValueType >
+struct DeviceIterateTile<6,RP,Functor,void,ValueType , typename std::enable_if< is_array_type::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_offset[5], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+
+// Specializations for tag type
+template< typename RP , typename Functor , typename Tag, typename ValueType >
+struct DeviceIterateTile<6,RP,Functor,Tag,ValueType, typename std::enable_if< is_array_type::value && !is_void< Tag >::value >::type >
+{
+  using index_type = typename RP::index_type;
+  using value_type = typename is_array_type< ValueType >::value_type;
+
+  __device__
+  DeviceIterateTile( const RP & rp_ , const Functor & f_ , value_type* v_)
+  : m_rp(rp_)
+  , m_func(f_)
+  , m_v(v_)
+  {}
+
+  static constexpr index_type max_blocks = 65535;
+  //static constexpr index_type max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+
+  inline __device__
+  void exec_range() const
+  {
+    //enum { max_blocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount) };
+    //const index_type max_blocks = static_cast( Kokkos::Impl::cuda_internal_maximum_grid_count() );
+    if ( (index_type)blockIdx.x < m_rp.m_num_tiles && (index_type)threadIdx.y < m_rp.m_prod_tile_dims ) {
+      index_type m_offset[RP::rank]; // tile starting global id offset
+      index_type m_local_offset[RP::rank]; // tile starting global id offset
+
+      for ( index_type tileidx = (index_type)blockIdx.x; tileidx < m_rp.m_num_tiles; tileidx += gridDim.x ) {
+        index_type tile_idx = tileidx; // temp because tile_idx will be modified while determining tile starting point offsets
+        index_type thrd_idx = (index_type)threadIdx.y;
+        bool in_bounds = true;
+
+        // LL
+        if (RP::inner_direction == RP::Left) {
+          for (int i=0; i=0; --i) {
+            m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ;
+            tile_idx /= m_rp.m_tile_end[i];
+
+            // tile-local indices identified with (index_type)threadIdx.y
+            m_local_offset[i] = (thrd_idx % m_rp.m_tile[i]);
+            thrd_idx /= m_rp.m_tile[i];
+
+            m_offset[i] += m_local_offset[i];
+            if ( !(m_offset[i] < m_rp.m_upper[i] && m_local_offset[i] < m_rp.m_tile[i]) ) {
+              in_bounds &= false;
+            }
+          }
+          if ( in_bounds )
+          { m_func( Tag(), m_offset[0], m_offset[1], m_offset[2], m_offset[3], m_offset[4], m_offset[5], m_v ); }
+        }
+      }
+    }
+  } //end exec_range
+
+private:
+  const RP & m_rp;
+  const Functor & m_func;
+  value_type* m_v;
+};
+
+} // Reduce
+
+// ----------------------------------------------------------------------------------
+
+} } } //end namespace Kokkos::Experimental::Impl
+
+#endif
+#endif
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
index 13abcfd93c..cae8ecd489 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaExec.hpp
@@ -53,6 +53,7 @@
 #include 
 #include 
 #include 
+#include 
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
@@ -125,53 +126,12 @@ unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits:
 
 #endif
 
-
-namespace Kokkos {
-namespace Impl {
-  struct CudaLockArraysStruct {
-    int* atomic;
-    int* scratch;
-    int* threadid;
-    int n;
-  };
-}
-}
-__device__ __constant__
-#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-extern
-#endif
-Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
-
-#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
-#define CUDA_SPACE_ATOMIC_XOR_MASK 0x15A39
-
 namespace Kokkos {
 namespace Impl {
   void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink = false);
 }
 }
 
-namespace Kokkos {
-namespace Impl {
-__device__ inline
-bool lock_address_cuda_space(void* ptr) {
-  size_t offset = size_t(ptr);
-  offset = offset >> 2;
-  offset = offset & CUDA_SPACE_ATOMIC_MASK;
-  return (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[offset],0,1));
-}
-
-__device__ inline
-void unlock_address_cuda_space(void* ptr) {
-  size_t offset = size_t(ptr);
-  offset = offset >> 2;
-  offset = offset & CUDA_SPACE_ATOMIC_MASK;
-  atomicExch( &kokkos_impl_cuda_lock_arrays.atomic[ offset ], 0);
-}
-
-}
-}
-
 template< typename T >
 inline
 __device__
@@ -192,7 +152,7 @@ namespace Impl {
 // For 2.0 capability: 48 KB L1 and 16 KB shared
 //----------------------------------------------------------------------------
 
-template< class DriverType >
+template< class DriverType>
 __global__
 static void cuda_parallel_launch_constant_memory()
 {
@@ -202,19 +162,39 @@ static void cuda_parallel_launch_constant_memory()
   driver();
 }
 
-template< class DriverType >
+template< class DriverType, unsigned int maxTperB, unsigned int minBperSM >
+__global__
+__launch_bounds__(maxTperB, minBperSM)
+static void cuda_parallel_launch_constant_memory()
+{
+  const DriverType & driver =
+    *((const DriverType *) kokkos_impl_cuda_constant_memory_buffer );
+
+  driver();
+}
+
+template< class DriverType>
 __global__
 static void cuda_parallel_launch_local_memory( const DriverType driver )
 {
   driver();
 }
 
-template < class DriverType ,
-           bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) >
+template< class DriverType, unsigned int maxTperB, unsigned int minBperSM >
+__global__
+__launch_bounds__(maxTperB, minBperSM)
+static void cuda_parallel_launch_local_memory( const DriverType driver )
+{
+  driver();
+}
+
+template < class DriverType
+         , class LaunchBounds = Kokkos::LaunchBounds<>
+         , bool Large = ( CudaTraits::ConstantMemoryUseThreshold < sizeof(DriverType) ) >
 struct CudaParallelLaunch ;
 
-template < class DriverType >
-struct CudaParallelLaunch< DriverType , true > {
+template < class DriverType, class LaunchBounds >
+struct CudaParallelLaunch< DriverType, LaunchBounds, true > {
 
   inline
   CudaParallelLaunch( const DriverType & driver
@@ -238,26 +218,19 @@ struct CudaParallelLaunch< DriverType , true > {
       }
       #ifndef KOKKOS_ARCH_KEPLER //On Kepler the L1 has no benefit since it doesn't cache reads
       else if ( shmem ) {
-        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferShared ) );
+        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType, LaunchBounds::maxTperB, LaunchBounds::minBperSM > , cudaFuncCachePreferShared ) );
       } else {
-        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType > , cudaFuncCachePreferL1 ) );
+        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_constant_memory< DriverType, LaunchBounds::maxTperB, LaunchBounds::minBperSM > , cudaFuncCachePreferL1 ) );
       }
       #endif
 
       // Copy functor to constant memory on the device
       cudaMemcpyToSymbol( kokkos_impl_cuda_constant_memory_buffer , & driver , sizeof(DriverType) );
 
-      #ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-      Kokkos::Impl::CudaLockArraysStruct locks;
-      locks.atomic = atomic_lock_array_cuda_space_ptr(false);
-      locks.scratch = scratch_lock_array_cuda_space_ptr(false);
-      locks.threadid = threadid_lock_array_cuda_space_ptr(false);
-      locks.n = Kokkos::Cuda::concurrency();
-      cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) );
-      #endif
+      KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
 
       // Invoke the driver function on the device
-      cuda_parallel_launch_constant_memory< DriverType ><<< grid , block , shmem , stream >>>();
+      cuda_parallel_launch_constant_memory< DriverType, LaunchBounds::maxTperB, LaunchBounds::minBperSM ><<< grid , block , shmem , stream >>>();
 
 #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
       CUDA_SAFE_CALL( cudaGetLastError() );
@@ -267,8 +240,8 @@ struct CudaParallelLaunch< DriverType , true > {
   }
 };
 
-template < class DriverType >
-struct CudaParallelLaunch< DriverType , false > {
+template < class DriverType, class LaunchBounds >
+struct CudaParallelLaunch< DriverType, LaunchBounds, false > {
 
   inline
   CudaParallelLaunch( const DriverType & driver
@@ -284,22 +257,15 @@ struct CudaParallelLaunch< DriverType , false > {
       }
       #ifndef KOKKOS_ARCH_KEPLER //On Kepler the L1 has no benefit since it doesn't cache reads
       else if ( shmem ) {
-        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferShared ) );
+        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType, LaunchBounds::maxTperB, LaunchBounds::minBperSM > , cudaFuncCachePreferShared ) );
       } else {
-        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType > , cudaFuncCachePreferL1 ) );
+        CUDA_SAFE_CALL( cudaFuncSetCacheConfig( cuda_parallel_launch_local_memory< DriverType, LaunchBounds::maxTperB, LaunchBounds::minBperSM > , cudaFuncCachePreferL1 ) );
       }
       #endif
 
-      #ifndef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-      Kokkos::Impl::CudaLockArraysStruct locks;
-      locks.atomic = atomic_lock_array_cuda_space_ptr(false);
-      locks.scratch = scratch_lock_array_cuda_space_ptr(false);
-      locks.threadid = threadid_lock_array_cuda_space_ptr(false);
-      locks.n = Kokkos::Cuda::concurrency();
-      cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) );
-      #endif
+      KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
 
-      cuda_parallel_launch_local_memory< DriverType ><<< grid , block , shmem , stream >>>( driver );
+      cuda_parallel_launch_local_memory< DriverType, LaunchBounds::maxTperB, LaunchBounds::minBperSM ><<< grid , block , shmem , stream >>>( driver );
 
 #if defined( KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK )
       CUDA_SAFE_CALL( cudaGetLastError() );
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
index 406b4f1e22..b699f0d6ba 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_CudaSpace.cpp
@@ -230,18 +230,6 @@ void CudaHostPinnedSpace::deallocate( void * const arg_alloc_ptr , const size_t
   } catch(...) {}
 }
 
-constexpr const char* CudaSpace::name() {
-  return m_name;
-}
-
-constexpr const char* CudaUVMSpace::name() {
-  return m_name;
-}
-
-constexpr const char* CudaHostPinnedSpace::name() {
-  return m_name;
-}
-
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
@@ -655,11 +643,12 @@ reallocate_tracked( void * const arg_alloc_ptr
 SharedAllocationRecord< Kokkos::CudaSpace , void > *
 SharedAllocationRecord< Kokkos::CudaSpace , void >::get_record( void * alloc_ptr )
 {
-  using Header     = SharedAllocationHeader ;
   using RecordBase = SharedAllocationRecord< void , void > ;
   using RecordCuda = SharedAllocationRecord< Kokkos::CudaSpace , void > ;
 
 #if 0
+  using Header     = SharedAllocationHeader ;
+
   // Copy the header from the allocation
   Header head ;
 
@@ -812,83 +801,6 @@ print_records( std::ostream & s , const Kokkos::CudaHostPinnedSpace & space , bo
   SharedAllocationRecord< void , void >::print_host_accessible_records( s , "CudaHostPinned" , & s_root_record , detail );
 }
 
-} // namespace Impl
-} // namespace Kokkos
-
-/*--------------------------------------------------------------------------*/
-/*--------------------------------------------------------------------------*/
-
-namespace Kokkos {
-namespace {
-  __global__ void init_lock_array_kernel_atomic() {
-    unsigned i = blockIdx.x*blockDim.x + threadIdx.x;
-
-    if(i>>();
-    init_lock_array_kernel_scratch_threadid<<<(Kokkos::Cuda::concurrency()+255)/256,256>>>(Kokkos::Cuda::concurrency());
-  }
-}
-
 void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink) {
   static void* ptr = NULL;
   static std::int64_t current_size = 0;
@@ -908,8 +820,8 @@ void* cuda_resize_scratch_space(std::int64_t bytes, bool force_shrink) {
   return ptr;
 }
 
-}
-}
+} // namespace Impl
+} // namespace Kokkos
 #else
 void KOKKOS_CORE_SRC_CUDA_CUDASPACE_PREVENT_LINK_ERROR() {}
 #endif // KOKKOS_ENABLE_CUDA
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
index daf55cbd97..80e8f9bd8a 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Impl.cpp
@@ -51,6 +51,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -69,9 +70,6 @@
 __device__ __constant__
 unsigned long kokkos_impl_cuda_constant_memory_buffer[ Kokkos::Impl::CudaTraits::ConstantMemoryUsage / sizeof(unsigned long) ] ;
 
-__device__ __constant__
-Kokkos::Impl::CudaLockArraysStruct kokkos_impl_cuda_lock_arrays ;
-
 #endif
 
 /*--------------------------------------------------------------------------*/
@@ -103,6 +101,7 @@ int cuda_kernel_arch()
   return arch ;
 }
 
+#ifdef KOKKOS_ENABLE_CUDA_UVM
 bool cuda_launch_blocking()
 {
   const char * env = getenv("CUDA_LAUNCH_BLOCKING");
@@ -111,16 +110,13 @@ bool cuda_launch_blocking()
 
   return atoi(env);
 }
+#endif
 
 }
 
 void cuda_device_synchronize()
 {
-//  static const bool launch_blocking = cuda_launch_blocking();
-
-//  if (!launch_blocking) {
-    CUDA_SAFE_CALL( cudaDeviceSynchronize() );
-//  }
+  CUDA_SAFE_CALL( cudaDeviceSynchronize() );
 }
 
 void cuda_internal_error_throw( cudaError e , const char * name, const char * file, const int line )
@@ -240,6 +236,7 @@ public:
   unsigned    m_maxWarpCount ;
   unsigned    m_maxBlock ;
   unsigned    m_maxSharedWords ;
+  uint32_t    m_maxConcurrency ;
   size_type   m_scratchSpaceCount ;
   size_type   m_scratchFlagsCount ;
   size_type   m_scratchUnifiedCount ;
@@ -248,6 +245,7 @@ public:
   size_type * m_scratchSpace ;
   size_type * m_scratchFlags ;
   size_type * m_scratchUnified ;
+  uint32_t  * m_scratchConcurrentBitset ;
   cudaStream_t * m_stream ;
 
   static int was_initialized;
@@ -274,6 +272,7 @@ public:
     , m_maxWarpCount( 0 )
     , m_maxBlock( 0 )
     , m_maxSharedWords( 0 )
+    , m_maxConcurrency( 0 )
     , m_scratchSpaceCount( 0 )
     , m_scratchFlagsCount( 0 )
     , m_scratchUnifiedCount( 0 )
@@ -282,6 +281,7 @@ public:
     , m_scratchSpace( 0 )
     , m_scratchFlags( 0 )
     , m_scratchUnified( 0 )
+    , m_scratchConcurrentBitset( 0 )
     , m_stream( 0 )
     {}
 
@@ -327,7 +327,8 @@ CudaInternal::~CudaInternal()
   if ( m_stream ||
        m_scratchSpace ||
        m_scratchFlags ||
-       m_scratchUnified ) {
+       m_scratchUnified ||
+       m_scratchConcurrentBitset ) {
     std::cerr << "Kokkos::Cuda ERROR: Failed to call Kokkos::Cuda::finalize()"
               << std::endl ;
     std::cerr.flush();
@@ -339,6 +340,7 @@ CudaInternal::~CudaInternal()
   m_maxWarpCount            = 0 ;
   m_maxBlock                = 0 ;
   m_maxSharedWords          = 0 ;
+  m_maxConcurrency          = 0 ;
   m_scratchSpaceCount       = 0 ;
   m_scratchFlagsCount       = 0 ;
   m_scratchUnifiedCount     = 0 ;
@@ -347,6 +349,7 @@ CudaInternal::~CudaInternal()
   m_scratchSpace            = 0 ;
   m_scratchFlags            = 0 ;
   m_scratchUnified          = 0 ;
+  m_scratchConcurrentBitset = 0 ;
   m_stream                  = 0 ;
 }
 
@@ -485,6 +488,33 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
       (void) scratch_space( reduce_block_count * 16 * sizeof(size_type) );
     }
     //----------------------------------
+    // Concurrent bitset for obtaining unique tokens from within
+    // an executing kernel.
+    {
+      const unsigned max_threads_per_sm = 2048 ; // up to capability 7.0
+
+      m_maxConcurrency =
+        max_threads_per_sm * cudaProp.multiProcessorCount ;
+
+      const int32_t buffer_bound =
+         Kokkos::Impl::concurrent_bitset::buffer_bound( m_maxConcurrency );
+
+      // Allocate and initialize uint32_t[ buffer_bound ]
+
+      typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::CudaSpace , void > Record ;
+
+      Record * const r = Record::allocate( Kokkos::CudaSpace()
+                                         , "InternalScratchBitset"
+                                         , sizeof(uint32_t) * buffer_bound );
+
+      Record::increment( r );
+
+      m_scratchConcurrentBitset = reinterpret_cast( r->data() );
+
+      CUDA_SAFE_CALL( cudaMemset( m_scratchConcurrentBitset , 0 , sizeof(uint32_t) * buffer_bound ) );
+
+    }
+    //----------------------------------
 
     if ( stream_count ) {
       m_stream = (cudaStream_t*) ::malloc( stream_count * sizeof(cudaStream_t) );
@@ -543,16 +573,7 @@ void CudaInternal::initialize( int cuda_device_id , int stream_count )
   cudaThreadSetCacheConfig(cudaFuncCachePreferShared);
 
   // Init the array for used for arbitrarily sized atomics
-  Impl::init_lock_arrays_cuda_space();
-
-  #ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-  Kokkos::Impl::CudaLockArraysStruct locks;
-  locks.atomic = atomic_lock_array_cuda_space_ptr(false);
-  locks.scratch = scratch_lock_array_cuda_space_ptr(false);
-  locks.threadid = threadid_lock_array_cuda_space_ptr(false);
-  locks.n = Kokkos::Cuda::concurrency();
-  cudaMemcpyToSymbol( kokkos_impl_cuda_lock_arrays , & locks , sizeof(CudaLockArraysStruct) );
-  #endif
+  Impl::initialize_host_cuda_lock_arrays();
 }
 
 //----------------------------------------------------------------------------
@@ -635,9 +656,7 @@ void CudaInternal::finalize()
   was_finalized = 1;
   if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) {
 
-    atomic_lock_array_cuda_space_ptr(true);
-    scratch_lock_array_cuda_space_ptr(true);
-    threadid_lock_array_cuda_space_ptr(true);
+    Impl::finalize_host_cuda_lock_arrays();
 
     if ( m_stream ) {
       for ( size_type i = 1 ; i < m_streamCount ; ++i ) {
@@ -653,6 +672,7 @@ void CudaInternal::finalize()
     RecordCuda::decrement( RecordCuda::get_record( m_scratchFlags ) );
     RecordCuda::decrement( RecordCuda::get_record( m_scratchSpace ) );
     RecordHost::decrement( RecordHost::get_record( m_scratchUnified ) );
+    RecordCuda::decrement( RecordCuda::get_record( m_scratchConcurrentBitset ) );
 
     m_cudaDev             = -1 ;
     m_multiProcCount      = 0 ;
@@ -666,6 +686,7 @@ void CudaInternal::finalize()
     m_scratchSpace        = 0 ;
     m_scratchFlags        = 0 ;
     m_scratchUnified      = 0 ;
+    m_scratchConcurrentBitset = 0 ;
     m_stream              = 0 ;
   }
 }
@@ -713,9 +734,8 @@ namespace Kokkos {
 Cuda::size_type Cuda::detect_device_count()
 { return Impl::CudaInternalDevices::singleton().m_cudaDevCount ; }
 
-int Cuda::concurrency() {
-  return 131072;
-}
+int Cuda::concurrency()
+{ return Impl::CudaInternal::singleton().m_maxConcurrency ; }
 
 int Cuda::is_initialized()
 { return Impl::CudaInternal::singleton().is_initialized(); }
@@ -798,7 +818,22 @@ void Cuda::fence()
 const char* Cuda::name() { return "Cuda"; }
 
 } // namespace Kokkos
+
+namespace Kokkos {
+namespace Experimental {
+
+UniqueToken< Kokkos::Cuda , Kokkos::Experimental::UniqueTokenScope::Global >::
+UniqueToken( Kokkos::Cuda const & )
+  : m_buffer( Kokkos::Impl::CudaInternal::singleton().m_scratchConcurrentBitset )
+  , m_count(  Kokkos::Impl::CudaInternal::singleton().m_maxConcurrency )
+  {}
+
+} // namespace Experimental
+} // namespace Kokkos
+
 #else
+
 void KOKKOS_CORE_SRC_CUDA_IMPL_PREVENT_LINK_ERROR() {}
+
 #endif // KOKKOS_ENABLE_CUDA
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
new file mode 100644
index 0000000000..237022ad23
--- /dev/null
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.cpp
@@ -0,0 +1,119 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include 
+
+#ifdef KOKKOS_ENABLE_CUDA
+
+#include 
+#include 
+#include 
+
+#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
+namespace Kokkos {
+namespace Impl {
+__device__ __constant__
+CudaLockArrays g_device_cuda_lock_arrays = { nullptr, nullptr, 0 };
+}
+}
+#endif
+
+namespace Kokkos {
+
+namespace {
+
+__global__ void init_lock_array_kernel_atomic() {
+  unsigned i = blockIdx.x*blockDim.x + threadIdx.x;
+  if(i>>();
+  init_lock_array_kernel_threadid<<<(Kokkos::Cuda::concurrency()+255)/256,256>>>(Kokkos::Cuda::concurrency());
+  CUDA_SAFE_CALL(cudaDeviceSynchronize());
+}
+
+void finalize_host_cuda_lock_arrays() {
+  if (g_host_cuda_lock_arrays.atomic == nullptr) return;
+  cudaFree(g_host_cuda_lock_arrays.atomic);
+  g_host_cuda_lock_arrays.atomic = nullptr;
+  cudaFree(g_host_cuda_lock_arrays.scratch);
+  g_host_cuda_lock_arrays.scratch = nullptr;
+  g_host_cuda_lock_arrays.n = 0;
+#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
+  KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
+#endif
+}
+
+} // namespace Impl
+
+} // namespace Kokkos
+
+#else
+
+void KOKKOS_CORE_SRC_CUDA_CUDA_LOCKS_PREVENT_LINK_ERROR() {}
+
+#endif
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
new file mode 100644
index 0000000000..d01f06fb4f
--- /dev/null
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Locks.hpp
@@ -0,0 +1,166 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CUDA_LOCKS_HPP
+#define KOKKOS_CUDA_LOCKS_HPP
+
+#include 
+
+#ifdef KOKKOS_ENABLE_CUDA
+
+#include 
+
+#include 
+
+namespace Kokkos {
+namespace Impl {
+
+struct CudaLockArrays {
+  std::int32_t* atomic;
+  std::int32_t* scratch;
+  std::int32_t n;
+};
+
+/// \brief This global variable in Host space is the central definition
+///        of these arrays.
+extern Kokkos::Impl::CudaLockArrays g_host_cuda_lock_arrays ;
+
+/// \brief After this call, the g_host_cuda_lock_arrays variable has
+///        valid, initialized arrays.
+///
+/// This call is idempotent.
+void initialize_host_cuda_lock_arrays();
+
+/// \brief After this call, the g_host_cuda_lock_arrays variable has
+///        all null pointers, and all array memory has been freed.
+///
+/// This call is idempotent.
+void finalize_host_cuda_lock_arrays();
+
+} // namespace Impl
+} // namespace Kokkos
+
+#if defined( __CUDACC__ )
+
+namespace Kokkos {
+namespace Impl {
+
+/// \brief This global variable in CUDA space is what kernels use
+///        to get access to the lock arrays.
+///
+/// When relocatable device code is enabled, there can be one single
+/// instance of this global variable for the entire executable,
+/// whose definition will be in Kokkos_Cuda_Locks.cpp (and whose declaration
+/// here must then be extern.
+/// This one instance will be initialized by initialize_host_cuda_lock_arrays
+/// and need not be modified afterwards.
+///
+/// When relocatable device code is disabled, an instance of this variable
+/// will be created in every translation unit that sees this header file
+/// (we make this clear by marking it static, meaning no other translation
+///  unit can link to it).
+/// Since the Kokkos_Cuda_Locks.cpp translation unit cannot initialize the
+/// instances in other translation units, we must update this CUDA global
+/// variable based on the Host global variable prior to running any kernels
+/// that will use it.
+/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro.
+__device__ __constant__
+#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
+extern
+#endif
+Kokkos::Impl::CudaLockArrays g_device_cuda_lock_arrays ;
+
+#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
+
+/// \brief Aquire a lock for the address
+///
+/// This function tries to aquire the lock for the hash value derived
+/// from the provided ptr. If the lock is successfully aquired the
+/// function returns true. Otherwise it returns false.
+__device__ inline
+bool lock_address_cuda_space(void* ptr) {
+  size_t offset = size_t(ptr);
+  offset = offset >> 2;
+  offset = offset & CUDA_SPACE_ATOMIC_MASK;
+  return (0 == atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset],0,1));
+}
+
+/// \brief Release lock for the address
+///
+/// This function releases the lock for the hash value derived
+/// from the provided ptr. This function should only be called
+/// after previously successfully aquiring a lock with
+/// lock_address.
+__device__ inline
+void unlock_address_cuda_space(void* ptr) {
+  size_t offset = size_t(ptr);
+  offset = offset >> 2;
+  offset = offset & CUDA_SPACE_ATOMIC_MASK;
+  atomicExch( &Kokkos::Impl::g_device_cuda_lock_arrays.atomic[ offset ], 0);
+}
+
+} // namespace Impl
+} // namespace Kokkos
+
+/* Dan Ibanez: it is critical that this code be a macro, so that it will
+   capture the right address for Kokkos::Impl::g_device_cuda_lock_arrays!
+   putting this in an inline function will NOT do the right thing! */
+#define KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \
+{ \
+  CUDA_SAFE_CALL(cudaMemcpyToSymbol( \
+        Kokkos::Impl::g_device_cuda_lock_arrays , \
+        & Kokkos::Impl::g_host_cuda_lock_arrays , \
+        sizeof(Kokkos::Impl::CudaLockArrays) ) ); \
+}
+
+#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
+#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
+#else
+#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()
+#endif
+
+#endif /* defined( __CUDACC__ ) */
+
+#endif /* defined( KOKKOS_ENABLE_CUDA ) */
+
+#endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP */
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
index 0c8c700e8f..e2eab19e45 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Parallel.hpp
@@ -58,6 +58,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #if defined(KOKKOS_ENABLE_PROFILING)
@@ -65,6 +66,8 @@
 #include 
 #endif
 
+#include 
+
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
@@ -318,6 +321,7 @@ private:
   typedef Kokkos::RangePolicy< Traits ... > Policy;
   typedef typename Policy::member_type  Member ;
   typedef typename Policy::work_tag     WorkTag ;
+  typedef typename Policy::launch_bounds LaunchBounds ;
 
   const FunctorType  m_functor ;
   const Policy       m_policy ;
@@ -363,7 +367,7 @@ public:
       const dim3 block(  1 , CudaTraits::WarpSize * cuda_internal_maximum_warp_count(), 1);
       const dim3 grid( std::min( ( nwork + block.y - 1 ) / block.y , cuda_internal_maximum_grid_count() ) , 1 , 1);
 
-      CudaParallelLaunch< ParallelFor >( *this , grid , block , 0 );
+      CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 );
     }
 
   ParallelFor( const FunctorType  & arg_functor ,
@@ -373,6 +377,115 @@ public:
     { }
 };
 
+
+// MDRangePolicy impl
+template< class FunctorType , class ... Traits >
+class ParallelFor< FunctorType
+                 , Kokkos::Experimental::MDRangePolicy< Traits ... >
+                 , Kokkos::Cuda
+                 >
+{
+private:
+  typedef Kokkos::Experimental::MDRangePolicy< Traits ...  > Policy ;
+  using RP = Policy;
+  typedef typename Policy::array_index_type array_index_type;
+  typedef typename Policy::index_type index_type;
+  typedef typename Policy::launch_bounds LaunchBounds;
+
+
+  const FunctorType m_functor ;
+  const Policy      m_rp ;
+
+public:
+
+  inline
+  __device__
+  void operator()(void) const
+    {
+      Kokkos::Experimental::Impl::Refactor::DeviceIterateTile(m_rp,m_functor).exec_range();
+    }
+
+
+  inline
+  void execute() const
+  {
+    const array_index_type maxblocks = static_cast(Kokkos::Impl::CudaTraits::UpperBoundGridCount);
+    if ( RP::rank == 2 )
+    {
+      const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , 1);
+      const dim3 grid(
+            std::min( ( m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1 ) / block.x , maxblocks )
+          , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks )
+          , 1
+          );
+      CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 );
+    }
+    else if ( RP::rank == 3 )
+    {
+      const dim3 block( m_rp.m_tile[0] , m_rp.m_tile[1] , m_rp.m_tile[2] );
+      const dim3 grid(
+          std::min( ( m_rp.m_upper[0] - m_rp.m_lower[0] + block.x - 1 ) / block.x , maxblocks )
+        , std::min( ( m_rp.m_upper[1] - m_rp.m_lower[1] + block.y - 1 ) / block.y , maxblocks )
+        , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.z - 1 ) / block.z , maxblocks )
+        );
+      CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 );
+    }
+    else if ( RP::rank == 4 )
+    {
+      // id0,id1 encoded within threadIdx.x; id2 to threadIdx.y; id3 to threadIdx.z
+      const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2] , m_rp.m_tile[3] );
+      const dim3 grid(
+          std::min( static_cast( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] )
+                  , static_cast(maxblocks) )
+        , std::min( ( m_rp.m_upper[2] - m_rp.m_lower[2] + block.y - 1 ) / block.y , maxblocks )
+        , std::min( ( m_rp.m_upper[3] - m_rp.m_lower[3] + block.z - 1 ) / block.z , maxblocks )
+        );
+      CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 );
+    }
+    else if ( RP::rank == 5 )
+    {
+      // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4 to threadIdx.z
+      const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2]*m_rp.m_tile[3] , m_rp.m_tile[4] );
+      const dim3 grid(
+          std::min( static_cast( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] )
+                  , static_cast(maxblocks) )
+        , std::min( static_cast( m_rp.m_tile_end[2] * m_rp.m_tile_end[3] )
+                  , static_cast(maxblocks) )
+        , std::min( ( m_rp.m_upper[4] - m_rp.m_lower[4] + block.z - 1 ) / block.z , maxblocks )
+        );
+      CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 );
+    }
+    else if ( RP::rank == 6 )
+    {
+      // id0,id1 encoded within threadIdx.x; id2,id3 to threadIdx.y; id4,id5 to threadIdx.z
+      const dim3 block( m_rp.m_tile[0]*m_rp.m_tile[1] , m_rp.m_tile[2]*m_rp.m_tile[3] , m_rp.m_tile[4]*m_rp.m_tile[5] );
+      const dim3 grid(
+          std::min( static_cast( m_rp.m_tile_end[0] * m_rp.m_tile_end[1] )
+                  , static_cast(maxblocks) )
+        ,  std::min( static_cast( m_rp.m_tile_end[2] * m_rp.m_tile_end[3] )
+                  , static_cast(maxblocks) )
+        , std::min( static_cast( m_rp.m_tile_end[4] * m_rp.m_tile_end[5] )
+                  , static_cast(maxblocks) )
+        );
+      CudaParallelLaunch< ParallelFor, LaunchBounds >( *this , grid , block , 0 );
+    }
+    else
+    {
+      printf("Kokkos::MDRange Error: Exceeded rank bounds with Cuda\n");
+      Kokkos::abort("Aborting");
+    }
+
+  } //end execute
+
+//  inline
+  ParallelFor( const FunctorType & arg_functor
+             , Policy arg_policy )
+    : m_functor( arg_functor )
+    , m_rp(  arg_policy )
+    {}
+};
+
+
 template< class FunctorType , class ... Properties >
 class ParallelFor< FunctorType
                  , Kokkos::TeamPolicy< Properties ... >
@@ -384,6 +497,7 @@ private:
   typedef TeamPolicyInternal< Kokkos::Cuda , Properties ... >   Policy ;
   typedef typename Policy::member_type  Member ;
   typedef typename Policy::work_tag     WorkTag ;
+  typedef typename Policy::launch_bounds  LaunchBounds ;
 
 public:
 
@@ -430,15 +544,15 @@ public:
     if ( m_scratch_size[1]>0 ) {
       __shared__ int base_thread_id;
       if (threadIdx.x==0 && threadIdx.y==0 ) {
-        threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % kokkos_impl_cuda_lock_arrays.n;
+        threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % Kokkos::Impl::g_device_cuda_lock_arrays.n;
         threadid = ((threadid + blockDim.x * blockDim.y-1)/(blockDim.x * blockDim.y)) * blockDim.x * blockDim.y;
-        if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y;
+        if(threadid > Kokkos::Impl::g_device_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y;
         int done = 0;
         while (!done) {
-          done = (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[threadid],0,1));
+          done = (0 == atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid],0,1));
           if(!done) {
             threadid += blockDim.x * blockDim.y;
-            if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid = 0;
+            if(threadid > Kokkos::Impl::g_device_cuda_lock_arrays.n) threadid = 0;
           }
         }
         base_thread_id = threadid;
@@ -448,7 +562,8 @@ public:
     }
 
 
-    for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) {
+    const int int_league_size = (int)m_league_size;
+    for ( int league_rank = blockIdx.x ; league_rank < int_league_size ; league_rank += gridDim.x ) {
 
       this-> template exec_team< WorkTag >(
         typename Policy::member_type( kokkos_impl_cuda_shared_memory()
@@ -462,7 +577,7 @@ public:
     if ( m_scratch_size[1]>0 ) {
       __syncthreads();
       if (threadIdx.x==0 && threadIdx.y==0 )
-        kokkos_impl_cuda_lock_arrays.atomic[threadid]=0;
+        Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid]=0;
     }
   }
 
@@ -473,7 +588,7 @@ public:
       const dim3 grid( int(m_league_size) , 1 , 1 );
       const dim3 block( int(m_vector_size) , int(m_team_size) , 1 );
 
-      CudaParallelLaunch< ParallelFor >( *this, grid, block, shmem_size_total ); // copy to device and execute
+      CudaParallelLaunch< ParallelFor, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute
 
     }
 
@@ -529,6 +644,7 @@ private:
   typedef typename Policy::WorkRange    WorkRange ;
   typedef typename Policy::work_tag     WorkTag ;
   typedef typename Policy::member_type  Member ;
+  typedef typename Policy::launch_bounds LaunchBounds ;
 
   typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional;
   typedef typename ReducerConditional::type ReducerTypeFwd;
@@ -563,6 +679,7 @@ private:
   typedef int DummySHMEMReductionType;
 
 public:
+  // Make the exec_range calls call to Reduce::DeviceIterateTile
   template< class TagType >
   __device__ inline
   typename std::enable_if< std::is_same< TagType , void >::value >::type
@@ -686,7 +803,7 @@ public:
 
       const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( m_functor , block.y );
 
-      CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem ); // copy to device and execute
+      CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute
 
       Cuda::fence();
 
@@ -737,6 +854,232 @@ public:
   { }
 };
 
+
+// MDRangePolicy impl
+template< class FunctorType , class ReducerType, class ... Traits >
+class ParallelReduce< FunctorType
+                    , Kokkos::Experimental::MDRangePolicy< Traits ... >
+                    , ReducerType
+                    , Kokkos::Cuda
+                    >
+{
+private:
+
+  typedef Kokkos::Experimental::MDRangePolicy< Traits ... > Policy ;
+  typedef typename Policy::array_index_type                 array_index_type;
+  typedef typename Policy::index_type                       index_type;
+
+  typedef typename Policy::work_tag     WorkTag ;
+  typedef typename Policy::member_type  Member ;
+  typedef typename Policy::launch_bounds LaunchBounds;
+
+  typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional;
+  typedef typename ReducerConditional::type ReducerTypeFwd;
+
+  typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ;
+  typedef Kokkos::Impl::FunctorValueInit<   ReducerTypeFwd, WorkTag > ValueInit ;
+  typedef Kokkos::Impl::FunctorValueJoin<   ReducerTypeFwd, WorkTag > ValueJoin ;
+
+public:
+
+  typedef typename ValueTraits::pointer_type    pointer_type ;
+  typedef typename ValueTraits::value_type      value_type ;
+  typedef typename ValueTraits::reference_type  reference_type ;
+  typedef FunctorType                           functor_type ;
+  typedef Cuda::size_type                       size_type ;
+
+  // Algorithmic constraints: blockSize is a power of two AND blockDim.y == blockDim.z == 1
+
+  const FunctorType   m_functor ;
+  const Policy        m_policy ; // used for workrange and nwork
+  const ReducerType   m_reducer ;
+  const pointer_type  m_result_ptr ;
+  size_type *         m_scratch_space ;
+  size_type *         m_scratch_flags ;
+  size_type *         m_unified_space ;
+
+  typedef typename Kokkos::Experimental::Impl::Reduce::DeviceIterateTile DeviceIteratePattern;
+
+  // Shall we use the shfl based reduction or not (only use it for static sized types of more than 128bit
+  enum { UseShflReduction = ((sizeof(value_type)>2*sizeof(double)) && ValueTraits::StaticValueSize) };
+  // Some crutch to do function overloading
+private:
+  typedef double DummyShflReductionType;
+  typedef int DummySHMEMReductionType;
+
+public:
+  inline
+  __device__
+  void
+  exec_range( reference_type update ) const
+  {
+    Kokkos::Experimental::Impl::Reduce::DeviceIterateTile(m_policy, m_functor, update).exec_range();
+  }
+
+  inline
+  __device__
+  void operator() (void) const {
+    run(Kokkos::Impl::if_c::select(1,1.0) );
+  }
+
+  __device__ inline
+  void run(const DummySHMEMReductionType& ) const
+  {
+    const integral_nonzero_constant< size_type , ValueTraits::StaticValueSize / sizeof(size_type) >
+      word_count( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) / sizeof(size_type) );
+
+    {
+      reference_type value =
+        ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , kokkos_impl_cuda_shared_memory() + threadIdx.y * word_count.value );
+
+      // Number of blocks is bounded so that the reduction can be limited to two passes.
+      // Each thread block is given an approximately equal amount of work to perform.
+      // Accumulate the values for this block.
+      // The accumulation ordering does not match the final pass, but is arithmatically equivalent.
+
+      this-> exec_range( value );
+    }
+
+    // Reduce with final value at blockDim.y - 1 location.
+    // Problem: non power-of-two blockDim
+    if ( cuda_single_inter_block_reduce_scan(
+           ReducerConditional::select(m_functor , m_reducer) , blockIdx.x , gridDim.x ,
+           kokkos_impl_cuda_shared_memory() , m_scratch_space , m_scratch_flags ) ) {
+
+      // This is the final block with the final result at the final threads' location
+      size_type * const shared = kokkos_impl_cuda_shared_memory() + ( blockDim.y - 1 ) * word_count.value ;
+      size_type * const global = m_unified_space ? m_unified_space : m_scratch_space ;
+
+      if ( threadIdx.y == 0 ) {
+        Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , shared );
+      }
+
+      if ( CudaTraits::WarpSize < word_count.value ) { __syncthreads(); }
+
+      for ( unsigned i = threadIdx.y ; i < word_count.value ; i += blockDim.y ) { global[i] = shared[i]; }
+    }
+  }
+
+  __device__ inline
+   void run(const DummyShflReductionType&) const
+   {
+
+     value_type value;
+     ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value);
+     // Number of blocks is bounded so that the reduction can be limited to two passes.
+     // Each thread block is given an approximately equal amount of work to perform.
+     // Accumulate the values for this block.
+     // The accumulation ordering does not match the final pass, but is arithmatically equivalent.
+
+     const Member work_part =
+       ( ( m_policy.m_num_tiles + ( gridDim.x - 1 ) ) / gridDim.x ); //portion of tiles handled by each block
+
+     this-> exec_range( value );
+
+     pointer_type const result = (pointer_type) (m_unified_space ? m_unified_space : m_scratch_space) ;
+
+     int max_active_thread = work_part < blockDim.y ? work_part:blockDim.y;
+     max_active_thread = (max_active_thread == 0)?blockDim.y:max_active_thread;
+
+     value_type init;
+     ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &init);
+     if(Impl::cuda_inter_block_reduction
+         (value,init,ValueJoin(ReducerConditional::select(m_functor , m_reducer)),m_scratch_space,result,m_scratch_flags,max_active_thread)) {
+       const unsigned id = threadIdx.y*blockDim.x + threadIdx.x;
+       if(id==0) {
+         Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , (void*) &value );
+         *result = value;
+       }
+     }
+   }
+
+  // Determine block size constrained by shared memory:
+  static inline
+  unsigned local_block_size( const FunctorType & f )
+    {
+      unsigned n = CudaTraits::WarpSize * 8 ;
+      while ( n && CudaTraits::SharedMemoryCapacity < cuda_single_inter_block_reduce_scan_shmem( f , n ) ) { n >>= 1 ; }
+      return n ;
+    }
+
+  inline
+  void execute()
+    {
+      const int nwork = m_policy.m_num_tiles;
+      if ( nwork ) {
+        int block_size = m_policy.m_prod_tile_dims;
+        // CONSTRAINT: Algorithm requires block_size >= product of tile dimensions
+        // Nearest power of two
+        int exponent_pow_two = std::ceil( std::log2(block_size) );
+        block_size = std::pow(2, exponent_pow_two);
+        int suggested_blocksize = local_block_size( m_functor );
+
+        block_size = (block_size > suggested_blocksize) ? block_size : suggested_blocksize ; //Note: block_size must be less than or equal to 512
+
+
+        m_scratch_space = cuda_internal_scratch_space( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) * block_size /* block_size == max block_count */ );
+        m_scratch_flags = cuda_internal_scratch_flags( sizeof(size_type) );
+        m_unified_space = cuda_internal_scratch_unified( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) );
+
+        // REQUIRED ( 1 , N , 1 )
+        const dim3 block( 1 , block_size , 1 );
+        // Required grid.x <= block.y
+        const dim3 grid( std::min( int(block.y) , int( nwork ) ) , 1 , 1 );
+
+      const int shmem = UseShflReduction?0:cuda_single_inter_block_reduce_scan_shmem( m_functor , block.y );
+
+      CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute
+
+      Cuda::fence();
+
+      if ( m_result_ptr ) {
+        if ( m_unified_space ) {
+          const int count = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer)  );
+          for ( int i = 0 ; i < count ; ++i ) { m_result_ptr[i] = pointer_type(m_unified_space)[i] ; }
+        }
+        else {
+          const int size = ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer)  );
+          DeepCopy( m_result_ptr , m_scratch_space , size );
+        }
+      }
+    }
+    else {
+      if (m_result_ptr) {
+        ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , m_result_ptr );
+      }
+    }
+  }
+
+  template< class HostViewType >
+  ParallelReduce( const FunctorType  & arg_functor
+                , const Policy       & arg_policy
+                , const HostViewType & arg_result
+                , typename std::enable_if<
+                   Kokkos::is_view< HostViewType >::value
+                ,void*>::type = NULL)
+  : m_functor( arg_functor )
+  , m_policy(  arg_policy )
+  , m_reducer( InvalidType() )
+  , m_result_ptr( arg_result.ptr_on_device() )
+  , m_scratch_space( 0 )
+  , m_scratch_flags( 0 )
+  , m_unified_space( 0 )
+  {}
+
+  ParallelReduce( const FunctorType  & arg_functor
+                , const Policy       & arg_policy
+                , const ReducerType & reducer)
+  : m_functor( arg_functor )
+  , m_policy(  arg_policy )
+  , m_reducer( reducer )
+  , m_result_ptr( reducer.view().ptr_on_device() )
+  , m_scratch_space( 0 )
+  , m_scratch_flags( 0 )
+  , m_unified_space( 0 )
+  {}
+};
+
+
 //----------------------------------------------------------------------------
 
 #if 1
@@ -753,6 +1096,7 @@ private:
   typedef TeamPolicyInternal< Kokkos::Cuda, Properties ... >  Policy ;
   typedef typename Policy::member_type  Member ;
   typedef typename Policy::work_tag     WorkTag ;
+  typedef typename Policy::launch_bounds     LaunchBounds ;
 
   typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional;
   typedef typename ReducerConditional::type ReducerTypeFwd;
@@ -819,15 +1163,15 @@ public:
     if ( m_scratch_size[1]>0 ) {
       __shared__ int base_thread_id;
       if (threadIdx.x==0 && threadIdx.y==0 ) {
-        threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % kokkos_impl_cuda_lock_arrays.n;
+        threadid = ((blockIdx.x*blockDim.z + threadIdx.z) * blockDim.x * blockDim.y) % Kokkos::Impl::g_device_cuda_lock_arrays.n;
         threadid = ((threadid + blockDim.x * blockDim.y-1)/(blockDim.x * blockDim.y)) * blockDim.x * blockDim.y;
-        if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y;
+        if(threadid > Kokkos::Impl::g_device_cuda_lock_arrays.n) threadid-=blockDim.x * blockDim.y;
         int done = 0;
         while (!done) {
-          done = (0 == atomicCAS(&kokkos_impl_cuda_lock_arrays.atomic[threadid],0,1));
+          done = (0 == atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid],0,1));
           if(!done) {
             threadid += blockDim.x * blockDim.y;
-            if(threadid > kokkos_impl_cuda_lock_arrays.n) threadid = 0;
+            if(threadid > Kokkos::Impl::g_device_cuda_lock_arrays.n) threadid = 0;
           }
         }
         base_thread_id = threadid;
@@ -840,7 +1184,7 @@ public:
     if ( m_scratch_size[1]>0 ) {
       __syncthreads();
       if (threadIdx.x==0 && threadIdx.y==0 )
-        kokkos_impl_cuda_lock_arrays.atomic[threadid]=0;
+        Kokkos::Impl::g_device_cuda_lock_arrays.scratch[threadid]=0;
     }
   }
 
@@ -854,7 +1198,8 @@ public:
       ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , kokkos_impl_cuda_shared_memory() + threadIdx.y * word_count.value );
 
     // Iterate this block through the league
-    for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) {
+    const int int_league_size = (int)m_league_size;
+    for ( int league_rank = blockIdx.x ; league_rank < int_league_size ; league_rank += gridDim.x ) {
       this-> template exec_team< WorkTag >
         ( Member( kokkos_impl_cuda_shared_memory() + m_team_begin
                                         , m_shmem_begin
@@ -894,7 +1239,8 @@ public:
     ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , &value);
 
     // Iterate this block through the league
-    for ( int league_rank = blockIdx.x ; league_rank < m_league_size ; league_rank += gridDim.x ) {
+    const int int_league_size = (int)m_league_size;
+    for ( int league_rank = blockIdx.x ; league_rank < int_league_size ; league_rank += gridDim.x ) {
       this-> template exec_team< WorkTag >
         ( Member( kokkos_impl_cuda_shared_memory() + m_team_begin
                                         , m_shmem_begin
@@ -936,7 +1282,7 @@ public:
         const dim3 grid( block_count , 1 , 1 );
         const int shmem_size_total = m_team_begin + m_shmem_begin + m_shmem_size ;
 
-        CudaParallelLaunch< ParallelReduce >( *this, grid, block, shmem_size_total ); // copy to device and execute
+        CudaParallelLaunch< ParallelReduce, LaunchBounds >( *this, grid, block, shmem_size_total ); // copy to device and execute
 
         Cuda::fence();
 
@@ -975,12 +1321,6 @@ public:
   , m_shmem_begin( 0 )
   , m_shmem_size( 0 )
   , m_scratch_ptr{NULL,NULL}
-  , m_league_size( arg_policy.league_size() )
-  , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
-      Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
-                                                               arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
-                                                               arg_policy.vector_length() )
-  , m_vector_size( arg_policy.vector_length() )
   , m_scratch_size{
     arg_policy.scratch_size(0,( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
         Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
@@ -991,6 +1331,12 @@ public:
                                                                  arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
                                                                  arg_policy.vector_length() )
         )}
+  , m_league_size( arg_policy.league_size() )
+  , m_team_size( 0 <= arg_policy.team_size() ? arg_policy.team_size() :
+      Kokkos::Impl::cuda_get_opt_block_size< ParallelReduce >( arg_functor , arg_policy.vector_length(),
+                                                               arg_policy.team_scratch_size(0),arg_policy.thread_scratch_size(0) ) /
+                                                               arg_policy.vector_length() )
+  , m_vector_size( arg_policy.vector_length() )
   {
     // Return Init value if the number of worksets is zero
     if( arg_policy.league_size() == 0) {
@@ -1150,6 +1496,7 @@ private:
   typedef typename reducer_type<>::pointer_type    pointer_type ;
   typedef typename reducer_type<>::reference_type  reference_type ;
   typedef typename reducer_type<>::value_type      value_type ;
+  typedef typename Policy::launch_bounds           LaunchBounds ;
 
   typedef Kokkos::Impl::FunctorAnalysis
     < Kokkos::Impl::FunctorPatternInterface::REDUCE
@@ -1273,7 +1620,7 @@ public:
         const int  shmem = m_shmem_team_begin + m_shmem_team_size ;
 
         // copy to device and execute
-        CudaParallelLaunch( *this, grid, block, shmem );
+        CudaParallelLaunch( *this, grid, block, shmem );
 
         Cuda::fence();
 
@@ -1373,7 +1720,7 @@ public:
 
     if ( CudaTraits::WarpSize < team_threads ) {
       // Need inter-warp team reduction (collectives) shared memory
-      // Speculate an upper bound for the value size 
+      // Speculate an upper bound for the value size
 
       m_shmem_team_begin =
         align_scratch( CudaTraits::warp_count(team_threads) * sizeof(double) );
@@ -1426,7 +1773,7 @@ public:
 
     // Reduce space has claim flag followed by vaue buffer
     const int global_reduce_value_size =
-      max_concurrent_block * 
+      max_concurrent_block *
       ( aligned_flag_size + align_scratch( value_size ) );
 
     // Scratch space has claim flag followed by scratch buffer
@@ -1469,6 +1816,7 @@ private:
   typedef typename Policy::member_type  Member ;
   typedef typename Policy::work_tag     WorkTag ;
   typedef typename Policy::WorkRange    WorkRange ;
+  typedef typename Policy::launch_bounds  LaunchBounds ;
 
   typedef Kokkos::Impl::FunctorValueTraits< FunctorType, WorkTag > ValueTraits ;
   typedef Kokkos::Impl::FunctorValueInit<   FunctorType, WorkTag > ValueInit ;
@@ -1655,10 +2003,10 @@ public:
         const int shmem = ValueTraits::value_size( m_functor ) * ( block_size + 2 );
 
         m_final = false ;
-        CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
+        CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute
 
         m_final = true ;
-        CudaParallelLaunch< ParallelScan >( *this, grid, block, shmem ); // copy to device and execute
+        CudaParallelLaunch< ParallelScan, LaunchBounds >( *this, grid, block, shmem ); // copy to device and execute
       }
     }
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
index 432c7895cc..709cbbd534 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_ReduceScan.hpp
@@ -151,7 +151,7 @@ template< class ValueType , class JoinOp>
 __device__
 inline void cuda_intra_warp_reduction( ValueType& result,
                                        const JoinOp& join,
-                                       const int max_active_thread = blockDim.y) {
+                                       const uint32_t max_active_thread = blockDim.y) {
 
   unsigned int shift = 1;
 
@@ -268,29 +268,33 @@ bool cuda_inter_block_reduction( typename FunctorValueTraits< FunctorType , ArgT
         if( id + 1 < int(gridDim.x) )
           join(value, tmp);
       }
+      int active = __ballot(1);
       if (int(blockDim.x*blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2,32);
         if( id + 2 < int(gridDim.x) )
           join(value, tmp);
       }
+      active += __ballot(1);
       if (int(blockDim.x*blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4,32);
         if( id + 4 < int(gridDim.x) )
           join(value, tmp);
       }
+      active += __ballot(1);
       if (int(blockDim.x*blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8,32);
         if( id + 8 < int(gridDim.x) )
           join(value, tmp);
       }
+      active += __ballot(1);
       if (int(blockDim.x*blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16,32);
         if( id + 16 < int(gridDim.x) )
           join(value, tmp);
       }
+      active += __ballot(1);
     }
   }
-
   //The last block has in its thread=0 the global reduction value through "value"
   return last_block;
 #else
@@ -302,7 +306,7 @@ template< class ReducerType >
 __device__ inline
 typename std::enable_if< Kokkos::is_reducer::value >::type
 cuda_intra_warp_reduction( const ReducerType& reducer,
-                           const int max_active_thread = blockDim.y) {
+                           const uint32_t max_active_thread = blockDim.y) {
 
   typedef typename ReducerType::value_type ValueType;
 
@@ -428,26 +432,31 @@ cuda_inter_block_reduction( const ReducerType& reducer,
         if( id + 1 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
+      int active = __ballot(1);
       if (int(blockDim.x*blockDim.y) > 2) {
         value_type tmp = Kokkos::shfl_down(value, 2,32);
         if( id + 2 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
+      active += __ballot(1);
       if (int(blockDim.x*blockDim.y) > 4) {
         value_type tmp = Kokkos::shfl_down(value, 4,32);
         if( id + 4 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
+      active += __ballot(1);
       if (int(blockDim.x*blockDim.y) > 8) {
         value_type tmp = Kokkos::shfl_down(value, 8,32);
         if( id + 8 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
+      active += __ballot(1);
       if (int(blockDim.x*blockDim.y) > 16) {
         value_type tmp = Kokkos::shfl_down(value, 16,32);
         if( id + 16 < int(gridDim.x) )
           reducer.join(value, tmp);
       }
+      active += __ballot(1);
     }
   }
 
@@ -594,7 +603,7 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType     & functor ,
   typedef FunctorValueOps<    FunctorType , ArgTag >  ValueOps ;
 
   typedef typename ValueTraits::pointer_type    pointer_type ;
-  typedef typename ValueTraits::reference_type  reference_type ;
+  //typedef typename ValueTraits::reference_type  reference_type ;
 
   // '__ffs' = position of the least significant bit set to 1.
   // 'blockDim.y' is guaranteed to be a power of two so this
@@ -637,7 +646,7 @@ bool cuda_single_inter_block_reduce_scan( const FunctorType     & functor ,
 
     {
       void * const shared_ptr = shared_data + word_count.value * threadIdx.y ;
-      reference_type shared_value = ValueInit::init( functor , shared_ptr );
+      /* reference_type shared_value = */ ValueInit::init( functor , shared_ptr );
 
       for ( size_type i = b ; i < e ; ++i ) {
         ValueJoin::join( functor , shared_ptr , global_data + word_count.value * i );
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
index 3c6f0a5dda..5f08800c40 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.cpp
@@ -58,25 +58,56 @@ template class TaskQueue< Kokkos::Cuda > ;
 
 //----------------------------------------------------------------------------
 
+#if defined( KOKKOS_DEBUG )
+
+__device__
+void verify_warp_convergence( const char * const where )
+{
+  const unsigned b = __ballot(1);
+
+  if ( b != ~0u ) {
+
+printf(" verify_warp_convergence( %s ) (%d,%d,%d) (%d,%d,%d) failed %x\n"
+      , where
+      , blockIdx.x
+      , blockIdx.y
+      , blockIdx.z
+      , threadIdx.x
+      , threadIdx.y
+      , threadIdx.z
+      , b );
+
+  }
+}
+
+#endif // #if defined( KOKKOS_DEBUG )
+
+//----------------------------------------------------------------------------
+
 __device__
 void TaskQueueSpecialization< Kokkos::Cuda >::driver
-  ( TaskQueueSpecialization< Kokkos::Cuda >::queue_type * const queue )
+  ( TaskQueueSpecialization< Kokkos::Cuda >::queue_type * const queue 
+  , int32_t shmem_per_warp )
 {
   using Member = TaskExec< Kokkos::Cuda > ;
   using Queue  = TaskQueue< Kokkos::Cuda > ;
-  using task_root_type = TaskBase< Kokkos::Cuda , void , void > ;
+  using task_root_type = TaskBase< void , void , void > ;
+
+  extern __shared__ int32_t shmem_all[];
 
   task_root_type * const end = (task_root_type *) task_root_type::EndTag ;
 
-  Member single_exec( 1 );
-  Member team_exec( blockDim.y );
+  int32_t * const warp_shmem =
+    shmem_all + ( threadIdx.z * shmem_per_warp ) / sizeof(int32_t);
+
+  task_root_type * const task_shmem = (task_root_type *) warp_shmem ;
 
   const int warp_lane = threadIdx.x + threadIdx.y * blockDim.x ;
 
-  union {
-    task_root_type * ptr ;
-    int              raw[2] ;
-  } task ;
+  Member single_exec( warp_shmem , 1 );
+  Member team_exec( warp_shmem , blockDim.y );
+
+  task_root_type * task_ptr ;
 
   // Loop until all queues are empty and no tasks in flight
 
@@ -87,41 +118,86 @@ void TaskQueueSpecialization< Kokkos::Cuda >::driver
 
     if ( 0 == warp_lane ) {
 
-      task.ptr = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ;
+      task_ptr = 0 < *((volatile int *) & queue->m_ready_count) ? end : 0 ;
 
       // Loop by priority and then type
-      for ( int i = 0 ; i < Queue::NumQueue && end == task.ptr ; ++i ) {
-        for ( int j = 0 ; j < 2 && end == task.ptr ; ++j ) {
-          task.ptr = Queue::pop_ready_task( & queue->m_ready[i][j] );
+      for ( int i = 0 ; i < Queue::NumQueue && end == task_ptr ; ++i ) {
+        for ( int j = 0 ; j < 2 && end == task_ptr ; ++j ) {
+          task_ptr = Queue::pop_ready_task( & queue->m_ready[i][j] );
         }
       }
 
 #if 0
 printf("TaskQueue::driver(%d,%d) task(%lx)\n",threadIdx.z,blockIdx.x
-      , uintptr_t(task.ptr));
+      , uintptr_t(task_ptr));
 #endif
 
     }
 
     // shuffle broadcast
 
-    task.raw[0] = __shfl( task.raw[0] , 0 );
-    task.raw[1] = __shfl( task.raw[1] , 0 );
+    ((int*) & task_ptr )[0] = __shfl( ((int*) & task_ptr )[0] , 0 );
+    ((int*) & task_ptr )[1] = __shfl( ((int*) & task_ptr )[1] , 0 );
 
-    if ( 0 == task.ptr ) break ; // 0 == queue->m_ready_count
+#if defined( KOKKOS_DEBUG )
+    verify_warp_convergence("task_ptr");
+#endif
 
-    if ( end != task.ptr ) {
-      if ( task_root_type::TaskTeam == task.ptr->m_task_type ) {
+    if ( 0 == task_ptr ) break ; // 0 == queue->m_ready_count
+
+    if ( end != task_ptr ) {
+
+      // Whole warp copy task's closure to/from shared memory.
+      // Use all threads of warp for coalesced read/write.
+
+      int32_t const b = sizeof(task_root_type) / sizeof(int32_t);
+      int32_t const e = *((int32_t volatile *)( & task_ptr->m_alloc_size )) / sizeof(int32_t);
+
+      int32_t volatile * const task_mem = (int32_t volatile *) task_ptr ;
+
+      // copy global to shared memory:
+
+      for ( int32_t i = warp_lane ; i < e ; i += CudaTraits::WarpSize ) {
+        warp_shmem[i] = task_mem[i] ;
+      }
+
+      Kokkos::memory_fence();
+
+      // Copy done - use memory fence so that memory writes are visible.
+      // For reliable warp convergence on Pascal and Volta an explicit
+      // warp level synchronization will also be required.
+
+      if ( task_root_type::TaskTeam == task_shmem->m_task_type ) {
         // Thread Team Task
-        (*task.ptr->m_apply)( task.ptr , & team_exec );
+        (*task_shmem->m_apply)( task_shmem , & team_exec );
       }
       else if ( 0 == threadIdx.y ) {
         // Single Thread Task
-        (*task.ptr->m_apply)( task.ptr , & single_exec );
+        (*task_shmem->m_apply)( task_shmem , & single_exec );
       }
 
+      // copy shared to global memory:
+
+      for ( int32_t i = b + warp_lane ; i < e ; i += CudaTraits::WarpSize ) {
+        task_mem[i] = warp_shmem[i] ;
+      }
+
+      Kokkos::memory_fence();
+
+#if defined( KOKKOS_DEBUG )
+    verify_warp_convergence("apply");
+#endif
+
+      // If respawn requested copy respawn data back to main memory
+
       if ( 0 == warp_lane ) {
-        queue->complete( task.ptr );
+
+        if ( ((task_root_type *) task_root_type::LockTag) != task_shmem->m_next ) {
+          ( (volatile task_root_type *) task_ptr )->m_next = task_shmem->m_next ;
+          ( (volatile task_root_type *) task_ptr )->m_priority = task_shmem->m_priority ;
+        }
+
+        queue->complete( task_ptr );
       }
     }
   } while(1);
@@ -130,18 +206,20 @@ printf("TaskQueue::driver(%d,%d) task(%lx)\n",threadIdx.z,blockIdx.x
 namespace {
 
 __global__
-void cuda_task_queue_execute( TaskQueue< Kokkos::Cuda > * queue )
-{ TaskQueueSpecialization< Kokkos::Cuda >::driver( queue ); }
+void cuda_task_queue_execute( TaskQueue< Kokkos::Cuda > * queue 
+                            , int32_t shmem_size )
+{ TaskQueueSpecialization< Kokkos::Cuda >::driver( queue , shmem_size ); }
 
 }
 
 void TaskQueueSpecialization< Kokkos::Cuda >::execute
   ( TaskQueue< Kokkos::Cuda > * const queue )
 {
+  const int shared_per_warp = 2048 ;
   const int warps_per_block = 4 ;
   const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 );
   const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block );
-  const int shared = 0 ;
+  const int shared_total = shared_per_warp * warps_per_block ;
   const cudaStream_t stream = 0 ;
 
   CUDA_SAFE_CALL( cudaDeviceSynchronize() );
@@ -159,7 +237,7 @@ printf("cuda_task_queue_execute before\n");
   //
   // CUDA_SAFE_CALL( cudaDeviceSetLimit( cudaLimitStackSize , stack_size ) );
 
-  cuda_task_queue_execute<<< grid , block , shared , stream >>>( queue );
+  cuda_task_queue_execute<<< grid , block , shared_total , stream >>>( queue , shared_per_warp );
 
   CUDA_SAFE_CALL( cudaGetLastError() );
 
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
index 5d08219ea5..4a52985d29 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Task.hpp
@@ -57,7 +57,7 @@ namespace {
 template< typename TaskType >
 __global__
 void set_cuda_task_base_apply_function_pointer
-  ( TaskBase::function_type * ptr )
+  ( TaskBase::function_type * ptr )
 { *ptr = TaskType::apply ; }
 
 }
@@ -78,7 +78,7 @@ public:
   void iff_single_thread_recursive_execute( queue_type * const ) {}
 
   __device__
-  static void driver( queue_type * const );
+  static void driver( queue_type * const , int32_t );
 
   static
   void execute( queue_type * const );
@@ -106,7 +106,14 @@ public:
 
 extern template class TaskQueue< Kokkos::Cuda > ;
 
+}} /* namespace Kokkos::Impl */
+
 //----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
+
 /**\brief  Impl::TaskExec is the TaskScheduler::member_type
  *         passed to tasks running in a Cuda space.
  *
@@ -134,11 +141,13 @@ private:
   friend class Kokkos::Impl::TaskQueue< Kokkos::Cuda > ;
   friend class Kokkos::Impl::TaskQueueSpecialization< Kokkos::Cuda > ;
 
+  int32_t * m_team_shmem ;
   const int m_team_size ;
 
   __device__
-  TaskExec( int arg_team_size = blockDim.y )
-    : m_team_size( arg_team_size ) {}
+  TaskExec( int32_t * arg_team_shmem , int arg_team_size = blockDim.y )
+    : m_team_shmem( arg_team_shmem )
+    , m_team_size( arg_team_size ) {}
 
 public:
 
@@ -154,7 +163,13 @@ public:
 
 };
 
+}} /* namespace Kokkos::Impl */
+
 //----------------------------------------------------------------------------
+//----------------------------------------------------------------------------
+
+namespace Kokkos {
+namespace Impl {
 
 template
 struct TeamThreadRangeBoundariesStruct >
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
index 084daa098b..3f3d85ecd1 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_Team.hpp
@@ -106,7 +106,7 @@ private:
   typedef Kokkos::Cuda                           execution_space ;
   typedef execution_space::scratch_memory_space  scratch_memory_space ;
 
-  void                * m_team_reduce ;
+  mutable void        * m_team_reduce ;
   scratch_memory_space  m_team_shared ;
   int                   m_team_reduce_size ;
   int                   m_league_rank ;
@@ -166,7 +166,7 @@ public:
       if ( 1 == blockDim.z ) { // team == block
         __syncthreads();
         // Wait for shared data write until all threads arrive here
-        if ( threadIdx.x == 0 && threadIdx.y == thread_id ) {
+        if ( threadIdx.x == 0u && threadIdx.y == (uint32_t)thread_id ) {
           *((ValueType*) m_team_reduce) = val ;
         }
         __syncthreads(); // Wait for shared data read until root thread writes
@@ -210,7 +210,7 @@ public:
       const int wx =
         ( threadIdx.x + blockDim.x * threadIdx.y ) & CudaTraits::WarpIndexMask ;
 
-      for ( int i = CudaTraits::WarpSize ; blockDim.x <= ( i >>= 1 ) ; ) {
+      for ( int i = CudaTraits::WarpSize ; (int)blockDim.x <= ( i >>= 1 ) ; ) {
 
         cuda_shfl_down( reducer.reference() , tmp , i , CudaTraits::WarpSize );
 
@@ -354,7 +354,7 @@ public:
 
       for ( int i = blockDim.x ; ( i >>= 1 ) ; ) {
         cuda_shfl_down( reducer.reference() , tmp , i , blockDim.x );
-        if ( threadIdx.x < i ) { reducer.join( tmp , reducer.reference() ); }
+        if ( (int)threadIdx.x < i ) { reducer.join( tmp , reducer.reference() ); }
       }
 
       // Broadcast from root lane to all other lanes.
@@ -410,7 +410,7 @@ public:
 
         value_type tmp( reducer.reference() );
 
-        for ( int i = CudaTraits::WarpSize ; blockDim.x <= ( i >>= 1 ) ; ) {
+        for ( int i = CudaTraits::WarpSize ; (int)blockDim.x <= ( i >>= 1 ) ; ) {
 
           cuda_shfl_down( reducer.reference(), tmp, i, CudaTraits::WarpSize );
 
@@ -479,7 +479,7 @@ public:
 
           __threadfence(); // Wait until global write is visible.
 
-          last_block = gridDim.x ==
+          last_block = (int)gridDim.x ==
                        1 + Kokkos::atomic_fetch_add(global_scratch_flags,1);
 
           // If last block then reset count
@@ -509,7 +509,7 @@ public:
         reducer.copy( ((pointer_type)shmem) + offset
                     , ((pointer_type)global_scratch_space) + offset );
 
-        for ( int i = nentry + tid ; i < gridDim.x ; i += nentry ) {
+        for ( int i = nentry + tid ; i < (int)gridDim.x ; i += nentry ) {
           reducer.join( ((pointer_type)shmem) + offset
                       , ((pointer_type)global_scratch_space)
                         + i * reducer.length() );
@@ -576,6 +576,14 @@ public:
     , m_league_size( arg_league_size )
     {}
 
+public:
+  // Declare to avoid unused private member warnings which are trigger
+  // when SFINAE excludes the member function which uses these variables
+  // Making another class a friend also surpresses these warnings
+  bool impl_avoid_sfinae_warning() const noexcept
+  {
+    return m_team_reduce_size > 0 && m_team_reduce != nullptr;
+  }
 };
 
 } // namspace Impl
@@ -913,10 +921,10 @@ void parallel_scan
     //  [t] += [t-4] if t >= 4
     //  ...
 
-    for ( int j = 1 ; j < blockDim.x ; j <<= 1 ) {
+    for ( int j = 1 ; j < (int)blockDim.x ; j <<= 1 ) {
       value_type tmp = 0 ;
       Impl::cuda_shfl_up( tmp , sval , j , blockDim.x );
-      if ( j <= threadIdx.x ) { sval += tmp ; }
+      if ( j <= (int)threadIdx.x ) { sval += tmp ; }
     }
 
     // Include accumulation and remove value for exclusive scan:
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
new file mode 100644
index 0000000000..e11ae4798f
--- /dev/null
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_UniqueToken.hpp
@@ -0,0 +1,133 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CUDA_UNIQUE_TOKEN_HPP
+#define KOKKOS_CUDA_UNIQUE_TOKEN_HPP
+
+#include 
+#ifdef KOKKOS_ENABLE_CUDA
+
+#include 
+#include 
+#include 
+#include 
+
+namespace Kokkos { namespace Experimental {
+
+// both global and instance Unique Tokens are implemented in the same way
+template<>
+class UniqueToken< Cuda, UniqueTokenScope::Global >
+{
+private:
+
+  uint32_t volatile * m_buffer ;
+  uint32_t            m_count ;
+
+public:
+
+  using execution_space = Cuda;
+
+  explicit
+  UniqueToken( execution_space const& );
+
+  KOKKOS_INLINE_FUNCTION
+  UniqueToken() : m_buffer(0), m_count(0) {}
+
+  KOKKOS_INLINE_FUNCTION
+  UniqueToken( const UniqueToken & ) = default;
+
+  KOKKOS_INLINE_FUNCTION
+  UniqueToken( UniqueToken && )      = default;
+
+  KOKKOS_INLINE_FUNCTION
+  UniqueToken & operator=( const UniqueToken & ) = default ;
+
+  KOKKOS_INLINE_FUNCTION
+  UniqueToken & operator=( UniqueToken && ) = default ;
+
+  /// \brief upper bound for acquired values, i.e. 0 <= value < size()
+  KOKKOS_INLINE_FUNCTION
+  int32_t size() const noexcept { return m_count ; }
+
+  /// \brief acquire value such that 0 <= value < size()
+  KOKKOS_INLINE_FUNCTION
+  int32_t acquire() const
+  {
+    const Kokkos::pair result =
+      Kokkos::Impl::concurrent_bitset::
+        acquire_bounded( m_buffer
+                       , m_count
+                       , Kokkos::Impl::clock_tic() % m_count
+                       );
+
+   if ( result.first < 0 ) {
+     Kokkos::abort("UniqueToken failure to release tokens, no tokens available" );
+   }
+
+    return result.first;
+  }
+
+  /// \brief release an acquired value
+  KOKKOS_INLINE_FUNCTION
+  void release( int32_t i ) const noexcept
+  {
+    Kokkos::Impl::concurrent_bitset::release( m_buffer, i );
+  }
+};
+
+template<>
+class UniqueToken< Cuda, UniqueTokenScope::Instance >
+  : public UniqueToken< Cuda, UniqueTokenScope::Global >
+{
+public:
+
+  explicit
+  UniqueToken( execution_space const& arg )
+    : UniqueToken< Cuda, UniqueTokenScope::Global >( arg ) {}
+};
+
+}} // namespace Kokkos::Experimental
+
+#endif // KOKKOS_ENABLE_CUDA
+#endif // KOKKOS_CUDA_UNIQUE_TOKEN_HPP
+
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
index f5e2d87fb6..d641622bb6 100644
--- a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_View.hpp
@@ -221,7 +221,6 @@ struct CudaLDGFetch {
 //----------------------------------------------------------------------------
 
 namespace Kokkos {
-namespace Experimental {
 namespace Impl {
 
 /** \brief  Replace Default ViewDataHandle with Cuda texture fetch specialization
@@ -294,9 +293,8 @@ public:
     }
 };
 
-}
-}
-}
+} // namespace Impl
+} // namespace Kokkos
 
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
diff --git a/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp
new file mode 100644
index 0000000000..99778c64b1
--- /dev/null
+++ b/lib/kokkos/core/src/Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp
@@ -0,0 +1,119 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CUDA_WORKGRAPHPOLICY_HPP
+#define KOKKOS_CUDA_WORKGRAPHPOLICY_HPP
+
+namespace Kokkos {
+namespace Impl {
+
+template< class FunctorType , class ... Traits >
+class ParallelFor< FunctorType ,
+                   Kokkos::Experimental::WorkGraphPolicy< Traits ... > ,
+                   Kokkos::Cuda
+                 >
+  : public Kokkos::Impl::Experimental::
+           WorkGraphExec< FunctorType,
+                          Kokkos::Cuda,
+                          Traits ...
+                        >
+{
+public:
+
+  typedef Kokkos::Experimental::WorkGraphPolicy< Traits ... >   Policy ;
+  typedef Kokkos::Impl::Experimental::
+          WorkGraphExec Base ;
+  typedef ParallelFor        Self ;
+
+private:
+
+  template< class TagType >
+  __device__
+  typename std::enable_if< std::is_same< TagType , void >::value >::type
+  exec_one(const typename Policy::member_type& i) const {
+    Base::m_functor( i );
+  }
+
+  template< class TagType >
+  __device__
+  typename std::enable_if< ! std::is_same< TagType , void >::value >::type
+  exec_one(const typename Policy::member_type& i) const {
+    const TagType t{} ;
+    Base::m_functor( t , i );
+  }
+
+public:
+
+  __device__
+  inline
+  void operator()() const {
+    for (std::int32_t i; (-1 != (i = Base::before_work())); ) {
+      exec_one< typename Policy::work_tag >( i );
+      Base::after_work(i);
+    }
+  }
+
+  inline
+  void execute()
+  {
+    const int warps_per_block = 4 ;
+    const dim3 grid( Kokkos::Impl::cuda_internal_multiprocessor_count() , 1 , 1 );
+    const dim3 block( 1 , Kokkos::Impl::CudaTraits::WarpSize , warps_per_block );
+    const int shared = 0 ;
+    const cudaStream_t stream = 0 ;
+
+    Kokkos::Impl::CudaParallelLaunch(*this, grid, block, shared, stream);
+  }
+
+  inline
+  ParallelFor( const FunctorType & arg_functor
+             , const Policy      & arg_policy )
+    : Base( arg_functor, arg_policy )
+  {
+  }
+};
+
+} // namespace Impl
+} // namespace Kokkos
+
+#endif /* #define KOKKOS_CUDA_WORKGRAPHPOLICY_HPP */
diff --git a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
index 4f68d9c2c0..6ef7443a14 100644
--- a/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
+++ b/lib/kokkos/core/src/KokkosExp_MDRangePolicy.hpp
@@ -52,6 +52,7 @@
 
 #if defined( __CUDACC__ ) && defined( KOKKOS_ENABLE_CUDA )
 #include
+#include 
 #endif
 
 namespace Kokkos { namespace Experimental {
@@ -120,28 +121,17 @@ struct MDRangePolicy
                                        , typename traits::index_type
                                        > ;
 
+  typedef MDRangePolicy execution_policy; // needed for is_execution_space interrogation
+
   static_assert( !std::is_same::value
                , "Kokkos Error: MD iteration pattern not defined" );
 
   using iteration_pattern   = typename traits::iteration_pattern;
   using work_tag            = typename traits::work_tag;
+  using launch_bounds       = typename traits::launch_bounds;
+  using member_type = typename range_policy::member_type;
 
-  static constexpr int rank = iteration_pattern::rank;
-
-  static constexpr int outer_direction = static_cast (
-      (iteration_pattern::outer_direction != Iterate::Default)
-    ? iteration_pattern::outer_direction
-    : default_outer_direction< typename traits::execution_space>::value );
-
-  static constexpr int inner_direction = static_cast (
-      iteration_pattern::inner_direction != Iterate::Default
-    ? iteration_pattern::inner_direction
-    : default_inner_direction< typename traits::execution_space>::value ) ;
-
-
-  // Ugly ugly workaround intel 14 not handling scoped enum correctly
-  static constexpr int Right = static_cast( Iterate::Right );
-  static constexpr int Left  = static_cast( Iterate::Left );
+  enum { rank = static_cast(iteration_pattern::rank) };
 
   using index_type  = typename traits::index_type;
   using array_index_type = long;
@@ -155,11 +145,50 @@ struct MDRangePolicy
   // This would require the user to either pass a matching index_type parameter
   // as template parameter to the MDRangePolicy or static_cast the individual values
 
+  point_type m_lower;
+  point_type m_upper;
+  tile_type  m_tile;
+  point_type m_tile_end;
+  index_type m_num_tiles;
+  index_type m_prod_tile_dims;
+
+/*
+  // NDE enum impl definition alternative - replace static constexpr int ? 
+  enum { outer_direction = static_cast (
+      (iteration_pattern::outer_direction != Iterate::Default)
+    ? iteration_pattern::outer_direction
+    : default_outer_direction< typename traits::execution_space>::value ) };
+
+  enum { inner_direction = static_cast (
+      iteration_pattern::inner_direction != Iterate::Default
+    ? iteration_pattern::inner_direction
+    : default_inner_direction< typename traits::execution_space>::value ) };
+
+  enum { Right = static_cast( Iterate::Right ) };
+  enum { Left  = static_cast( Iterate::Left ) };
+*/
+  //static constexpr int rank = iteration_pattern::rank;
+
+  static constexpr int outer_direction = static_cast (
+      (iteration_pattern::outer_direction != Iterate::Default)
+    ? iteration_pattern::outer_direction
+    : default_outer_direction< typename traits::execution_space>::value );
+
+  static constexpr int inner_direction = static_cast (
+      iteration_pattern::inner_direction != Iterate::Default
+    ? iteration_pattern::inner_direction
+    : default_inner_direction< typename traits::execution_space>::value ) ;
+
+  // Ugly ugly workaround intel 14 not handling scoped enum correctly
+  static constexpr int Right = static_cast( Iterate::Right );
+  static constexpr int Left  = static_cast( Iterate::Left );
+
   MDRangePolicy( point_type const& lower, point_type const& upper, tile_type const& tile = tile_type{} )
     : m_lower(lower)
     , m_upper(upper)
     , m_tile(tile)
     , m_num_tiles(1)
+    , m_prod_tile_dims(1)
   {
     // Host
     if ( true
@@ -172,8 +201,8 @@ struct MDRangePolicy
       for (int i=0; i 0)) )
+          if (  ((int)inner_direction == (int)Right && (i < rank-1))
+              || ((int)inner_direction == (int)Left && (i > 0)) )
           {
             m_tile[i] = 2;
           }
@@ -183,6 +212,7 @@ struct MDRangePolicy
         }
         m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]);
         m_num_tiles *= m_tile_end[i];
+        m_prod_tile_dims *= m_tile[i];
       }
     }
     #if defined(KOKKOS_ENABLE_CUDA)
@@ -190,14 +220,18 @@ struct MDRangePolicy
     {
       index_type span;
       for (int i=0; i 0)) )
+          if (  ((int)inner_direction == (int)Right && (i < rank-1))
+              || ((int)inner_direction == (int)Left && (i > 0)) )
           {
-            m_tile[i] = 2;
+            if ( m_prod_tile_dims < 512 ) {
+              m_tile[i] = 2;
+            } else {
+              m_tile[i] = 1;
+            }
           }
           else {
             m_tile[i] = 16;
@@ -205,12 +239,9 @@ struct MDRangePolicy
         }
         m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]);
         m_num_tiles *= m_tile_end[i];
+        m_prod_tile_dims *= m_tile[i];
       }
-      index_type total_tile_size_check = 1;
-      for (int i=0; i= 1024 ) { // improve this check - 1024,1024,64 max per dim (Kepler), but product num_threads < 1024; more restrictions pending register limit
+      if ( m_prod_tile_dims > 512 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
         printf(" Tile dimensions exceed Cuda limits\n");
         Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
         //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
@@ -223,19 +254,7 @@ struct MDRangePolicy
   template < typename LT , typename UT , typename TT = array_index_type >
   MDRangePolicy( std::initializer_list const& lower, std::initializer_list const& upper, std::initializer_list const& tile = {} )
   {
-#if 0
-    // This should work, less duplicated code but not yet extensively tested
-    point_type lower_tmp, upper_tmp;
-    tile_type tile_tmp;
-    for ( auto i = 0; i < rank; ++i ) {
-      lower_tmp[i] = static_cast(lower.begin()[i]);
-      upper_tmp[i] = static_cast(upper.begin()[i]);
-      tile_tmp[i]  = static_cast(tile.begin()[i]);
-    }
 
-    MDRangePolicy( lower_tmp, upper_tmp, tile_tmp );
-
-#else
     if(static_cast(m_lower.size()) != rank || static_cast(m_upper.size()) != rank)
       Kokkos::abort("MDRangePolicy: Constructor initializer lists have wrong size");
 
@@ -249,7 +268,7 @@ struct MDRangePolicy
     }
 
     m_num_tiles = 1;
-
+    m_prod_tile_dims = 1;
 
     // Host
     if ( true
@@ -262,8 +281,8 @@ struct MDRangePolicy
       for (int i=0; i 0)) )
+          if (  ((int)inner_direction == (int)Right && (i < rank-1))
+              || ((int)inner_direction == (int)Left && (i > 0)) )
           {
             m_tile[i] = 2;
           }
@@ -273,6 +292,7 @@ struct MDRangePolicy
         }
         m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]);
         m_num_tiles *= m_tile_end[i];
+        m_prod_tile_dims *= m_tile[i];
       }
     }
     #if defined(KOKKOS_ENABLE_CUDA)
@@ -284,10 +304,14 @@ struct MDRangePolicy
         if ( m_tile[i] <= 0 ) {
           // TODO: determine what is a good default tile size for cuda
           // may be rank dependent
-          if (  (inner_direction == Right && (i < rank-1))
-              || (inner_direction == Left && (i > 0)) )
+          if (  ((int)inner_direction == (int)Right && (i < rank-1))
+              || ((int)inner_direction == (int)Left && (i > 0)) )
           {
-            m_tile[i] = 2;
+            if ( m_prod_tile_dims < 512 ) {
+              m_tile[i] = 2;
+            } else {
+              m_tile[i] = 1;
+            }
           }
           else {
             m_tile[i] = 16;
@@ -295,32 +319,22 @@ struct MDRangePolicy
         }
         m_tile_end[i] = static_cast((span + m_tile[i] - 1) / m_tile[i]);
         m_num_tiles *= m_tile_end[i];
+        m_prod_tile_dims *= m_tile[i];
       }
-      index_type total_tile_size_check = 1;
-      for (int i=0; i= 1024 ) { // improve this check - 1024,1024,64 max per dim (Kepler), but product num_threads < 1024; more restrictions pending register limit
+      if ( m_prod_tile_dims > 512 ) { // Match Cuda restriction for ParallelReduce; 1024,1024,64 max per dim (Kepler), but product num_threads < 1024
         printf(" Tile dimensions exceed Cuda limits\n");
         Kokkos::abort(" Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
         //Kokkos::Impl::throw_runtime_exception( " Cuda ExecSpace Error: MDRange tile dims exceed maximum number of threads per block - choose smaller tile dims");
       }
     }
     #endif
-#endif
   }
 
-
-  point_type m_lower;
-  point_type m_upper;
-  tile_type  m_tile;
-  point_type m_tile_end;
-  index_type m_num_tiles;
 };
 // ------------------------------------------------------------------ //
 
 // ------------------------------------------------------------------ //
-//md_parallel_for
+//md_parallel_for - deprecated use parallel_for
 // ------------------------------------------------------------------ //
 template 
 void md_parallel_for( MDRange const& range
@@ -335,7 +349,6 @@ void md_parallel_for( MDRange const& range
 {
   Impl::MDFunctor g(range, f);
 
-  //using range_policy = typename MDRange::range_policy;
   using range_policy = typename MDRange::impl_range_policy;
 
   Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
@@ -354,7 +367,6 @@ void md_parallel_for( const std::string& str
 {
   Impl::MDFunctor g(range, f);
 
-  //using range_policy = typename MDRange::range_policy;
   using range_policy = typename MDRange::impl_range_policy;
 
   Kokkos::parallel_for( range_policy(0, range.m_num_tiles).set_chunk_size(1), g, str );
@@ -395,7 +407,7 @@ void md_parallel_for( MDRange const& range
 // ------------------------------------------------------------------ //
 
 // ------------------------------------------------------------------ //
-//md_parallel_reduce
+//md_parallel_reduce - deprecated use parallel_reduce
 // ------------------------------------------------------------------ //
 template 
 void md_parallel_reduce( MDRange const& range
@@ -409,9 +421,8 @@ void md_parallel_reduce( MDRange const& range
                       ) >::type* = 0
                     )
 {
-  Impl::MDFunctor g(range, f, v);
+  Impl::MDFunctor g(range, f);
 
-  //using range_policy = typename MDRange::range_policy;
   using range_policy = typename MDRange::impl_range_policy;
   Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
 }
@@ -428,48 +439,14 @@ void md_parallel_reduce( const std::string& str
                       ) >::type* = 0
                     )
 {
-  Impl::MDFunctor g(range, f, v);
+  Impl::MDFunctor g(range, f);
 
-  //using range_policy = typename MDRange::range_policy;
   using range_policy = typename MDRange::impl_range_policy;
 
   Kokkos::parallel_reduce( str, range_policy(0, range.m_num_tiles).set_chunk_size(1), g, v );
 }
 
-// Cuda - parallel_reduce not implemented yet
-/*
-template 
-void md_parallel_reduce( MDRange const& range
-                    , Functor const& f
-                    , ValueType & v
-                    , const std::string& str = ""
-                    , typename std::enable_if<( true
-                      #if defined( KOKKOS_ENABLE_CUDA)
-                      && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
-                      #endif
-                      ) >::type* = 0
-                    )
-{
-  Impl::DeviceIterateTile closure(range, f, v);
-  closure.execute();
-}
-
-template 
-void md_parallel_reduce( const std::string& str
-                    , MDRange const& range
-                    , Functor const& f
-                    , ValueType & v
-                    , typename std::enable_if<( true
-                      #if defined( KOKKOS_ENABLE_CUDA)
-                      && std::is_same< typename MDRange::range_policy::execution_space, Kokkos::Cuda>::value
-                      #endif
-                      ) >::type* = 0
-                    )
-{
-  Impl::DeviceIterateTile closure(range, f, v);
-  closure.execute();
-}
-*/
+// Cuda - md_parallel_reduce not implemented - use parallel_reduce
 
 }} // namespace Kokkos::Experimental
 
diff --git a/lib/kokkos/core/src/Kokkos_Atomic.hpp b/lib/kokkos/core/src/Kokkos_Atomic.hpp
index 3ecae24da4..3c8673c66a 100644
--- a/lib/kokkos/core/src/Kokkos_Atomic.hpp
+++ b/lib/kokkos/core/src/Kokkos_Atomic.hpp
@@ -114,40 +114,9 @@
 #endif /* Not pre-selected atomic implementation */
 #endif
 
-//----------------------------------------------------------------------------
-
-// Forward decalaration of functions supporting arbitrary sized atomics
-// This is necessary since Kokkos_Atomic.hpp is internally included very early
-// through Kokkos_HostSpace.hpp as well as the allocation tracker.
 #ifdef KOKKOS_ENABLE_CUDA
-namespace Kokkos {
-namespace Impl {
-/// \brief Aquire a lock for the address
-///
-/// This function tries to aquire the lock for the hash value derived
-/// from the provided ptr. If the lock is successfully aquired the
-/// function returns true. Otherwise it returns false.
-#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-extern
+#include 
 #endif
-__device__ inline
-bool lock_address_cuda_space(void* ptr);
-
-/// \brief Release lock for the address
-///
-/// This function releases the lock for the hash value derived
-/// from the provided ptr. This function should only be called
-/// after previously successfully aquiring a lock with
-/// lock_address.
-#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
-extern
-#endif
-__device__ inline
-void unlock_address_cuda_space(void* ptr);
-}
-}
-#endif
-
 
 namespace Kokkos {
 template 
diff --git a/lib/kokkos/core/src/Kokkos_Concepts.hpp b/lib/kokkos/core/src/Kokkos_Concepts.hpp
index 9a2b53e157..5480dbf40c 100644
--- a/lib/kokkos/core/src/Kokkos_Concepts.hpp
+++ b/lib/kokkos/core/src/Kokkos_Concepts.hpp
@@ -79,6 +79,21 @@ struct IndexType
   using type = T;
 };
 
+/**\brief Specify Launch Bounds for CUDA execution.
+ *
+ *  The "best" defaults may be architecture specific.
+ */
+template< unsigned int maxT = 1024 /* Max threads per block */
+        , unsigned int minB = 1    /* Min blocks per SM */
+        >
+struct LaunchBounds
+{
+  using launch_bounds = LaunchBounds;
+  using type = LaunchBounds;
+  static unsigned int constexpr maxTperB {maxT};
+  static unsigned int constexpr minBperSM {minB};
+};
+
 } // namespace Kokkos
 
 //----------------------------------------------------------------------------
@@ -119,6 +134,7 @@ using Kokkos::is_array_layout ;
 KOKKOS_IMPL_IS_CONCEPT( iteration_pattern )
 KOKKOS_IMPL_IS_CONCEPT( schedule_type )
 KOKKOS_IMPL_IS_CONCEPT( index_type )
+KOKKOS_IMPL_IS_CONCEPT( launch_bounds )
 
 }
 
diff --git a/lib/kokkos/core/src/Kokkos_Core.hpp b/lib/kokkos/core/src/Kokkos_Core.hpp
index 19de791c0f..ddb11d2894 100644
--- a/lib/kokkos/core/src/Kokkos_Core.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core.hpp
@@ -96,11 +96,13 @@ struct InitArguments {
   int num_numa;
   int device_id;
 
-  InitArguments() {
-    num_threads = -1;
-    num_numa = -1;
-    device_id = -1;
-  }
+  InitArguments( int nt = -1
+               , int nn = -1
+               , int dv = -1)
+    : num_threads( nt )
+    , num_numa( nn )
+    , device_id( dv )
+  {}
 };
 
 void initialize(int& narg, char* arg[]);
@@ -168,6 +170,9 @@ void * kokkos_realloc( void * arg_alloc , const size_t arg_alloc_size )
 
 } // namespace Kokkos
 
+#include 
+#include 
+
 //----------------------------------------------------------------------------
 //----------------------------------------------------------------------------
 
diff --git a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
index 09081d2387..8c080f7a8f 100644
--- a/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
+++ b/lib/kokkos/core/src/Kokkos_Core_fwd.hpp
@@ -51,6 +51,9 @@
 #include 
 #include 
 
+#include 
+#include 
+
 //----------------------------------------------------------------------------
 // Have assumed a 64bit build (8byte pointers) throughout the code base.
 
diff --git a/lib/kokkos/core/src/Kokkos_Crs.hpp b/lib/kokkos/core/src/Kokkos_Crs.hpp
new file mode 100644
index 0000000000..93b3fa5ca9
--- /dev/null
+++ b/lib/kokkos/core/src/Kokkos_Crs.hpp
@@ -0,0 +1,333 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#ifndef KOKKOS_CRS_HPP
+#define KOKKOS_CRS_HPP
+
+namespace Kokkos {
+namespace Experimental {
+
+/// \class Crs
+/// \brief Compressed row storage array.
+///
+/// \tparam DataType The type of stored entries.  If a Crs is
+///   used as the graph of a sparse matrix, then this is usually an
+///   integer type, the type of the column indices in the sparse
+///   matrix.
+///
+/// \tparam Arg1Type The second template parameter, corresponding
+///   either to the Device type (if there are no more template
+///   parameters) or to the Layout type (if there is at least one more
+///   template parameter).
+///
+/// \tparam Arg2Type The third template parameter, which if provided
+///   corresponds to the Device type.
+///
+/// \tparam SizeType The type of row offsets.  Usually the default
+///   parameter suffices.  However, setting a nondefault value is
+///   necessary in some cases, for example, if you want to have a
+///   sparse matrices with dimensions (and therefore column indices)
+///   that fit in \c int, but want to store more than INT_MAX
+///   entries in the sparse matrix.
+///
+/// A row has a range of entries:
+/// 
    +///
  • row_map[i0] <= entry < row_map[i0+1]
  • +///
  • 0 <= i1 < row_map[i0+1] - row_map[i0]
  • +///
  • entries( entry , i2 , i3 , ... );
  • +///
  • entries( row_map[i0] + i1 , i2 , i3 , ... );
  • +///
+template< class DataType, + class Arg1Type, + class Arg2Type = void, + typename SizeType = typename ViewTraits::size_type> +class Crs { +protected: + typedef ViewTraits traits; + +public: + typedef DataType data_type; + typedef typename traits::array_layout array_layout; + typedef typename traits::execution_space execution_space; + typedef typename traits::memory_space memory_space; + typedef typename traits::device_type device_type; + typedef SizeType size_type; + + typedef Crs< DataType , Arg1Type , Arg2Type , SizeType > staticcrsgraph_type; + typedef Crs< DataType , array_layout , typename traits::host_mirror_space , SizeType > HostMirror; + typedef View row_map_type; + typedef View entries_type; + + entries_type entries; + row_map_type row_map; + + //! Construct an empty view. + Crs () : entries(), row_map() {} + + //! Copy constructor (shallow copy). + Crs (const Crs& rhs) : entries (rhs.entries), row_map (rhs.row_map) + {} + + template + Crs (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_) + {} + + /** \brief Assign to a view of the rhs array. + * If the old view is the last view + * then allocated memory is deallocated. + */ + Crs& operator= (const Crs& rhs) { + entries = rhs.entries; + row_map = rhs.row_map; + return *this; + } + + /** \brief Destroy this view of the array. + * If the last view then allocated memory is deallocated. + */ + ~Crs() {} + + /** \brief Return number of rows in the graph + */ + KOKKOS_INLINE_FUNCTION + size_type numRows() const { + return (row_map.dimension_0 () != 0) ? + row_map.dimension_0 () - static_cast (1) : + static_cast (0); + } +}; + +/*--------------------------------------------------------------------------*/ + +template< class OutCounts, + class DataType, + class Arg1Type, + class Arg2Type, + class SizeType> +void get_crs_transpose_counts( + OutCounts& out, + Crs const& in, + std::string const& name = "transpose_counts"); + +template< class OutCounts, + class InCrs> +void get_crs_row_map_from_counts( + OutCounts& out, + InCrs const& in, + std::string const& name = "row_map"); + +template< class DataType, + class Arg1Type, + class Arg2Type, + class SizeType> +void transpose_crs( + Crs& out, + Crs const& in); + +}} // namespace Kokkos::Experimental + +/*--------------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +namespace Experimental { + +template +class GetCrsTransposeCounts { + public: + using execution_space = typename InCrs::execution_space; + using self_type = GetCrsTransposeCounts; + using index_type = typename InCrs::size_type; + private: + InCrs in; + OutCounts out; + public: + KOKKOS_INLINE_FUNCTION + void operator()(index_type i) const { + atomic_increment( &out[in.entries(i)] ); + } + GetCrsTransposeCounts(InCrs const& arg_in, OutCounts const& arg_out): + in(arg_in),out(arg_out) { + using policy_type = RangePolicy; + using closure_type = Kokkos::Impl::ParallelFor; + const closure_type closure(*this, policy_type(0, index_type(in.entries.size()))); + closure.execute(); + execution_space::fence(); + } +}; + +template +class CrsRowMapFromCounts { + public: + using execution_space = typename InCounts::execution_space; + using value_type = typename OutRowMap::value_type; + using index_type = typename InCounts::size_type; + private: + InCounts in; + OutRowMap out; + public: + KOKKOS_INLINE_FUNCTION + void operator()(index_type i, value_type& update, bool final_pass) const { + update += in(i); + if (final_pass) { + out(i + 1) = update; + if (i == 0) { + out(0) = 0; + } + } + } + KOKKOS_INLINE_FUNCTION + void init(value_type& update) const { update = 0; } + KOKKOS_INLINE_FUNCTION + void join(volatile value_type& update, const volatile value_type& input) const { + update += input; + } + using self_type = CrsRowMapFromCounts; + CrsRowMapFromCounts(InCounts const& arg_in, OutRowMap const& arg_out): + in(arg_in),out(arg_out) { + using policy_type = RangePolicy; + using closure_type = Kokkos::Impl::ParallelScan; + closure_type closure(*this, policy_type(0, in.size())); + closure.execute(); + execution_space::fence(); + } +}; + +template +class FillCrsTransposeEntries { + public: + using execution_space = typename InCrs::execution_space; + using memory_space = typename InCrs::memory_space; + using value_type = typename OutCrs::entries_type::value_type; + using index_type = typename InCrs::size_type; + private: + using counters_type = View; + InCrs in; + OutCrs out; + counters_type counters; + public: + KOKKOS_INLINE_FUNCTION + void operator()(index_type i) const { + auto begin = in.row_map(i); + auto end = in.row_map(i + 1); + for (auto j = begin; j < end; ++j) { + auto ti = in.entries(j); + auto tbegin = out.row_map(ti); + auto tj = atomic_fetch_add( &counters(ti), 1 ); + out.entries( tbegin + tj ) = i; + } + } + using self_type = FillCrsTransposeEntries; + FillCrsTransposeEntries(InCrs const& arg_in, OutCrs const& arg_out): + in(arg_in),out(arg_out), + counters("counters", arg_out.numRows()) { + using policy_type = RangePolicy; + using closure_type = Kokkos::Impl::ParallelFor; + const closure_type closure(*this, policy_type(0, index_type(in.numRows()))); + closure.execute(); + execution_space::fence(); + } +}; + +}}} // namespace Kokkos::Impl::Experimental + +/*--------------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Experimental { + +template< class OutCounts, + class DataType, + class Arg1Type, + class Arg2Type, + class SizeType> +void get_crs_transpose_counts( + OutCounts& out, + Crs const& in, + std::string const& name) { + using InCrs = Crs; + out = OutCounts(name, in.numRows()); + Kokkos::Impl::Experimental:: + GetCrsTransposeCounts functor(in, out); +} + +template< class OutRowMap, + class InCounts> +void get_crs_row_map_from_counts( + OutRowMap& out, + InCounts const& in, + std::string const& name) { + out = OutRowMap(ViewAllocateWithoutInitializing(name), in.size() + 1); + Kokkos::Impl::Experimental:: + CrsRowMapFromCounts functor(in, out); +} + +template< class DataType, + class Arg1Type, + class Arg2Type, + class SizeType> +void transpose_crs( + Crs& out, + Crs const& in) +{ + typedef Crs crs_type ; + typedef typename crs_type::memory_space memory_space ; + typedef View counts_type ; + { + counts_type counts; + Kokkos::Experimental::get_crs_transpose_counts(counts, in); + Kokkos::Experimental::get_crs_row_map_from_counts(out.row_map, counts, + "tranpose_row_map"); + } + out.entries = decltype(out.entries)("transpose_entries", in.entries.size()); + Kokkos::Impl::Experimental:: + FillCrsTransposeEntries entries_functor(in, out); +} + +}} // namespace Kokkos::Experimental + +#endif /* #define KOKKOS_CRS_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_Cuda.hpp b/lib/kokkos/core/src/Kokkos_Cuda.hpp index f0f0f87458..197831dee5 100644 --- a/lib/kokkos/core/src/Kokkos_Cuda.hpp +++ b/lib/kokkos/core/src/Kokkos_Cuda.hpp @@ -217,8 +217,8 @@ public: private: - cudaStream_t m_stream ; int m_device ; + cudaStream_t m_stream ; }; } // namespace Kokkos @@ -295,6 +295,7 @@ struct VerifyExecutionCanAccessMemorySpace #include #include #include +#include #include //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp index 307ab193b1..fb5985e164 100644 --- a/lib/kokkos/core/src/Kokkos_CudaSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_CudaSpace.hpp @@ -90,7 +90,7 @@ public: , const size_t arg_alloc_size ) const ; /**\brief Return Name of the MemorySpace */ - static constexpr const char* name(); + static constexpr const char* name() { return m_name; } /*--------------------------------*/ /** \brief Error reporting for HostSpace attempt to access CudaSpace */ @@ -186,7 +186,7 @@ public: , const size_t arg_alloc_size ) const ; /**\brief Return Name of the MemorySpace */ - static constexpr const char* name(); + static constexpr const char* name() { return m_name; } /*--------------------------------*/ @@ -234,7 +234,7 @@ public: , const size_t arg_alloc_size ) const ; /**\brief Return Name of the MemorySpace */ - static constexpr const char* name(); + static constexpr const char* name() { return m_name; } private: diff --git a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp index 375a2d3744..a8c4d77c62 100644 --- a/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp +++ b/lib/kokkos/core/src/Kokkos_ExecPolicy.hpp @@ -384,6 +384,7 @@ Impl::PerThreadValue PerThread(const int& arg); * WorkTag (none): Tag which is used as the first argument for the functor operator. * Schedule (Schedule): Scheduling Policy (Dynamic, or Static). * IndexType (IndexType: Integer Index type used to iterate over the Index space. + * LaunchBounds (LaunchBounds<1024,1>: Launch Bounds for CUDA compilation. */ template< class ... Properties> class TeamPolicy: public @@ -561,6 +562,45 @@ KOKKOS_INLINE_FUNCTION Impl::ThreadVectorRangeBoundariesStruct ThreadVectorRange( const TeamMemberType&, const iType& count ); +#if defined(KOKKOS_ENABLE_PROFILING) +namespace Impl { + +template::value > +struct ParallelConstructName; + +template +struct ParallelConstructName { + ParallelConstructName(std::string const& label):label_ref(label) { + if (label.empty()) { + default_name = std::string(typeid(FunctorType).name()) + "/" + + typeid(TagType).name(); + } + } + std::string const& get() { + return (label_ref.empty()) ? default_name : label_ref; + } + std::string const& label_ref; + std::string default_name; +}; + +template +struct ParallelConstructName { + ParallelConstructName(std::string const& label):label_ref(label) { + if (label.empty()) { + default_name = std::string(typeid(FunctorType).name()); + } + } + std::string const& get() { + return (label_ref.empty()) ? default_name : label_ref; + } + std::string const& label_ref; + std::string default_name; +}; + +} // namespace Impl +#endif /* defined KOKKOS_ENABLE_PROFILING */ + } // namespace Kokkos #endif /* #define KOKKOS_EXECPOLICY_HPP */ diff --git a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp index e224cd4e84..9c9af0dd8b 100644 --- a/lib/kokkos/core/src/Kokkos_HBWSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HBWSpace.hpp @@ -126,14 +126,6 @@ public: //! This memory space preferred device_type typedef Kokkos::Device< execution_space, memory_space > device_type; - /*--------------------------------*/ - /* Functions unique to the HBWSpace */ - static int in_parallel(); - - static void register_in_parallel( int (*)() ); - - /*--------------------------------*/ - /**\brief Default memory space instance */ HBWSpace(); HBWSpace( const HBWSpace & rhs ) = default; diff --git a/lib/kokkos/core/src/Kokkos_HostSpace.hpp b/lib/kokkos/core/src/Kokkos_HostSpace.hpp index d00cce8f60..431635047a 100644 --- a/lib/kokkos/core/src/Kokkos_HostSpace.hpp +++ b/lib/kokkos/core/src/Kokkos_HostSpace.hpp @@ -130,14 +130,6 @@ public: //! This memory space preferred device_type typedef Kokkos::Device< execution_space, memory_space > device_type; - /*--------------------------------*/ - /* Functions unique to the HostSpace */ - static int in_parallel(); - - static void register_in_parallel( int (*)() ); - - /*--------------------------------*/ - /**\brief Default memory space instance */ HostSpace(); HostSpace( HostSpace && rhs ) = default; @@ -161,7 +153,7 @@ public: , const size_t arg_alloc_size ) const; /**\brief Return Name of the MemorySpace */ - static constexpr const char* name(); + static constexpr const char* name() { return m_name; } private: AllocationMechanism m_alloc_mech; diff --git a/lib/kokkos/core/src/Kokkos_Layout.hpp b/lib/kokkos/core/src/Kokkos_Layout.hpp index f300a6d9f6..87c705153e 100644 --- a/lib/kokkos/core/src/Kokkos_Layout.hpp +++ b/lib/kokkos/core/src/Kokkos_Layout.hpp @@ -156,6 +156,8 @@ struct LayoutStride { for ( int r = 0 ; r < ARRAY_LAYOUT_MAX_RANK ; ++r ) { tmp.dimension[r] = 0 ; tmp.stride[r] = 0 ; + } + for ( int r = 0 ; r < rank ; ++r ) { check_input &= ~int( 1 << order[r] ); } if ( 0 == check_input ) { diff --git a/lib/kokkos/core/src/Kokkos_Macros.hpp b/lib/kokkos/core/src/Kokkos_Macros.hpp index 1439dbd3f8..250ef6630a 100644 --- a/lib/kokkos/core/src/Kokkos_Macros.hpp +++ b/lib/kokkos/core/src/Kokkos_Macros.hpp @@ -297,6 +297,10 @@ #endif #endif + #if defined( KOKKOS_ARCH_AVX512MIC ) + #define KOKKOS_ENABLE_RFO_PREFETCH 1 + #endif + #if defined( __MIC__ ) // Compiling for Xeon Phi #endif @@ -344,13 +348,18 @@ //#define KOKKOS_ENABLE_PRAGMA_VECTOR 1 //#define KOKKOS_ENABLE_PRAGMA_SIMD 1 + #if defined( KOKKOS_ARCH_AVX512MIC ) + #define KOKKOS_ENABLE_RFO_PREFETCH 1 + #endif + #if !defined( KOKKOS_FORCEINLINE_FUNCTION ) #define KOKKOS_FORCEINLINE_FUNCTION inline __attribute__((always_inline)) #endif #if !defined( KOKKOS_ENABLE_ASM ) && !defined( __PGIC__ ) && \ ( defined( __amd64 ) || defined( __amd64__ ) || \ - defined( __x86_64 ) || defined( __x86_64__ ) ) + defined( __x86_64 ) || defined( __x86_64__ ) || \ + defined(__PPC64__) ) #define KOKKOS_ENABLE_ASM 1 #endif #endif diff --git a/lib/kokkos/core/src/Kokkos_MasterLock.hpp b/lib/kokkos/core/src/Kokkos_MasterLock.hpp new file mode 100644 index 0000000000..81564b8eac --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_MasterLock.hpp @@ -0,0 +1,73 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_MASTER_LOCK_HPP +#define KOKKOS_MASTER_LOCK_HPP + +#include + +namespace Kokkos { namespace Experimental { + +// my be used to coordinate work between master instances +// SHOULD NOT be used within a parallel algorithm +// +// This lock should be used with with a scoped lock guard +// i.e. std::unique_lock, std::lock_guard +// +// cannot be copied or moved +// has the following functions available +// +// Lock() +// ~Lock() +// +// void lock() +// void unlock() +// bool try_lock() +// +template +class MasterLock; + +}} // namespace Kokkos::Experimental + +#endif //KOKKOS_MASTER_LOCK_HPP + diff --git a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp index dbf1ad8057..1da936067d 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryPool.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryPool.hpp @@ -66,11 +66,6 @@ private: enum : uint32_t { max_bit_count_lg2 = CB::max_bit_count_lg2 }; enum : uint32_t { max_bit_count = CB::max_bit_count }; - /* Defaults for min block, max block, and superblock sizes */ - enum : uint32_t { MIN_BLOCK_SIZE_LG2 = 6 /* 64 bytes */ }; - enum : uint32_t { MAX_BLOCK_SIZE_LG2 = 12 /* 4k bytes */ }; - enum : uint32_t { SUPERBLOCK_SIZE_LG2 = 16 /* 64k bytes */ }; - enum : uint32_t { HINT_PER_BLOCK_SIZE = 2 }; /* Each superblock has a concurrent bitset state @@ -85,6 +80,14 @@ private: * is concurrently updated. */ + /* Mapping between block_size <-> block_state + * + * block_state = ( m_sb_size_lg2 - block_size_lg2 ) << state_shift + * block_size = m_sb_size_lg2 - ( block_state >> state_shift ) + * + * Thus A_block_size < B_block_size <=> A_block_state > B_block_state + */ + typedef typename DeviceType::memory_space base_memory_space ; enum { accessible = @@ -251,10 +254,10 @@ public: * significant runtime performance improvements. */ MemoryPool( const base_memory_space & memspace - , const size_t min_total_alloc_size - , const uint32_t min_block_alloc_size // = 1 << MIN_BLOCK_SIZE_LG2 - , const uint32_t max_block_alloc_size // = 1 << MAX_BLOCK_SIZE_LG2 - , const uint32_t min_superblock_size // = 1 << SUPERBLOCK_SIZE_LG2 + , const size_t min_total_alloc_size + , size_t min_block_alloc_size = 0 + , size_t max_block_alloc_size = 0 + , size_t min_superblock_size = 0 ) : m_tracker() , m_sb_state_array(0) @@ -267,8 +270,43 @@ public: , m_data_offset(0) , m_unused_padding(0) { - const uint32_t int_align_lg2 = 3 ; /* align as int[8] */ - const uint32_t int_align_mask = ( 1u << int_align_lg2 ) - 1 ; + const uint32_t int_align_lg2 = 3 ; /* align as int[8] */ + const uint32_t int_align_mask = ( 1u << int_align_lg2 ) - 1 ; + + // Constraints and defaults: + // min_block_alloc_size <= max_block_alloc_size + // max_block_alloc_size <= min_superblock_size + // min_superblock_size <= min_total_alloc_size + + const uint32_t MIN_BLOCK_SIZE = 1u << 6 /* 64 bytes */ ; + const uint32_t MAX_BLOCK_SIZE = 1u << 12 /* 4k bytes */ ; + + if ( 0 == min_block_alloc_size ) min_block_alloc_size = MIN_BLOCK_SIZE ; + + if ( 0 == max_block_alloc_size ) { + + max_block_alloc_size = MAX_BLOCK_SIZE ; + + // Upper bound of total allocation size + max_block_alloc_size = std::min( size_t(max_block_alloc_size) + , min_total_alloc_size ); + + // Lower bound of minimum block size + max_block_alloc_size = std::max( max_block_alloc_size + , min_block_alloc_size ); + } + + if ( 0 == min_superblock_size ) { + min_superblock_size = max_block_alloc_size ; + + // Upper bound of total allocation size + min_superblock_size = std::min( size_t(min_superblock_size) + , min_total_alloc_size ); + + // Lower bound of maximum block size + min_superblock_size = std::max( min_superblock_size + , max_block_alloc_size ); + } // Block and superblock size is power of two: @@ -435,6 +473,8 @@ public: void * allocate( size_t alloc_size , int32_t attempt_limit = 1 ) const noexcept { + if ( 0 == alloc_size ) return (void*) 0 ; + void * p = 0 ; const uint32_t block_size_lg2 = get_block_size_lg2( alloc_size ); @@ -444,10 +484,9 @@ public: // Allocation will fit within a superblock // that has block sizes ( 1 << block_size_lg2 ) - const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2 ; - const uint32_t block_state = block_count_lg2 << state_shift ; - const uint32_t block_count = 1u << block_count_lg2 ; - const uint32_t block_count_mask = block_count - 1 ; + const uint32_t block_count_lg2 = m_sb_size_lg2 - block_size_lg2 ; + const uint32_t block_state = block_count_lg2 << state_shift ; + const uint32_t block_count = 1u << block_count_lg2 ; // Superblock hints for this block size: // hint_sb_id_ptr[0] is the dynamically changing hint @@ -465,7 +504,7 @@ public: // the guess for which block within a superblock should // be claimed. If not available then a search occurs. - const uint32_t block_id_hint = block_count_mask & + const uint32_t block_id_hint = (uint32_t)( Kokkos::Impl::clock_tic() #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) // Spread out potentially concurrent access @@ -474,6 +513,9 @@ public: #endif ); + // expected state of superblock for allocation + uint32_t sb_state = block_state ; + int32_t sb_id = -1 ; volatile uint32_t * sb_state_array = 0 ; @@ -484,6 +526,8 @@ public: if ( sb_id < 0 ) { + // No superblock specified, try the hint for this block size + sb_id = hint_sb_id = int32_t( *hint_sb_id_ptr ); sb_state_array = m_sb_state_array + ( sb_id * m_sb_state_size ); @@ -493,16 +537,20 @@ public: // 0 <= sb_id // sb_state_array == m_sb_state_array + m_sb_state_size * sb_id - if ( block_state == ( state_header_mask & *sb_state_array ) ) { + if ( sb_state == ( state_header_mask & *sb_state_array ) ) { - // This superblock state is assigned to this block size. - // Try to claim a bit. + // This superblock state is as expected, for the moment. + // Attempt to claim a bit. The attempt updates the state + // so have already made sure the state header is as expected. + + const uint32_t count_lg2 = sb_state >> state_shift ; + const uint32_t mask = ( 1u << count_lg2 ) - 1 ; const Kokkos::pair result = CB::acquire_bounded_lg2( sb_state_array - , block_count_lg2 - , block_id_hint - , block_state + , count_lg2 + , block_id_hint & mask + , sb_state ); // If result.first < 0 then failed to acquire @@ -512,16 +560,18 @@ public: if ( 0 <= result.first ) { // acquired a bit + const uint32_t size_lg2 = m_sb_size_lg2 - count_lg2 ; + // Set the allocated block pointer p = ((char*)( m_sb_state_array + m_data_offset )) + ( uint32_t(sb_id) << m_sb_size_lg2 ) // superblock memory - + ( result.first << block_size_lg2 ); // block memory + + ( result.first << size_lg2 ); // block memory break ; // Success } -// printf(" acquire block_count_lg2(%d) block_state(0x%x) sb_id(%d) result(%d,%d)\n" , block_count_lg2 , block_state , sb_id , result.first , result.second ); +// printf(" acquire count_lg2(%d) sb_state(0x%x) sb_id(%d) result(%d,%d)\n" , count_lg2 , sb_state , sb_id , result.first , result.second ); } //------------------------------------------------------------------ @@ -529,12 +579,18 @@ public: // Must find a new superblock. // Start searching at designated index for this block size. - // Look for a partially full superblock of this block size. - // Look for an empty superblock just in case cannot find partfull. + // Look for superblock that, in preferential order, + // 1) part-full superblock of this block size + // 2) empty superblock to claim for this block size + // 3) part-full superblock of the next larger block size + sb_state = block_state ; // Expect to find the desired state sb_id = -1 ; + bool update_hint = false ; int32_t sb_id_empty = -1 ; + int32_t sb_id_large = -1 ; + uint32_t sb_state_large = 0 ; sb_state_array = m_sb_state_array + sb_id_begin * m_sb_state_size ; @@ -544,38 +600,54 @@ public: // Note that the state may change at any moment // as concurrent allocations and deallocations occur. - const uint32_t state = *sb_state_array ; - const uint32_t used = state & state_used_mask ; + const uint32_t full_state = *sb_state_array ; + const uint32_t used = full_state & state_used_mask ; + const uint32_t state = full_state & state_header_mask ; - if ( block_state == ( state & state_header_mask ) ) { + if ( state == block_state ) { // Superblock is assigned to this block size - if ( used < block_count ) { + if ( used < block_count ) { // There is room to allocate one block sb_id = id ; - if ( used + 1 < block_count ) { + // Is there room to allocate more than one block? - // There is room to allocate more than one block - - Kokkos::atomic_compare_exchange - ( hint_sb_id_ptr , uint32_t(hint_sb_id) , uint32_t(sb_id) ); - } + update_hint = used + 1 < block_count ; break ; } } - else if ( ( used == 0 ) && ( sb_id_empty == -1 ) ) { + else if ( 0 == used ) { - // Superblock is not assigned to this block size - // and is the first empty superblock encountered. - // Save this id to use if a partfull superblock is not found. + // Superblock is empty - sb_id_empty = id ; + if ( -1 == sb_id_empty ) { + + // Superblock is not assigned to this block size + // and is the first empty superblock encountered. + // Save this id to use if a partfull superblock is not found. + + sb_id_empty = id ; + } } + else if ( ( -1 == sb_id_empty /* have not found an empty */ ) && + ( -1 == sb_id_large /* have not found a larger */ ) && + ( state < block_state /* a larger block */ ) && + // is not full: + ( used < ( 1u << ( state >> state_shift ) ) ) ) { + // First superblock encountered that is + // larger than this block size and + // has room for an allocation. + // Save this id to use of partfull or empty superblock not found + sb_id_large = id ; + sb_state_large = state ; + } + + // Iterate around the superblock array: if ( ++id < m_sb_count ) { sb_state_array += m_sb_state_size ; @@ -586,7 +658,7 @@ public: } } -// printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d)\n" , m_sb_count , sb_id , sb_id_empty ); +// printf(" search m_sb_count(%d) sb_id(%d) sb_id_empty(%d) sb_id_large(%d)\n" , m_sb_count , sb_id , sb_id_empty , sb_id_large); if ( sb_id < 0 ) { @@ -609,21 +681,31 @@ public: const uint32_t state_empty = state_header_mask & *sb_state_array ; - if ( state_empty == - Kokkos::atomic_compare_exchange - (sb_state_array,state_empty,block_state) ) { + // If this thread claims the empty block then update the hint + update_hint = + state_empty == + Kokkos::atomic_compare_exchange + (sb_state_array,state_empty,block_state); + } + else if ( 0 <= sb_id_large ) { - // If this thread claimed the block then update the hint + // Found a larger superblock with space available - Kokkos::atomic_compare_exchange - ( hint_sb_id_ptr , uint32_t(hint_sb_id) , uint32_t(sb_id) ); - } + sb_id = sb_id_large ; + sb_state = sb_state_large ; + + sb_state_array = m_sb_state_array + ( sb_id * m_sb_state_size ); } else { // Did not find a potentially usable superblock --attempt_limit ; } } + + if ( update_hint ) { + Kokkos::atomic_compare_exchange + ( hint_sb_id_ptr , uint32_t(hint_sb_id) , uint32_t(sb_id) ); + } } // end allocation attempt loop //-------------------------------------------------------------------- @@ -646,6 +728,8 @@ public: KOKKOS_INLINE_FUNCTION void deallocate( void * p , size_t /* alloc_size */ ) const noexcept { + if ( 0 == p ) return ; + // Determine which superblock and block const ptrdiff_t d = ((char*)p) - ((char*)( m_sb_state_array + m_data_offset )); diff --git a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp index 94b58b8aff..af9c8ea782 100644 --- a/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp +++ b/lib/kokkos/core/src/Kokkos_MemoryTraits.hpp @@ -72,11 +72,11 @@ struct MemoryTraits { //! Tag this class as a kokkos memory traits: typedef MemoryTraits memory_traits ; - enum { Unmanaged = T & unsigned(Kokkos::Unmanaged) }; - enum { RandomAccess = T & unsigned(Kokkos::RandomAccess) }; - enum { Atomic = T & unsigned(Kokkos::Atomic) }; - enum { Restrict = T & unsigned(Kokkos::Restrict) }; - enum { Aligned = T & unsigned(Kokkos::Aligned) }; + enum : bool { Unmanaged = (unsigned(0) != (T & unsigned(Kokkos::Unmanaged))) }; + enum : bool { RandomAccess = (unsigned(0) != (T & unsigned(Kokkos::RandomAccess))) }; + enum : bool { Atomic = (unsigned(0) != (T & unsigned(Kokkos::Atomic))) }; + enum : bool { Restrict = (unsigned(0) != (T & unsigned(Kokkos::Restrict))) }; + enum : bool { Aligned = (unsigned(0) != (T & unsigned(Kokkos::Aligned))) }; }; @@ -109,7 +109,11 @@ enum { MEMORY_ALIGNMENT = #else ( 1 << Kokkos::Impl::integral_power_of_two( 128 ) ) #endif - , MEMORY_ALIGNMENT_THRESHOLD = 4 +#if defined( KOKKOS_MEMORY_ALIGNMENT_THRESHOLD ) + , MEMORY_ALIGNMENT_THRESHOLD = KOKKOS_MEMORY_ALIGNMENT_THRESHOLD +#else + , MEMORY_ALIGNMENT_THRESHOLD = 4 +#endif }; diff --git a/lib/kokkos/core/src/Kokkos_OpenMP.hpp b/lib/kokkos/core/src/Kokkos_OpenMP.hpp index 3e11621ce6..d5de01cf2f 100644 --- a/lib/kokkos/core/src/Kokkos_OpenMP.hpp +++ b/lib/kokkos/core/src/Kokkos_OpenMP.hpp @@ -47,10 +47,6 @@ #include #if defined( KOKKOS_ENABLE_OPENMP) -#if !defined(_OPENMP) -#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!" -#endif - #include #include @@ -67,95 +63,144 @@ #include #include +#include + /*--------------------------------------------------------------------------*/ namespace Kokkos { +namespace Impl { +class OpenMPExec; +} + /// \class OpenMP /// \brief Kokkos device for multicore processors in the host memory space. class OpenMP { public: - //------------------------------------ - //! \name Type declarations that all Kokkos devices must provide. - //@{ - //! Tag this class as a kokkos execution space using execution_space = OpenMP; + + using memory_space = #ifdef KOKKOS_ENABLE_HBWSPACE - using memory_space = Experimental::HBWSpace; + Experimental::HBWSpace; #else - using memory_space = HostSpace; + HostSpace; #endif + //! This execution space preferred device_type - using device_type = Kokkos::Device; - - using array_layout = LayoutRight; - using size_type = memory_space::size_type; - + using device_type = Kokkos::Device< execution_space, memory_space >; + using array_layout = LayoutRight; + using size_type = memory_space::size_type; using scratch_memory_space = ScratchMemorySpace< OpenMP >; - //@} - //------------------------------------ - //! \name Functions that all Kokkos execution spaces must implement. - //@{ + /// \brief Get a handle to the default execution space instance + inline + OpenMP() noexcept; - inline static bool in_parallel(); + // Using omp_get_max_threads(); is problematic + // On Intel (essentially an initial call to the OpenMP runtime + // without a parallel region before will set a process mask for a single core + // The runtime will than bind threads for a parallel region to other cores on the + // entering the first parallel region and make the process mask the aggregate of + // the thread masks. The intend seems to be to make serial code run fast, if you + // compile with OpenMP enabled but don't actually use parallel regions or so + // static int omp_max_threads = omp_get_max_threads(); + static int get_current_max_threads() noexcept; - /** \brief Set the device in a "sleep" state. A noop for OpenMP. */ - static bool sleep(); + /// \brief Initialize the default execution space + /// + /// if ( thread_count == -1 ) + /// then use the number of threads that openmp defaults to + /// if ( thread_count == 0 && Kokkos::hwlow_available() ) + /// then use hwloc to choose the number of threads and change + /// the default number of threads + /// if ( thread_count > 0 ) + /// then force openmp to use the given number of threads and change + /// the default number of threads + static void initialize( int thread_count = -1 ); - /** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */ - static bool wake(); - - /** \brief Wait until all dispatched functors complete. A noop for OpenMP. */ - static void fence() {} - - /// \brief Print configuration information to the given output stream. - static void print_configuration( std::ostream & , const bool detail = false ); - - /// \brief Free any resources being consumed by the device. + /// \brief Free any resources being consumed by the default execution space static void finalize(); - /** \brief Initialize the device. - * - * 1) If the hardware locality library is enabled and OpenMP has not - * already bound threads then bind OpenMP threads to maximize - * core utilization and group for memory hierarchy locality. - * - * 2) Allocate a HostThread for each OpenMP thread to hold its - * topology and fan in/out data. - */ - static void initialize( unsigned thread_count = 0 , - unsigned use_numa_count = 0 , - unsigned use_cores_per_numa = 0 ); + /// \brief is the default execution space initialized for current 'master' thread + static bool is_initialized() noexcept; - static int is_initialized(); + /// \brief Print configuration information to the given output stream. + static void print_configuration( std::ostream & , const bool verbose = false ); - /** \brief Return the maximum amount of concurrency. */ - static int concurrency(); + /// \brief is the instance running a parallel algorithm + inline + static bool in_parallel( OpenMP const& = OpenMP() ) noexcept; - //@} - //------------------------------------ - /** \brief This execution space has a topological thread pool which can be queried. - * - * All threads within a pool have a common memory space for which they are cache coherent. - * depth = 0 gives the number of threads in the whole pool. - * depth = 1 gives the number of threads in a NUMA region, typically sharing L3 cache. - * depth = 2 gives the number of threads at the finest granularity, typically sharing L1 cache. - */ - inline static int thread_pool_size( int depth = 0 ); + /// \brief Wait until all dispatched functors complete on the given instance + /// + /// This is a no-op on OpenMP + inline + static void fence( OpenMP const& = OpenMP() ) noexcept; + + /// \brief Does the given instance return immediately after launching + /// a parallel algorithm + /// + /// This always returns false on OpenMP + inline + static bool is_asynchronous( OpenMP const& = OpenMP() ) noexcept; + + + /// \brief Partition the default instance into new instances without creating + /// new masters + /// + /// This is a no-op on OpenMP since the default instance cannot be partitioned + /// without promoting other threads to 'master' + static std::vector partition(...); + + /// Non-default instances should be ref-counted so that when the last + /// is destroyed the instance resources are released + /// + /// This is a no-op on OpenMP since a non default instance cannot be created + static OpenMP create_instance(...); + + /// \brief Partition the default instance and call 'f' on each new 'master' thread + /// + /// Func is a functor with the following signiture + /// void( int partition_id, int num_partitions ) + template + static void partition_master( F const& f + , int requested_num_partitions = 0 + , int requested_partition_size = 0 + ); + + inline + static int thread_pool_size() noexcept; /** \brief The rank of the executing thread in this thread pool */ - KOKKOS_INLINE_FUNCTION static int thread_pool_rank(); + KOKKOS_INLINE_FUNCTION + static int thread_pool_rank() noexcept; - //------------------------------------ +#if !defined( KOKKOS_DISABLE_DEPRECATED ) + /// \brief Initialize the default execution space + static void initialize( int thread_count, + int use_numa_count, + int use_cores_per_numa = 0); - inline static unsigned max_hardware_threads() { return thread_pool_size(0); } + inline + static int thread_pool_size( int depth ); - KOKKOS_INLINE_FUNCTION static - unsigned hardware_thread_id() { return thread_pool_rank(); } + static void sleep() {}; + static void wake() {}; - static const char* name(); + // use UniqueToken + static int concurrency(); + + // use UniqueToken + inline + static int max_hardware_threads() noexcept; + + // use UniqueToken + KOKKOS_INLINE_FUNCTION + static int hardware_thread_id() noexcept; +#endif + + static constexpr const char* name() noexcept { return "OpenMP"; } }; } // namespace Kokkos @@ -195,6 +240,7 @@ struct VerifyExecutionCanAccessMemorySpace /*--------------------------------------------------------------------------*/ #include +#include #include #include diff --git a/lib/kokkos/core/src/Kokkos_Parallel.hpp b/lib/kokkos/core/src/Kokkos_Parallel.hpp index e412e608b2..fc8d6bec81 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel.hpp @@ -177,22 +177,23 @@ void parallel_for( const ExecPolicy & policy ) { #if defined(KOKKOS_ENABLE_PROFILING) - uint64_t kpID = 0; - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); - } + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Impl::ParallelConstructName name(str); + Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID); + } #endif - Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Kokkos::Impl::shared_allocation_tracking_disable(); Impl::ParallelFor< FunctorType , ExecPolicy > closure( functor , policy ); - Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + Kokkos::Impl::shared_allocation_tracking_enable(); closure.execute(); #if defined(KOKKOS_ENABLE_PROFILING) - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelFor(kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelFor(kpID); + } #endif } @@ -210,14 +211,15 @@ void parallel_for( const size_t work_count #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelFor("" == str ? typeid(FunctorType).name() : str, 0, &kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Impl::ParallelConstructName name(str); + Kokkos::Profiling::beginParallelFor(name.get(), 0, &kpID); + } #endif - Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Kokkos::Impl::shared_allocation_tracking_disable(); Impl::ParallelFor< FunctorType , policy > closure( functor , policy(0,work_count) ); - Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + Kokkos::Impl::shared_allocation_tracking_enable(); closure.execute(); @@ -420,21 +422,22 @@ void parallel_scan( const ExecutionPolicy & policy { #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Impl::ParallelConstructName name(str); + Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID); + } #endif - Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Kokkos::Impl::shared_allocation_tracking_disable(); Impl::ParallelScan< FunctorType , ExecutionPolicy > closure( functor , policy ); - Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + Kokkos::Impl::shared_allocation_tracking_enable(); closure.execute(); #if defined(KOKKOS_ENABLE_PROFILING) - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelScan(kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelScan(kpID); + } #endif } @@ -453,21 +456,22 @@ void parallel_scan( const size_t work_count #if defined(KOKKOS_ENABLE_PROFILING) uint64_t kpID = 0; - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelScan("" == str ? typeid(FunctorType).name() : str, 0, &kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Impl::ParallelConstructName name(str); + Kokkos::Profiling::beginParallelScan(name.get(), 0, &kpID); + } #endif - Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Kokkos::Impl::shared_allocation_tracking_disable(); Impl::ParallelScan< FunctorType , policy > closure( functor , policy(0,work_count) ); - Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + Kokkos::Impl::shared_allocation_tracking_enable(); closure.execute(); #if defined(KOKKOS_ENABLE_PROFILING) - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelScan(kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelScan(kpID); + } #endif } diff --git a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp index 8ea5183e35..9df6d4ba09 100644 --- a/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp +++ b/lib/kokkos/core/src/Kokkos_Parallel_Reduce.hpp @@ -872,13 +872,14 @@ namespace Impl { const FunctorType& functor, ReturnType& return_value) { #if defined(KOKKOS_ENABLE_PROFILING) - uint64_t kpID = 0; - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::beginParallelReduce("" == label ? typeid(FunctorType).name() : label, 0, &kpID); - } + uint64_t kpID = 0; + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Impl::ParallelConstructName name(label); + Kokkos::Profiling::beginParallelReduce(name.get(), 0, &kpID); + } #endif - Kokkos::Impl::shared_allocation_tracking_claim_and_disable(); + Kokkos::Impl::shared_allocation_tracking_disable(); #ifdef KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER Impl::ParallelReduce closure(functor_adaptor::functor(functor), @@ -890,13 +891,13 @@ namespace Impl { policy, return_value_adapter::return_value(return_value,functor)); #endif - Kokkos::Impl::shared_allocation_tracking_release_and_enable(); + Kokkos::Impl::shared_allocation_tracking_enable(); closure.execute(); #if defined(KOKKOS_ENABLE_PROFILING) - if(Kokkos::Profiling::profileLibraryLoaded()) { - Kokkos::Profiling::endParallelReduce(kpID); - } + if(Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endParallelReduce(kpID); + } #endif } diff --git a/lib/kokkos/core/src/Kokkos_Serial.hpp b/lib/kokkos/core/src/Kokkos_Serial.hpp index 73e8ae3030..539761a1f9 100644 --- a/lib/kokkos/core/src/Kokkos_Serial.hpp +++ b/lib/kokkos/core/src/Kokkos_Serial.hpp @@ -66,6 +66,7 @@ #include +#include namespace Kokkos { @@ -526,6 +527,7 @@ public: } }; + /*--------------------------------------------------------------------------*/ template< class FunctorType , class ... Traits > @@ -604,6 +606,178 @@ public: {} }; +} // namespace Impl +} // namespace Kokkos + + +/*--------------------------------------------------------------------------*/ +/*--------------------------------------------------------------------------*/ +/* Parallel patterns for Kokkos::Serial with MDRangePolicy */ + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType , + Kokkos::Experimental::MDRangePolicy< Traits ... > , + Kokkos::Serial + > +{ +private: + + typedef Kokkos::Experimental::MDRangePolicy< Traits ... > MDRangePolicy ; + typedef typename MDRangePolicy::impl_range_policy Policy ; + + typedef typename Kokkos::Experimental::Impl::HostIterateTile< MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void > iterate_type; + + const FunctorType m_functor ; + const MDRangePolicy m_mdr_policy ; + const Policy m_policy ; + + void + exec() const + { + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + iterate_type( m_mdr_policy, m_functor )( i ); + } + } + +public: + + inline + void execute() const + { this->exec(); } + + inline + ParallelFor( const FunctorType & arg_functor + , const MDRangePolicy & arg_policy ) + : m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + {} +}; + + +template< class FunctorType , class ReducerType , class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::Experimental::MDRangePolicy< Traits ... > + , ReducerType + , Kokkos::Serial + > +{ +private: + + typedef Kokkos::Experimental::MDRangePolicy< Traits ... > MDRangePolicy ; + typedef typename MDRangePolicy::impl_range_policy Policy ; + + typedef typename MDRangePolicy::work_tag WorkTag ; + + typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef typename ReducerTypeFwd::value_type ValueType; + + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd , WorkTag > ValueInit ; + + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; + + + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRangePolicy + , FunctorType + , WorkTag + , ValueType + >; + + + const FunctorType m_functor ; + const MDRangePolicy m_mdr_policy ; + const Policy m_policy ; + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + inline + void + exec( reference_type update ) const + { + const typename Policy::member_type e = m_policy.end(); + for ( typename Policy::member_type i = m_policy.begin() ; i < e ; ++i ) { + iterate_type( m_mdr_policy, m_functor, update )( i ); + } + } + +public: + + inline + void execute() const + { + const size_t pool_reduce_size = + Analysis::value_size( ReducerConditional::select(m_functor , m_reducer) ); + const size_t team_reduce_size = 0 ; // Never shrinks + const size_t team_shared_size = 0 ; // Never shrinks + const size_t thread_local_size = 0 ; // Never shrinks + + serial_resize_thread_team_data( pool_reduce_size + , team_reduce_size + , team_shared_size + , thread_local_size ); + + HostThreadTeamData & data = *serial_get_thread_team_data(); + + pointer_type ptr = + m_result_ptr ? m_result_ptr : pointer_type(data.pool_reduce_local()); + + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + this-> exec( update ); + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >:: + final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + } + + template< class HostViewType > + ParallelReduce( const FunctorType & arg_functor , + const MDRangePolicy & arg_policy , + const HostViewType & arg_result_view , + typename std::enable_if< + Kokkos::is_view< HostViewType >::value && + !Kokkos::is_reducer_type::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result_view.data() ) + { + static_assert( Kokkos::is_view< HostViewType >::value + , "Kokkos::Serial reduce result must be a View" ); + + static_assert( std::is_same< typename HostViewType::memory_space , HostSpace >::value + , "Kokkos::Serial reduce result must be a View in HostSpace" ); + } + + inline + ParallelReduce( const FunctorType & arg_functor + , MDRangePolicy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + , m_reducer( reducer ) + , m_result_ptr( reducer.view().data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } +}; + + + } // namespace Impl } // namespace Kokkos @@ -819,6 +993,60 @@ public: /*--------------------------------------------------------------------------*/ /*--------------------------------------------------------------------------*/ +namespace Kokkos { namespace Experimental { + +template<> +class UniqueToken< Serial, UniqueTokenScope::Instance> +{ +public: + using execution_space = Serial; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken( execution_space const& = execution_space() ) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + inline + int size() const noexcept { return 1; } + + /// \brief acquire value such that 0 <= value < size() + inline + int acquire() const noexcept { return 0; } + + /// \brief release a value acquired by generate + inline + void release( int ) const noexcept {} +}; + +template<> +class UniqueToken< Serial, UniqueTokenScope::Global> +{ +public: + using execution_space = Serial; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken( execution_space const& = execution_space() ) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + inline + int size() const noexcept { return 1; } + + /// \brief acquire value such that 0 <= value < size() + inline + int acquire() const noexcept { return 0; } + + /// \brief release a value acquired by generate + inline + void release( int ) const noexcept {} +}; + +}} // namespace Kokkos::Experimental + #include #endif // defined( KOKKOS_ENABLE_SERIAL ) diff --git a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp index 7edda7aa75..fcfc91a4ee 100644 --- a/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp +++ b/lib/kokkos/core/src/Kokkos_TaskScheduler.hpp @@ -148,7 +148,7 @@ private: typename std::conditional< Arg2_is_space , Arg2 , void >::type >::type ; - using task_base = Impl::TaskBase< Space , ValueType , void > ; + using task_base = Impl::TaskBase< void , void , void > ; using queue_type = Impl::TaskQueue< Space > ; task_base * m_task ; @@ -293,13 +293,17 @@ public: //---------------------------------------- KOKKOS_INLINE_FUNCTION - typename task_base::get_return_type + int is_ready() const noexcept + { return ( 0 == m_task ) || ( ((task_base*) task_base::LockTag) == m_task->m_wait ); } + + KOKKOS_INLINE_FUNCTION + const typename Impl::TaskResult< ValueType >::reference_type get() const { if ( 0 == m_task ) { Kokkos::abort( "Kokkos:::Future::get ERROR: is_null()"); } - return m_task->get(); + return Impl::TaskResult< ValueType >::get( m_task ); } }; @@ -396,7 +400,7 @@ private: using track_type = Kokkos::Impl::SharedAllocationTracker ; using queue_type = Kokkos::Impl::TaskQueue< ExecSpace > ; - using task_base = Impl::TaskBase< ExecSpace , void , void > ; + using task_base = Impl::TaskBase< void , void , void > ; track_type m_track ; queue_type * m_queue ; @@ -464,29 +468,19 @@ public: KOKKOS_INLINE_FUNCTION memory_pool * memory() const noexcept - { return m_queue ? m_queue->m_memory : (memory_pool*) 0 ; } + { return m_queue ? &( m_queue->m_memory ) : (memory_pool*) 0 ; } //---------------------------------------- /**\brief Allocation size for a spawned task */ template< typename FunctorType > KOKKOS_FUNCTION size_t spawn_allocation_size() const - { - using task_type = Impl::TaskBase< execution_space - , typename FunctorType::value_type - , FunctorType > ; - - return m_queue->allocate_block_size( sizeof(task_type) ); - } + { return m_queue->template spawn_allocation_size< FunctorType >(); } /**\brief Allocation size for a when_all aggregate */ KOKKOS_FUNCTION size_t when_all_allocation_size( int narg ) const - { - using task_base = Kokkos::Impl::TaskBase< ExecSpace , void , void > ; - - return m_queue->allocate_block_size( sizeof(task_base) + narg * sizeof(task_base*) ); - } + { return m_queue->when_all_allocation_size( narg ); } //---------------------------------------- @@ -507,7 +501,7 @@ public: queue_type * const queue = arg_policy.m_scheduler ? arg_policy.m_scheduler->m_queue : ( arg_policy.m_dependence.m_task - ? arg_policy.m_dependence.m_task->m_queue + ? static_cast(arg_policy.m_dependence.m_task->m_queue) : (queue_type*) 0 ); if ( 0 == queue ) { @@ -530,8 +524,12 @@ public: future_type f ; // Allocate task from memory pool + + const size_t alloc_size = + queue->template spawn_allocation_size< FunctorType >(); + f.m_task = - reinterpret_cast< task_type * >(queue->allocate(sizeof(task_type))); + reinterpret_cast< task_type * >(queue->allocate(alloc_size) ); if ( f.m_task ) { @@ -539,15 +537,17 @@ public: // Reference count starts at two: // +1 for the matching decrement when task is complete // +1 for the future - new ( f.m_task ) - task_type( arg_function - , queue - , arg_policy.m_dependence.m_task /* dependence */ - , 2 /* reference count */ - , int(sizeof(task_type)) /* allocation size */ - , int(arg_policy.m_task_type) - , int(arg_policy.m_priority) - , std::move(arg_functor) ); + new ( f.m_task ) task_type( std::move(arg_functor) ); + + f.m_task->m_apply = arg_function ; + f.m_task->m_queue = queue ; + f.m_task->m_next = arg_policy.m_dependence.m_task ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = alloc_size ; + f.m_task->m_task_type = arg_policy.m_task_type ; + f.m_task->m_priority = arg_policy.m_priority ; + + Kokkos::memory_fence(); // The dependence (if any) is processed immediately // within the schedule function, as such the dependence's @@ -586,6 +586,30 @@ public: // Postcondition: task is in Executing-Respawn state } + template< typename FunctorType > + KOKKOS_FUNCTION static + void + respawn( FunctorType * arg_self + , TaskScheduler const & + , TaskPriority const & arg_priority + ) + { + // Precondition: task is in Executing state + + using value_type = typename FunctorType::value_type ; + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + task_type * const task = static_cast< task_type * >( arg_self ); + + task->m_priority = static_cast(arg_priority); + + task->add_dependence( (task_base*) 0 ); + + // Postcondition: task is in Executing-Respawn state + } + //---------------------------------------- /**\brief Return a future that is complete * when all input futures are complete. @@ -596,7 +620,7 @@ public: when_all( Future< A1 , A2 > const arg[] , int narg ) { using future_type = Future< execution_space > ; - using task_base = Kokkos::Impl::TaskBase< execution_space , void , void > ; + using task_base = Kokkos::Impl::TaskBase< void , void , void > ; future_type f ; @@ -610,9 +634,9 @@ public: // Increment reference count to track subsequent assignment. Kokkos::atomic_increment( &(t->m_ref_count) ); if ( queue == 0 ) { - queue = t->m_queue ; + queue = static_cast< queue_type * >( t->m_queue ); } - else if ( queue != t->m_queue ) { + else if ( queue != static_cast< queue_type * >( t->m_queue ) ) { Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" ); } } @@ -620,28 +644,34 @@ public: if ( queue != 0 ) { - size_t const size = sizeof(task_base) + narg * sizeof(task_base*); + size_t const alloc_size = queue->when_all_allocation_size( narg ); f.m_task = - reinterpret_cast< task_base * >( queue->allocate( size ) ); + reinterpret_cast< task_base * >( queue->allocate( alloc_size ) ); if ( f.m_task ) { // Reference count starts at two: // +1 to match decrement when task completes // +1 for the future - new( f.m_task ) task_base( queue - , 2 /* reference count */ - , size /* allocation size */ - , narg /* dependence count */ - ); + + new( f.m_task ) task_base(); + + f.m_task->m_queue = queue ; + f.m_task->m_ref_count = 2 ; + f.m_task->m_alloc_size = alloc_size ; + f.m_task->m_dep_count = narg ; + f.m_task->m_task_type = task_base::Aggregate ; // Assign dependences, reference counts were already incremented - task_base ** const dep = f.m_task->aggregate_dependences(); + task_base * volatile * const dep = + f.m_task->aggregate_dependences(); for ( int i = 0 ; i < narg ; ++i ) { dep[i] = arg[i].m_task ; } + Kokkos::memory_fence(); + queue->schedule_aggregate( f.m_task ); // this when_all may be processed at any moment } diff --git a/lib/kokkos/core/src/Kokkos_UniqueToken.hpp b/lib/kokkos/core/src/Kokkos_UniqueToken.hpp new file mode 100644 index 0000000000..1ffb07a6db --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_UniqueToken.hpp @@ -0,0 +1,88 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_UNIQUE_TOKEN_HPP +#define KOKKOS_UNIQUE_TOKEN_HPP + +#include + +namespace Kokkos { namespace Experimental { + +enum class UniqueTokenScope : int +{ + Instance, + Global +}; + +/// \brief class to generate unique ids base on the required amount of concurrency +/// +/// This object should behave like a ref-counted object, so that when the last +/// instance is destroy resources are free if needed +template +class UniqueToken +{ +public: + using execution_space = ExecutionSpace; + using size_type = typename execution_space::size_type; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken( execution_space const& = execution_space() ); + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + size_type size() const ; + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + size_type acquire() const ; + + /// \brief release a value acquired by generate + KOKKOS_INLINE_FUNCTION + void release( size_type ) const ; +}; + +}} // namespace Kokkos::Experimental + +#endif //KOKKOS_UNIQUE_TOKEN_HPP diff --git a/lib/kokkos/core/src/Kokkos_View.hpp b/lib/kokkos/core/src/Kokkos_View.hpp index 3312aa6a96..1754e4a8fb 100644 --- a/lib/kokkos/core/src/Kokkos_View.hpp +++ b/lib/kokkos/core/src/Kokkos_View.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -54,11 +54,14 @@ #include #include +#if defined(KOKKOS_ENABLE_PROFILING) +#include +#endif + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { template< class DataType > @@ -73,16 +76,6 @@ struct ViewDataAnalysis ; template< class , class ... > class ViewMapping { public: enum { is_assignable = false }; }; -} /* namespace Impl */ -} /* namespace Experimental */ -} /* namespace Kokkos */ - -namespace Kokkos { -namespace Impl { - -using Kokkos::Experimental::Impl::ViewMapping ; -using Kokkos::Experimental::Impl::ViewDataAnalysis ; - } /* namespace Impl */ } /* namespace Kokkos */ @@ -1563,12 +1556,12 @@ namespace Kokkos { namespace Impl { inline -void shared_allocation_tracking_claim_and_disable() -{ Kokkos::Impl::SharedAllocationRecord::tracking_claim_and_disable(); } +void shared_allocation_tracking_disable() +{ Kokkos::Impl::SharedAllocationRecord::tracking_disable(); } inline -void shared_allocation_tracking_release_and_enable() -{ Kokkos::Impl::SharedAllocationRecord::tracking_release_and_enable(); } +void shared_allocation_tracking_enable() +{ Kokkos::Impl::SharedAllocationRecord::tracking_enable(); } } /* namespace Impl */ } /* namespace Kokkos */ @@ -1795,6 +1788,20 @@ void deep_copy if ( (void *) dst.data() != (void*) src.data() ) { +#if defined(KOKKOS_ENABLE_PROFILING) + if (Kokkos::Profiling::profileLibraryLoaded()) { + const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span(); + Kokkos::Profiling::beginDeepCopy( + Kokkos::Profiling::SpaceHandle(dst_memory_space::name()), + dst.label(), + dst.data(), + Kokkos::Profiling::SpaceHandle(src_memory_space::name()), + src.label(), + src.data(), + nbytes); + } +#endif + // Concern: If overlapping views then a parallel copy will be erroneous. // ... @@ -1882,7 +1889,14 @@ void deep_copy else { Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation"); } - } + +#if defined(KOKKOS_ENABLE_PROFILING) + if (Kokkos::Profiling::profileLibraryLoaded()) { + Kokkos::Profiling::endDeepCopy(); + } +#endif + + } // ( (void *) dst.data() != (void*) src.data() ) } } /* namespace Kokkos */ @@ -2249,6 +2263,82 @@ resize( Kokkos::View & v , static_assert( Kokkos::ViewTraits::is_managed , "Can only resize managed views" ); + // Fix #904 by checking dimensions before actually resizing. + // + // Rank is known at compile time, so hopefully the compiler will + // remove branches that are compile-time false. The upcoming "if + // constexpr" language feature would make this certain. + if (view_type::Rank == 1 && + n0 == static_cast (v.extent(0))) { + return; + } + if (view_type::Rank == 2 && + n0 == static_cast (v.extent(0)) && + n1 == static_cast (v.extent(1))) { + return; + } + if (view_type::Rank == 3 && + n0 == static_cast (v.extent(0)) && + n1 == static_cast (v.extent(1)) && + n2 == static_cast (v.extent(2))) { + return; + } + if (view_type::Rank == 4 && + n0 == static_cast (v.extent(0)) && + n1 == static_cast (v.extent(1)) && + n2 == static_cast (v.extent(2)) && + n3 == static_cast (v.extent(3))) { + return; + } + if (view_type::Rank == 5 && + n0 == static_cast (v.extent(0)) && + n1 == static_cast (v.extent(1)) && + n2 == static_cast (v.extent(2)) && + n3 == static_cast (v.extent(3)) && + n4 == static_cast (v.extent(4))) { + return; + } + if (view_type::Rank == 6 && + n0 == static_cast (v.extent(0)) && + n1 == static_cast (v.extent(1)) && + n2 == static_cast (v.extent(2)) && + n3 == static_cast (v.extent(3)) && + n4 == static_cast (v.extent(4)) && + n5 == static_cast (v.extent(5))) { + return; + } + if (view_type::Rank == 7 && + n0 == static_cast (v.extent(0)) && + n1 == static_cast (v.extent(1)) && + n2 == static_cast (v.extent(2)) && + n3 == static_cast (v.extent(3)) && + n4 == static_cast (v.extent(4)) && + n5 == static_cast (v.extent(5)) && + n6 == static_cast (v.extent(6))) { + return; + } + if (view_type::Rank == 8 && + n0 == static_cast (v.extent(0)) && + n1 == static_cast (v.extent(1)) && + n2 == static_cast (v.extent(2)) && + n3 == static_cast (v.extent(3)) && + n4 == static_cast (v.extent(4)) && + n5 == static_cast (v.extent(5)) && + n6 == static_cast (v.extent(6)) && + n7 == static_cast (v.extent(7))) { + return; + } + // If Kokkos ever supports Views of rank > 8, the above code won't + // be incorrect, because avoiding reallocation in resize() is just + // an optimization. + + // TODO (mfh 27 Jun 2017) If the old View has enough space but just + // different dimensions (e.g., if the product of the dimensions, + // including extra space for alignment, will not change), then + // consider just reusing storage. For now, Kokkos always + // reallocates if any of the dimensions change, even if the old View + // has enough space. + view_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6, n7 ); Kokkos::Impl::ViewRemap< view_type , view_type >( v_resized , v ); @@ -2317,6 +2407,106 @@ void realloc( Kokkos::View & v , } } /* namespace Kokkos */ +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { namespace Impl { + +template < class Specialize, typename A, typename B > +struct CommonViewValueType; + +template < typename A, typename B > +struct CommonViewValueType< void, A, B > +{ + using value_type = typename std::common_type< A , B >::type; +}; + + +template < class Specialize, class ValueType > +struct CommonViewAllocProp; + +template < class ValueType > +struct CommonViewAllocProp< void, ValueType > +{ + using value_type = ValueType; + + template < class ... Views > + CommonViewAllocProp( const Views & ... ) {} +}; + + +template < class ... Views > +struct DeduceCommonViewAllocProp; + +// Base case must provide types for: +// 1. specialize 2. value_type 3. is_view 4. prop_type +template < class FirstView > +struct DeduceCommonViewAllocProp< FirstView > +{ + using specialize = typename FirstView::traits::specialize; + + using value_type = typename FirstView::traits::value_type; + + enum : bool { is_view = is_view< FirstView >::value }; + + using prop_type = CommonViewAllocProp< specialize, value_type >; +}; + + +template < class FirstView, class ... NextViews > +struct DeduceCommonViewAllocProp< FirstView, NextViews... > +{ + using NextTraits = DeduceCommonViewAllocProp< NextViews... >; + + using first_specialize = typename FirstView::traits::specialize; + using first_value_type = typename FirstView::traits::value_type; + + enum : bool { first_is_view = is_view< FirstView >::value }; + + using next_specialize = typename NextTraits::specialize; + using next_value_type = typename NextTraits::value_type; + + enum : bool { next_is_view = NextTraits::is_view }; + + // common types + + // determine specialize type + // if first and next specialize differ, but are not the same specialize, error out + static_assert( !(!std::is_same< first_specialize, next_specialize >::value && !std::is_same< first_specialize, void>::value && !std::is_same< void, next_specialize >::value) , "Kokkos DeduceCommonViewAllocProp ERROR: Only one non-void specialize trait allowed" ); + + // otherwise choose non-void specialize if either/both are non-void + using specialize = typename std::conditional< std::is_same< first_specialize, next_specialize >::value + , first_specialize + , typename std::conditional< ( std::is_same< first_specialize, void >::value + && !std::is_same< next_specialize, void >::value) + , next_specialize + , first_specialize + >::type + >::type; + + using value_type = typename CommonViewValueType< specialize, first_value_type, next_value_type >::value_type; + + enum : bool { is_view = (first_is_view && next_is_view) }; + + using prop_type = CommonViewAllocProp< specialize, value_type >; +}; + +} // end namespace Impl + +template < class ... Views > +using DeducedCommonPropsType = typename Impl::DeduceCommonViewAllocProp::prop_type ; + +// User function +template < class ... Views > +DeducedCommonPropsType +common_view_alloc_prop( Views const & ... views ) +{ + return DeducedCommonPropsType( views... ); +} + +} // namespace Kokkos + + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- // For backward compatibility: @@ -2350,6 +2540,9 @@ using Kokkos::Impl::WithoutInitializing_t ; using Kokkos::Impl::AllowPadding_t ; using Kokkos::Impl::SharedAllocationRecord ; using Kokkos::Impl::SharedAllocationTracker ; +using Kokkos::Impl::ViewMapping ; +using Kokkos::Impl::ViewDataAnalysis ; + } /* namespace Impl */ } /* namespace Experimental */ diff --git a/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp new file mode 100644 index 0000000000..58b0f72f51 --- /dev/null +++ b/lib/kokkos/core/src/Kokkos_WorkGraphPolicy.hpp @@ -0,0 +1,265 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_WORKGRAPHPOLICY_HPP +#define KOKKOS_WORKGRAPHPOLICY_HPP + +namespace Kokkos { +namespace Impl { +namespace Experimental { + +template< class functor_type , class execution_space, class ... policy_args > +class WorkGraphExec; + +}}} // namespace Kokkos::Impl::Experimental + +namespace Kokkos { +namespace Experimental { + +template< class ... Properties > +class WorkGraphPolicy +{ +public: + + using self_type = WorkGraphPolicy; + using traits = Kokkos::Impl::PolicyTraits; + using index_type = typename traits::index_type; + using execution_space = typename traits::execution_space; + using work_tag = typename traits::work_tag; + using memory_space = typename execution_space::memory_space; + using graph_type = Kokkos::Experimental::Crs; + using member_type = index_type; + +private: + + graph_type m_graph; + + using ints_type = Kokkos::View; + using range_type = Kokkos::pair; + using ranges_type = Kokkos::View; + const std::int32_t m_total_work; + ints_type m_counts; + ints_type m_queue; + ranges_type m_ranges; + +public: + + struct TagZeroRanges {}; + KOKKOS_INLINE_FUNCTION + void operator()(TagZeroRanges, std::int32_t i) const { + m_ranges[i] = range_type(0, 0); + } + void zero_ranges() { + using policy_type = RangePolicy; + using closure_type = Kokkos::Impl::ParallelFor; + const closure_type closure(*this, policy_type(0, 1)); + closure.execute(); + execution_space::fence(); + } + + struct TagFillQueue {}; + KOKKOS_INLINE_FUNCTION + void operator()(TagFillQueue, std::int32_t i) const { + if (*((volatile std::int32_t*)(&m_counts(i))) == 0) push_work(i); + } + void fill_queue() { + using policy_type = RangePolicy; + using closure_type = Kokkos::Impl::ParallelFor; + const closure_type closure(*this, policy_type(0, m_total_work)); + closure.execute(); + execution_space::fence(); + } + +private: + + inline + void setup() { + if (m_graph.numRows() > std::numeric_limits::max()) { + Kokkos::abort("WorkGraphPolicy work must be indexable using int32_t"); + } + get_crs_transpose_counts(m_counts, m_graph); + m_queue = ints_type(ViewAllocateWithoutInitializing("queue"), m_total_work); + deep_copy(m_queue, std::int32_t(-1)); + m_ranges = ranges_type("ranges", 1); + fill_queue(); + } + + KOKKOS_INLINE_FUNCTION + std::int32_t pop_work() const { + range_type w(-1,-1); + while (true) { + const range_type w_new( w.first + 1 , w.second ); + w = atomic_compare_exchange( &m_ranges(0) , w , w_new ); + if ( w.first < w.second ) { // there was work in the queue + if ( w_new.first == w.first + 1 && w_new.second == w.second ) { + // we got a work item + std::int32_t i; + // the push_work function may have incremented the end counter + // but not yet written the work index into the queue. + // wait until the entry is valid. + while ( -1 == ( i = *((volatile std::int32_t*)(&m_queue( w.first ))) ) ); + return i; + } // we got a work item + } else { // there was no work in the queue +#ifdef KOKKOS_DEBUG + if ( w_new.first == w.first + 1 && w_new.second == w.second ) { + Kokkos::abort("bug in pop_work"); + } +#endif + if (w.first == m_total_work) { // all work is done + return -1; + } else { // need to wait for more work to be pushed + // take a guess that one work item will be pushed + // the key thing is we can't leave (w) alone, because + // otherwise the next compare_exchange may succeed in + // popping work from an empty queue + w.second++; + } + } // there was no work in the queue + } // while (true) + } + + KOKKOS_INLINE_FUNCTION + void push_work(std::int32_t i) const { + range_type w(-1,-1); + while (true) { + const range_type w_new( w.first , w.second + 1 ); + // try to increment the end counter + w = atomic_compare_exchange( &m_ranges(0) , w , w_new ); + // stop trying if the increment was successful + if ( w.first == w_new.first && w.second + 1 == w_new.second ) break; + } + // write the work index into the claimed spot in the queue + *((volatile std::int32_t*)(&m_queue( w.second ))) = i; + // push this write out into the memory system + memory_fence(); + } + + template< class functor_type , class execution_space, class ... policy_args > + friend class Kokkos::Impl::Experimental::WorkGraphExec; + +public: + + WorkGraphPolicy(graph_type arg_graph) + : m_graph(arg_graph) + , m_total_work( arg_graph.numRows() ) + { + setup(); + } + +}; + +}} // namespace Kokkos::Experimental + +/*--------------------------------------------------------------------------*/ + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +namespace Experimental { + +template< class functor_type , class execution_space, class ... policy_args > +class WorkGraphExec +{ + public: + + using self_type = WorkGraphExec< functor_type, execution_space, policy_args ... >; + using policy_type = Kokkos::Experimental::WorkGraphPolicy< policy_args ... >; + using member_type = typename policy_type::member_type; + using memory_space = typename execution_space::memory_space; + + protected: + + const functor_type m_functor; + const policy_type m_policy; + + protected: + + KOKKOS_INLINE_FUNCTION + std::int32_t before_work() const { + return m_policy.pop_work(); + } + + KOKKOS_INLINE_FUNCTION + void after_work(std::int32_t i) const { + /* fence any writes that were done by the work item itself + (usually writing its result to global memory) */ + memory_fence(); + const std::int32_t begin = m_policy.m_graph.row_map( i ); + const std::int32_t end = m_policy.m_graph.row_map( i + 1 ); + for (std::int32_t j = begin; j < end; ++j) { + const std::int32_t next = m_policy.m_graph.entries( j ); + const std::int32_t old_count = atomic_fetch_add( &(m_policy.m_counts(next)), -1 ); + if ( old_count == 1 ) m_policy.push_work( next ); + } + } + + inline + WorkGraphExec( const functor_type & arg_functor + , const policy_type & arg_policy ) + : m_functor( arg_functor ) + , m_policy( arg_policy ) + { + } +}; + +}}} // namespace Kokkos::Impl::Experimental + +#ifdef KOKKOS_ENABLE_SERIAL +#include "impl/Kokkos_Serial_WorkGraphPolicy.hpp" +#endif + +#ifdef KOKKOS_ENABLE_OPENMP +#include "OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp" +#endif + +#ifdef KOKKOS_ENABLE_CUDA +#include "Cuda/Kokkos_Cuda_WorkGraphPolicy.hpp" +#endif + +#ifdef KOKKOS_ENABLE_THREADS +#include "Threads/Kokkos_Threads_WorkGraphPolicy.hpp" +#endif + +#endif /* #define KOKKOS_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp index 4e0ea93920..915fbe52c1 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp @@ -45,75 +45,100 @@ #if defined( KOKKOS_ENABLE_OPENMP ) #include +#include + #include #include #include + #include + #include -#include #include #include namespace Kokkos { namespace Impl { -namespace { -KOKKOS_INLINE_FUNCTION -int kokkos_omp_in_parallel(); +int g_openmp_hardware_max_threads = 1; -int kokkos_omp_in_critical_region = ( Kokkos::HostSpace::register_in_parallel( kokkos_omp_in_parallel ) , 0 ); +__thread int t_openmp_hardware_id = 0; +__thread Impl::OpenMPExec * t_openmp_instance = nullptr; -KOKKOS_INLINE_FUNCTION -int kokkos_omp_in_parallel() +void OpenMPExec::validate_partition( const int nthreads + , int & num_partitions + , int & partition_size + ) { -#ifndef __CUDA_ARCH__ - return omp_in_parallel() && ! kokkos_omp_in_critical_region ; -#else - return 0; -#endif + if (nthreads == 1) { + num_partitions = 1; + partition_size = 1; + } + else if( num_partitions < 1 && partition_size < 1) { + int idle = nthreads; + for (int np = 2; np <= nthreads ; ++np) { + for (int ps = 1; ps <= nthreads/np; ++ps) { + if (nthreads - np*ps < idle) { + idle = nthreads - np*ps; + num_partitions = np; + partition_size = ps; + } + if (idle == 0) { + break; + } + } + } + } + else if( num_partitions < 1 && partition_size > 0 ) { + if ( partition_size <= nthreads ) { + num_partitions = nthreads / partition_size; + } + else { + num_partitions = 1; + partition_size = nthreads; + } + } + else if( num_partitions > 0 && partition_size < 1 ) { + if ( num_partitions <= nthreads ) { + partition_size = nthreads / num_partitions; + } + else { + num_partitions = nthreads; + partition_size = 1; + } + } + else if ( num_partitions * partition_size > nthreads ) { + int idle = nthreads; + const int NP = num_partitions; + const int PS = partition_size; + for (int np = NP; np > 0; --np) { + for (int ps = PS; ps > 0; --ps) { + if ( (np*ps <= nthreads) + && (nthreads - np*ps < idle) ) { + idle = nthreads - np*ps; + num_partitions = np; + partition_size = ps; + } + if (idle == 0) { + break; + } + } + } + } + } -bool s_using_hwloc = false; - -} // namespace -} // namespace Impl -} // namespace Kokkos - - -namespace Kokkos { -namespace Impl { - -int OpenMPExec::m_map_rank[ OpenMPExec::MAX_THREAD_COUNT ] = { 0 }; - -int OpenMPExec::m_pool_topo[ 4 ] = { 0 }; - -HostThreadTeamData * OpenMPExec::m_pool[ OpenMPExec::MAX_THREAD_COUNT ] = { 0 }; - -void OpenMPExec::verify_is_process( const char * const label ) +void OpenMPExec::verify_is_master( const char * const label ) { - if ( omp_in_parallel() ) { + if ( !t_openmp_instance ) + { std::string msg( label ); - msg.append( " ERROR: in parallel" ); + msg.append( " ERROR: in parallel or not initialized" ); Kokkos::Impl::throw_runtime_exception( msg ); } } -void OpenMPExec::verify_initialized( const char * const label ) -{ - if ( 0 == m_pool[0] ) { - std::string msg( label ); - msg.append( " ERROR: not initialized" ); - Kokkos::Impl::throw_runtime_exception( msg ); - } - - if ( omp_get_max_threads() != Kokkos::OpenMP::thread_pool_size(0) ) { - std::string msg( label ); - msg.append( " ERROR: Initialized but threads modified inappropriately" ); - Kokkos::Impl::throw_runtime_exception( msg ); - } - -} } // namespace Impl } // namespace Kokkos @@ -133,11 +158,11 @@ void OpenMPExec::clear_thread_data() const int old_alloc_bytes = m_pool[0] ? ( member_bytes + m_pool[0]->scratch_bytes() ) : 0 ; - Kokkos::HostSpace space ; + OpenMP::memory_space space ; -#pragma omp parallel + #pragma omp parallel num_threads( m_pool_size ) { - const int rank = m_map_rank[ omp_get_thread_num() ]; + const int rank = omp_get_thread_num(); if ( 0 != m_pool[rank] ) { @@ -189,13 +214,13 @@ void OpenMPExec::resize_thread_data( size_t pool_reduce_bytes , team_shared_bytes , thread_local_bytes ); - const int pool_size = omp_get_max_threads(); + OpenMP::memory_space space ; - Kokkos::HostSpace space ; + memory_fence(); -#pragma omp parallel + #pragma omp parallel num_threads(m_pool_size) { - const int rank = m_map_rank[ omp_get_thread_num() ]; + const int rank = omp_get_thread_num(); if ( 0 != m_pool[rank] ) { @@ -214,11 +239,14 @@ void OpenMPExec::resize_thread_data( size_t pool_reduce_bytes , pool_reduce_bytes , team_reduce_bytes , team_shared_bytes - , thread_local_bytes ); + , thread_local_bytes + ); + + memory_fence(); } /* END #pragma omp parallel */ - HostThreadTeamData::organize_pool( m_pool , pool_size ); + HostThreadTeamData::organize_pool( m_pool , m_pool_size ); } } @@ -232,16 +260,8 @@ namespace Kokkos { //---------------------------------------------------------------------------- -int OpenMP::is_initialized() -{ return 0 != Impl::OpenMPExec::m_pool[0]; } - -void OpenMP::initialize( unsigned thread_count , - unsigned use_numa_count , - unsigned use_cores_per_numa ) +int OpenMP::get_current_max_threads() noexcept { - // Before any other call to OMP query the maximum number of threads - // and save the value for re-initialization unit testing. - // Using omp_get_max_threads(); is problematic in conjunction with // Hwloc on Intel (essentially an initial call to the OpenMP runtime // without a parallel region before will set a process mask for a single core @@ -250,110 +270,99 @@ void OpenMP::initialize( unsigned thread_count , // the thread masks. The intend seems to be to make serial code run fast, if you // compile with OpenMP enabled but don't actually use parallel regions or so // static int omp_max_threads = omp_get_max_threads(); - int nthreads = 0; + + int count = 0; #pragma omp parallel { #pragma omp atomic - nthreads++; + ++count; } + return count; +} - static int omp_max_threads = nthreads; - - const bool is_initialized = 0 != Impl::OpenMPExec::m_pool[0] ; - - bool thread_spawn_failed = false ; - - if ( ! is_initialized ) { - - // Use hwloc thread pinning if concerned with locality. - // If spreading threads across multiple NUMA regions. - // If hyperthreading is enabled. - Impl::s_using_hwloc = hwloc::available() && ( - ( 1 < Kokkos::hwloc::get_available_numa_count() ) || - ( 1 < Kokkos::hwloc::get_available_threads_per_core() ) ); - - std::pair threads_coord[ Impl::OpenMPExec::MAX_THREAD_COUNT ]; - - // If hwloc available then use it's maximum value. - - if ( thread_count == 0 ) { - thread_count = Impl::s_using_hwloc - ? Kokkos::hwloc::get_available_numa_count() * - Kokkos::hwloc::get_available_cores_per_numa() * - Kokkos::hwloc::get_available_threads_per_core() - : omp_max_threads ; - } - - if(Impl::s_using_hwloc) - hwloc::thread_mapping( "Kokkos::OpenMP::initialize" , - false /* do not allow asynchronous */ , - thread_count , - use_numa_count , - use_cores_per_numa , - threads_coord ); - - // Spawn threads: - - omp_set_num_threads( thread_count ); - - // Verify OMP interaction: - if ( int(thread_count) != omp_get_max_threads() ) { - thread_spawn_failed = true ; - } - - // Verify spawning and bind threads: -#pragma omp parallel - { -#pragma omp critical - { - if ( int(thread_count) != omp_get_num_threads() ) { - thread_spawn_failed = true ; - } - - // Call to 'bind_this_thread' is not thread safe so place this whole block in a critical region. - // Call to 'new' may not be thread safe as well. - - const unsigned omp_rank = omp_get_thread_num(); - const unsigned thread_r = Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() - ? Kokkos::hwloc::bind_this_thread( thread_count , threads_coord ) - : omp_rank ; - - Impl::OpenMPExec::m_map_rank[ omp_rank ] = thread_r ; - } -/* END #pragma omp critical */ - } -/* END #pragma omp parallel */ - - if ( ! thread_spawn_failed ) { - Impl::OpenMPExec::m_pool_topo[0] = thread_count ; - Impl::OpenMPExec::m_pool_topo[1] = Impl::s_using_hwloc ? thread_count / use_numa_count : thread_count; - Impl::OpenMPExec::m_pool_topo[2] = Impl::s_using_hwloc ? thread_count / ( use_numa_count * use_cores_per_numa ) : 1; - - // New, unified host thread team data: - { - size_t pool_reduce_bytes = 32 * thread_count ; - size_t team_reduce_bytes = 32 * thread_count ; - size_t team_shared_bytes = 1024 * thread_count ; - size_t thread_local_bytes = 1024 ; - - Impl::OpenMPExec::resize_thread_data( pool_reduce_bytes - , team_reduce_bytes - , team_shared_bytes - , thread_local_bytes - ); - } - } - } - - if ( is_initialized || thread_spawn_failed ) { - std::string msg("Kokkos::OpenMP::initialize ERROR"); - - if ( is_initialized ) { msg.append(" : already initialized"); } - if ( thread_spawn_failed ) { msg.append(" : failed spawning threads"); } +void OpenMP::initialize( int thread_count ) +{ + if ( omp_in_parallel() ) { + std::string msg("Kokkos::OpenMP::initialize ERROR : in parallel"); Kokkos::Impl::throw_runtime_exception(msg); } + if ( Impl::t_openmp_instance ) + { + finalize(); + } + + { + if (nullptr == std::getenv("OMP_PROC_BIND") ) { + printf("Kokkos::OpenMP::initialize WARNING: OMP_PROC_BIND environment variable not set\n"); + printf(" In general, for best performance with OpenMP 4.0 or better set OMP_PROC_BIND=spread and OMP_PLACES=threads\n"); + printf(" For best performance with OpenMP 3.1 set OMP_PROC_BIND=true\n"); + printf(" For unit testing set OMP_PROC_BIND=false\n"); + } + + OpenMP::memory_space space ; + + // Before any other call to OMP query the maximum number of threads + // and save the value for re-initialization unit testing. + + Impl::g_openmp_hardware_max_threads = get_current_max_threads(); + + int process_num_threads = Impl::g_openmp_hardware_max_threads; + + if ( Kokkos::hwloc::available() ) { + process_num_threads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core(); + } + + // if thread_count < 0, use g_openmp_hardware_max_threads; + // if thread_count == 0, set g_openmp_hardware_max_threads to process_num_threads + // if thread_count > 0, set g_openmp_hardware_max_threads to thread_count + if (thread_count < 0 ) { + thread_count = Impl::g_openmp_hardware_max_threads; + } + else if( thread_count == 0 && Impl::g_openmp_hardware_max_threads != process_num_threads ) { + Impl::g_openmp_hardware_max_threads = process_num_threads; + omp_set_num_threads(Impl::g_openmp_hardware_max_threads); + } + else { + if( thread_count > process_num_threads ) { + printf( "Kokkos::OpenMP::initialize WARNING: You are likely oversubscribing your CPU cores.\n"); + printf( " process threads available : %3d, requested thread : %3d\n", process_num_threads, thread_count ); + } + Impl::g_openmp_hardware_max_threads = thread_count; + omp_set_num_threads(Impl::g_openmp_hardware_max_threads); + } + + // setup thread local + #pragma omp parallel num_threads(Impl::g_openmp_hardware_max_threads) + { + Impl::t_openmp_instance = nullptr; + Impl::t_openmp_hardware_id = omp_get_thread_num(); + Impl::SharedAllocationRecord< void, void >::tracking_enable(); + } + + void * const ptr = space.allocate( sizeof(Impl::OpenMPExec) ); + + Impl::t_openmp_instance = new (ptr) Impl::OpenMPExec( Impl::g_openmp_hardware_max_threads ); + + // New, unified host thread team data: + { + size_t pool_reduce_bytes = 32 * thread_count ; + size_t team_reduce_bytes = 32 * thread_count ; + size_t team_shared_bytes = 1024 * thread_count ; + size_t thread_local_bytes = 1024 ; + + Impl::t_openmp_instance->resize_thread_data( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes + ); + } + } + + // Check for over-subscription //if( Impl::mpi_ranks_per_node() * long(thread_count) > Impl::processors_per_node() ) { // std::cout << "Kokkos::OpenMP::initialize WARNING: You are likely oversubscribing your CPU cores." << std::endl; @@ -373,20 +382,38 @@ void OpenMP::initialize( unsigned thread_count , void OpenMP::finalize() { - Impl::OpenMPExec::verify_initialized( "OpenMP::finalize" ); - Impl::OpenMPExec::verify_is_process( "OpenMP::finalize" ); + if ( omp_in_parallel() ) + { + std::string msg("Kokkos::OpenMP::finalize ERROR "); + if( !Impl::t_openmp_instance ) msg.append(": not initialized"); + if( omp_in_parallel() ) msg.append(": in parallel"); + Kokkos::Impl::throw_runtime_exception(msg); + } - // New, unified host thread team data: - Impl::OpenMPExec::clear_thread_data(); + if ( Impl::t_openmp_instance ) { - Impl::OpenMPExec::m_pool_topo[0] = 0 ; - Impl::OpenMPExec::m_pool_topo[1] = 0 ; - Impl::OpenMPExec::m_pool_topo[2] = 0 ; + const int nthreads = Impl::t_openmp_instance->m_pool_size <= Impl::g_openmp_hardware_max_threads + ? Impl::g_openmp_hardware_max_threads + : Impl::t_openmp_instance->m_pool_size; - omp_set_num_threads(1); + using Exec = Impl::OpenMPExec; + Exec * instance = Impl::t_openmp_instance; + instance->~Exec(); - if ( Impl::s_using_hwloc && Kokkos::hwloc::can_bind_threads() ) { - hwloc::unbind_this_thread(); + OpenMP::memory_space space; + space.deallocate( instance, sizeof(Exec) ); + + #pragma omp parallel num_threads(nthreads) + { + Impl::t_openmp_hardware_id = 0; + Impl::t_openmp_instance = nullptr; + Impl::SharedAllocationRecord< void, void >::tracking_disable(); + } + + // allow main thread to track + Impl::SharedAllocationRecord< void, void >::tracking_enable(); + + Impl::g_openmp_hardware_max_threads = 1; } #if defined(KOKKOS_ENABLE_PROFILING) @@ -396,70 +423,48 @@ void OpenMP::finalize() //---------------------------------------------------------------------------- -void OpenMP::print_configuration( std::ostream & s , const bool detail ) +void OpenMP::print_configuration( std::ostream & s , const bool verbose ) { - Impl::OpenMPExec::verify_is_process( "OpenMP::print_configuration" ); - s << "Kokkos::OpenMP" ; -#if defined( KOKKOS_ENABLE_OPENMP ) - s << " KOKKOS_ENABLE_OPENMP" ; -#endif -#if defined( KOKKOS_ENABLE_HWLOC ) - - const unsigned numa_count_ = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); - - s << " hwloc[" << numa_count_ << "x" << cores_per_numa << "x" << threads_per_core << "]" - << " hwloc_binding_" << ( Impl::s_using_hwloc ? "enabled" : "disabled" ) - ; -#endif - - const bool is_initialized = 0 != Impl::OpenMPExec::m_pool[0] ; + const bool is_initialized = Impl::t_openmp_instance != nullptr; if ( is_initialized ) { - const int numa_count = Kokkos::Impl::OpenMPExec::m_pool_topo[0] / Kokkos::Impl::OpenMPExec::m_pool_topo[1] ; - const int core_per_numa = Kokkos::Impl::OpenMPExec::m_pool_topo[1] / Kokkos::Impl::OpenMPExec::m_pool_topo[2] ; - const int thread_per_core = Kokkos::Impl::OpenMPExec::m_pool_topo[2] ; + Impl::OpenMPExec::verify_is_master( "OpenMP::print_configuration" ); + + const int numa_count = 1; + const int core_per_numa = Impl::g_openmp_hardware_max_threads; + const int thread_per_core = 1; s << " thread_pool_topology[ " << numa_count << " x " << core_per_numa << " x " << thread_per_core << " ]" << std::endl ; - - if ( detail ) { - std::vector< std::pair > coord( Kokkos::Impl::OpenMPExec::m_pool_topo[0] ); - -#pragma omp parallel - { -#pragma omp critical - { - coord[ omp_get_thread_num() ] = hwloc::get_this_thread_coordinate(); - } -/* END #pragma omp critical */ - } -/* END #pragma omp parallel */ - - for ( unsigned i = 0 ; i < coord.size() ; ++i ) { - s << " thread omp_rank[" << i << "]" - << " kokkos_rank[" << Impl::OpenMPExec::m_map_rank[ i ] << "]" - << " hwloc_coord[" << coord[i].first << "." << coord[i].second << "]" - << std::endl ; - } - } } else { s << " not initialized" << std::endl ; } } +std::vector OpenMP::partition(...) +{ return std::vector(1); } + +OpenMP OpenMP::create_instance(...) { return OpenMP(); } + + +#if !defined( KOKKOS_DISABLE_DEPRECATED ) + int OpenMP::concurrency() { - return thread_pool_size(0); + return Impl::g_openmp_hardware_max_threads; } -const char* OpenMP::name() { return "OpenMP"; } +void OpenMP::initialize( int thread_count , int, int ) +{ + initialize(thread_count); +} + +#endif } // namespace Kokkos diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp index 75b7f5da4a..37d2ac8318 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Exec.hpp @@ -47,6 +47,10 @@ #include #if defined( KOKKOS_ENABLE_OPENMP ) +#if !defined(_OPENMP) +#error "You enabled Kokkos OpenMP support without enabling OpenMP in the compiler!" +#endif + #include #include @@ -54,6 +58,8 @@ #include +#include + #include #include #include @@ -63,8 +69,14 @@ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { -namespace Impl { +namespace Kokkos { namespace Impl { + +class OpenMPExec; + +extern int g_openmp_hardware_max_threads; + +extern __thread int t_openmp_hardware_id; +extern __thread OpenMPExec * t_openmp_instance; //---------------------------------------------------------------------------- /** \brief Data for OpenMP thread execution */ @@ -74,279 +86,279 @@ public: friend class Kokkos::OpenMP ; - enum { MAX_THREAD_COUNT = 4096 }; + enum { MAX_THREAD_COUNT = 512 }; + + void clear_thread_data(); + + static void validate_partition( const int nthreads + , int & num_partitions + , int & partition_size + ); private: + OpenMPExec( int arg_pool_size ) + : m_pool_size{ arg_pool_size } + , m_level{ omp_get_level() } + , m_pool() + {} - static int m_pool_topo[ 4 ]; - static int m_map_rank[ MAX_THREAD_COUNT ]; + ~OpenMPExec() + { + clear_thread_data(); + } - static HostThreadTeamData * m_pool[ MAX_THREAD_COUNT ]; + int m_pool_size; + int m_level; - static - void clear_thread_data(); + HostThreadTeamData * m_pool[ MAX_THREAD_COUNT ]; public: - // Topology of a cache coherent thread pool: - // TOTAL = NUMA x GRAIN - // pool_size( depth = 0 ) - // pool_size(0) = total number of threads - // pool_size(1) = number of threads per NUMA - // pool_size(2) = number of threads sharing finest grain memory hierarchy + static void verify_is_master( const char * const ); - inline static - int pool_size( int depth = 0 ) { return m_pool_topo[ depth ]; } - - static void finalize(); - - static void initialize( const unsigned team_count , - const unsigned threads_per_team , - const unsigned numa_count , - const unsigned cores_per_numa ); - - static void verify_is_process( const char * const ); - static void verify_initialized( const char * const ); - - - static void resize_thread_data( size_t pool_reduce_bytes , size_t team_reduce_bytes , size_t team_shared_bytes , size_t thread_local_bytes ); - inline static - HostThreadTeamData * get_thread_data() noexcept - { return m_pool[ m_map_rank[ omp_get_thread_num() ] ]; } + inline + HostThreadTeamData * get_thread_data() const noexcept + { return m_pool[ m_level == omp_get_level() ? 0 : omp_get_thread_num() ]; } - inline static - HostThreadTeamData * get_thread_data( int i ) noexcept - { return m_pool[i]; } + inline + HostThreadTeamData * get_thread_data( int i ) const noexcept + { return m_pool[i]; } }; -} // namespace Impl -} // namespace Kokkos - -//---------------------------------------------------------------------------- -//---------------------------------------------------------------------------- - -namespace Kokkos { -namespace Impl { - -template< class ... Properties > -class TeamPolicyInternal< Kokkos::OpenMP, Properties ... >: public PolicyTraits -{ -public: - - //! Tag this class as a kokkos execution policy - typedef TeamPolicyInternal execution_policy ; - - typedef PolicyTraits traits; - - TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { - m_league_size = p.m_league_size; - m_team_size = p.m_team_size; - m_team_alloc = p.m_team_alloc; - m_team_iter = p.m_team_iter; - m_team_scratch_size[0] = p.m_team_scratch_size[0]; - m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; - m_team_scratch_size[1] = p.m_team_scratch_size[1]; - m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; - m_chunk_size = p.m_chunk_size; - return *this; - } - - //---------------------------------------- - - template< class FunctorType > - inline static - int team_size_max( const FunctorType & ) { - int pool_size = traits::execution_space::thread_pool_size(1); - int max_host_team_size = Impl::HostThreadTeamData::max_team_members; - return pool_size - inline static - int team_size_recommended( const FunctorType & ) - { return traits::execution_space::thread_pool_size(2); } - - template< class FunctorType > - inline static - int team_size_recommended( const FunctorType &, const int& ) - { return traits::execution_space::thread_pool_size(2); } - - //---------------------------------------- - -private: - - int m_league_size ; - int m_team_size ; - int m_team_alloc ; - int m_team_iter ; - - size_t m_team_scratch_size[2]; - size_t m_thread_scratch_size[2]; - - int m_chunk_size; - - inline void init( const int league_size_request - , const int team_size_request ) - { - const int pool_size = traits::execution_space::thread_pool_size(0); - const int max_host_team_size = Impl::HostThreadTeamData::max_team_members; - const int team_max = pool_size 0) { - if(!Impl::is_integral_power_of_two( m_chunk_size )) - Kokkos::abort("TeamPolicy blocking granularity must be power of two" ); - } - - int new_chunk_size = 1; - while(new_chunk_size*100*concurrency < m_league_size) - new_chunk_size *= 2; - if(new_chunk_size < 128) { - new_chunk_size = 1; - while( (new_chunk_size*40*concurrency < m_league_size ) && (new_chunk_size<128) ) - new_chunk_size*=2; - } - m_chunk_size = new_chunk_size; - } - -public: - typedef Impl::HostThreadTeamMember< Kokkos::OpenMP > member_type ; -}; -} // namespace Impl - -} // namespace Kokkos +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -inline -bool OpenMP::in_parallel() -{ return omp_in_parallel(); } +inline OpenMP::OpenMP() noexcept +{} inline -int OpenMP::thread_pool_size( int depth ) +bool OpenMP::is_initialized() noexcept +{ return Impl::t_openmp_instance != nullptr; } + +inline +bool OpenMP::in_parallel( OpenMP const& ) noexcept { - return Impl::OpenMPExec::pool_size(depth); + //t_openmp_instance is only non-null on a master thread + return !Impl::t_openmp_instance + || Impl::t_openmp_instance->m_level < omp_get_level() + ; +} + +inline +int OpenMP::thread_pool_size() noexcept +{ + return OpenMP::in_parallel() + ? omp_get_num_threads() + : Impl::t_openmp_instance->m_pool_size + ; } KOKKOS_INLINE_FUNCTION -int OpenMP::thread_pool_rank() +int OpenMP::thread_pool_rank() noexcept { #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - return Impl::OpenMPExec::m_map_rank[ omp_get_thread_num() ]; + return Impl::t_openmp_instance ? 0 : omp_get_thread_num(); #else return -1 ; #endif } +inline +void OpenMP::fence( OpenMP const& instance ) noexcept {} + +inline +bool OpenMP::is_asynchronous( OpenMP const& instance ) noexcept +{ return false; } + +template +void OpenMP::partition_master( F const& f + , int num_partitions + , int partition_size + ) +{ + if (omp_get_nested()) { + using Exec = Impl::OpenMPExec; + + Exec * prev_instance = Impl::t_openmp_instance; + + Exec::validate_partition( prev_instance->m_pool_size, num_partitions, partition_size ); + + OpenMP::memory_space space; + + #pragma omp parallel num_threads(num_partitions) + { + void * const ptr = space.allocate( sizeof(Exec) ); + + Impl::t_openmp_instance = new (ptr) Exec( partition_size ); + + size_t pool_reduce_bytes = 32 * partition_size ; + size_t team_reduce_bytes = 32 * partition_size ; + size_t team_shared_bytes = 1024 * partition_size ; + size_t thread_local_bytes = 1024 ; + + Impl::t_openmp_instance->resize_thread_data( pool_reduce_bytes + , team_reduce_bytes + , team_shared_bytes + , thread_local_bytes + ); + + f( omp_get_thread_num(), omp_get_num_threads() ); + + Impl::t_openmp_instance->~Exec(); + space.deallocate( Impl::t_openmp_instance, sizeof(Exec) ); + Impl::t_openmp_instance = nullptr; + } + + Impl::t_openmp_instance = prev_instance; + } + else { + // nested openmp not enabled + f(0,1); + } +} + + +namespace Experimental { + +template<> +class MasterLock +{ +public: + void lock() { omp_set_lock( &m_lock ); } + void unlock() { omp_unset_lock( &m_lock ); } + bool try_lock() { return static_cast(omp_test_lock( &m_lock )); } + + MasterLock() { omp_init_lock( &m_lock ); } + ~MasterLock() { omp_destroy_lock( &m_lock ); } + + MasterLock( MasterLock const& ) = delete; + MasterLock( MasterLock && ) = delete; + MasterLock & operator=( MasterLock const& ) = delete; + MasterLock & operator=( MasterLock && ) = delete; + +private: + omp_lock_t m_lock; + +}; + +template<> +class UniqueToken< OpenMP, UniqueTokenScope::Instance> +{ +public: + using execution_space = OpenMP; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken( execution_space const& = execution_space() ) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int size() const noexcept + { + #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return Kokkos::OpenMP::thread_pool_size(); + #else + return 0 ; + #endif + } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int acquire() const noexcept + { + #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return Kokkos::OpenMP::thread_pool_rank(); + #else + return 0 ; + #endif + } + + /// \brief release a value acquired by generate + KOKKOS_INLINE_FUNCTION + void release( int ) const noexcept {} +}; + +template<> +class UniqueToken< OpenMP, UniqueTokenScope::Global> +{ +public: + using execution_space = OpenMP; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken( execution_space const& = execution_space() ) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int size() const noexcept + { + #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return Kokkos::Impl::g_openmp_hardware_max_threads ; + #else + return 0 ; + #endif + } + + /// \brief acquire value such that 0 <= value < size() + KOKKOS_INLINE_FUNCTION + int acquire() const noexcept + { + #if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return Kokkos::Impl::t_openmp_hardware_id ; + #else + return 0 ; + #endif + } + + /// \brief release a value acquired by generate + KOKKOS_INLINE_FUNCTION + void release( int ) const noexcept {} +}; + +} // namespace Experimental + + +#if !defined( KOKKOS_DISABLE_DEPRECATED ) + +inline +int OpenMP::thread_pool_size( int depth ) +{ + return depth < 2 + ? thread_pool_size() + : 1; +} + +KOKKOS_INLINE_FUNCTION +int OpenMP::hardware_thread_id() noexcept +{ +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + return Impl::t_openmp_hardware_id; +#else + return -1 ; +#endif +} + +inline +int OpenMP::max_hardware_threads() noexcept +{ + return Impl::g_openmp_hardware_max_threads; +} + +#endif // KOKKOS_DISABLE_DEPRECATED + } // namespace Kokkos #endif diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp index c47e0fc654..b54abb0068 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Parallel.hpp @@ -52,6 +52,8 @@ #include #include +#include + //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -71,8 +73,9 @@ private: typedef typename Policy::WorkRange WorkRange ; typedef typename Policy::member_type Member ; - const FunctorType m_functor ; - const Policy m_policy ; + OpenMPExec * m_instance ; + const FunctorType m_functor ; + const Policy m_policy ; template< class TagType > inline static @@ -110,16 +113,120 @@ private: public: inline void execute() const + { + enum { is_dynamic = std::is_same< typename Policy::schedule_type::type + , Kokkos::Dynamic >::value + }; + + if ( OpenMP::in_parallel() ) { + exec_range< WorkTag >( m_functor + , m_policy.begin() + , m_policy.end() ); + } + else { + + OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); + + const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(pool_size) + { + HostThreadTeamData & data = *(m_instance->get_thread_data()); + + data.set_work_partition( m_policy.end() - m_policy.begin() + , m_policy.chunk_size() ); + + if ( is_dynamic ) { + // Make sure work partition is set before stealing + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); + } + + std::pair range(0,0); + + do { + + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelFor::template + exec_range< WorkTag >( m_functor + , range.first + m_policy.begin() + , range.second + m_policy.begin() ); + + } while ( is_dynamic && 0 <= range.first ); + } + } + } + + inline + ParallelFor( const FunctorType & arg_functor + , Policy arg_policy ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) + , m_policy( arg_policy ) + {} +}; + + +// MDRangePolicy impl +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType + , Kokkos::Experimental::MDRangePolicy< Traits ... > + , Kokkos::OpenMP + > +{ +private: + + typedef Kokkos::Experimental::MDRangePolicy< Traits ... > MDRangePolicy ; + typedef typename MDRangePolicy::impl_range_policy Policy ; + typedef typename MDRangePolicy::work_tag WorkTag ; + + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef typename Kokkos::Experimental::Impl::HostIterateTile< MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void > iterate_type; + + OpenMPExec * m_instance ; + const FunctorType m_functor ; + const MDRangePolicy m_mdr_policy ; + const Policy m_policy ; // construct as RangePolicy( 0, num_tiles ).set_chunk_size(1) in ctor + + inline static + void + exec_range( const MDRangePolicy & mdr_policy + , const FunctorType & functor + , const Member ibeg , const Member iend ) { + #ifdef KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION + #ifdef KOKKOS_ENABLE_PRAGMA_IVDEP + #pragma ivdep + #endif + #endif + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + iterate_type( mdr_policy, functor )( iwork ); + } + } + +public: + + inline void execute() const + { enum { is_dynamic = std::is_same< typename Policy::schedule_type::type , Kokkos::Dynamic >::value }; - OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_for"); + if ( OpenMP::in_parallel() ) { + ParallelFor::exec_range ( m_mdr_policy + , m_functor + , m_policy.begin() + , m_policy.end() ); + } + else { -#pragma omp parallel + OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); + + const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(pool_size) { - HostThreadTeamData & data = *OpenMPExec::get_thread_data(); + HostThreadTeamData & data = *(m_instance->get_thread_data()); data.set_work_partition( m_policy.end() - m_policy.begin() , m_policy.chunk_size() ); @@ -136,8 +243,8 @@ public: range = is_dynamic ? data.get_work_stealing_chunk() : data.get_work_partition(); - ParallelFor::template - exec_range< WorkTag >( m_functor + ParallelFor::exec_range( m_mdr_policy + , m_functor , range.first + m_policy.begin() , range.second + m_policy.begin() ); @@ -145,12 +252,15 @@ public: } // END #pragma omp parallel } + } inline ParallelFor( const FunctorType & arg_functor - , Policy arg_policy ) - : m_functor( arg_functor ) - , m_policy( arg_policy ) + , MDRangePolicy arg_policy ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) {} }; @@ -191,10 +301,11 @@ private: typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; - const FunctorType m_functor ; - const Policy m_policy ; - const ReducerType m_reducer ; - const pointer_type m_result_ptr ; + OpenMPExec * m_instance; + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; template< class TagType > inline static @@ -228,21 +339,21 @@ public: enum { is_dynamic = std::is_same< typename Policy::schedule_type::type , Kokkos::Dynamic >::value }; - OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_reduce"); const size_t pool_reduce_bytes = Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); - OpenMPExec::resize_thread_data( pool_reduce_bytes + m_instance->resize_thread_data( pool_reduce_bytes , 0 // team_reduce_bytes , 0 // team_shared_bytes , 0 // thread_local_bytes ); -#pragma omp parallel + const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(pool_size) { - HostThreadTeamData & data = *OpenMPExec::get_thread_data(); + HostThreadTeamData & data = *(m_instance->get_thread_data()); data.set_work_partition( m_policy.end() - m_policy.begin() , m_policy.chunk_size() ); @@ -271,16 +382,15 @@ public: } while ( is_dynamic && 0 <= range.first ); } -// END #pragma omp parallel // Reduction: - const pointer_type ptr = pointer_type( OpenMPExec::get_thread_data(0)->pool_reduce_local() ); + const pointer_type ptr = pointer_type( m_instance->get_thread_data(0)->pool_reduce_local() ); - for ( int i = 1 ; i < OpenMPExec::pool_size() ; ++i ) { + for ( int i = 1 ; i < pool_size ; ++i ) { ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr - , OpenMPExec::get_thread_data(i)->pool_reduce_local() ); + , m_instance->get_thread_data(i)->pool_reduce_local() ); } Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); @@ -303,7 +413,8 @@ public: Kokkos::is_view< ViewType >::value && !Kokkos::is_reducer_type::value ,void*>::type = NULL) - : m_functor( arg_functor ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_view.data() ) @@ -317,7 +428,8 @@ public: ParallelReduce( const FunctorType & arg_functor , Policy arg_policy , const ReducerType& reducer ) - : m_functor( arg_functor ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.view().data() ) @@ -329,6 +441,173 @@ public: }; + +// MDRangePolicy impl +template< class FunctorType , class ReducerType, class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::Experimental::MDRangePolicy< Traits ...> + , ReducerType + , Kokkos::OpenMP + > +{ +private: + + typedef Kokkos::Experimental::MDRangePolicy< Traits ... > MDRangePolicy ; + typedef typename MDRangePolicy::impl_range_policy Policy ; + + typedef typename MDRangePolicy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef FunctorAnalysis< FunctorPatternInterface::REDUCE , Policy , FunctorType > Analysis ; + + typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef typename ReducerTypeFwd::value_type ValueType; + + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + typedef Kokkos::Impl::FunctorValueJoin< ReducerTypeFwd, WorkTag > ValueJoin ; + + typedef typename Analysis::pointer_type pointer_type ; + typedef typename Analysis::reference_type reference_type ; + + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRangePolicy + , FunctorType + , WorkTag + , ValueType + >; + + OpenMPExec * m_instance ; + const FunctorType m_functor ; + const MDRangePolicy m_mdr_policy ; + const Policy m_policy ; // construct as RangePolicy( 0, num_tiles ).set_chunk_size(1) in ctor + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + inline static + void + exec_range( const MDRangePolicy & mdr_policy + , const FunctorType & functor + , const Member ibeg , const Member iend + , reference_type update ) + { + for ( Member iwork = ibeg ; iwork < iend ; ++iwork ) { + iterate_type( mdr_policy, functor, update )( iwork ); + } + } + +public: + + inline void execute() const + { + enum { is_dynamic = std::is_same< typename Policy::schedule_type::type + , Kokkos::Dynamic >::value }; + + OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_reduce"); + + const size_t pool_reduce_bytes = + Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); + + m_instance->resize_thread_data( pool_reduce_bytes + , 0 // team_reduce_bytes + , 0 // team_shared_bytes + , 0 // thread_local_bytes + ); + + const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(pool_size) + { + HostThreadTeamData & data = *(m_instance->get_thread_data()); + + data.set_work_partition( m_policy.end() - m_policy.begin() + , m_policy.chunk_size() ); + + if ( is_dynamic ) { + // Make sure work partition is set before stealing + if ( data.pool_rendezvous() ) data.pool_rendezvous_release(); + } + + reference_type update = + ValueInit::init( ReducerConditional::select(m_functor , m_reducer) + , data.pool_reduce_local() ); + + std::pair range(0,0); + + do { + + range = is_dynamic ? data.get_work_stealing_chunk() + : data.get_work_partition(); + + ParallelReduce::exec_range ( m_mdr_policy, m_functor + , range.first + m_policy.begin() + , range.second + m_policy.begin() + , update ); + + } while ( is_dynamic && 0 <= range.first ); + } +// END #pragma omp parallel + + // Reduction: + + const pointer_type ptr = pointer_type( m_instance->get_thread_data(0)->pool_reduce_local() ); + + for ( int i = 1 ; i < pool_size ; ++i ) { + ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) + , ptr + , m_instance->get_thread_data(i)->pool_reduce_local() ); + } + + Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); + + if ( m_result_ptr ) { + const int n = Analysis::value_count( ReducerConditional::select(m_functor , m_reducer) ); + + for ( int j = 0 ; j < n ; ++j ) { m_result_ptr[j] = ptr[j] ; } + } + } + + //---------------------------------------- + + template< class ViewType > + inline + ParallelReduce( const FunctorType & arg_functor + , MDRangePolicy arg_policy + , const ViewType & arg_view + , typename std::enable_if< + Kokkos::is_view< ViewType >::value && + !Kokkos::is_reducer_type::value + ,void*>::type = NULL) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_view.data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + + inline + ParallelReduce( const FunctorType & arg_functor + , MDRangePolicy arg_policy + , const ReducerType& reducer ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + , m_reducer( reducer ) + , m_result_ptr( reducer.view().data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + +}; + } // namespace Impl } // namespace Kokkos @@ -361,8 +640,9 @@ private: typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; - const FunctorType m_functor ; - const Policy m_policy ; + OpenMPExec * m_instance; + const FunctorType m_functor; + const Policy m_policy; template< class TagType > inline static @@ -394,23 +674,23 @@ public: inline void execute() const { - OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_scan"); - OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_scan"); + OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_scan"); const int value_count = Analysis::value_count( m_functor ); const size_t pool_reduce_bytes = 2 * Analysis::value_size( m_functor ); - OpenMPExec::resize_thread_data( pool_reduce_bytes + m_instance->resize_thread_data( pool_reduce_bytes , 0 // team_reduce_bytes , 0 // team_shared_bytes , 0 // thread_local_bytes ); -#pragma omp parallel + const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(pool_size) { - HostThreadTeamData & data = *OpenMPExec::get_thread_data(); + HostThreadTeamData & data = *(m_instance->get_thread_data()); - const WorkRange range( m_policy, data.pool_rank(), data.pool_size() ); + const WorkRange range( m_policy, omp_get_thread_num(), omp_get_num_threads() ); reference_type update_sum = ValueInit::init( m_functor , data.pool_reduce_local() ); @@ -422,7 +702,7 @@ public: pointer_type ptr_prev = 0 ; - const int n = data.pool_size(); + const int n = omp_get_num_threads(); for ( int i = 0 ; i < n ; ++i ) { @@ -452,7 +732,6 @@ public: ParallelScan::template exec_range< WorkTag > ( m_functor , range.begin() , range.end() , update_base , true ); } -/* END #pragma omp parallel */ } @@ -461,7 +740,8 @@ public: inline ParallelScan( const FunctorType & arg_functor , const Policy & arg_policy ) - : m_functor( arg_functor ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) , m_policy( arg_policy ) {} @@ -492,9 +772,10 @@ private: typedef typename Policy::schedule_type::type SchedTag ; typedef typename Policy::member_type Member ; - const FunctorType m_functor ; - const Policy m_policy ; - const int m_shmem_size ; + OpenMPExec * m_instance; + const FunctorType m_functor; + const Policy m_policy; + const int m_shmem_size; template< class TagType > inline static @@ -548,22 +829,22 @@ public: { enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; - OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_for"); - OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_for"); + OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_for"); const size_t pool_reduce_size = 0 ; // Never shrinks const size_t team_reduce_size = TEAM_REDUCE_SIZE * m_policy.team_size(); const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); const size_t thread_local_size = 0 ; // Never shrinks - OpenMPExec::resize_thread_data( pool_reduce_size + m_instance->resize_thread_data( pool_reduce_size , team_reduce_size , team_shared_size , thread_local_size ); -#pragma omp parallel + const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(pool_size) { - HostThreadTeamData & data = *OpenMPExec::get_thread_data(); + HostThreadTeamData & data = *(m_instance->get_thread_data()); const int active = data.organize_team( m_policy.team_size() ); @@ -598,14 +879,14 @@ public: data.disband_team(); } -// END #pragma omp parallel } inline ParallelFor( const FunctorType & arg_functor , const Policy & arg_policy ) - : m_functor( arg_functor ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) , m_policy( arg_policy ) , m_shmem_size( arg_policy.scratch_size(0) + arg_policy.scratch_size(1) + @@ -646,11 +927,12 @@ private: typedef typename Analysis::pointer_type pointer_type ; typedef typename Analysis::reference_type reference_type ; - const FunctorType m_functor ; - const Policy m_policy ; - const ReducerType m_reducer ; - const pointer_type m_result_ptr ; - const int m_shmem_size ; + OpenMPExec * m_instance; + const FunctorType m_functor; + const Policy m_policy; + const ReducerType m_reducer; + const pointer_type m_result_ptr; + const int m_shmem_size; template< class TagType > inline static @@ -706,8 +988,7 @@ public: { enum { is_dynamic = std::is_same< SchedTag , Kokkos::Dynamic >::value }; - OpenMPExec::verify_is_process("Kokkos::OpenMP parallel_reduce"); - OpenMPExec::verify_initialized("Kokkos::OpenMP parallel_reduce"); + OpenMPExec::verify_is_master("Kokkos::OpenMP parallel_reduce"); const size_t pool_reduce_size = Analysis::value_size( ReducerConditional::select(m_functor, m_reducer)); @@ -716,14 +997,15 @@ public: const size_t team_shared_size = m_shmem_size + m_policy.scratch_size(1); const size_t thread_local_size = 0 ; // Never shrinks - OpenMPExec::resize_thread_data( pool_reduce_size + m_instance->resize_thread_data( pool_reduce_size , team_reduce_size , team_shared_size , thread_local_size ); -#pragma omp parallel + const int pool_size = OpenMP::thread_pool_size(); + #pragma omp parallel num_threads(pool_size) { - HostThreadTeamData & data = *OpenMPExec::get_thread_data(); + HostThreadTeamData & data = *(m_instance->get_thread_data()); const int active = data.organize_team( m_policy.team_size() ); @@ -763,17 +1045,26 @@ public: } data.disband_team(); + + // This thread has updated 'pool_reduce_local()' with its + // contributions to the reduction. The parallel region is + // about to terminate and the master thread will load and + // reduce each 'pool_reduce_local()' contribution. + // Must 'memory_fence()' to guarantee that storing the update to + // 'pool_reduce_local()' will complete before this thread + // exits the parallel region. + + memory_fence(); } -// END #pragma omp parallel // Reduction: - const pointer_type ptr = pointer_type( OpenMPExec::get_thread_data(0)->pool_reduce_local() ); + const pointer_type ptr = pointer_type( m_instance->get_thread_data(0)->pool_reduce_local() ); - for ( int i = 1 ; i < OpenMPExec::pool_size() ; ++i ) { + for ( int i = 1 ; i < pool_size ; ++i ) { ValueJoin::join( ReducerConditional::select(m_functor , m_reducer) , ptr - , OpenMPExec::get_thread_data(i)->pool_reduce_local() ); + , m_instance->get_thread_data(i)->pool_reduce_local() ); } Kokkos::Impl::FunctorFinal< ReducerTypeFwd , WorkTag >::final( ReducerConditional::select(m_functor , m_reducer) , ptr ); @@ -796,7 +1087,8 @@ public: Kokkos::is_view< ViewType >::value && !Kokkos::is_reducer_type::value ,void*>::type = NULL) - : m_functor( arg_functor ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( InvalidType() ) , m_result_ptr( arg_result.ptr_on_device() ) @@ -810,7 +1102,8 @@ public: ParallelReduce( const FunctorType & arg_functor , Policy arg_policy , const ReducerType& reducer ) - : m_functor( arg_functor ) + : m_instance( t_openmp_instance ) + , m_functor( arg_functor ) , m_policy( arg_policy ) , m_reducer( reducer ) , m_result_ptr( reducer.view().data() ) diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp index d4ade211f8..77363876b0 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.cpp @@ -105,7 +105,7 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::execute { using execution_space = Kokkos::OpenMP ; using queue_type = TaskQueue< execution_space > ; - using task_root_type = TaskBase< execution_space , void , void > ; + using task_root_type = TaskBase< void , void , void > ; using Member = Impl::HostThreadTeamMember< execution_space > ; static task_root_type * const end = @@ -115,23 +115,19 @@ void TaskQueueSpecialization< Kokkos::OpenMP >::execute HostThreadTeamData & team_data_single = HostThreadTeamDataSingleton::singleton(); - const int team_size = Impl::OpenMPExec::pool_size(2); // Threads per core - // const int team_size = Impl::OpenMPExec::pool_size(1); // Threads per NUMA + Impl::OpenMPExec * instance = t_openmp_instance; + const int pool_size = OpenMP::thread_pool_size(); -#if 0 -fprintf(stdout,"TaskQueue execute %d\n", team_size ); -fflush(stdout); -#endif + const int team_size = 1; // Threads per core + instance->resize_thread_data( 0 /* global reduce buffer */ + , 512 * team_size /* team reduce buffer */ + , 0 /* team shared buffer */ + , 0 /* thread local buffer */ + ); - OpenMPExec::resize_thread_data( 0 /* global reduce buffer */ - , 512 * team_size /* team reduce buffer */ - , 0 /* team shared buffer */ - , 0 /* thread local buffer */ - ); - -#pragma omp parallel + #pragma omp parallel num_threads(pool_size) { - Impl::HostThreadTeamData & self = *Impl::OpenMPExec::get_thread_data(); + Impl::HostThreadTeamData & self = *(instance->get_thread_data()); // Organizing threads into a team performs a barrier across the // entire pool to insure proper initialization of the team @@ -142,18 +138,6 @@ fflush(stdout); Member single_exec( team_data_single ); Member team_exec( self ); -#if 0 -fprintf(stdout,"TaskQueue pool(%d of %d) team(%d of %d) league(%d of %d) running\n" - , self.pool_rank() - , self.pool_size() - , team_exec.team_rank() - , team_exec.team_size() - , team_exec.league_rank() - , team_exec.league_size() - ); -fflush(stdout); -#endif - // Loop until all queues are empty and no tasks in flight task_root_type * task = 0 ; @@ -197,15 +181,6 @@ fflush(stdout); // if a single thread task then execute now -#if 0 -fprintf(stdout,"TaskQueue pool(%d of %d) executing single task 0x%lx\n" - , self.pool_rank() - , self.pool_size() - , int64_t(task) - ); -fflush(stdout); -#endif - (*task->m_apply)( task , & single_exec ); leader_loop = true ; @@ -220,57 +195,14 @@ fflush(stdout); if ( 0 != task ) { // Thread Team Task -#if 0 -fprintf(stdout,"TaskQueue pool(%d of %d) team((%d of %d) league(%d of %d) executing team task 0x%lx\n" - , self.pool_rank() - , self.pool_size() - , team_exec.team_rank() - , team_exec.team_size() - , team_exec.league_rank() - , team_exec.league_size() - , int64_t(task) - ); -fflush(stdout); -#endif - (*task->m_apply)( task , & team_exec ); // The m_apply function performs a barrier } } while( 0 != task ); - -#if 0 -fprintf(stdout,"TaskQueue pool(%d of %d) team(%d of %d) league(%d of %d) ending\n" - , self.pool_rank() - , self.pool_size() - , team_exec.team_rank() - , team_exec.team_size() - , team_exec.league_rank() - , team_exec.league_size() - ); -fflush(stdout); -#endif - } - self.disband_team(); - -#if 0 -fprintf(stdout,"TaskQueue pool(%d of %d) disbanded\n" - , self.pool_rank() - , self.pool_size() - ); -fflush(stdout); -#endif - } -// END #pragma omp parallel - -#if 0 -fprintf(stdout,"TaskQueue execute %d end\n", team_size ); -fflush(stdout); -#endif - } void TaskQueueSpecialization< Kokkos::OpenMP >:: @@ -279,10 +211,10 @@ void TaskQueueSpecialization< Kokkos::OpenMP >:: { using execution_space = Kokkos::OpenMP ; using queue_type = TaskQueue< execution_space > ; - using task_root_type = TaskBase< execution_space , void , void > ; + using task_root_type = TaskBase< void , void , void > ; using Member = Impl::HostThreadTeamMember< execution_space > ; - if ( 1 == omp_get_num_threads() ) { + if ( 1 == OpenMP::thread_pool_size() ) { task_root_type * const end = (task_root_type *) task_root_type::EndTag ; diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp index 82fbef255b..dfa1635e08 100644 --- a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Task.hpp @@ -45,7 +45,7 @@ #define KOKKOS_IMPL_OPENMP_TASK_HPP #include -#if defined( KOKKOS_ENABLE_TASKDAG ) +#if defined( KOKKOS_ENABLE_OPENMP ) && defined( KOKKOS_ENABLE_TASKDAG ) //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -60,7 +60,7 @@ public: using execution_space = Kokkos::OpenMP ; using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; - using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ; using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; // Must specify memory space diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp new file mode 100644 index 0000000000..743e6b6e62 --- /dev/null +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_Team.hpp @@ -0,0 +1,245 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMP_TEAM_HPP +#define KOKKOS_OPENMP_TEAM_HPP + +#include +#if defined( KOKKOS_ENABLE_OPENMP ) + +#include + +namespace Kokkos { namespace Impl { + +template< class ... Properties > +class TeamPolicyInternal< Kokkos::OpenMP, Properties ... >: public PolicyTraits +{ +public: + + //! Tag this class as a kokkos execution policy + typedef TeamPolicyInternal execution_policy ; + + typedef PolicyTraits traits; + + TeamPolicyInternal& operator = (const TeamPolicyInternal& p) { + m_league_size = p.m_league_size; + m_team_size = p.m_team_size; + m_team_alloc = p.m_team_alloc; + m_team_iter = p.m_team_iter; + m_team_scratch_size[0] = p.m_team_scratch_size[0]; + m_thread_scratch_size[0] = p.m_thread_scratch_size[0]; + m_team_scratch_size[1] = p.m_team_scratch_size[1]; + m_thread_scratch_size[1] = p.m_thread_scratch_size[1]; + m_chunk_size = p.m_chunk_size; + return *this; + } + + //---------------------------------------- + + template< class FunctorType > + inline static + int team_size_max( const FunctorType & ) { + int pool_size = traits::execution_space::thread_pool_size(1); + int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + return pool_size + inline static + int team_size_recommended( const FunctorType & ) + { return traits::execution_space::thread_pool_size(2); } + + template< class FunctorType > + inline static + int team_size_recommended( const FunctorType &, const int& ) + { return traits::execution_space::thread_pool_size(2); } + + //---------------------------------------- + +private: + + int m_league_size ; + int m_team_size ; + int m_team_alloc ; + int m_team_iter ; + + size_t m_team_scratch_size[2]; + size_t m_thread_scratch_size[2]; + + int m_chunk_size; + + inline void init( const int league_size_request + , const int team_size_request ) + { + const int pool_size = traits::execution_space::thread_pool_size(0); + const int max_host_team_size = Impl::HostThreadTeamData::max_team_members; + const int team_max = pool_size 0) { + if(!Impl::is_integral_power_of_two( m_chunk_size )) + Kokkos::abort("TeamPolicy blocking granularity must be power of two" ); + } + + int new_chunk_size = 1; + while(new_chunk_size*100*concurrency < m_league_size) + new_chunk_size *= 2; + if(new_chunk_size < 128) { + new_chunk_size = 1; + while( (new_chunk_size*40*concurrency < m_league_size ) && (new_chunk_size<128) ) + new_chunk_size*=2; + } + m_chunk_size = new_chunk_size; + } + +public: + typedef Impl::HostThreadTeamMember< Kokkos::OpenMP > member_type ; +}; + +}} // namespace Kokkos::Impl + +#endif +#endif /* KOKKOS_OPENMP_TEAM_HPP */ + + diff --git a/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp new file mode 100644 index 0000000000..289ad15451 --- /dev/null +++ b/lib/kokkos/core/src/OpenMP/Kokkos_OpenMP_WorkGraphPolicy.hpp @@ -0,0 +1,107 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_OPENMP_WORKGRAPHPOLICY_HPP +#define KOKKOS_OPENMP_WORKGRAPHPOLICY_HPP + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType , + Kokkos::Experimental::WorkGraphPolicy< Traits ... > , + Kokkos::OpenMP + > + : public Kokkos::Impl::Experimental:: + WorkGraphExec< FunctorType, + Kokkos::OpenMP, + Traits ... + > +{ +private: + + typedef Kokkos::Experimental::WorkGraphPolicy< Traits ... > Policy ; + typedef Kokkos::Impl::Experimental:: + WorkGraphExec Base ; + + template< class TagType > + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_one(const typename Policy::member_type& i) const { + Base::m_functor( i ); + } + + template< class TagType > + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_one(const typename Policy::member_type& i) const { + const TagType t{} ; + Base::m_functor( t , i ); + } + +public: + + inline + void execute() + { + const int pool_size = OpenMP::thread_pool_size(); + + #pragma omp parallel num_threads(pool_size) + { + for (std::int32_t i; (-1 != (i = Base::before_work())); ) { + exec_one< typename Policy::work_tag >( i ); + Base::after_work(i); + } + } + } + + inline + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) + : Base( arg_functor, arg_policy ) + { + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* #define KOKKOS_OPENMP_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp index bec7844ed6..258a9d2ff7 100644 --- a/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp +++ b/lib/kokkos/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -45,7 +45,7 @@ #define KOKKOS_OPENMPTARGETEXEC_HPP #include -#include +#include #include #include @@ -59,10 +59,10 @@ namespace Impl { class OpenMPTargetExec { -public: +public: enum { MAX_ACTIVE_THREADS = 256*8*56*4 }; enum { MAX_ACTIVE_TEAMS = MAX_ACTIVE_THREADS/32 }; - + private: static void* scratch_ptr; @@ -70,7 +70,7 @@ public: static void verify_is_process( const char * const ); static void verify_initialized( const char * const ); - static void* get_scratch_ptr(); + static void* get_scratch_ptr(); static void clear_scratch(); static void resize_scratch( int64_t reduce_bytes , int64_t team_reduce_bytes, int64_t team_shared_bytes, int64_t thread_local_bytes ); @@ -159,7 +159,7 @@ public: KOKKOS_INLINE_FUNCTION void team_barrier() const { - #pragma omp barrier + #pragma omp barrier } template @@ -191,13 +191,13 @@ public: typedef ValueType value_type; const JoinLambdaAdapter op(op_in); - + // Make sure there is enough scratch space: typedef typename if_c< sizeof(value_type) < TEAM_REDUCE_SIZE , value_type , void >::type type ; const int n_values = TEAM_REDUCE_SIZE/sizeof(value_type); - type * team_scratch = (type*) ((char*)m_glb_scratch + TEAM_REDUCE_SIZE*omp_get_team_num()); + type * team_scratch = (type*) ((char*)m_glb_scratch + TEAM_REDUCE_SIZE*omp_get_team_num()); for(int i = m_team_rank; i < n_values; i+= m_team_size) { team_scratch[i] = value_type(); } @@ -209,7 +209,7 @@ public: team_scratch[m_team_rank%n_values]+=value; #pragma omp barrier } - + for(int d = 1; d #if defined( KOKKOS_ENABLE_QTHREADS ) -#include +#include //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp index 4c805310cc..35b2163ae5 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.cpp @@ -45,14 +45,14 @@ #include #if defined( KOKKOS_ENABLE_THREADS ) -#include - #include #include #include #include #include + #include + #include #include #include @@ -80,9 +80,7 @@ const void * volatile s_current_function_arg = 0 ; struct Sentinel { Sentinel() - { - HostSpace::register_in_parallel( ThreadsExec::in_parallel ); - } + {} ~Sentinel() { @@ -122,6 +120,8 @@ void execute_function_noop( ThreadsExec & , const void * ) {} void ThreadsExec::driver(void) { + SharedAllocationRecord< void, void >::tracking_enable(); + ThreadsExec this_thread ; while ( ThreadsExec::Active == this_thread.m_pool_state ) { @@ -726,6 +726,8 @@ void ThreadsExec::initialize( unsigned thread_count , // Init the array for used for arbitrarily sized atomics Impl::init_lock_array_host_space(); + Impl::SharedAllocationRecord< void, void >::tracking_enable(); + #if defined(KOKKOS_ENABLE_PROFILING) Kokkos::Profiling::initialize(); #endif diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp index 74de3a2596..7557bad7d9 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsExec.hpp @@ -50,11 +50,12 @@ #include #include -#include +#include #include #include +#include //---------------------------------------------------------------------------- namespace Kokkos { @@ -275,6 +276,17 @@ public: if ( ! rev_rank ) { Final::final( f , reduce_memory() ); } + + // This thread has updated 'reduce_memory()' and upon returning + // from this function will set 'm_pool_state' to inactive. + // If this is a non-root thread then setting 'm_pool_state' + // to inactive triggers another thread to exit a spinwait + // and read the 'reduce_memory'. + // Must 'memory_fence()' to guarantee that storing the update to + // 'reduce_memory()' will complete before storing the the update to + // 'm_pool_state'. + + memory_fence(); } inline @@ -627,6 +639,62 @@ inline void Threads::fence() } /* namespace Kokkos */ +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { namespace Experimental { + +template<> +class UniqueToken< Threads, UniqueTokenScope::Instance> +{ +public: + using execution_space = Threads; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken( execution_space const& = execution_space() ) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + inline + int size() const noexcept { return Threads::thread_pool_size(); } + + /// \brief acquire value such that 0 <= value < size() + inline + int acquire() const noexcept { return Threads::thread_pool_rank(); } + + /// \brief release a value acquired by generate + inline + void release( int ) const noexcept {} +}; + +template<> +class UniqueToken< Threads, UniqueTokenScope::Global> +{ +public: + using execution_space = Threads; + using size_type = int; + + /// \brief create object size for concurrency on the given instance + /// + /// This object should not be shared between instances + UniqueToken( execution_space const& = execution_space() ) noexcept {} + + /// \brief upper bound for acquired values, i.e. 0 <= value < size() + inline + int size() const noexcept { return Threads::thread_pool_size(); } + + /// \brief acquire value such that 0 <= value < size() + inline + int acquire() const noexcept { return Threads::thread_pool_rank(); } + + /// \brief release a value acquired by generate + inline + void release( int ) const noexcept {} +}; + +}} // namespace Kokkos::Experimental //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- #endif diff --git a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp index c12019413b..6060bf191f 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_ThreadsTeam.hpp @@ -50,7 +50,7 @@ #include #include -#include +#include #include #include @@ -482,6 +482,8 @@ public: void next_static() { if ( m_league_rank < m_league_end ) { + // Make sure all stores are complete before entering the barrier + memory_fence(); team_barrier(); set_team_shared(); } @@ -518,6 +520,8 @@ public: return; if ( m_league_rank < m_league_chunk_end ) { + // Make sure all stores are complete before entering the barrier + memory_fence(); team_barrier(); set_team_shared(); } diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp index 0ee0cd3280..18ac7d26ad 100644 --- a/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_Parallel.hpp @@ -55,6 +55,8 @@ #include #include +#include + //---------------------------------------------------------------------------- namespace Kokkos { @@ -174,6 +176,108 @@ public: {} }; + +// MDRangePolicy impl +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType + , Kokkos::Experimental::MDRangePolicy< Traits ... > + , Kokkos::Threads + > +{ +private: + typedef Kokkos::Experimental::MDRangePolicy< Traits ... > MDRangePolicy ; + typedef typename MDRangePolicy::impl_range_policy Policy ; + + typedef typename MDRangePolicy::work_tag WorkTag ; + + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef typename Kokkos::Experimental::Impl::HostIterateTile< MDRangePolicy, FunctorType, typename MDRangePolicy::work_tag, void > iterate_type; + + const FunctorType m_functor ; + const MDRangePolicy m_mdr_policy ; + const Policy m_policy ; // construct as RangePolicy( 0, num_tiles ).set_chunk_size(1) in ctor + + inline static + void + exec_range( const MDRangePolicy & mdr_policy + , const FunctorType & functor + , const Member ibeg , const Member iend ) + { + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + iterate_type( mdr_policy, functor )( i ); + } + } + + static void exec( ThreadsExec & exec , const void * arg ) + { + exec_schedule(exec,arg); + } + + template + static + typename std::enable_if< std::is_same::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelFor & self = * ((const ParallelFor *) arg ); + + WorkRange range( self.m_policy , exec.pool_rank() , exec.pool_size() ); + + ParallelFor::exec_range + ( self.m_mdr_policy, self.m_functor , range.begin() , range.end() ); + + exec.fan_in(); + } + + template + static + typename std::enable_if< std::is_same::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelFor & self = * ((const ParallelFor *) arg ); + + WorkRange range( self.m_policy , exec.pool_rank() , exec.pool_size() ); + + exec.set_work_range(range.begin(),range.end(),self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + + while(work_index != -1) { + const Member begin = static_cast(work_index) * self.m_policy.chunk_size(); + const Member end = begin + self.m_policy.chunk_size() < self.m_policy.end()?begin+self.m_policy.chunk_size():self.m_policy.end(); + + ParallelFor::exec_range + ( self.m_mdr_policy, self.m_functor , begin , end ); + work_index = exec.get_work_index(); + } + + exec.fan_in(); + } + +public: + + inline + void execute() const + { + ThreadsExec::start( & ParallelFor::exec , this ); + ThreadsExec::fence(); + } + + ParallelFor( const FunctorType & arg_functor + , const MDRangePolicy & arg_policy ) + : m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + {} +}; + //---------------------------------------------------------------------------- /* ParallelFor Kokkos::Threads with TeamPolicy */ @@ -440,6 +544,169 @@ public: }; + +// MDRangePolicy impl +template< class FunctorType , class ReducerType, class ... Traits > +class ParallelReduce< FunctorType + , Kokkos::Experimental::MDRangePolicy< Traits ... > + , ReducerType + , Kokkos::Threads + > +{ +private: + + typedef Kokkos::Experimental::MDRangePolicy< Traits ... > MDRangePolicy ; + typedef typename MDRangePolicy::impl_range_policy Policy ; + + typedef typename MDRangePolicy::work_tag WorkTag ; + typedef typename Policy::WorkRange WorkRange ; + typedef typename Policy::member_type Member ; + + typedef Kokkos::Impl::if_c< std::is_same::value, FunctorType, ReducerType> ReducerConditional; + typedef typename ReducerConditional::type ReducerTypeFwd; + + typedef typename ReducerTypeFwd::value_type ValueType; + + typedef Kokkos::Impl::FunctorValueTraits< ReducerTypeFwd, WorkTag > ValueTraits ; + typedef Kokkos::Impl::FunctorValueInit< ReducerTypeFwd, WorkTag > ValueInit ; + + typedef typename ValueTraits::pointer_type pointer_type ; + typedef typename ValueTraits::reference_type reference_type ; + + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRangePolicy + , FunctorType + , WorkTag + , ValueType + >; + + const FunctorType m_functor ; + const MDRangePolicy m_mdr_policy ; + const Policy m_policy ; // construct as RangePolicy( 0, num_tiles ).set_chunk_size(1) in ctor + const ReducerType m_reducer ; + const pointer_type m_result_ptr ; + + inline static + void + exec_range( const MDRangePolicy & mdr_policy + , const FunctorType & functor + , const Member & ibeg , const Member & iend + , reference_type update ) + { + #if defined( KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION ) && \ + defined( KOKKOS_ENABLE_PRAGMA_IVDEP ) + #pragma ivdep + #endif + for ( Member i = ibeg ; i < iend ; ++i ) { + iterate_type( mdr_policy, functor, update )( i ); + } + } + + static void + exec( ThreadsExec & exec , const void * arg ) { + exec_schedule(exec, arg); + } + + template + static + typename std::enable_if< std::is_same::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelReduce & self = * ((const ParallelReduce *) arg ); + const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() ); + + ParallelReduce::exec_range + ( self.m_mdr_policy, self.m_functor , range.begin() , range.end() + , ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ) ); + + exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + } + + template + static + typename std::enable_if< std::is_same::value >::type + exec_schedule( ThreadsExec & exec , const void * arg ) + { + const ParallelReduce & self = * ((const ParallelReduce *) arg ); + const WorkRange range( self.m_policy, exec.pool_rank(), exec.pool_size() ); + + exec.set_work_range(range.begin(),range.end(),self.m_policy.chunk_size()); + exec.reset_steal_target(); + exec.barrier(); + + long work_index = exec.get_work_index(); + reference_type update = ValueInit::init( ReducerConditional::select(self.m_functor , self.m_reducer) , exec.reduce_memory() ); + while(work_index != -1) { + const Member begin = static_cast(work_index) * self.m_policy.chunk_size(); + const Member end = begin + self.m_policy.chunk_size() < self.m_policy.end()?begin+self.m_policy.chunk_size():self.m_policy.end(); + ParallelReduce::exec_range + ( self.m_mdr_policy, self.m_functor , begin , end + , update ); + work_index = exec.get_work_index(); + } + + exec.template fan_in_reduce< ReducerTypeFwd , WorkTag >( ReducerConditional::select(self.m_functor , self.m_reducer) ); + } + +public: + + inline + void execute() const + { + ThreadsExec::resize_scratch( ValueTraits::value_size( ReducerConditional::select(m_functor , m_reducer) ) , 0 ); + + ThreadsExec::start( & ParallelReduce::exec , this ); + + ThreadsExec::fence(); + + if ( m_result_ptr ) { + + const pointer_type data = + (pointer_type) ThreadsExec::root_reduce_scratch(); + + const unsigned n = ValueTraits::value_count( ReducerConditional::select(m_functor , m_reducer) ); + for ( unsigned i = 0 ; i < n ; ++i ) { m_result_ptr[i] = data[i]; } + } + } + + template< class HostViewType > + ParallelReduce( const FunctorType & arg_functor , + const MDRangePolicy & arg_policy , + const HostViewType & arg_result_view , + typename std::enable_if< + Kokkos::is_view< HostViewType >::value && + !Kokkos::is_reducer_type::value + ,void*>::type = NULL) + : m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + , m_reducer( InvalidType() ) + , m_result_ptr( arg_result_view.ptr_on_device() ) + { + static_assert( Kokkos::is_view< HostViewType >::value + , "Kokkos::Threads reduce result must be a View" ); + + static_assert( std::is_same< typename HostViewType::memory_space , HostSpace >::value + , "Kokkos::Threads reduce result must be a View in HostSpace" ); + } + + inline + ParallelReduce( const FunctorType & arg_functor + , MDRangePolicy arg_policy + , const ReducerType& reducer ) + : m_functor( arg_functor ) + , m_mdr_policy( arg_policy ) + , m_policy( Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1) ) + , m_reducer( reducer ) + , m_result_ptr( reducer.view().data() ) + { + /*static_assert( std::is_same< typename ViewType::memory_space + , Kokkos::HostSpace >::value + , "Reduction result on Kokkos::OpenMP must be a Kokkos::View in HostSpace" );*/ + } + +}; + + //---------------------------------------------------------------------------- /* ParallelReduce with Kokkos::Threads and TeamPolicy */ diff --git a/lib/kokkos/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp b/lib/kokkos/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp new file mode 100644 index 0000000000..be904a1670 --- /dev/null +++ b/lib/kokkos/core/src/Threads/Kokkos_Threads_WorkGraphPolicy.hpp @@ -0,0 +1,115 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_THREADS_WORKGRAPHPOLICY_HPP +#define KOKKOS_THREADS_WORKGRAPHPOLICY_HPP + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType , + Kokkos::Experimental::WorkGraphPolicy< Traits ... > , + Kokkos::Threads + > + : public Kokkos::Impl::Experimental:: + WorkGraphExec< FunctorType, + Kokkos::Threads, + Traits ... + > +{ +private: + + typedef Kokkos::Experimental::WorkGraphPolicy< Traits ... > Policy ; + typedef Kokkos::Impl::Experimental:: + WorkGraphExec Base ; + typedef ParallelFor, + Kokkos::Threads> Self ; + + template< class TagType > + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_one(const typename Policy::member_type& i) const { + Base::m_functor( i ); + } + + template< class TagType > + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_one(const typename Policy::member_type& i) const { + const TagType t{} ; + Base::m_functor( t , i ); + } + + inline void exec_one_thread() const { + for (std::int32_t i; (-1 != (i = Base::before_work())); ) { + exec_one< typename Policy::work_tag >( i ); + Base::after_work(i); + } + } + + static inline void thread_main( ThreadsExec&, const void* arg ) { + const Self& self = *(static_cast(arg)); + self.exec_one_thread(); + } + +public: + + inline + void execute() + { + ThreadsExec::start( & Self::thread_main, this ); + ThreadsExec::fence(); + } + + inline + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) + : Base( arg_functor, arg_policy ) + { + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* #define KOKKOS_THREADS_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp index 77a1e8754d..0171b209e5 100644 --- a/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp +++ b/lib/kokkos/core/src/impl/KokkosExp_Host_IterateTile.hpp @@ -141,7 +141,6 @@ namespace Kokkos { namespace Experimental { namespace Impl { #define LOOP_ARGS_8 LOOP_ARGS_7, i7 + m_offset[7] - // New Loop Macros... // parallel_for, non-tagged #define APPLY( func, ... ) \ @@ -1010,8 +1009,6 @@ namespace Kokkos { namespace Experimental { namespace Impl { // end tagged macros - - // Structs for calling loops template < int Rank, bool IsLeft, typename IType, typename Tagged, typename Enable = void > struct Tile_Loop_Type; @@ -1279,6 +1276,19 @@ struct Tile_Loop_Type<8, IsLeft, IType, Tagged, typename std::enable_if< !std::i template using is_void = std::is_same< T , void >; +template +struct is_type_array : std::false_type +{ + using value_type = T; +}; + +template +struct is_type_array< T[] > : std::true_type +{ + using value_type = T; +}; + + template < typename RP , typename Functor , typename Tag = void @@ -1761,18 +1771,17 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i RP const& m_rp; Functor const& m_func; typename std::conditional< std::is_same::value,int,Tag>::type m_tag; -// value_type & m_v; - }; -// ValueType: For reductions +// For ParallelReduce +// ValueType - scalar: For reductions template < typename RP , typename Functor , typename Tag , typename ValueType > -struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void::value >::type > +struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void::value && !is_type_array::value >::type > { using index_type = typename RP::index_type; using point_type = typename RP::point_type; @@ -2251,12 +2260,497 @@ struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_i }; +// For ParallelReduce +// Extra specialization for array reductions +// ValueType[]: For array reductions +template < typename RP + , typename Functor + , typename Tag + , typename ValueType + > +struct HostIterateTile < RP , Functor , Tag , ValueType , typename std::enable_if< !is_void::value && is_type_array::value >::type > +{ + using index_type = typename RP::index_type; + using point_type = typename RP::point_type; + + using value_type = typename is_type_array::value_type; // strip away the 'array-ness' [], only underlying type remains + + inline + HostIterateTile( RP const& rp, Functor const& func, value_type *v ) // v should be an array; treat as pointer for compatibility since size is not known nor needed here + : m_rp(rp) //Cuda 7.0 does not like braces... + , m_func(func) + , m_v(v) // use with non-void ValueType struct + {} + + inline + bool check_iteration_bounds( point_type& partial_tile , point_type& offset ) const { + bool is_full_tile = true; + + for ( int i = 0; i < RP::rank; ++i ) { + if ((offset[i] + m_rp.m_tile[i]) <= m_rp.m_upper[i]) { + partial_tile[i] = m_rp.m_tile[i] ; + } + else { + is_full_tile = false ; + partial_tile[i] = (m_rp.m_upper[i] - 1 - offset[i]) == 0 ? 1 + : (m_rp.m_upper[i] - m_rp.m_tile[i]) > 0 ? (m_rp.m_upper[i] - offset[i]) + : (m_rp.m_upper[i] - m_rp.m_lower[i]) ; // when single tile encloses range + } + } + + return is_full_tile ; + } // end check bounds + + + template + struct RankTag + { + typedef RankTag type; + enum { value = (int)Rank }; + }; + + +#if KOKKOS_ENABLE_NEW_LOOP_MACROS + template + inline + void + operator()(IType tile_idx) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + Tile_Loop_Type< RP::rank, (RP::inner_direction == RP::Left), index_type, Tag >::apply( m_v, m_func, full_tile, m_offset, m_rp.m_tile, m_tiledims ); + + } + +#else + template + inline + void + operator()(IType tile_idx) const + { operator_impl( tile_idx , RankTag() ); } + // added due to compiler error when using sfinae to choose operator based on rank + + + template + inline + void operator_impl( IType tile_idx , const RankTag<2> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2L(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } else { +// #pragma simd + LOOP_2R(index_type, m_tiledims) { + apply( LOOP_ARGS_2 ); + } + } + } // end RP::Right + + } //end op() rank == 2 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<3> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3L(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } else { +// #pragma simd + LOOP_3R(index_type, m_tiledims) { + apply( LOOP_ARGS_3 ); + } + } + } // end RP::Right + + } //end op() rank == 3 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<4> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4L(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } else { +// #pragma simd + LOOP_4R(index_type, m_tiledims) { + apply( LOOP_ARGS_4 ); + } + } + } // end RP::Right + + } //end op() rank == 4 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<5> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5L(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } else { +// #pragma simd + LOOP_5R(index_type, m_tiledims) { + apply( LOOP_ARGS_5 ); + } + } + } // end RP::Right + + } //end op() rank == 5 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<6> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6L(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } else { +// #pragma simd + LOOP_6R(index_type, m_tiledims) { + apply( LOOP_ARGS_6 ); + } + } + } // end RP::Right + + } //end op() rank == 6 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<7> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7L(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } else { +// #pragma simd + LOOP_7R(index_type, m_tiledims) { + apply( LOOP_ARGS_7 ); + } + } + } // end RP::Right + + } //end op() rank == 7 + + + template + inline + void operator_impl( IType tile_idx , const RankTag<8> ) const + { + point_type m_offset; + point_type m_tiledims; + + if (RP::outer_direction == RP::Left) { + for (int i=0; i=0; --i) { + m_offset[i] = (tile_idx % m_rp.m_tile_end[i]) * m_rp.m_tile[i] + m_rp.m_lower[i] ; + tile_idx /= m_rp.m_tile_end[i]; + } + } + + //Check if offset+tiledim in bounds - if not, replace tile dims with the partial tile dims + const bool full_tile = check_iteration_bounds(m_tiledims , m_offset) ; + + if (RP::inner_direction == RP::Left) { + if ( full_tile ) { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8L(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Left + else { + if ( full_tile ) { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } else { +// #pragma simd + LOOP_8R(index_type, m_tiledims) { + apply( LOOP_ARGS_8 ); + } + } + } // end RP::Right + + } //end op() rank == 8 +#endif + + + template + typename std::enable_if<( sizeof...(Args) == RP::rank && std::is_same::value), void>::type + apply(Args &&... args) const + { + m_func(args... , m_v); + } + + template + typename std::enable_if<( sizeof...(Args) == RP::rank && !std::is_same::value), void>::type + apply(Args &&... args) const + { + m_func( m_tag, args... , m_v); + } + + + RP const& m_rp; + Functor const& m_func; + value_type * m_v; + typename std::conditional< std::is_same::value,int,Tag>::type m_tag; + +}; + + // ------------------------------------------------------------------ // // MDFunctor - wraps the range_policy and functor to pass to IterateTile -// Serial, Threads, OpenMP +// Used for md_parallel_{for,reduce} with Serial, Threads, OpenMP // Cuda uses DeviceIterateTile directly within md_parallel_for -// ParallelReduce +// TODO Once md_parallel_{for,reduce} removed, this can be removed + +// ParallelReduce - scalar reductions template < typename MDRange, typename Functor, typename ValueType = void > struct MDFunctor { @@ -2273,7 +2767,7 @@ struct MDFunctor inline - MDFunctor( MDRange const& range, Functor const& f, ValueType & v ) + MDFunctor( MDRange const& range, Functor const& f ) : m_range( range ) , m_func( f ) {} @@ -2290,7 +2784,6 @@ struct MDFunctor inline MDFunctor& operator=( MDFunctor && ) = default; -// KOKKOS_FORCEINLINE_FUNCTION //Caused cuda warning - __host__ warning inline void operator()(index_type t, value_type & v) const { @@ -2301,6 +2794,56 @@ struct MDFunctor Functor m_func; }; + +// ParallelReduce - array reductions +template < typename MDRange, typename Functor, typename ValueType > +struct MDFunctor< MDRange, Functor, ValueType[] > +{ + using range_policy = MDRange; + using functor_type = Functor; + using value_type = ValueType[]; + using work_tag = typename range_policy::work_tag; + using index_type = typename range_policy::index_type; + using iterate_type = typename Kokkos::Experimental::Impl::HostIterateTile< MDRange + , Functor + , work_tag + , value_type + >; + + + inline + MDFunctor( MDRange const& range, Functor const& f ) + : m_range( range ) + , m_func( f ) + , value_count( f.value_count ) + {} + + inline + MDFunctor( MDFunctor const& ) = default; + + inline + MDFunctor& operator=( MDFunctor const& ) = default; + + inline + MDFunctor( MDFunctor && ) = default; + + inline + MDFunctor& operator=( MDFunctor && ) = default; + + // FIXME Init and Join, as defined in m_func, are not working through the MDFunctor + // Best path forward is to eliminate need for MDFunctor, directly use MDRangePolicy within Parallel{For,Reduce} ?? + inline + void operator()(index_type t, value_type v) const + { + iterate_type(m_range, m_func, v)(t); + } + + MDRange m_range; + Functor m_func; + size_t value_count; +}; + + // ParallelFor template < typename MDRange, typename Functor > struct MDFunctor< MDRange, Functor, void > @@ -2349,4 +2892,3 @@ struct MDFunctor< MDRange, Functor, void > } } } //end namespace Kokkos::Experimental::Impl #endif - diff --git a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp index c5685c5b62..3fb15c8d1e 100644 --- a/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_AnalyzePolicy.hpp @@ -55,16 +55,19 @@ template < typename ExecutionSpace = void , typename WorkTag = void , typename IndexType = void , typename IterationPattern = void + , typename LaunchBounds = void > struct PolicyTraitsBase { - using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType, IterationPattern>; + using type = PolicyTraitsBase< ExecutionSpace, Schedule, WorkTag, IndexType, + IterationPattern, LaunchBounds>; using execution_space = ExecutionSpace; using schedule_type = Schedule; using work_tag = WorkTag; using index_type = IndexType; using iteration_pattern = IterationPattern; + using launch_bounds = LaunchBounds; }; @@ -78,6 +81,7 @@ struct SetExecutionSpace , typename PolicyBase::work_tag , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern + , typename PolicyBase::launch_bounds >; }; @@ -91,6 +95,7 @@ struct SetSchedule , typename PolicyBase::work_tag , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern + , typename PolicyBase::launch_bounds >; }; @@ -104,6 +109,7 @@ struct SetWorkTag , WorkTag , typename PolicyBase::index_type , typename PolicyBase::iteration_pattern + , typename PolicyBase::launch_bounds >; }; @@ -117,6 +123,7 @@ struct SetIndexType , typename PolicyBase::work_tag , IndexType , typename PolicyBase::iteration_pattern + , typename PolicyBase::launch_bounds >; }; @@ -131,6 +138,22 @@ struct SetIterationPattern , typename PolicyBase::work_tag , typename PolicyBase::index_type , IterationPattern + , typename PolicyBase::launch_bounds + >; +}; + + +template +struct SetLaunchBounds +{ + static_assert( is_void::value + , "Kokkos Error: More than one launch_bounds given" ); + using type = PolicyTraitsBase< typename PolicyBase::execution_space + , typename PolicyBase::schedule_type + , typename PolicyBase::work_tag + , typename PolicyBase::index_type + , typename PolicyBase::iteration_pattern + , LaunchBounds >; }; @@ -146,8 +169,9 @@ struct AnalyzePolicy : public , typename std::conditional< is_index_type::value , SetIndexType , typename std::conditional< std::is_integral::value , SetIndexType > , typename std::conditional< is_iteration_pattern::value, SetIterationPattern + , typename std::conditional< is_launch_bounds::value , SetLaunchBounds , SetWorkTag - >::type >::type >::type >::type>::type::type + >::type >::type >::type >::type >::type>::type::type , Traits... > {}; @@ -178,11 +202,18 @@ struct AnalyzePolicy , void // TODO set default iteration pattern , typename Base::iteration_pattern >::type; + + using launch_bounds = typename std::conditional< is_void< typename Base::launch_bounds >::value + , LaunchBounds<> + , typename Base::launch_bounds + >::type; + using type = PolicyTraitsBase< execution_space , schedule_type , work_tag , index_type , iteration_pattern + , launch_bounds >; }; diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp index 010b15064e..5b894b037b 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Compare_Exchange_Strong.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP ) #define KOKKOS_ATOMIC_COMPARE_EXCHANGE_STRONG_HPP @@ -126,11 +130,21 @@ T atomic_compare_exchange( volatile T * const dest , const T & compare , inline int atomic_compare_exchange( volatile int * const dest, const int compare, const int val) -{ return __sync_val_compare_and_swap(dest,compare,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_val_compare_and_swap(dest,compare,val); +} inline long atomic_compare_exchange( volatile long * const dest, const long compare, const long val ) -{ return __sync_val_compare_and_swap(dest,compare,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_val_compare_and_swap(dest,compare,val); +} #if defined( KOKKOS_ENABLE_GNU_ATOMICS ) @@ -159,6 +173,10 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare, KOKKOS_INLINE_FUNCTION U() {}; } tmp ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + tmp.i = __sync_val_compare_and_swap( (int*) dest , *((int*)&compare) , *((int*)&val) ); return tmp.t ; } @@ -175,6 +193,10 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare, KOKKOS_INLINE_FUNCTION U() {}; } tmp ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + tmp.i = __sync_val_compare_and_swap( (long*) dest , *((long*)&compare) , *((long*)&val) ); return tmp.t ; } @@ -193,6 +215,10 @@ T atomic_compare_exchange( volatile T * const dest, const T & compare, KOKKOS_INLINE_FUNCTION U() {}; } tmp ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + tmp.i = Impl::cas128( (Impl::cas128_t*) dest , *((Impl::cas128_t*)&compare) , *((Impl::cas128_t*)&val) ); return tmp.t ; } @@ -209,6 +235,10 @@ T atomic_compare_exchange( volatile T * const dest , const T compare , #endif , const T >::type& val ) { +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + while( !Impl::lock_address_host_space( (void*) dest ) ); T return_val = *dest; if( return_val == compare ) { diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp index 127de528f5..2a13a4865c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Decrement.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_DECREMENT_HPP ) #define KOKKOS_ATOMIC_DECREMENT_HPP @@ -54,6 +58,10 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_decrement(volatile char* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif + __asm__ __volatile__( "lock decb %0" : /* no output registers */ @@ -69,6 +77,10 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_decrement(volatile short* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif + __asm__ __volatile__( "lock decw %0" : /* no output registers */ @@ -84,6 +96,10 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_decrement(volatile int* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif + __asm__ __volatile__( "lock decl %0" : /* no output registers */ @@ -99,6 +115,9 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_decrement(volatile long long int* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif __asm__ __volatile__( "lock decq %0" : /* no output registers */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp index a1ff47abce..9ba3cae9fc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Exchange.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_EXCHANGE_HPP ) #define KOKKOS_ATOMIC_EXCHANGE_HPP @@ -81,6 +85,10 @@ T atomic_exchange( typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(int) , const T & >::type val ) { // int tmp = __ullAtomicExch( (int*) dest , *((int*)&val) ); +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + int tmp = atomicExch( ((int*)dest) , *((int*)&val) ); return *((T*)&tmp); } @@ -93,6 +101,11 @@ T atomic_exchange( sizeof(T) == sizeof(unsigned long long int) , const T & >::type val ) { typedef unsigned long long int type ; + +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + // type tmp = __ullAtomicExch( (type*) dest , *((type*)&val) ); type tmp = atomicExch( ((type*)dest) , *((type*)&val) ); return *((T*)&tmp); @@ -108,6 +121,10 @@ T atomic_exchange( volatile T * const dest , { T return_val; // This is a way to (hopefully) avoid dead lock in a warp +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + int done = 0; unsigned int active = __ballot(1); unsigned int done_active = 0; @@ -173,6 +190,9 @@ T atomic_exchange( volatile T * const dest , , const T & >::type val ) { typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif const type v = *((type*)&val); // Extract to be sure the value doesn't change @@ -201,6 +221,10 @@ T atomic_exchange( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t) , const T & >::type val ) { +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + union U { Impl::cas128_t i ; T t ; @@ -260,6 +284,10 @@ void atomic_assign( volatile T * const dest , { typedef typename Kokkos::Impl::if_c< sizeof(T) == sizeof(int) , int , long >::type type ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + const type v = *((type*)&val); // Extract to be sure the value doesn't change type assumed ; @@ -285,6 +313,10 @@ void atomic_assign( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) == sizeof(Impl::cas128_t) , const T & >::type val ) { +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + union U { Impl::cas128_t i ; T t ; diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp index 860c8e0e43..084c55efed 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Add.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_ADD_HPP ) #define KOKKOS_ATOMIC_FETCH_ADD_HPP @@ -161,36 +165,60 @@ T atomic_fetch_add( volatile T * const dest , inline int atomic_fetch_add( volatile int * dest , const int val ) { - int original = val; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif - __asm__ __volatile__( - "lock xadd %1, %0" - : "+m" (*dest), "+r" (original) - : "m" (*dest), "r" (original) - : "memory" + int original = val; + + __asm__ __volatile__( + "lock xadd %1, %0" + : "+m" (*dest), "+r" (original) + : "m" (*dest), "r" (original) + : "memory" ); - return original; + return original; } #else inline int atomic_fetch_add( volatile int * const dest , const int val ) -{ return __sync_fetch_and_add(dest, val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_add(dest, val); +} #endif inline long int atomic_fetch_add( volatile long int * const dest , const long int val ) -{ return __sync_fetch_and_add(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_add(dest,val); +} #if defined( KOKKOS_ENABLE_GNU_ATOMICS ) inline unsigned int atomic_fetch_add( volatile unsigned int * const dest , const unsigned int val ) -{ return __sync_fetch_and_add(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_add(dest,val); +} inline unsigned long int atomic_fetch_add( volatile unsigned long int * const dest , const unsigned long int val ) -{ return __sync_fetch_and_add(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_add(dest,val); +} #endif @@ -205,6 +233,10 @@ T atomic_fetch_add( volatile T * const dest , inline U() {}; } assume , oldval , newval ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + oldval.t = *dest ; do { @@ -228,6 +260,10 @@ T atomic_fetch_add( volatile T * const dest , inline U() {}; } assume , oldval , newval ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + oldval.t = *dest ; do { @@ -253,6 +289,10 @@ T atomic_fetch_add( volatile T * const dest , inline U() {}; } assume , oldval , newval ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + oldval.t = *dest ; do { diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp index 83f5b2a5aa..6ecb65336c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_And.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_AND_HPP ) #define KOKKOS_ATOMIC_FETCH_AND_HPP @@ -76,21 +80,41 @@ unsigned long long int atomic_fetch_and( volatile unsigned long long int * const inline int atomic_fetch_and( volatile int * const dest , const int val ) -{ return __sync_fetch_and_and(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_and(dest,val); +} inline long int atomic_fetch_and( volatile long int * const dest , const long int val ) -{ return __sync_fetch_and_and(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_and(dest,val); +} #if defined( KOKKOS_ENABLE_GNU_ATOMICS ) inline unsigned int atomic_fetch_and( volatile unsigned int * const dest , const unsigned int val ) -{ return __sync_fetch_and_and(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_and(dest,val); +} inline unsigned long int atomic_fetch_and( volatile unsigned long int * const dest , const unsigned long int val ) -{ return __sync_fetch_and_and(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_and(dest,val); +} #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp index 8c73b4c3ef..ed3b438f89 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Or.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_OR_HPP ) #define KOKKOS_ATOMIC_FETCH_OR_HPP @@ -76,21 +80,41 @@ unsigned long long int atomic_fetch_or( volatile unsigned long long int * const inline int atomic_fetch_or( volatile int * const dest , const int val ) -{ return __sync_fetch_and_or(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_or(dest,val); +} inline long int atomic_fetch_or( volatile long int * const dest , const long int val ) -{ return __sync_fetch_and_or(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_or(dest,val); +} #if defined( KOKKOS_ENABLE_GNU_ATOMICS ) inline unsigned int atomic_fetch_or( volatile unsigned int * const dest , const unsigned int val ) -{ return __sync_fetch_and_or(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_or(dest,val); +} inline unsigned long int atomic_fetch_or( volatile unsigned long int * const dest , const unsigned long int val ) -{ return __sync_fetch_and_or(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_or(dest,val); +} #endif diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp index 504731d3a2..038cc13e9a 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Fetch_Sub.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP ) && ! defined( KOKKOS_ATOMIC_FETCH_SUB_HPP ) #define KOKKOS_ATOMIC_FETCH_SUB_HPP @@ -136,21 +140,41 @@ T atomic_fetch_sub( volatile T * const dest , inline int atomic_fetch_sub( volatile int * const dest , const int val ) -{ return __sync_fetch_and_sub(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_sub(dest,val); +} inline long int atomic_fetch_sub( volatile long int * const dest , const long int val ) -{ return __sync_fetch_and_sub(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_sub(dest,val); +} #if defined( KOKKOS_ENABLE_GNU_ATOMICS ) inline unsigned int atomic_fetch_sub( volatile unsigned int * const dest , const unsigned int val ) -{ return __sync_fetch_and_sub(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_sub(dest,val); +} inline unsigned long int atomic_fetch_sub( volatile unsigned long int * const dest , const unsigned long int val ) -{ return __sync_fetch_and_sub(dest,val); } +{ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + return __sync_fetch_and_sub(dest,val); +} #endif @@ -161,6 +185,10 @@ T atomic_fetch_sub( volatile T * const dest , { union { int i ; T t ; } assume , oldval , newval ; +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + oldval.t = *dest ; do { @@ -178,6 +206,10 @@ T atomic_fetch_sub( volatile T * const dest , typename Kokkos::Impl::enable_if< sizeof(T) != sizeof(int) && sizeof(T) == sizeof(long) , const T >::type val ) { +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + union { long i ; T t ; } assume , oldval , newval ; oldval.t = *dest ; @@ -202,6 +234,10 @@ T atomic_fetch_sub( volatile T * const dest , && ( sizeof(T) != 8 ) , const T >::type& val ) { +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) dest, _MM_HINT_ET0 ); +#endif + while( !Impl::lock_address_host_space( (void*) dest ) ); T return_val = *dest; *dest = return_val - val; diff --git a/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp b/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp index 2985fad95e..e7626603fc 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Atomic_Increment.hpp @@ -41,6 +41,10 @@ //@HEADER */ +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) +#include +#endif + #include #if defined( KOKKOS_ATOMIC_HPP) && ! defined( KOKKOS_ATOMIC_INCREMENT_HPP ) #define KOKKOS_ATOMIC_INCREMENT_HPP @@ -52,6 +56,9 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_increment(volatile char* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif __asm__ __volatile__( "lock incb %0" : /* no output registers */ @@ -67,6 +74,9 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_increment(volatile short* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif __asm__ __volatile__( "lock incw %0" : /* no output registers */ @@ -82,6 +92,9 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_increment(volatile int* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif __asm__ __volatile__( "lock incl %0" : /* no output registers */ @@ -97,6 +110,9 @@ template<> KOKKOS_INLINE_FUNCTION void atomic_increment(volatile long long int* a) { #if defined( KOKKOS_ENABLE_ASM ) && defined( KOKKOS_ENABLE_ISA_X86_64 ) && ! defined(_WIN32) && ! defined(__CUDA_ARCH__) +#if defined( KOKKOS_ENABLE_RFO_PREFETCH ) + _mm_prefetch( (const char*) a, _MM_HINT_ET0 ); +#endif __asm__ __volatile__( "lock incq %0" : /* no output registers */ diff --git a/lib/kokkos/core/src/impl/Kokkos_Core.cpp b/lib/kokkos/core/src/impl/Kokkos_Core.cpp index f0ff6d78ec..f52cc469ac 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Core.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Core.cpp @@ -87,17 +87,12 @@ setenv("MEMKIND_HBW_NODES", "1", 0); #if defined( KOKKOS_ENABLE_OPENMP ) if( std::is_same< Kokkos::OpenMP , Kokkos::DefaultExecutionSpace >::value || std::is_same< Kokkos::OpenMP , Kokkos::HostSpace::execution_space >::value ) { - if(num_threads>0) { - if(use_numa>0) { - Kokkos::OpenMP::initialize(num_threads,use_numa); - } - else { - Kokkos::OpenMP::initialize(num_threads); - } - } else { - Kokkos::OpenMP::initialize(); + if(use_numa>0) { + Kokkos::OpenMP::initialize(num_threads,use_numa); + } + else { + Kokkos::OpenMP::initialize(num_threads); } - //std::cout << "Kokkos::initialize() fyi: OpenMP enabled and initialized" << std::endl ; } else { //std::cout << "Kokkos::initialize() fyi: OpenMP enabled but not initialized" << std::endl ; @@ -437,10 +432,7 @@ void initialize(int& narg, char* arg[]) iarg++; } - InitArguments arguments; - arguments.num_threads = num_threads; - arguments.num_numa = numa; - arguments.device_id = device; + InitArguments arguments{num_threads, numa, device}; Impl::initialize_internal(arguments); } diff --git a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp index dc75fb072f..fccd8e090f 100644 --- a/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_FunctorAdapter.hpp @@ -170,28 +170,31 @@ struct FunctorValueTraits< FunctorType , ArgTag , true /* == exists FunctorType: static_assert( 0 == ( sizeof(value_type) % sizeof(int) ) , "Reduction functor's declared value_type requires: 0 == sizeof(value_type) % sizeof(int)" ); + /* this cast to bool is needed for correctness by NVCC */ + enum : bool { IsArray = static_cast(Impl::is_array< typename FunctorType::value_type >::value) }; + // If not an array then what is the sizeof(value_type) - enum { StaticValueSize = Impl::is_array< typename FunctorType::value_type >::value ? 0 : sizeof(value_type) }; + enum { StaticValueSize = IsArray ? 0 : sizeof(value_type) }; typedef value_type * pointer_type ; // The reference_type for an array is 'value_type *' // The reference_type for a single value is 'value_type &' - typedef typename Impl::if_c< ! StaticValueSize , value_type * - , value_type & >::type reference_type ; + typedef typename Impl::if_c< IsArray , value_type * + , value_type & >::type reference_type ; // Number of values if single value template< class F > KOKKOS_FORCEINLINE_FUNCTION static - typename Impl::enable_if< std::is_same::value && StaticValueSize , unsigned >::type + typename Impl::enable_if< std::is_same::value && ! IsArray , unsigned >::type value_count( const F & ) { return 1 ; } // Number of values if an array, protect via templating because 'f.value_count' // will only exist when the functor declares the value_type to be an array. template< class F > KOKKOS_FORCEINLINE_FUNCTION static - typename Impl::enable_if< std::is_same::value && ! StaticValueSize , unsigned >::type + typename Impl::enable_if< std::is_same::value && IsArray , unsigned >::type value_count( const F & f ) { return f.value_count ; } // Total size of the value diff --git a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp index 8cb7430035..e11f8b6d34 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HBWSpace.cpp @@ -70,62 +70,6 @@ #ifdef KOKKOS_ENABLE_HBWSPACE #define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB) -namespace Kokkos { -namespace Experimental { -namespace { - -static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ; - -typedef int (* QuerySpaceInParallelPtr )(); - -QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ; -int s_in_parallel_query_count = 0 ; - -} // namespace - -void HBWSpace::register_in_parallel( int (*device_in_parallel)() ) -{ - if ( 0 == device_in_parallel ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel ERROR : given NULL" ) ); - } - - int i = -1 ; - - if ( ! (device_in_parallel)() ) { - for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i ); - } - - if ( i < s_in_parallel_query_count ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : called in_parallel" ) ); - - } - - if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::Experimental::HBWSpace::register_in_parallel_query ERROR : exceeded maximum" ) ); - - } - - for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i ); - - if ( i == s_in_parallel_query_count ) { - s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ; - } -} - -int HBWSpace::in_parallel() -{ - const int n = s_in_parallel_query_count ; - - int i = 0 ; - - while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; } - - return i < n ; -} - -} // namespace Experiemtal -} // namespace Kokkos - /*--------------------------------------------------------------------------*/ namespace Kokkos { diff --git a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp index 2a5c34c375..a5a73ddebb 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostSpace.cpp @@ -106,62 +106,6 @@ //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -namespace Kokkos { -namespace { - -static const int QUERY_SPACE_IN_PARALLEL_MAX = 16 ; - -typedef int (* QuerySpaceInParallelPtr )(); - -QuerySpaceInParallelPtr s_in_parallel_query[ QUERY_SPACE_IN_PARALLEL_MAX ] ; -int s_in_parallel_query_count = 0 ; - -} // namespace - -void HostSpace::register_in_parallel( int (*device_in_parallel)() ) -{ - if ( 0 == device_in_parallel ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel ERROR : given NULL" ) ); - } - - int i = -1 ; - - if ( ! (device_in_parallel)() ) { - for ( i = 0 ; i < s_in_parallel_query_count && ! (*(s_in_parallel_query[i]))() ; ++i ); - } - - if ( i < s_in_parallel_query_count ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : called in_parallel" ) ); - - } - - if ( QUERY_SPACE_IN_PARALLEL_MAX <= i ) { - Kokkos::Impl::throw_runtime_exception( std::string("Kokkos::HostSpace::register_in_parallel_query ERROR : exceeded maximum" ) ); - - } - - for ( i = 0 ; i < s_in_parallel_query_count && s_in_parallel_query[i] != device_in_parallel ; ++i ); - - if ( i == s_in_parallel_query_count ) { - s_in_parallel_query[s_in_parallel_query_count++] = device_in_parallel ; - } -} - -int HostSpace::in_parallel() -{ - const int n = s_in_parallel_query_count ; - - int i = 0 ; - - while ( i < n && ! (*(s_in_parallel_query[i]))() ) { ++i ; } - - return i < n ; -} - -} // namespace Kokkos - -/*--------------------------------------------------------------------------*/ - namespace Kokkos { /* Default allocation mechanism */ @@ -340,9 +284,6 @@ void HostSpace::deallocate( void * const arg_alloc_ptr , const size_t arg_alloc_ } } -constexpr const char* HostSpace::name() { - return m_name; -} } // namespace Kokkos //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp index ac200209c7..d2446bde09 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.cpp @@ -45,7 +45,7 @@ #include #include #include -#include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -58,9 +58,11 @@ void HostThreadTeamData::organize_pool { bool ok = true ; + memory_fence(); + // Verify not already a member of a pool: for ( int rank = 0 ; rank < size && ok ; ++rank ) { - ok = ( 0 != members[rank] ) && ( 0 == members[rank]->m_pool_scratch ); + ok = ( nullptr != members[rank] ) && ( 0 == members[rank]->m_pool_scratch ); } if ( ok ) { @@ -89,7 +91,6 @@ void HostThreadTeamData::organize_pool mem->m_team_alloc = 1 ; mem->m_league_rank = rank ; mem->m_league_size = size ; - mem->m_pool_rendezvous_step = 0 ; mem->m_team_rendezvous_step = 0 ; pool[ rank ] = mem ; } @@ -116,7 +117,6 @@ void HostThreadTeamData::disband_pool() m_team_alloc = 1 ; m_league_rank = 0 ; m_league_size = 1 ; - m_pool_rendezvous_step = 0 ; m_team_rendezvous_step = 0 ; } @@ -256,11 +256,6 @@ int HostThreadTeamData::rendezvous( int64_t * const buffer const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle ; - union { - int64_t full ; - int8_t byte[8] ; - } value ; - if ( rank ) { const int group_begin = rank << shift_byte ; // == rank * size_byte @@ -275,13 +270,14 @@ int HostThreadTeamData::rendezvous( int64_t * const buffer const int end = group_begin + size_byte < size ? size_byte : size - group_begin ; - value.full = 0 ; - for ( int i = 0 ; i < end ; ++i ) value.byte[i] = int8_t( step ); + int64_t value = 0 ; - store_fence(); // This should not be needed but fixes #742 + for ( int i = 0 ; i < end ; ++i ) { + ((int8_t*) & value )[i] = int8_t( step ); + } spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] - , value.full ); + , value ); } { @@ -316,10 +312,12 @@ int HostThreadTeamData::rendezvous( int64_t * const buffer const int end = size_byte < size ? 8 : size ; - value.full = 0 ; - for ( int i = 1 ; i < end ; ++i ) value.byte[i] = int8_t( step ); + int64_t value = 0 ; + for ( int i = 1 ; i < end ; ++i ) { + ((int8_t *) & value)[i] = int8_t( step ); + } - spinwait_until_equal( buffer[ sync_offset ], value.full ); + spinwait_until_equal( buffer[ sync_offset ], value ); } return rank ? 0 : 1 ; diff --git a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp index c050a16eae..7facc0a410 100644 --- a/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_HostThreadTeam.hpp @@ -50,6 +50,7 @@ #include #include #include +#include //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -67,14 +68,12 @@ public: // Assume upper bounds on number of threads: // pool size <= 1024 threads - // pool rendezvous <= ( 1024 / 8 ) * 4 + 4 = 2052 // team size <= 64 threads - // team rendezvous <= ( 64 / 8 ) * 4 + 4 = 36 enum : int { max_pool_members = 1024 }; enum : int { max_team_members = 64 }; - enum : int { max_pool_rendezvous = ( max_pool_members / 8 ) * 4 + 4 }; - enum : int { max_team_rendezvous = ( max_team_members / 8 ) * 4 + 4 }; + enum : int { max_pool_rendezvous = rendezvous_buffer_size( max_pool_members ) }; + enum : int { max_team_rendezvous = rendezvous_buffer_size( max_team_members ) }; private: @@ -114,7 +113,6 @@ private: int m_league_size ; int m_work_chunk ; int m_steal_rank ; // work stealing rank - int mutable m_pool_rendezvous_step ; int mutable m_team_rendezvous_step ; HostThreadTeamData * team_member( int r ) const noexcept @@ -147,6 +145,7 @@ public: int team_rendezvous( int const root ) const noexcept { return 1 == m_team_size ? 1 : + HostThreadTeamData:: rendezvous( m_team_scratch + m_team_rendezvous , m_team_rendezvous_step , m_team_size @@ -157,6 +156,7 @@ public: int team_rendezvous() const noexcept { return 1 == m_team_size ? 1 : + HostThreadTeamData:: rendezvous( m_team_scratch + m_team_rendezvous , m_team_rendezvous_step , m_team_size @@ -167,6 +167,7 @@ public: void team_rendezvous_release() const noexcept { if ( 1 < m_team_size ) { + HostThreadTeamData:: rendezvous_release( m_team_scratch + m_team_rendezvous , m_team_rendezvous_step ); } @@ -175,19 +176,30 @@ public: inline int pool_rendezvous() const noexcept { + static constexpr int yield_wait = + #if defined( KOKKOS_COMPILER_IBM ) + // If running on IBM POWER architecture the global + // level rendzvous should immediately yield when + // waiting for other threads in the pool to arrive. + 1 + #else + 0 + #endif + ; return 1 == m_pool_size ? 1 : + Kokkos::Impl:: rendezvous( m_pool_scratch + m_pool_rendezvous - , m_pool_rendezvous_step , m_pool_size - , m_pool_rank ); + , m_pool_rank + , yield_wait ); } inline void pool_rendezvous_release() const noexcept { if ( 1 < m_pool_size ) { - rendezvous_release( m_pool_scratch + m_pool_rendezvous - , m_pool_rendezvous_step ); + Kokkos::Impl:: + rendezvous_release( m_pool_scratch + m_pool_rendezvous ); } } @@ -213,7 +225,6 @@ public: , m_league_size(1) , m_work_chunk(0) , m_steal_rank(0) - , m_pool_rendezvous_step(0) , m_team_rendezvous_step(0) {} @@ -406,7 +417,7 @@ fflush(stdout); // Steal from next team, round robin // The next team is offset by m_team_alloc if it fits in the pool. - m_steal_rank = m_team_base + m_team_alloc + m_team_size <= m_pool_size ? + m_steal_rank = m_team_base + m_team_alloc + m_team_size <= m_pool_size ? m_team_base + m_team_alloc : 0 ; } diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp index 98482cfab6..608d514c79 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.cpp @@ -50,51 +50,70 @@ namespace Kokkos { namespace Profiling { +static initFunction initProfileLibrary = nullptr; +static finalizeFunction finalizeProfileLibrary = nullptr; + +static beginFunction beginForCallee = nullptr; +static beginFunction beginScanCallee = nullptr; +static beginFunction beginReduceCallee = nullptr; +static endFunction endForCallee = nullptr; +static endFunction endScanCallee = nullptr; +static endFunction endReduceCallee = nullptr; + +static pushFunction pushRegionCallee = nullptr; +static popFunction popRegionCallee = nullptr; + +static allocateDataFunction allocateDataCallee = nullptr; +static deallocateDataFunction deallocateDataCallee = nullptr; + +static beginDeepCopyFunction beginDeepCopyCallee = nullptr; +static endDeepCopyFunction endDeepCopyCallee = nullptr; + SpaceHandle::SpaceHandle(const char* space_name) { strncpy(name,space_name,64); } bool profileLibraryLoaded() { - return (NULL != initProfileLibrary); + return (nullptr != initProfileLibrary); } void beginParallelFor(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if(NULL != beginForCallee) { + if(nullptr != beginForCallee) { Kokkos::fence(); (*beginForCallee)(kernelPrefix.c_str(), devID, kernelID); } } void endParallelFor(const uint64_t kernelID) { - if(NULL != endForCallee) { + if(nullptr != endForCallee) { Kokkos::fence(); (*endForCallee)(kernelID); } } void beginParallelScan(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if(NULL != beginScanCallee) { + if(nullptr != beginScanCallee) { Kokkos::fence(); (*beginScanCallee)(kernelPrefix.c_str(), devID, kernelID); } } void endParallelScan(const uint64_t kernelID) { - if(NULL != endScanCallee) { + if(nullptr != endScanCallee) { Kokkos::fence(); (*endScanCallee)(kernelID); } } void beginParallelReduce(const std::string& kernelPrefix, const uint32_t devID, uint64_t* kernelID) { - if(NULL != beginReduceCallee) { + if(nullptr != beginReduceCallee) { Kokkos::fence(); (*beginReduceCallee)(kernelPrefix.c_str(), devID, kernelID); } } void endParallelReduce(const uint64_t kernelID) { - if(NULL != endReduceCallee) { + if(nullptr != endReduceCallee) { Kokkos::fence(); (*endReduceCallee)(kernelID); } @@ -102,31 +121,47 @@ void endParallelReduce(const uint64_t kernelID) { void pushRegion(const std::string& kName) { - if( NULL != pushRegionCallee ) { + if( nullptr != pushRegionCallee ) { Kokkos::fence(); (*pushRegionCallee)(kName.c_str()); } } void popRegion() { - if( NULL != popRegionCallee ) { + if( nullptr != popRegionCallee ) { Kokkos::fence(); (*popRegionCallee)(); } } void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { - if(NULL != allocateDataCallee) { + if(nullptr != allocateDataCallee) { (*allocateDataCallee)(space,label.c_str(),ptr,size); } } void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size) { - if(NULL != allocateDataCallee) { + if(nullptr != deallocateDataCallee) { (*deallocateDataCallee)(space,label.c_str(),ptr,size); } } +void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, const void* dst_ptr, + const SpaceHandle src_space, const std::string src_label, const void* src_ptr, + const uint64_t size) { + if(nullptr != beginDeepCopyCallee) { + (*beginDeepCopyCallee)(dst_space, dst_label.c_str(), dst_ptr, + src_space, src_label.c_str(), src_ptr, + size); + } +} + +void endDeepCopy() { + if(nullptr != endDeepCopyCallee) { + (*endDeepCopyCallee)(); + } +} + void initialize() { // Make sure initialize calls happens only once @@ -140,7 +175,7 @@ void initialize() { // If we do not find a profiling library in the environment then exit // early. - if( NULL == envProfileLibrary ) { + if( nullptr == envProfileLibrary ) { return ; } @@ -149,10 +184,10 @@ void initialize() { char* profileLibraryName = strtok(envProfileCopy, ";"); - if( (NULL != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) { + if( (nullptr != profileLibraryName) && (strcmp(profileLibraryName, "") != 0) ) { firstProfileLibrary = dlopen(profileLibraryName, RTLD_NOW | RTLD_GLOBAL); - if(NULL == firstProfileLibrary) { + if(nullptr == firstProfileLibrary) { std::cerr << "Error: Unable to load KokkosP library: " << profileLibraryName << std::endl; } else { @@ -191,14 +226,19 @@ void initialize() { auto p12 = dlsym(firstProfileLibrary, "kokkosp_deallocate_data"); deallocateDataCallee = *((deallocateDataFunction*) &p12); + auto p13 = dlsym(firstProfileLibrary, "kokkosp_begin_deep_copy"); + beginDeepCopyCallee = *((beginDeepCopyFunction*) &p13); + auto p14 = dlsym(firstProfileLibrary, "kokkosp_end_deep_copy"); + endDeepCopyCallee = *((endDeepCopyFunction*) &p14); + } } - if(NULL != initProfileLibrary) { + if(nullptr != initProfileLibrary) { (*initProfileLibrary)(0, (uint64_t) KOKKOSP_INTERFACE_VERSION, (uint32_t) 0, - NULL); + nullptr); } free(envProfileCopy); @@ -210,28 +250,30 @@ void finalize() { if(is_finalized) return; is_finalized = 1; - if(NULL != finalizeProfileLibrary) { + if(nullptr != finalizeProfileLibrary) { (*finalizeProfileLibrary)(); - // Set all profile hooks to NULL to prevent + // Set all profile hooks to nullptr to prevent // any additional calls. Once we are told to // finalize, we mean it - initProfileLibrary = NULL; - finalizeProfileLibrary = NULL; + initProfileLibrary = nullptr; + finalizeProfileLibrary = nullptr; - beginForCallee = NULL; - beginScanCallee = NULL; - beginReduceCallee = NULL; - endScanCallee = NULL; - endForCallee = NULL; - endReduceCallee = NULL; + beginForCallee = nullptr; + beginScanCallee = nullptr; + beginReduceCallee = nullptr; + endScanCallee = nullptr; + endForCallee = nullptr; + endReduceCallee = nullptr; - pushRegionCallee = NULL; - popRegionCallee = NULL; + pushRegionCallee = nullptr; + popRegionCallee = nullptr; - allocateDataCallee = NULL; - deallocateDataCallee = NULL; + allocateDataCallee = nullptr; + deallocateDataCallee = nullptr; + beginDeepCopyCallee = nullptr; + endDeepCopyCallee = nullptr; } } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp index f76e5dfa04..2c2e524d9d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Profiling_Interface.hpp @@ -81,23 +81,11 @@ typedef void (*popFunction)(); typedef void (*allocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); typedef void (*deallocateDataFunction)(const SpaceHandle, const char*, const void*, const uint64_t); - -static initFunction initProfileLibrary = NULL; -static finalizeFunction finalizeProfileLibrary = NULL; - -static beginFunction beginForCallee = NULL; -static beginFunction beginScanCallee = NULL; -static beginFunction beginReduceCallee = NULL; -static endFunction endForCallee = NULL; -static endFunction endScanCallee = NULL; -static endFunction endReduceCallee = NULL; - -static pushFunction pushRegionCallee = NULL; -static popFunction popRegionCallee = NULL; - -static allocateDataFunction allocateDataCallee = NULL; -static deallocateDataFunction deallocateDataCallee = NULL; - +typedef void (*beginDeepCopyFunction)( + SpaceHandle, const char*, const void*, + SpaceHandle, const char*, const void*, + uint64_t); +typedef void (*endDeepCopyFunction)(); bool profileLibraryLoaded(); @@ -114,35 +102,14 @@ void popRegion(); void allocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); void deallocateData(const SpaceHandle space, const std::string label, const void* ptr, const uint64_t size); +void beginDeepCopy(const SpaceHandle dst_space, const std::string dst_label, const void* dst_ptr, + const SpaceHandle src_space, const std::string src_label, const void* src_ptr, + const uint64_t size); +void endDeepCopy(); + void initialize(); void finalize(); -//Define finalize_fake inline to get rid of warnings for unused static variables -inline void finalize_fake() { - if(NULL != finalizeProfileLibrary) { - (*finalizeProfileLibrary)(); - - // Set all profile hooks to NULL to prevent - // any additional calls. Once we are told to - // finalize, we mean it - beginForCallee = NULL; - beginScanCallee = NULL; - beginReduceCallee = NULL; - endScanCallee = NULL; - endForCallee = NULL; - endReduceCallee = NULL; - - allocateDataCallee = NULL; - deallocateDataCallee = NULL; - - initProfileLibrary = NULL; - finalizeProfileLibrary = NULL; - pushRegionCallee = NULL; - popRegionCallee = NULL; - } -} - - } } diff --git a/lib/kokkos/core/src/impl/Kokkos_Rendezvous.cpp b/lib/kokkos/core/src/impl/Kokkos_Rendezvous.cpp new file mode 100644 index 0000000000..ac697fce4b --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Rendezvous.cpp @@ -0,0 +1,208 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include +#include +#include + +namespace Kokkos { namespace Impl { + +//---------------------------------------------------------------------------- +/* pattern for rendezvous + * + * if ( rendezvous() ) { + * ... all other threads are still in team_rendezvous() ... + * rendezvous_release(); + * ... all other threads are released from team_rendezvous() ... + * } + */ + +int rendezvous( volatile int64_t * const buffer + , int const size + , int const rank + , int const slow + ) noexcept +{ + enum : int { shift_byte = 3 }; + enum : int { size_byte = ( 01 << shift_byte ) }; // == 8 + enum : int { mask_byte = size_byte - 1 }; + + enum : int { shift_mem_cycle = 2 }; + enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 + enum : int { mask_mem_cycle = size_mem_cycle - 1 }; + + // Cycle step values: 1 <= step <= size_val_cycle + // An odd multiple of memory cycle so that when a memory location + // is reused it has a different value. + // Must be representable within a single byte: size_val_cycle < 16 + + enum : int { size_val_cycle = 3 * size_mem_cycle }; + + // Requires: + // Called by rank = [ 0 .. size ) + // buffer aligned to int64_t[4] + + // A sequence of rendezvous uses four cycled locations in memory + // and non-equal cycled synchronization values to + // 1) prevent rendezvous from overtaking one another and + // 2) give each spin wait location an int64_t[4] span + // so that it has its own cache line. + + const int64_t step = (buffer[0] % size_val_cycle ) + 1 ; + + // The leading int64_t[4] span is for thread 0 to write + // and all other threads to read spin-wait. + // sync_offset is the index into this array for this step. + + const int sync_offset = ( step & mask_mem_cycle ) + size_mem_cycle + size_mem_cycle ; + + if ( rank ) { + + const int group_begin = rank << shift_byte ; // == rank * size_byte + + if ( group_begin < size ) { + + // This thread waits for threads + // [ group_begin .. group_begin + 8 ) + // [ rank*8 .. rank*8 + 8 ) + // to write to their designated bytes. + + const int end = group_begin + size_byte < size + ? size_byte : size - group_begin ; + + int64_t value = 0; + for ( int i = 0 ; i < end ; ++i ) { + value |= step << (i * size_byte ); + } + + store_fence(); // This should not be needed but fixes #742 + + if ( slow ) { + yield_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] + , value ); + } + else { + spinwait_until_equal( buffer[ (rank << shift_mem_cycle) + sync_offset ] + , value ); + } + } + + { + // This thread sets its designated byte. + // ( rank % size_byte ) + + // ( ( rank / size_byte ) * size_byte * size_mem_cycle ) + + // ( sync_offset * size_byte ) + const int offset = ( rank & mask_byte ) + + ( ( rank & ~mask_byte ) << shift_mem_cycle ) + + ( sync_offset << shift_byte ); + + // All of this thread's previous memory stores must be complete before + // this thread stores the step value at this thread's designated byte + // in the shared synchronization array. + + Kokkos::memory_fence(); + + ((volatile int8_t*) buffer)[ offset ] = int8_t( step ); + + // Memory fence to push the previous store out + Kokkos::memory_fence(); + } + + // Wait for thread 0 to release all other threads + + if ( slow ) { + yield_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) ); + } + else { + spinwait_until_equal( buffer[ (step & mask_mem_cycle) + size_mem_cycle ] , int64_t(step) ); + } + } + else { + // Thread 0 waits for threads [1..7] + // to write to their designated bytes. + + const int end = size_byte < size ? 8 : size ; + + int64_t value = 0; + for ( int i = 1 ; i < end ; ++i ) { + value |= step << (i * size_byte ); + } + + if ( slow ) { + yield_until_equal( buffer[ sync_offset ], value ); + } + else { + spinwait_until_equal( buffer[ sync_offset ], value ); + } + } + + return rank ? 0 : 1 ; +} + +void rendezvous_release( volatile int64_t * const buffer ) noexcept +{ + enum : int { shift_mem_cycle = 2 }; + enum : int { size_mem_cycle = ( 01 << shift_mem_cycle ) }; // == 4 + enum : int { mask_mem_cycle = size_mem_cycle - 1 }; + enum : int { size_val_cycle = 3 * size_mem_cycle }; + + // Requires: + // Called after team_rendezvous + // Called only by true == team_rendezvous(root) + + // update step + const int64_t step = (buffer[0] % size_val_cycle ) + 1; + buffer[0] = step; + + // Memory fence to be sure all previous writes are complete: + Kokkos::memory_fence(); + + buffer[ (step & mask_mem_cycle) + size_mem_cycle ] = step; + + // Memory fence to push the store out + Kokkos::memory_fence(); +} + +}} // namespace Kokkos::Impl + diff --git a/lib/kokkos/core/src/impl/Kokkos_Rendezvous.hpp b/lib/kokkos/core/src/impl/Kokkos_Rendezvous.hpp new file mode 100644 index 0000000000..57f8633bca --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Rendezvous.hpp @@ -0,0 +1,87 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_IMPL_RENDEZVOUS_HPP +#define KOKKOS_IMPL_RENDEZVOUS_HPP + +#include + +namespace Kokkos { namespace Impl { + +inline +constexpr int rendezvous_buffer_size( int max_members ) noexcept +{ + return (((max_members + 7) / 8) * 4) + 4 + 4; +} + +/** \brief Thread pool rendezvous + * + * Rendezvous pattern: + * if ( rendezvous(root) ) { + * ... only root thread here while all others wait ... + * rendezvous_release(); + * } + * else { + * ... all other threads release here ... + * } + * + * Requires: buffer[ rendezvous_buffer_size( max_threads ) ]; + * + * When slow != 0 the expectation is thread arrival will be + * slow so the threads that arrive early should quickly yield + * their core to the runtime thus possibly allowing the late + * arriving threads to have more resources + * (e.g., power and clock frequency). + */ +int rendezvous( volatile int64_t * const buffer + , int const size + , int const rank + , int const slow = 0 ) noexcept ; + +void rendezvous_release( volatile int64_t * const buffer ) noexcept ; + + +}} // namespace Kokkos::Impl + +#endif // KOKKOS_IMPL_RENDEZVOUS_HPP + diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp index 755271c07e..dfbeba461e 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial.cpp @@ -50,6 +50,7 @@ #include #include +#include /*--------------------------------------------------------------------------*/ @@ -123,7 +124,6 @@ void serial_resize_thread_team_data( size_t pool_reduce_bytes } } -// Get thread team data structure for omp_get_thread_num() HostThreadTeamData * serial_get_thread_team_data() { return & g_serial_thread_team_data ; @@ -151,6 +151,8 @@ void Serial::initialize( unsigned threads_count (void) use_cores_per_numa; (void) allow_asynchronous_threadpool; + Impl::SharedAllocationRecord< void, void >::tracking_enable(); + // Init the array of locks used for arbitrarily sized atomics Impl::init_lock_array_host_space(); #if defined(KOKKOS_ENABLE_PROFILING) diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp index 76297161b1..0b6fbd9af0 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.cpp @@ -62,7 +62,7 @@ void TaskQueueSpecialization< Kokkos::Serial >::execute { using execution_space = Kokkos::Serial ; using queue_type = TaskQueue< execution_space > ; - using task_root_type = TaskBase< execution_space , void , void > ; + using task_root_type = TaskBase< void , void , void > ; using Member = Impl::HostThreadTeamMember< execution_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; @@ -122,7 +122,7 @@ void TaskQueueSpecialization< Kokkos::Serial > :: { using execution_space = Kokkos::Serial ; using queue_type = TaskQueue< execution_space > ; - using task_root_type = TaskBase< execution_space , void , void > ; + using task_root_type = TaskBase< void , void , void > ; using Member = Impl::HostThreadTeamMember< execution_space > ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp index 2eb2b5cf52..39deebbbf1 100644 --- a/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_Task.hpp @@ -65,7 +65,7 @@ public: using execution_space = Kokkos::Serial ; using memory_space = Kokkos::HostSpace ; using queue_type = Kokkos::Impl::TaskQueue< execution_space > ; - using task_base_type = Kokkos::Impl::TaskBase< execution_space , void , void > ; + using task_base_type = Kokkos::Impl::TaskBase< void , void , void > ; using member_type = Kokkos::Impl::HostThreadTeamMember< execution_space > ; static diff --git a/lib/kokkos/core/src/impl/Kokkos_Serial_WorkGraphPolicy.hpp b/lib/kokkos/core/src/impl/Kokkos_Serial_WorkGraphPolicy.hpp new file mode 100644 index 0000000000..dc30ffe9e0 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Serial_WorkGraphPolicy.hpp @@ -0,0 +1,102 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#ifndef KOKKOS_SERIAL_WORKGRAPHPOLICY_HPP +#define KOKKOS_SERIAL_WORKGRAPHPOLICY_HPP + +namespace Kokkos { +namespace Impl { + +template< class FunctorType , class ... Traits > +class ParallelFor< FunctorType , + Kokkos::Experimental::WorkGraphPolicy< Traits ... > , + Kokkos::Serial + > + : public Kokkos::Impl::Experimental:: + WorkGraphExec< FunctorType, + Kokkos::Serial, + Traits ... + > +{ +private: + + typedef Kokkos::Experimental::WorkGraphPolicy< Traits ... > Policy ; + typedef Kokkos::Impl::Experimental:: + WorkGraphExec Base ; + + template< class TagType > + typename std::enable_if< std::is_same< TagType , void >::value >::type + exec_one(const typename Policy::member_type& i) const { + Base::m_functor( i ); + } + + template< class TagType > + typename std::enable_if< ! std::is_same< TagType , void >::value >::type + exec_one(const typename Policy::member_type& i) const { + const TagType t{} ; + Base::m_functor( t , i ); + } + +public: + + inline + void execute() + { + for (std::int32_t i; (-1 != (i = Base::before_work())); ) { + exec_one< typename Policy::work_tag >( i ); + Base::after_work(i); + } + } + + inline + ParallelFor( const FunctorType & arg_functor + , const Policy & arg_policy ) + : Base( arg_functor, arg_policy ) + { + } +}; + +} // namespace Impl +} // namespace Kokkos + +#endif /* #define KOKKOS_SERIAL_WORKGRAPHPOLICY_HPP */ diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp index e28c1194a7..af79523e0c 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.cpp @@ -46,23 +46,23 @@ namespace Kokkos { namespace Impl { -int SharedAllocationRecord< void , void >::s_tracking_enabled = 1 ; +namespace { -void SharedAllocationRecord< void , void >::tracking_claim_and_disable() -{ - // A host thread claim and disable tracking flag +__thread int t_tracking_enabled = 1; - while ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 1, 0 ) ); } -void SharedAllocationRecord< void , void >::tracking_release_and_enable() -{ - // The host thread that claimed and disabled the tracking flag - // now release and enable tracking. +int SharedAllocationRecord< void , void >::tracking_enabled() +{ return t_tracking_enabled; } - if ( ! Kokkos::atomic_compare_exchange_strong( & s_tracking_enabled, 0, 1 ) ){ - Kokkos::Impl::throw_runtime_exception("Kokkos::Impl::SharedAllocationRecord<>::tracking_release_and_enable FAILED, this host process thread did not hold the lock" ); - } +void SharedAllocationRecord< void , void >::tracking_disable() +{ + t_tracking_enabled = 0; +} + +void SharedAllocationRecord< void , void >::tracking_enable() +{ + t_tracking_enabled = 1; } //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp index 4dc61bb02e..2e3cc1a163 100644 --- a/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_SharedAlloc.hpp @@ -71,6 +71,9 @@ public: KOKKOS_INLINE_FUNCTION static const SharedAllocationHeader * get_header( void * alloc_ptr ) { return reinterpret_cast( reinterpret_cast(alloc_ptr) - sizeof(SharedAllocationHeader) ); } + + KOKKOS_INLINE_FUNCTION + const char* label() const { return m_label; } }; template<> @@ -83,8 +86,6 @@ protected: typedef void (* function_type )( SharedAllocationRecord * ); - static int s_tracking_enabled ; - SharedAllocationHeader * const m_alloc_ptr ; size_t const m_alloc_size ; function_type const m_dealloc ; @@ -110,17 +111,17 @@ protected: public: inline std::string get_label() const { return std::string("Unmanaged"); } - static int tracking_enabled() { return s_tracking_enabled ; } + static int tracking_enabled(); /**\brief A host process thread claims and disables the * shared allocation tracking flag. */ - static void tracking_claim_and_disable(); + static void tracking_disable(); /**\brief A host process thread releases and enables the * shared allocation tracking flag. */ - static void tracking_release_and_enable(); + static void tracking_enable(); ~SharedAllocationRecord() = default ; @@ -317,6 +318,11 @@ public: #endif } + KOKKOS_INLINE_FUNCTION + bool has_record() const { + return (m_record_bits & (~DO_NOT_DEREF_FLAG)) != 0; + } + KOKKOS_FORCEINLINE_FUNCTION ~SharedAllocationTracker() { KOKKOS_IMPL_SHARED_ALLOCATION_TRACKER_DECREMENT } diff --git a/lib/kokkos/core/src/impl/Kokkos_Spinwait.cpp b/lib/kokkos/core/src/impl/Kokkos_Spinwait.cpp new file mode 100644 index 0000000000..3d3f83ed85 --- /dev/null +++ b/lib/kokkos/core/src/impl/Kokkos_Spinwait.cpp @@ -0,0 +1,210 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +#include +#include +#include + +#if defined( KOKKOS_ENABLE_STDTHREAD ) + #include +#elif !defined( _WIN32 ) + #include + #include +#else + #include + #include + #include +#endif + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { +namespace { + +void host_thread_yield( const uint32_t i , const int force_yield ) +{ + static constexpr uint32_t sleep_limit = 1 << 13 ; + static constexpr uint32_t yield_limit = 1 << 12 ; + + const int c = Kokkos::Impl::bit_scan_reverse(i); + + if ( sleep_limit < i ) { + + // Attempt to put the thread to sleep for 'c' milliseconds + + #if defined( KOKKOS_ENABLE_STDTHREAD ) + std::this_thread::sleep_for( std::chrono::nanoseconds( c * 1000 ) ) + #elif !defined( _WIN32 ) + timespec req ; + req.tv_sec = 0 ; + req.tv_nsec = 1000 * c ; + nanosleep( &req, nullptr ); + #else /* defined( _WIN32 ) IS Microsoft Windows */ + Sleep(c); + #endif + } + + else if ( force_yield || yield_limit < i ) { + + // Attempt to yield thread resources to runtime + + #if defined( KOKKOS_ENABLE_STDTHREAD ) + std::this_thread::yield(); + #elif !defined( _WIN32 ) + sched_yield(); + #else /* defined( _WIN32 ) IS Microsoft Windows */ + YieldProcessor(); + #endif + } + + #if defined( KOKKOS_ENABLE_ASM ) + + else if ( (1u<<4) < i ) { + + // Insert a few no-ops to quiet the thread: + + for ( int k = 0 ; k < c ; ++k ) { + #if defined( __amd64 ) || defined( __amd64__ ) || \ + defined( __x86_64 ) || defined( __x86_64__ ) + #if !defined( _WIN32 ) /* IS NOT Microsoft Windows */ + asm volatile( "nop\n" ); + #else + __asm__ __volatile__( "nop\n" ); + #endif + #elif defined(__PPC64__) + asm volatile( "nop\n" ); + #endif + } + } + + { + // Insert memory pause + #if defined( __amd64 ) || defined( __amd64__ ) || \ + defined( __x86_64 ) || defined( __x86_64__ ) + #if !defined( _WIN32 ) /* IS NOT Microsoft Windows */ + asm volatile( "pause\n":::"memory" ); + #else + __asm__ __volatile__( "pause\n":::"memory" ); + #endif + #elif defined(__PPC64__) + asm volatile( "or 27, 27, 27" ::: "memory" ); + #endif + } + + #endif /* defined( KOKKOS_ENABLE_ASM ) */ +} + +}}} // namespace Kokkos::Impl::{anonymous} + +/*--------------------------------------------------------------------------*/ + +namespace Kokkos { +namespace Impl { + +void spinwait_while_equal( volatile int32_t & flag , const int32_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,0); + Kokkos::load_fence(); +} + +void spinwait_until_equal( volatile int32_t & flag , const int32_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,0); + Kokkos::load_fence(); +} + +void spinwait_while_equal( volatile int64_t & flag , const int64_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,0); + Kokkos::load_fence(); +} + +void spinwait_until_equal( volatile int64_t & flag , const int64_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,0); + Kokkos::load_fence(); +} + +void yield_while_equal( volatile int32_t & flag , const int32_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,1); + Kokkos::load_fence(); +} + +void yield_until_equal( volatile int32_t & flag , const int32_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,1); + Kokkos::load_fence(); +} + +void yield_while_equal( volatile int64_t & flag , const int64_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value == flag ) host_thread_yield(++i,1); + Kokkos::load_fence(); +} + +void yield_until_equal( volatile int64_t & flag , const int64_t value ) +{ + Kokkos::store_fence(); + uint32_t i = 0 ; while( value != flag ) host_thread_yield(++i,1); + Kokkos::load_fence(); +} + +} /* namespace Impl */ +} /* namespace Kokkos */ + +#else +void KOKKOS_CORE_SRC_IMPL_SPINWAIT_PREVENT_LINK_ERROR() {} +#endif + diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp b/lib/kokkos/core/src/impl/Kokkos_Spinwait.hpp similarity index 82% rename from lib/kokkos/core/src/impl/Kokkos_spinwait.hpp rename to lib/kokkos/core/src/impl/Kokkos_Spinwait.hpp index 6e34b8a943..b49e308566 100644 --- a/lib/kokkos/core/src/impl/Kokkos_spinwait.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_Spinwait.hpp @@ -59,6 +59,13 @@ void spinwait_until_equal( volatile int32_t & flag , const int32_t value ); void spinwait_while_equal( volatile int64_t & flag , const int64_t value ); void spinwait_until_equal( volatile int64_t & flag , const int64_t value ); + +void yield_while_equal( volatile int32_t & flag , const int32_t value ); +void yield_until_equal( volatile int32_t & flag , const int32_t value ); + +void yield_while_equal( volatile int64_t & flag , const int64_t value ); +void yield_until_equal( volatile int64_t & flag , const int64_t value ); + #else KOKKOS_INLINE_FUNCTION @@ -71,6 +78,16 @@ void spinwait_while_equal( volatile int64_t & , const int64_t ) {} KOKKOS_INLINE_FUNCTION void spinwait_until_equal( volatile int64_t & , const int64_t ) {} +KOKKOS_INLINE_FUNCTION +void yield_while_equal( volatile int32_t & , const int32_t ) {} +KOKKOS_INLINE_FUNCTION +void yield_until_equal( volatile int32_t & , const int32_t ) {} + +KOKKOS_INLINE_FUNCTION +void yield_while_equal( volatile int64_t & , const int64_t ) {} +KOKKOS_INLINE_FUNCTION +void yield_until_equal( volatile int64_t & , const int64_t ) {} + #endif } /* namespace Impl */ diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp index bee98e6745..5f8699302d 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue.hpp @@ -59,24 +59,15 @@ namespace Kokkos { namespace Impl { -/*\brief Implementation data for task data management, access, and execution. - * - * Curiously recurring template pattern (CRTP) - * to allow static_cast from the - * task root type and a task's FunctorType. - * - * TaskBase< Space , ResultType , FunctorType > - * : TaskBase< Space , ResultType , void > - * , FunctorType - * { ... }; - * - * TaskBase< Space , ResultType , void > - * : TaskBase< Space , void , void > - * { ... }; - */ -template< typename Space , typename ResultType , typename FunctorType > +template< class Space , typename ResultType , class FunctorType > class TaskBase ; +template< typename Space > +class TaskQueue ; + +template< typename Space > +class TaskQueueSpecialization ; + } /* namespace Impl */ } /* namespace Kokkos */ @@ -86,8 +77,217 @@ class TaskBase ; namespace Kokkos { namespace Impl { -template< typename Space > -class TaskQueueSpecialization ; +/** \brief Base class for task management, access, and execution. + * + * Inheritance structure to allow static_cast from the task root type + * and a task's FunctorType. + * + * // Enable a functor to access the base class + * // and provide memory for result value. + * TaskBase< Space , ResultType , FunctorType > + * : TaskBase< void , void , void > + * , FunctorType + * { ... }; + * Followed by memory allocated for result value. + * + * + * States of a task: + * + * Constructing State, NOT IN a linked list + * m_wait == 0 + * m_next == 0 + * + * Scheduling transition : Constructing -> Waiting + * before: + * m_wait == 0 + * m_next == this task's initial dependence, 0 if none + * after: + * m_wait == EndTag + * m_next == EndTag + * + * Waiting State, IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == next of linked list of tasks + * + * transition : Waiting -> Executing + * before: + * m_next == EndTag + * after:: + * m_next == LockTag + * + * Executing State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == LockTag + * + * Respawn transition : Executing -> Executing-Respawn + * before: + * m_next == LockTag + * after: + * m_next == this task's updated dependence, 0 if none + * + * Executing-Respawn State, NOT IN a linked list + * m_apply != 0 + * m_queue != 0 + * m_ref_count > 0 + * m_wait == head of linked list of tasks waiting on this task + * m_next == this task's updated dependence, 0 if none + * + * transition : Executing -> Complete + * before: + * m_wait == head of linked list + * after: + * m_wait == LockTag + * + * Complete State, NOT IN a linked list + * m_wait == LockTag: cannot add dependence (<=> complete) + * m_next == LockTag: not a member of a wait queue + * + */ +template<> +class TaskBase< void , void , void > +{ +public: + + enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 }; + enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) }; + + template< typename > friend class Kokkos::TaskScheduler ; + + typedef TaskQueue< void > queue_type ; + + typedef void (* function_type) ( TaskBase * , void * ); + + // sizeof(TaskBase) == 48 + + function_type m_apply ; ///< Apply function pointer + queue_type * m_queue ; ///< Pointer to queue + TaskBase * m_wait ; ///< Linked list of tasks waiting on this + TaskBase * m_next ; ///< Waiting linked-list next + int32_t m_ref_count ; ///< Reference count + int32_t m_alloc_size ; ///< Allocation size + int32_t m_dep_count ; ///< Aggregate's number of dependences + int16_t m_task_type ; ///< Type of task + int16_t m_priority ; ///< Priority of runnable task + + TaskBase( TaskBase && ) = delete ; + TaskBase( const TaskBase & ) = delete ; + TaskBase & operator = ( TaskBase && ) = delete ; + TaskBase & operator = ( const TaskBase & ) = delete ; + + KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; + + KOKKOS_INLINE_FUNCTION constexpr + TaskBase() + : m_apply( 0 ) + , m_queue( 0 ) + , m_wait( 0 ) + , m_next( 0 ) + , m_ref_count( 0 ) + , m_alloc_size( 0 ) + , m_dep_count( 0 ) + , m_task_type( 0 ) + , m_priority( 0 ) + {} + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + TaskBase * volatile * aggregate_dependences() volatile + { return reinterpret_cast( this + 1 ); } + + KOKKOS_INLINE_FUNCTION + bool requested_respawn() + { + // This should only be called when a task has finished executing and is + // in the transition to either the complete or executing-respawn state. + TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag ); + return lock != m_next; + } + + KOKKOS_INLINE_FUNCTION + void add_dependence( TaskBase* dep ) + { + // Precondition: lock == m_next + + TaskBase * const lock = (TaskBase *) LockTag ; + + // Assign dependence to m_next. It will be processed in the subsequent + // call to schedule. Error if the dependence is reset. + if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { + Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); + } + + if ( 0 != dep ) { + // The future may be destroyed upon returning from this call + // so increment reference count to track this assignment. + Kokkos::atomic_increment( &(dep->m_ref_count) ); + } + } + + //---------------------------------------- + + KOKKOS_INLINE_FUNCTION + int32_t reference_count() const + { return *((int32_t volatile *)( & m_ref_count )); } + +}; + +static_assert( sizeof(TaskBase) == 48 + , "Verifying expected sizeof(TaskBase)" ); + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +template< typename ResultType > +struct TaskResult { + + enum : int32_t { size = sizeof(ResultType) }; + + using reference_type = ResultType & ; + + KOKKOS_INLINE_FUNCTION static + ResultType * ptr( TaskBase * task ) + { + return reinterpret_cast< ResultType * > + ( reinterpret_cast< char * >(task) + task->m_alloc_size - sizeof(ResultType) ); + } + + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskBase * task ) + { return *ptr( task ); } +}; + +template<> +struct TaskResult< void > { + + enum : int32_t { size = 0 }; + + using reference_type = void ; + + KOKKOS_INLINE_FUNCTION static + void * ptr( TaskBase * ) { return (void*) 0 ; } + + KOKKOS_INLINE_FUNCTION static + reference_type get( TaskBase * ) {} +}; + +} /* namespace Impl */ +} /* namespace Kokkos */ + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +namespace Kokkos { +namespace Impl { + +template<> +class TaskQueue< void > {}; /** \brief Manage task allocation, deallocation, and scheduling. * @@ -95,7 +295,7 @@ class TaskQueueSpecialization ; * All other aspects of task management have shared implementation. */ template< typename ExecSpace > -class TaskQueue { +class TaskQueue : public TaskQueue { private: friend class TaskQueueSpecialization< ExecSpace > ; @@ -106,7 +306,7 @@ private: using memory_space = typename specialization::memory_space ; using device_type = Kokkos::Device< execution_space , memory_space > ; using memory_pool = Kokkos::MemoryPool< device_type > ; - using task_root_type = Kokkos::Impl::TaskBase ; + using task_root_type = Kokkos::Impl::TaskBase ; struct Destroy { TaskQueue * m_queue ; @@ -198,12 +398,10 @@ public: } // Assign task pointer with reference counting of assigned tasks - template< typename LV , typename RV > KOKKOS_FUNCTION static - void assign( TaskBase< execution_space,LV,void> ** const lhs - , TaskBase< execution_space,RV,void> * const rhs ) + void assign( task_root_type ** const lhs + , task_root_type * const rhs ) { - using task_lhs = TaskBase< execution_space,LV,void> ; #if 0 { printf( "assign( 0x%lx { 0x%lx %d %d } , 0x%lx { 0x%lx %d %d } )\n" @@ -225,7 +423,7 @@ public: // Force write of *lhs - *static_cast< task_lhs * volatile * >(lhs) = rhs ; + *static_cast< task_root_type * volatile * >(lhs) = rhs ; Kokkos::memory_fence(); } @@ -238,6 +436,38 @@ public: KOKKOS_FUNCTION void deallocate( void * p , size_t n ); ///< Deallocate to the memory pool + + + //---------------------------------------- + /**\brief Allocation size for a spawned task */ + + template< typename FunctorType > + KOKKOS_FUNCTION + size_t spawn_allocation_size() const + { + using value_type = typename FunctorType::value_type ; + + using task_type = Impl::TaskBase< execution_space + , value_type + , FunctorType > ; + + enum : size_t { align = ( 1 << 4 ) , align_mask = align - 1 }; + enum : size_t { task_size = sizeof(task_type) }; + enum : size_t { result_size = Impl::TaskResult< value_type >::size }; + enum : size_t { alloc_size = + ( ( task_size + align_mask ) & ~align_mask ) + + ( ( result_size + align_mask ) & ~align_mask ) }; + + return m_memory.allocate_block_size( task_size ); + } + + /**\brief Allocation size for a when_all aggregate */ + + KOKKOS_FUNCTION + size_t when_all_allocation_size( int narg ) const + { + return m_memory.allocate_block_size( sizeof(task_root_type) + narg * sizeof(task_root_type*) ); + } }; } /* namespace Impl */ @@ -249,261 +479,9 @@ public: namespace Kokkos { namespace Impl { -template<> -class TaskBase< void , void , void > { -public: - enum : int16_t { TaskTeam = 0 , TaskSingle = 1 , Aggregate = 2 }; - enum : uintptr_t { LockTag = ~uintptr_t(0) , EndTag = ~uintptr_t(1) }; -}; - -/** \brief Base class for task management, access, and execution. - * - * Inheritance structure to allow static_cast from the task root type - * and a task's FunctorType. - * - * // Enable a Future to access result data - * TaskBase< Space , ResultType , void > - * : TaskBase< void , void , void > - * { ... }; - * - * // Enable a functor to access the base class - * TaskBase< Space , ResultType , FunctorType > - * : TaskBase< Space , ResultType , void > - * , FunctorType - * { ... }; - * - * - * States of a task: - * - * Constructing State, NOT IN a linked list - * m_wait == 0 - * m_next == 0 - * - * Scheduling transition : Constructing -> Waiting - * before: - * m_wait == 0 - * m_next == this task's initial dependence, 0 if none - * after: - * m_wait == EndTag - * m_next == EndTag - * - * Waiting State, IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == next of linked list of tasks - * - * transition : Waiting -> Executing - * before: - * m_next == EndTag - * after:: - * m_next == LockTag - * - * Executing State, NOT IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == LockTag - * - * Respawn transition : Executing -> Executing-Respawn - * before: - * m_next == LockTag - * after: - * m_next == this task's updated dependence, 0 if none - * - * Executing-Respawn State, NOT IN a linked list - * m_apply != 0 - * m_queue != 0 - * m_ref_count > 0 - * m_wait == head of linked list of tasks waiting on this task - * m_next == this task's updated dependence, 0 if none - * - * transition : Executing -> Complete - * before: - * m_wait == head of linked list - * after: - * m_wait == LockTag - * - * Complete State, NOT IN a linked list - * m_wait == LockTag: cannot add dependence - * m_next == LockTag: not a member of a wait queue - * - */ -template< typename ExecSpace > -class TaskBase< ExecSpace , void , void > -{ -public: - - enum : int16_t { TaskTeam = TaskBase::TaskTeam - , TaskSingle = TaskBase::TaskSingle - , Aggregate = TaskBase::Aggregate }; - - enum : uintptr_t { LockTag = TaskBase::LockTag - , EndTag = TaskBase::EndTag }; - - using execution_space = ExecSpace ; - using queue_type = TaskQueue< execution_space > ; - - template< typename > friend class Kokkos::TaskScheduler ; - - typedef void (* function_type) ( TaskBase * , void * ); - - // sizeof(TaskBase) == 48 - - function_type m_apply ; ///< Apply function pointer - queue_type * m_queue ; ///< Queue in which this task resides - TaskBase * m_wait ; ///< Linked list of tasks waiting on this - TaskBase * m_next ; ///< Waiting linked-list next - int32_t m_ref_count ; ///< Reference count - int32_t m_alloc_size ; ///< Allocation size - int32_t m_dep_count ; ///< Aggregate's number of dependences - int16_t m_task_type ; ///< Type of task - int16_t m_priority ; ///< Priority of runnable task - - TaskBase() = delete ; - TaskBase( TaskBase && ) = delete ; - TaskBase( const TaskBase & ) = delete ; - TaskBase & operator = ( TaskBase && ) = delete ; - TaskBase & operator = ( const TaskBase & ) = delete ; - - KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; - - // Constructor for a runnable task - KOKKOS_INLINE_FUNCTION - constexpr TaskBase( function_type arg_apply - , queue_type * arg_queue - , TaskBase * arg_dependence - , int arg_ref_count - , int arg_alloc_size - , int arg_task_type - , int arg_priority - ) noexcept - : m_apply( arg_apply ) - , m_queue( arg_queue ) - , m_wait( 0 ) - , m_next( arg_dependence ) - , m_ref_count( arg_ref_count ) - , m_alloc_size( arg_alloc_size ) - , m_dep_count( 0 ) - , m_task_type( arg_task_type ) - , m_priority( arg_priority ) - {} - - // Constructor for an aggregate task - KOKKOS_INLINE_FUNCTION - constexpr TaskBase( queue_type * arg_queue - , int arg_ref_count - , int arg_alloc_size - , int arg_dep_count - ) noexcept - : m_apply( 0 ) - , m_queue( arg_queue ) - , m_wait( 0 ) - , m_next( 0 ) - , m_ref_count( arg_ref_count ) - , m_alloc_size( arg_alloc_size ) - , m_dep_count( arg_dep_count ) - , m_task_type( Aggregate ) - , m_priority( 0 ) - {} - - //---------------------------------------- - - KOKKOS_INLINE_FUNCTION - TaskBase ** aggregate_dependences() - { return reinterpret_cast( this + 1 ); } - - KOKKOS_INLINE_FUNCTION - bool requested_respawn() - { - // This should only be called when a task has finished executing and is - // in the transition to either the complete or executing-respawn state. - TaskBase * const lock = reinterpret_cast< TaskBase * >( LockTag ); - return lock != m_next; - } - - KOKKOS_INLINE_FUNCTION - void add_dependence( TaskBase* dep ) - { - // Precondition: lock == m_next - - TaskBase * const lock = (TaskBase *) LockTag ; - - // Assign dependence to m_next. It will be processed in the subsequent - // call to schedule. Error if the dependence is reset. - if ( lock != Kokkos::atomic_exchange( & m_next, dep ) ) { - Kokkos::abort("TaskScheduler ERROR: resetting task dependence"); - } - - if ( 0 != dep ) { - // The future may be destroyed upon returning from this call - // so increment reference count to track this assignment. - Kokkos::atomic_increment( &(dep->m_ref_count) ); - } - } - - using get_return_type = void ; - - KOKKOS_INLINE_FUNCTION - get_return_type get() const {} -}; - -template < typename ExecSpace , typename ResultType > -class TaskBase< ExecSpace , ResultType , void > - : public TaskBase< ExecSpace , void , void > -{ -private: - - using root_type = TaskBase ; - using function_type = typename root_type::function_type ; - using queue_type = typename root_type::queue_type ; - - static_assert( sizeof(root_type) == 48 , "" ); - - TaskBase() = delete ; - TaskBase( TaskBase && ) = delete ; - TaskBase( const TaskBase & ) = delete ; - TaskBase & operator = ( TaskBase && ) = delete ; - TaskBase & operator = ( const TaskBase & ) = delete ; - -public: - - ResultType m_result ; - - KOKKOS_INLINE_FUNCTION ~TaskBase() = default ; - - // Constructor for runnable task - KOKKOS_INLINE_FUNCTION - constexpr TaskBase( function_type arg_apply - , queue_type * arg_queue - , root_type * arg_dependence - , int arg_ref_count - , int arg_alloc_size - , int arg_task_type - , int arg_priority - ) - : root_type( arg_apply - , arg_queue - , arg_dependence - , arg_ref_count - , arg_alloc_size - , arg_task_type - , arg_priority - ) - , m_result() - {} - - using get_return_type = ResultType const & ; - - KOKKOS_INLINE_FUNCTION - get_return_type get() const { return m_result ; } -}; - -template< typename ExecSpace , typename ResultType , typename FunctorType > +template< class ExecSpace , typename ResultType , class FunctorType > class TaskBase - : public TaskBase< ExecSpace , ResultType , void > + : public TaskBase< void , void , void > , public FunctorType { private: @@ -516,50 +494,31 @@ private: public: - using root_type = TaskBase< ExecSpace , void , void > ; - using base_type = TaskBase< ExecSpace , ResultType , void > ; - using specialization = TaskQueueSpecialization< ExecSpace > ; - using function_type = typename root_type::function_type ; - using queue_type = typename root_type::queue_type ; - using member_type = typename specialization::member_type ; + using root_type = TaskBase< void , void , void > ; using functor_type = FunctorType ; using result_type = ResultType ; - template< typename Type > - KOKKOS_INLINE_FUNCTION static - void apply_functor - ( Type * const task - , typename std::enable_if - < std::is_same< typename Type::result_type , void >::value - , member_type * const - >::type member - ) - { - using fType = typename Type::functor_type ; - static_cast(task)->operator()( *member ); - } + using specialization = TaskQueueSpecialization< ExecSpace > ; + using member_type = typename specialization::member_type ; - template< typename Type > - KOKKOS_INLINE_FUNCTION static - void apply_functor - ( Type * const task - , typename std::enable_if - < ! std::is_same< typename Type::result_type , void >::value - , member_type * const - >::type member - ) - { - using fType = typename Type::functor_type ; - static_cast(task)->operator()( *member , task->m_result ); - } + KOKKOS_INLINE_FUNCTION + void apply_functor( member_type * const member , void * ) + { functor_type::operator()( *member ); } + + template< typename T > + KOKKOS_INLINE_FUNCTION + void apply_functor( member_type * const member + , T * const result ) + { functor_type::operator()( *member , *result ); } KOKKOS_FUNCTION static void apply( root_type * root , void * exec ) { TaskBase * const task = static_cast< TaskBase * >( root ); member_type * const member = reinterpret_cast< member_type * >( exec ); + result_type * const result = TaskResult< result_type >::ptr( task ); - TaskBase::template apply_functor( task , member ); + task->apply_functor( member , result ); // Task may be serial or team. // If team then must synchronize before querying if respawn was requested. @@ -576,26 +535,9 @@ public: } // Constructor for runnable task - KOKKOS_INLINE_FUNCTION - constexpr TaskBase( function_type arg_apply - , queue_type * arg_queue - , root_type * arg_dependence - , int arg_ref_count - , int arg_alloc_size - , int arg_task_type - , int arg_priority - , FunctorType && arg_functor - ) - : base_type( arg_apply - , arg_queue - , arg_dependence - , arg_ref_count - , arg_alloc_size - , arg_task_type - , arg_priority - ) - , functor_type( arg_functor ) - {} + KOKKOS_INLINE_FUNCTION constexpr + TaskBase( FunctorType && arg_functor ) + : root_type() , functor_type( std::move(arg_functor) ) {} KOKKOS_INLINE_FUNCTION ~TaskBase() {} diff --git a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp index aee381afad..1974f7e1ca 100644 --- a/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_TaskQueue_impl.hpp @@ -44,6 +44,8 @@ #include #if defined( KOKKOS_ENABLE_TASKDAG ) +#define KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING 0 + namespace Kokkos { namespace Impl { @@ -100,9 +102,11 @@ KOKKOS_FUNCTION void TaskQueue< ExecSpace >::decrement ( TaskQueue< ExecSpace >::task_root_type * task ) { - const int count = Kokkos::atomic_fetch_add(&(task->m_ref_count),-1); + task_root_type volatile & t = *task ; -#if 0 + const int count = Kokkos::atomic_fetch_add(&(t.m_ref_count),-1); + +#if KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING if ( 1 == count ) { printf( "decrement-destroy( 0x%lx { 0x%lx %d %d } )\n" , uintptr_t( task ) @@ -114,9 +118,13 @@ void TaskQueue< ExecSpace >::decrement #endif if ( ( 1 == count ) && - ( task->m_next == (task_root_type *) task_root_type::LockTag ) ) { + ( t.m_next == (task_root_type *) task_root_type::LockTag ) ) { // Reference count is zero and task is complete, deallocate. - task->m_queue->deallocate( task , task->m_alloc_size ); + + TaskQueue< ExecSpace > * const queue = + static_cast< TaskQueue< ExecSpace > * >( t.m_queue ); + + queue->deallocate( task , t.m_alloc_size ); } else if ( count <= 1 ) { Kokkos::abort("TaskScheduler task has negative reference count or is incomplete" ); @@ -171,7 +179,7 @@ bool TaskQueue< ExecSpace >::push_task // Fail the push attempt if the queue is locked; // otherwise retry until the push succeeds. -#if 0 +#if KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING printf( "push_task( 0x%lx { 0x%lx } 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" , uintptr_t(queue) , uintptr_t(*queue) @@ -186,9 +194,9 @@ bool TaskQueue< ExecSpace >::push_task task_root_type * const zero = (task_root_type *) 0 ; task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; - task_root_type * volatile * const next = & task->m_next ; + task_root_type * volatile & next = task->m_next ; - if ( zero != *next ) { + if ( zero != next ) { Kokkos::abort("TaskQueue::push_task ERROR: already a member of another queue" ); } @@ -196,9 +204,9 @@ bool TaskQueue< ExecSpace >::push_task while ( lock != y ) { - *next = y ; + next = y ; - // Do not proceed until '*next' has been stored. + // Do not proceed until 'next' has been stored. Kokkos::memory_fence(); task_root_type * const x = y ; @@ -211,9 +219,9 @@ bool TaskQueue< ExecSpace >::push_task // Failed, replace 'task->m_next' value since 'task' remains // not a member of a queue. - *next = zero ; + next = zero ; - // Do not proceed until '*next' has been stored. + // Do not proceed until 'next' has been stored. Kokkos::memory_fence(); return false ; @@ -270,11 +278,13 @@ TaskQueue< ExecSpace >::pop_ready_task // This thread has exclusive access to // the queue and the popped task's m_next. - *queue = task->m_next ; task->m_next = lock ; + task_root_type * volatile & next = task->m_next ; + + *queue = next ; next = lock ; Kokkos::memory_fence(); -#if 0 +#if KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING printf( "pop_ready_task( 0x%lx 0x%lx { 0x%lx 0x%lx %d %d %d } )\n" , uintptr_t(queue) , uintptr_t(task) @@ -323,7 +333,7 @@ void TaskQueue< ExecSpace >::schedule_runnable // task->m_wait == head of linked list (queue) // task->m_next == member of linked list (queue) -#if 0 +#if KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING printf( "schedule_runnable( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" , uintptr_t(task) , uintptr_t(task->m_wait) @@ -337,20 +347,22 @@ void TaskQueue< ExecSpace >::schedule_runnable task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + task_root_type volatile & t = *task ; + bool respawn = false ; //---------------------------------------- - if ( zero == task->m_wait ) { + if ( zero == t.m_wait ) { // Task in Constructing state // - Transition to Waiting state // Preconditions: // - call occurs exclusively within a single thread - task->m_wait = end ; + t.m_wait = end ; // Task in Waiting state } - else if ( lock != task->m_wait ) { + else if ( lock != t.m_wait ) { // Task in Executing state with Respawn request // - Update dependence // - Transition to Waiting state @@ -373,7 +385,9 @@ void TaskQueue< ExecSpace >::schedule_runnable // Exclusive access so don't need an atomic exchange // task_root_type * dep = Kokkos::atomic_exchange( & task->m_next , zero ); - task_root_type * dep = task->m_next ; task->m_next = zero ; + task_root_type * dep = t.m_next ; t.m_next = zero ; + + Kokkos::memory_fence(); const bool is_ready = ( 0 == dep ) || ( ! push_task( & dep->m_wait , task ) ); @@ -398,7 +412,7 @@ void TaskQueue< ExecSpace >::schedule_runnable Kokkos::atomic_increment( & m_ready_count ); task_root_type * volatile * const ready_queue = - & m_ready[ task->m_priority ][ task->m_task_type ]; + & m_ready[ t.m_priority ][ t.m_task_type ]; // A push_task fails if the ready queue is locked. // A ready queue is only locked during a push or pop; @@ -441,7 +455,7 @@ void TaskQueue< ExecSpace >::schedule_aggregate // task->m_wait == head of linked list (queue) // task->m_next == member of linked list (queue) -#if 0 +#if KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING printf( "schedule_aggregate( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" , uintptr_t(task) , uintptr_t(task->m_wait) @@ -455,18 +469,20 @@ void TaskQueue< ExecSpace >::schedule_aggregate task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; + task_root_type volatile & t = *task ; + //---------------------------------------- - if ( zero == task->m_wait ) { + if ( zero == t.m_wait ) { // Task in Constructing state // - Transition to Waiting state // Preconditions: // - call occurs exclusively within a single thread - task->m_wait = end ; + t.m_wait = end ; // Task in Waiting state } - else if ( lock == task->m_wait ) { + else if ( lock == t.m_wait ) { // Task in Complete state Kokkos::abort("TaskQueue::schedule_aggregate ERROR: task is complete"); } @@ -477,14 +493,14 @@ void TaskQueue< ExecSpace >::schedule_aggregate // (1) created or // (2) being removed from a completed task's wait list. - task_root_type ** const aggr = task->aggregate_dependences(); + task_root_type * volatile * const aggr = t.aggregate_dependences(); // Assume the 'when_all' is complete until a dependence is // found that is not complete. bool is_complete = true ; - for ( int i = task->m_dep_count ; 0 < i && is_complete ; ) { + for ( int i = t.m_dep_count ; 0 < i && is_complete ; ) { --i ; @@ -523,7 +539,7 @@ void TaskQueue< ExecSpace >::schedule_aggregate // Complete the when_all 'task' to schedule other tasks // that are waiting for the when_all 'task' to complete. - task->m_next = lock ; + t.m_next = lock ; complete( task ); @@ -573,7 +589,7 @@ void TaskQueue< ExecSpace >::complete task_root_type * const lock = (task_root_type *) task_root_type::LockTag ; task_root_type * const end = (task_root_type *) task_root_type::EndTag ; -#if 0 +#if KOKKOS_IMPL_DEBUG_TASKDAG_SCHEDULING printf( "complete( 0x%lx { 0x%lx 0x%lx %d %d %d }\n" , uintptr_t(task) , uintptr_t(task->m_wait) @@ -584,11 +600,13 @@ void TaskQueue< ExecSpace >::complete fflush( stdout ); #endif - const bool runnable = task_root_type::Aggregate != task->m_task_type ; + task_root_type volatile & t = *task ; + + const bool runnable = task_root_type::Aggregate != t.m_task_type ; //---------------------------------------- - if ( runnable && lock != task->m_next ) { + if ( runnable && lock != t.m_next ) { // Is a runnable task has finished executing and requested respawn. // Schedule the task for subsequent execution. @@ -607,7 +625,7 @@ void TaskQueue< ExecSpace >::complete // Stop other tasks from adding themselves to this task's wait queue // by locking the head of this task's wait queue. - task_root_type * x = Kokkos::atomic_exchange( & task->m_wait , lock ); + task_root_type * x = Kokkos::atomic_exchange( & t.m_wait , lock ); if ( x != (task_root_type *) lock ) { @@ -627,9 +645,13 @@ void TaskQueue< ExecSpace >::complete // Have exclusive access to 'x' until it is scheduled // Set x->m_next = zero <= no dependence, not a respawn - task_root_type * const next = x->m_next ; x->m_next = 0 ; + task_root_type volatile & vx = *x ; - if ( task_root_type::Aggregate != x->m_task_type ) { + task_root_type * const next = vx.m_next ; vx.m_next = 0 ; + + Kokkos::memory_fence(); + + if ( task_root_type::Aggregate != vx.m_task_type ) { schedule_runnable( x ); } else { diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp index c55636b64e..ed1a71bea7 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewArray.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -47,7 +47,6 @@ #include namespace Kokkos { -namespace Experimental { namespace Impl { template< class DataType , class ArrayLayout , class V , size_t N , class P > @@ -94,13 +93,12 @@ public: typedef typename ViewDataType< non_const_scalar_type , array_scalar_dimension >::type non_const_scalar_array_type ; }; -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { /** \brief View mapping for non-specialized data type and standard layout */ @@ -597,7 +595,7 @@ public: } }; -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp index 6381aee468..f32c6bb2ee 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewCtor.hpp @@ -96,6 +96,27 @@ struct is_view_label< const char[N] > : public std::true_type {}; template< typename ... P > struct ViewCtorProp ; +// Forward declare +template< typename Specialize , typename T > +struct CommonViewAllocProp ; + +/* Common value_type stored as ViewCtorProp + */ +template< typename Specialize , typename T > +struct ViewCtorProp< void , CommonViewAllocProp > +{ + ViewCtorProp() = default ; + ViewCtorProp( const ViewCtorProp & ) = default ; + ViewCtorProp & operator = ( const ViewCtorProp & ) = default ; + + using type = CommonViewAllocProp ; + + ViewCtorProp( const type & arg ) : value( arg ) {} + ViewCtorProp( type && arg ) : value( arg ) {} + + type value ; +}; + /* std::integral_constant are dummy arguments * that avoid duplicate base class errors */ diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp index 900bd88f1c..d346f9e639 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewMapping.hpp @@ -62,7 +62,6 @@ //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { template< unsigned I , size_t ... Args > @@ -250,7 +249,7 @@ struct ViewDimensionAssignable< ViewDimension< DstArgs ... > }; -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -266,14 +265,11 @@ struct ALL_t { }} // namespace Kokkos::Impl namespace Kokkos { -namespace Experimental { namespace Impl { -using Kokkos::Impl::ALL_t ; - template< class T > struct is_integral_extent_type -{ enum { value = std::is_same::value ? 1 : 0 }; }; +{ enum { value = std::is_same::value ? 1 : 0 }; }; template< class iType > struct is_integral_extent_type< std::pair > @@ -314,10 +310,10 @@ struct SubviewLegalArgsCompileTime; template struct SubviewLegalArgsCompileTime { - enum { value =(((CurrentArg==RankDest-1) && (Kokkos::Experimental::Impl::is_integral_extent_type::value)) || + enum { value =(((CurrentArg==RankDest-1) && (Kokkos::Impl::is_integral_extent_type::value)) || ((CurrentArg>=RankDest) && (std::is_integral::value)) || ((CurrentArg::value)) || - ((CurrentArg==0) && (Kokkos::Experimental::Impl::is_integral_extent_type::value)) + ((CurrentArg==0) && (Kokkos::Impl::is_integral_extent_type::value)) ) && (SubviewLegalArgsCompileTime::value)}; }; @@ -331,7 +327,7 @@ struct SubviewLegalArgsCompileTime struct SubviewLegalArgsCompileTime { - enum { value =(((CurrentArg==RankSrc-RankDest) && (Kokkos::Experimental::Impl::is_integral_extent_type::value)) || + enum { value =(((CurrentArg==RankSrc-RankDest) && (Kokkos::Impl::is_integral_extent_type::value)) || ((CurrentArg::value)) || ((CurrentArg>=RankSrc-RankDest) && (std::is_same::value)) ) && (SubviewLegalArgsCompileTime::value)}; @@ -403,7 +399,7 @@ private: bool set( unsigned domain_rank , unsigned range_rank , const ViewDimension< DimArgs ... > & dim - , const Kokkos::Experimental::Impl::ALL_t + , const Kokkos::Impl::ALL_t , Args ... args ) { m_begin[ domain_rank ] = 0 ; @@ -519,7 +515,7 @@ private: , unsigned domain_rank , unsigned range_rank , const ViewDimension< DimArgs ... > & dim - , const Kokkos::Experimental::Impl::ALL_t + , const Kokkos::Impl::ALL_t , Args ... args ) const { const int n = std::min( buf_len , @@ -670,13 +666,12 @@ public: { return unsigned(i) < InternalRangeRank ? m_index[i] : ~0u ; } }; -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { /** \brief Given a value type and dimension generate the View data type */ @@ -814,13 +809,12 @@ public: typedef non_const_type non_const_scalar_array_type ; }; -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { template < class Dimension , class Layout , typename Enable = void > @@ -1228,14 +1222,14 @@ private: // If memory alignment is a multiple of the trivial scalar size then attempt to align. enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; - enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr + enum { div_ok = (div != 0) ? div : 1 }; // To valid modulo zero in constexpr KOKKOS_INLINE_FUNCTION static constexpr size_t stride( size_t const N ) - { - return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) - ? N + align - ( N % div_ok ) : N ; - } + { + return ( (align != 0) && ((Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align) < N) && ((N % div_ok) != 0) ) + ? N + align - ( N % div_ok ) : N ; + } }; public: @@ -1707,12 +1701,12 @@ private: // If memory alignment is a multiple of the trivial scalar size then attempt to align. enum { align = 0 != TrivialScalarSize && 0 == mod ? div : 0 }; - enum { div_ok = div ? div : 1 }; // To valid modulo zero in constexpr + enum { div_ok = (div != 0) ? div : 1 }; // To valid modulo zero in constexpr KOKKOS_INLINE_FUNCTION static constexpr size_t stride( size_t const N ) { - return ( align && ( Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align < N ) && ( N % div_ok ) ) + return ( (align != 0) && ((Kokkos::Impl::MEMORY_ALIGNMENT_THRESHOLD * align) < N) && ((N % div_ok) != 0) ) ? N + align - ( N % div_ok ) : N ; } }; @@ -2225,13 +2219,12 @@ public: {} }; -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { /** \brief ViewDataHandle provides the type of the 'data handle' which the view @@ -2422,13 +2415,12 @@ struct ViewDataHandle< Traits , return handle_type( arg_data_ptr + offset ); } }; -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { //---------------------------------------------------------------------------- @@ -2451,8 +2443,9 @@ template< class ExecSpace , class ValueType > struct ViewValueFunctor< ExecSpace , ValueType , false /* is_scalar */ > { typedef Kokkos::RangePolicy< ExecSpace > PolicyType ; + typedef typename ExecSpace::execution_space Exec; - ExecSpace space ; + Exec space ; ValueType * ptr ; size_t n ; bool destroy ; @@ -2597,6 +2590,9 @@ private: public: + typedef void printable_label_typedef; + enum { is_managed = Traits::is_managed }; + //---------------------------------------- // Domain dimensions @@ -2944,7 +2940,7 @@ public: Kokkos::abort("View Assignment: trying to assign runtime dimension to non matching compile time dimension."); } dst.m_offset = dst_offset_type( src.m_offset ); - dst.m_handle = Kokkos::Experimental::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track ); + dst.m_handle = Kokkos::Impl::ViewDataHandle< DstTraits >::assign( src.m_handle , src_track ); } }; @@ -3102,7 +3098,7 @@ public: //---------------------------------------------------------------------------- -}}} // namespace Kokkos::Experimental::Impl +}} // namespace Kokkos::Impl //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- @@ -3151,6 +3147,77 @@ void view_error_operator_bounds view_error_operator_bounds(buf+n,len-n,map,args...); } +#if ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + +/* Check #3: is the View managed as determined by the MemoryTraits? */ +template< class MapType, + bool is_managed = (MapType::is_managed != 0) > +struct OperatorBoundsErrorOnDevice; + +template< class MapType > +struct OperatorBoundsErrorOnDevice< MapType, false > { +KOKKOS_INLINE_FUNCTION +static void run(MapType const&) { + Kokkos::abort("View bounds error"); +} +}; + +template< class MapType > +struct OperatorBoundsErrorOnDevice< MapType, true > { +KOKKOS_INLINE_FUNCTION +static void run(MapType const& map) { + char const* const user_alloc_start = reinterpret_cast(map.data()); + char const* const header_start = user_alloc_start - sizeof(SharedAllocationHeader); + SharedAllocationHeader const* const header = + reinterpret_cast(header_start); + char const* const label = header->label(); + enum { LEN = 128 }; + char msg[LEN]; + char const* const first_part = "View bounds error of view "; + char* p = msg; + char* const end = msg + LEN - 1; + for (char const* p2 = first_part; (*p2 != '\0') && (p < end); ++p, ++p2) { + *p = *p2; + } + for (char const* p2 = label; (*p2 != '\0') && (p < end); ++p, ++p2) { + *p = *p2; + } + *p = '\0'; + Kokkos::abort(msg); +} +}; + +/* Check #2: does the ViewMapping have the printable_label_typedef defined? + See above that only the non-specialized standard-layout ViewMapping has + this defined by default. + The existence of this typedef indicates the existence of MapType::is_managed */ +template< class T, class Enable = void > +struct has_printable_label_typedef : public std::false_type {}; + +template +struct has_printable_label_typedef< + T, typename enable_if_type::type> + : public std::true_type +{}; + +template< class MapType > +KOKKOS_INLINE_FUNCTION +void operator_bounds_error_on_device( + MapType const&, + std::false_type) { + Kokkos::abort("View bounds error"); +} + +template< class MapType > +KOKKOS_INLINE_FUNCTION +void operator_bounds_error_on_device( + MapType const& map, + std::true_type) { + OperatorBoundsErrorOnDevice< MapType >::run(map); +} + +#endif // ! defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) + template< class MemorySpace , class MapType , class ... Args > KOKKOS_INLINE_FUNCTION void view_verify_operator_bounds @@ -3166,7 +3233,17 @@ void view_verify_operator_bounds view_error_operator_bounds<0>( buffer + n , LEN - n , map , args ... ); Kokkos::Impl::throw_runtime_exception(std::string(buffer)); #else - Kokkos::abort("View bounds error"); + /* Check #1: is there a SharedAllocationRecord? + (we won't use it, but if its not there then there isn't + a corresponding SharedAllocationHeader containing a label). + This check should cover the case of Views that don't + have the Unmanaged trait but were initialized by pointer. */ + if (tracker.has_record()) { + operator_bounds_error_on_device( + map, has_printable_label_typedef()); + } else { + Kokkos::abort("View bounds error"); + } #endif } } diff --git a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp index ecbcf72fe0..5a8600e0ae 100644 --- a/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp +++ b/lib/kokkos/core/src/impl/Kokkos_ViewTile.hpp @@ -1,13 +1,13 @@ /* //@HEADER // ************************************************************************ -// +// // Kokkos v. 2.0 // Copyright (2014) Sandia Corporation -// +// // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, // the U.S. Government retains certain rights in this software. -// +// // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -36,7 +36,7 @@ // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// +// // ************************************************************************ //@HEADER */ @@ -48,7 +48,6 @@ //---------------------------------------------------------------------------- namespace Kokkos { -namespace Experimental { namespace Impl { // View mapping for rank two tiled array @@ -195,11 +194,9 @@ struct ViewMapping }; } /* namespace Impl */ -} /* namespace Experimental */ } /* namespace Kokkos */ namespace Kokkos { -namespace Experimental { template< typename T , unsigned N0 , unsigned N1 , class ... P > KOKKOS_INLINE_FUNCTION @@ -217,7 +214,6 @@ tile_subview( const Kokkos::View,P...> & ( src , SrcLayout() , i_tile0 , i_tile1 ); } -} /* namespace Experimental */ } /* namespace Kokkos */ //---------------------------------------------------------------------------- diff --git a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp b/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp deleted file mode 100644 index 101b714fcd..0000000000 --- a/lib/kokkos/core/src/impl/Kokkos_spinwait.cpp +++ /dev/null @@ -1,183 +0,0 @@ -/* -//@HEADER -// ************************************************************************ -// -// Kokkos v. 2.0 -// Copyright (2014) Sandia Corporation -// -// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, -// the U.S. Government retains certain rights in this software. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// 1. Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// -// 2. Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// -// 3. Neither the name of the Corporation nor the names of the -// contributors may be used to endorse or promote products derived from -// this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF -// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -// -// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) -// -// ************************************************************************ -//@HEADER -*/ - -#include -#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST ) - -#include - -#include -#include - -/*--------------------------------------------------------------------------*/ - -#if !defined( _WIN32 ) - #if defined( KOKKOS_ENABLE_ASM ) - #if defined( __arm__ ) || defined( __aarch64__ ) - /* No-operation instruction to idle the thread. */ - #define KOKKOS_INTERNAL_PAUSE - #else - /* Pause instruction to prevent excess processor bus usage */ - #define KOKKOS_INTERNAL_PAUSE asm volatile("pause\n":::"memory") - #endif - #define KOKKOS_INTERNAL_NOP2 asm volatile("nop\n" "nop\n") - #define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2 - #define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4; - #define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8; - #define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16; - namespace { - inline void kokkos_internal_yield( const unsigned i ) noexcept { - switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) { - case 0u: KOKKOS_INTERNAL_NOP2; break; - case 1u: KOKKOS_INTERNAL_NOP4; break; - case 2u: KOKKOS_INTERNAL_NOP8; break; - case 3u: KOKKOS_INTERNAL_NOP16; break; - default: KOKKOS_INTERNAL_NOP32; - } - KOKKOS_INTERNAL_PAUSE; - } - } - #else - #include - namespace { - inline void kokkos_internal_yield( const unsigned ) noexcept { - sched_yield(); - } - } - #endif -#else // defined( _WIN32 ) - #if defined ( KOKKOS_ENABLE_WINTHREAD ) - #include - namespace { - inline void kokkos_internal_yield( const unsigned ) noexcept { - Sleep(0); - } - } - #elif defined( _MSC_VER ) - #define NOMINMAX - #include - #include - namespace { - inline void kokkos_internal_yield( const unsigned ) noexcept { - YieldProcessor(); - } - } - #else - #define KOKKOS_INTERNAL_PAUSE __asm__ __volatile__("pause\n":::"memory") - #define KOKKOS_INTERNAL_NOP2 __asm__ __volatile__("nop\n" "nop") - #define KOKKOS_INTERNAL_NOP4 KOKKOS_INTERNAL_NOP2; KOKKOS_INTERNAL_NOP2 - #define KOKKOS_INTERNAL_NOP8 KOKKOS_INTERNAL_NOP4; KOKKOS_INTERNAL_NOP4; - #define KOKKOS_INTERNAL_NOP16 KOKKOS_INTERNAL_NOP8; KOKKOS_INTERNAL_NOP8; - #define KOKKOS_INTERNAL_NOP32 KOKKOS_INTERNAL_NOP16; KOKKOS_INTERNAL_NOP16; - namespace { - inline void kokkos_internal_yield( const unsigned i ) noexcept { - switch (Kokkos::Impl::bit_scan_reverse((i >> 2)+1u)) { - case 0: KOKKOS_INTERNAL_NOP2; break; - case 1: KOKKOS_INTERNAL_NOP4; break; - case 2: KOKKOS_INTERNAL_NOP8; break; - case 3: KOKKOS_INTERNAL_NOP16; break; - default: KOKKOS_INTERNAL_NOP32; - } - KOKKOS_INTERNAL_PAUSE; - } - } - #endif -#endif - - -/*--------------------------------------------------------------------------*/ - -namespace Kokkos { -namespace Impl { - -void spinwait_while_equal( volatile int32_t & flag , const int32_t value ) -{ - Kokkos::store_fence(); - unsigned i = 0; - while ( value == flag ) { - kokkos_internal_yield(i); - ++i; - } - Kokkos::load_fence(); -} - -void spinwait_until_equal( volatile int32_t & flag , const int32_t value ) -{ - Kokkos::store_fence(); - unsigned i = 0; - while ( value != flag ) { - kokkos_internal_yield(i); - ++i; - } - Kokkos::load_fence(); -} - -void spinwait_while_equal( volatile int64_t & flag , const int64_t value ) -{ - Kokkos::store_fence(); - unsigned i = 0; - while ( value == flag ) { - kokkos_internal_yield(i); - ++i; - } - Kokkos::load_fence(); -} - -void spinwait_until_equal( volatile int64_t & flag , const int64_t value ) -{ - Kokkos::store_fence(); - unsigned i = 0; - while ( value != flag ) { - kokkos_internal_yield(i); - ++i; - } - Kokkos::load_fence(); -} - -} /* namespace Impl */ -} /* namespace Kokkos */ - -#else -void KOKKOS_CORE_SRC_IMPL_SPINWAIT_PREVENT_LINK_ERROR() {} -#endif - diff --git a/lib/kokkos/core/unit_test/CMakeLists.txt b/lib/kokkos/core/unit_test/CMakeLists.txt index 5d6f25ac95..475b6bb48a 100644 --- a/lib/kokkos/core/unit_test/CMakeLists.txt +++ b/lib/kokkos/core/unit_test/CMakeLists.txt @@ -57,6 +57,7 @@ IF(Kokkos_ENABLE_Serial) serial/TestSerial_ViewMapping_b.cpp serial/TestSerial_ViewMapping_subview.cpp serial/TestSerial_ViewOfClass.cpp + serial/TestSerial_WorkGraph.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -102,6 +103,7 @@ IF(Kokkos_ENABLE_Pthread) threads/TestThreads_ViewMapping_b.cpp threads/TestThreads_ViewMapping_subview.cpp threads/TestThreads_ViewOfClass.cpp + threads/TestThreads_WorkGraph.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -147,6 +149,8 @@ IF(Kokkos_ENABLE_OpenMP) openmp/TestOpenMP_ViewMapping_b.cpp openmp/TestOpenMP_ViewMapping_subview.cpp openmp/TestOpenMP_ViewOfClass.cpp + openmp/TestOpenMP_WorkGraph.cpp + openmp/TestOpenMP_UniqueToken.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -237,6 +241,7 @@ IF(Kokkos_ENABLE_Cuda) cuda/TestCuda_ViewMapping_b.cpp cuda/TestCuda_ViewMapping_subview.cpp cuda/TestCuda_ViewOfClass.cpp + cuda/TestCuda_WorkGraph.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " @@ -253,6 +258,7 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( default/TestDefaultDeviceType_b.cpp default/TestDefaultDeviceType_c.cpp default/TestDefaultDeviceType_d.cpp + default/TestDefaultDeviceTypeResize.cpp COMM serial mpi NUM_MPI_PROCS 1 FAIL_REGULAR_EXPRESSION " FAILED " diff --git a/lib/kokkos/core/unit_test/Makefile b/lib/kokkos/core/unit_test/Makefile index 41f192a486..c877aa7dd2 100644 --- a/lib/kokkos/core/unit_test/Makefile +++ b/lib/kokkos/core/unit_test/Makefile @@ -62,8 +62,9 @@ endif OBJ_CUDA += TestCuda_TeamReductionScan.o OBJ_CUDA += TestCuda_Other.o OBJ_CUDA += TestCuda_MDRange.o - OBJ_CUDA += TestCuda_Task.o + OBJ_CUDA += TestCuda_Task.o TestCuda_WorkGraph.o OBJ_CUDA += TestCuda_Spaces.o + OBJ_CUDA += TestCuda_UniqueToken.o TARGETS += KokkosCore_UnitTest_Cuda @@ -121,7 +122,8 @@ endif OBJ_OPENMP += TestOpenMP_TeamReductionScan.o OBJ_OPENMP += TestOpenMP_Other.o OBJ_OPENMP += TestOpenMP_MDRange.o - OBJ_OPENMP += TestOpenMP_Task.o + OBJ_OPENMP += TestOpenMP_Task.o TestOpenMP_WorkGraph.o + OBJ_OPENMP += TestOpenMP_UniqueToken.o TARGETS += KokkosCore_UnitTest_OpenMP @@ -208,7 +210,7 @@ endif OBJ_SERIAL += TestSerial_TeamReductionScan.o OBJ_SERIAL += TestSerial_Other.o OBJ_SERIAL += TestSerial_MDRange.o - OBJ_SERIAL += TestSerial_Task.o + OBJ_SERIAL += TestSerial_Task.o TestSerial_WorkGraph.o TARGETS += KokkosCore_UnitTest_Serial diff --git a/lib/kokkos/core/unit_test/TestAggregate.hpp b/lib/kokkos/core/unit_test/TestAggregate.hpp index 6896a27bfb..87440c36be 100644 --- a/lib/kokkos/core/unit_test/TestAggregate.hpp +++ b/lib/kokkos/core/unit_test/TestAggregate.hpp @@ -58,7 +58,7 @@ template< class DeviceType > void TestViewAggregate() { typedef Kokkos::Array< double, 32 > value_type; - typedef Kokkos::Experimental::Impl::ViewDataAnalysis< value_type *, Kokkos::LayoutLeft, value_type > analysis_1d; + typedef Kokkos::Impl::ViewDataAnalysis< value_type *, Kokkos::LayoutLeft, value_type > analysis_1d; static_assert( std::is_same< typename analysis_1d::specialize, Kokkos::Array<> >::value, "" ); diff --git a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp index 401da58a58..68864c8d66 100644 --- a/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp +++ b/lib/kokkos/core/unit_test/TestDefaultDeviceTypeInit.hpp @@ -186,6 +186,21 @@ void check_correct_initialization( const Kokkos::InitArguments & argstruct ) { // Figure out the number of threads the HostSpace ExecutionSpace should have initialized to. int expected_nthreads = argstruct.num_threads; +#ifdef KOKKOS_ENABLE_OPENMP + if ( std::is_same< Kokkos::HostSpace::execution_space, Kokkos::OpenMP >::value ) { + // use openmp default num threads + if ( expected_nthreads < 0 || ( expected_nthreads == 0 && !Kokkos::hwloc::available() ) ) { + expected_nthreads = omp_get_max_threads(); + } + // use hwloc if available + else if ( expected_nthreads == 0 && Kokkos::hwloc::available() ) { + expected_nthreads = Kokkos::hwloc::get_available_numa_count() + * Kokkos::hwloc::get_available_cores_per_numa() + * Kokkos::hwloc::get_available_threads_per_core(); + } + } +#endif + if ( expected_nthreads < 1 ) { if ( Kokkos::hwloc::available() ) { expected_nthreads = Kokkos::hwloc::get_available_numa_count() @@ -193,12 +208,6 @@ void check_correct_initialization( const Kokkos::InitArguments & argstruct ) { * Kokkos::hwloc::get_available_threads_per_core(); } else { -#ifdef KOKKOS_ENABLE_OPENMP - if ( std::is_same< Kokkos::HostSpace::execution_space, Kokkos::OpenMP >::value ) { - expected_nthreads = omp_get_max_threads(); - } - else -#endif expected_nthreads = 1; } diff --git a/lib/kokkos/core/unit_test/TestMDRange.hpp b/lib/kokkos/core/unit_test/TestMDRange.hpp index 091591bcbf..f579ddf02c 100644 --- a/lib/kokkos/core/unit_test/TestMDRange.hpp +++ b/lib/kokkos/core/unit_test/TestMDRange.hpp @@ -51,6 +51,180 @@ namespace Test { namespace { +template +struct TestMDRange_ReduceArray_2D { + + using DataType = int; + using ViewType_2 = typename Kokkos::View< DataType**, ExecSpace >; + using HostViewType_2 = typename ViewType_2::HostMirror; + + ViewType_2 input_view; + + using scalar_type = double; + using value_type = scalar_type[]; + const unsigned value_count; + + TestMDRange_ReduceArray_2D( const int N0, const int N1, const unsigned array_size ) + : input_view( "input_view", N0, N1 ) + , value_count( array_size ) + {} + + KOKKOS_INLINE_FUNCTION + void init( scalar_type dst[] ) const + { + for ( unsigned i = 0; i < value_count; ++i ) { + dst[i] = 0.0; + } + } + + KOKKOS_INLINE_FUNCTION + void join( volatile scalar_type dst[], + const volatile scalar_type src[] ) const + { + for ( unsigned i = 0; i < value_count; ++i ) { + dst[i] += src[i]; + } + } + + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j ) const + { + input_view( i, j ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, value_type lsum ) const + { + lsum[0] += input_view( i, j ) * 2; //+=6 each time if InitTag => N0*N1*6 + lsum[1] += input_view( i, j ) ; //+=3 each time if InitTag => N0*N1*3 + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j ) const + { + input_view( i, j ) = 3; + } + + static void test_arrayreduce2( const int N0, const int N1 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType, InitTag > range_type_init; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type_init range_init( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); + + const unsigned array_size = 2; + + TestMDRange_ReduceArray_2D functor( N0, N1, array_size ); + + parallel_for( range_init, functor ); // Init the view to 3's + + double sums[ array_size ]; + parallel_reduce( range, functor, sums ); + + // Check output + //printf("Array Reduce result. N0 = %d N1 = %d N0*N1 = %d sums[0] = %lf sums[1] = %lf \n", N0, N1, N0*N1, sums[0], sums[1]); + + ASSERT_EQ( sums[0], 6 * N0 * N1 ); + ASSERT_EQ( sums[1], 3 * N0 * N1 ); + } + } +}; + +template +struct TestMDRange_ReduceArray_3D { + + using DataType = int; + using ViewType_3 = typename Kokkos::View< DataType***, ExecSpace >; + using HostViewType_3 = typename ViewType_3::HostMirror; + + ViewType_3 input_view; + + using scalar_type = double; + using value_type = scalar_type[]; + const unsigned value_count; + + TestMDRange_ReduceArray_3D( const int N0, const int N1, const int N2, const unsigned array_size ) + : input_view( "input_view", N0, N1, N2 ) + , value_count( array_size ) + {} + + KOKKOS_INLINE_FUNCTION + void init( scalar_type dst[] ) const + { + for ( unsigned i = 0; i < value_count; ++i ) { + dst[i] = 0.0; + } + } + + KOKKOS_INLINE_FUNCTION + void join( volatile scalar_type dst[], + const volatile scalar_type src[] ) const + { + for ( unsigned i = 0; i < value_count; ++i ) { + dst[i] += src[i]; + } + } + + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k ) const + { + input_view( i, j, k ) = 1; + } + + KOKKOS_INLINE_FUNCTION + void operator()( const int i, const int j, const int k, value_type lsum ) const + { + lsum[0] += input_view( i, j, k ) * 2; //+=6 each time if InitTag => N0*N1*N2*6 + lsum[1] += input_view( i, j, k ) ; //+=3 each time if InitTag => N0*N1*N2*3 + } + + // tagged operators + struct InitTag {}; + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k ) const + { + input_view( i, j, k ) = 3; + } + + static void test_arrayreduce3( const int N0, const int N1, const int N2 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType, InitTag > range_type_init; + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type_init range_init( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); + + const unsigned array_size = 2; + + TestMDRange_ReduceArray_3D functor( N0, N1, N2, array_size ); + + parallel_for( range_init, functor ); // Init the view to 3's + + double sums[ array_size ]; + parallel_reduce( range, functor, sums ); + + ASSERT_EQ( sums[0], 6 * N0 * N1 * N2 ); + ASSERT_EQ( sums[1], 3 * N0 * N1 * N2 ); + } + } +}; + + template struct TestMDRange_2D { using DataType = int; @@ -58,6 +232,7 @@ struct TestMDRange_2D { using HostViewType = typename ViewType::HostMirror; ViewType input_view; + using value_type = double; TestMDRange_2D( const DataType N0, const DataType N1 ) : input_view( "input_view", N0, N1 ) {} @@ -68,7 +243,7 @@ struct TestMDRange_2D { } KOKKOS_INLINE_FUNCTION - void operator()( const int i, const int j, double &lsum ) const + void operator()( const int i, const int j, value_type &lsum ) const { lsum += input_view( i, j ) * 2; } @@ -81,6 +256,13 @@ struct TestMDRange_2D { input_view( i, j ) = 3; } + // reduction tagged operators + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, value_type &lsum ) const + { + lsum += input_view( i, j ) * 3; + } + static void test_reduce2( const int N0, const int N1 ) { using namespace Kokkos::Experimental; @@ -94,13 +276,85 @@ struct TestMDRange_2D { TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 ); } + // Test with reducers - scalar + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0 }}, {{ N0, N1 }}, {{ 3, 3 }} ); + + TestMDRange_2D functor( N0, N1 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::Experimental::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + // Test with reducers - scalar view + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0 }}, {{ N0, N1 }}, {{ 3, 3 }} ); + + TestMDRange_2D functor( N0, N1 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::View< value_type, Kokkos::HostSpace > sum_view("sum_view"); + sum_view() = sum; + Kokkos::Experimental::Sum< value_type > reducer_view( sum_view ); + + parallel_reduce( range, functor, reducer_view); + sum = sum_view(); + + ASSERT_EQ( sum, 2 * N0 * N1 ); + } + + // Tagged operator test + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Default >, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 2, 4 } } ); + + TestMDRange_2D functor( N0, N1 ); + + parallel_for( range, functor ); + + // check parallel_for results correct with InitTag + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + { + if ( h_view( i, j ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_for3; mismatches = %d\n\n", counter ); + } + ASSERT_EQ( counter, 0 ); + + + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 9 * N0 * N1 ); + } + { typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<2, Iterate::Default, Iterate::Default>, Kokkos::IndexType > range_type; typedef typename range_type::tile_type tile_type; @@ -110,9 +364,9 @@ struct TestMDRange_2D { TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 ); } @@ -126,9 +380,9 @@ struct TestMDRange_2D { TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 ); } @@ -142,9 +396,9 @@ struct TestMDRange_2D { TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 ); } @@ -158,9 +412,9 @@ struct TestMDRange_2D { TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 ); } @@ -174,9 +428,9 @@ struct TestMDRange_2D { TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 ); } @@ -194,7 +448,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -223,7 +477,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -251,7 +505,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -280,7 +534,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -309,7 +563,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 4, 4 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -338,7 +592,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 3, 3 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -367,7 +621,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 7, 7 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -396,7 +650,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 16, 16 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -425,7 +679,7 @@ struct TestMDRange_2D { range_type range( point_type{ { 0, 0 } }, point_type{ { N0, N1 } }, tile_type{ { 5, 16 } } ); TestMDRange_2D functor( N0, N1 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -455,6 +709,7 @@ struct TestMDRange_3D { using HostViewType = typename ViewType::HostMirror; ViewType input_view; + using value_type = double; TestMDRange_3D( const DataType N0, const DataType N1, const DataType N2 ) : input_view( "input_view", N0, N1, N2 ) {} @@ -478,6 +733,13 @@ struct TestMDRange_3D { input_view( i, j, k ) = 3; } + // reduction tagged operators + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, value_type &lsum ) const + { + lsum += input_view( i, j, k ) * 3; + } + static void test_reduce3( const int N0, const int N1, const int N2 ) { using namespace Kokkos::Experimental; @@ -491,13 +753,86 @@ struct TestMDRange_3D { TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } + // Test with reducers - scalar + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} ); + + TestMDRange_3D functor( N0, N1, N2 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::Experimental::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + // Test with reducers - scalar view + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0 }}, {{ N0, N1, N2 }}, {{ 3, 3, 3 }} ); + + TestMDRange_3D functor( N0, N1, N2 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::View< value_type, Kokkos::HostSpace > sum_view("sum_view"); + sum_view() = sum; + Kokkos::Experimental::Sum< value_type > reducer_view( sum_view ); + + parallel_reduce( range, functor, reducer_view); + sum = sum_view(); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); + } + + // Tagged operator test + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default >, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 6 } } ); + + TestMDRange_3D functor( N0, N1, N2 ); + + parallel_for( range, functor ); + + // check parallel_for results correct with InitTag + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + { + if ( h_view( i, j, k ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_for3; mismatches = %d\n\n", counter ); + } + ASSERT_EQ( counter, 0 ); + + + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 9 * N0 * N1 * N2 ); + } + { typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<3, Iterate::Default, Iterate::Default >, Kokkos::IndexType > range_type; typedef typename range_type::tile_type tile_type; @@ -507,9 +842,9 @@ struct TestMDRange_3D { TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } @@ -523,9 +858,9 @@ struct TestMDRange_3D { TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } @@ -539,9 +874,9 @@ struct TestMDRange_3D { TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } @@ -555,9 +890,9 @@ struct TestMDRange_3D { TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } @@ -571,9 +906,9 @@ struct TestMDRange_3D { TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); double sum = 0.0; - md_parallel_reduce( range, functor, sum ); + parallel_reduce( range, functor, sum ); ASSERT_EQ( sum, 2 * N0 * N1 * N2 ); } @@ -590,7 +925,7 @@ struct TestMDRange_3D { range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } } ); TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -620,7 +955,7 @@ struct TestMDRange_3D { range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -651,7 +986,7 @@ struct TestMDRange_3D { TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -681,7 +1016,7 @@ struct TestMDRange_3D { range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 3, 3 } } ); TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -711,7 +1046,7 @@ struct TestMDRange_3D { range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 2 } } ); TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -741,7 +1076,7 @@ struct TestMDRange_3D { range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 3, 5, 7 } } ); TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -771,7 +1106,7 @@ struct TestMDRange_3D { range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 8, 8, 8 } } ); TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -801,7 +1136,7 @@ struct TestMDRange_3D { range_type range( point_type{ { 0, 0, 0 } }, point_type{ { N0, N1, N2 } }, tile_type{ { 2, 4, 2 } } ); TestMDRange_3D functor( N0, N1, N2 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -832,6 +1167,7 @@ struct TestMDRange_4D { using HostViewType = typename ViewType::HostMirror; ViewType input_view; + using value_type = double; TestMDRange_4D( const DataType N0, const DataType N1, const DataType N2, const DataType N3 ) : input_view( "input_view", N0, N1, N2, N3 ) {} @@ -855,6 +1191,191 @@ struct TestMDRange_4D { input_view( i, j, k, l ) = 3; } + // reduction tagged operators + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l, value_type &lsum ) const + { + lsum += input_view( i, j, k, l ) * 3; + } + + static void test_reduce4( const int N0, const int N1, const int N2, const int N3 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 3, 3, 3 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + + // Test with reducers - scalar + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0, 0 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::Experimental::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + + // Test with reducers - scalar view + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0, 0 }}, {{ N0, N1, N2, N3 }}, {{ 3, 3, 3, 3 }} ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::View< value_type, Kokkos::HostSpace > sum_view("sum_view"); + sum_view() = sum; + Kokkos::Experimental::Sum< value_type > reducer_view( sum_view ); + + parallel_reduce( range, functor, reducer_view); + sum = sum_view(); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + + // Tagged operator test + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Default, Iterate::Default >, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 2, 4, 6, 2 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + + // check parallel_for results correct with InitTag + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + { + if ( h_view( i, j, k, l ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_reduce4 parallel_for init; mismatches = %d\n\n", counter ); + } + ASSERT_EQ( counter, 0 ); + + + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 9 * N0 * N1 * N2 * N3 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Default, Iterate::Default >, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 2, 4, 6, 2 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Left, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 2, 4, 6, 2 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Left, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 2, 4, 6, 2 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Right, Iterate::Left>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 2, 4, 6, 2 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<4, Iterate::Right, Iterate::Right>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 2, 4, 6, 2 } } ); + + TestMDRange_4D functor( N0, N1, N2, N3 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 ); + } + } // end test_reduce + + + static void test_for4( const int N0, const int N1, const int N2, const int N3 ) { using namespace Kokkos::Experimental; @@ -866,7 +1387,7 @@ struct TestMDRange_4D { range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } } ); TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -897,7 +1418,7 @@ struct TestMDRange_4D { range_type range( point_type{ { 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3 } }, tile_type{ { 3, 11, 3, 3 } } ); TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -929,7 +1450,7 @@ struct TestMDRange_4D { TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -961,7 +1482,7 @@ struct TestMDRange_4D { TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -993,7 +1514,7 @@ struct TestMDRange_4D { TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1025,7 +1546,7 @@ struct TestMDRange_4D { TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1057,7 +1578,7 @@ struct TestMDRange_4D { TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1089,7 +1610,7 @@ struct TestMDRange_4D { TestMDRange_4D functor( N0, N1, N2, N3 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1121,6 +1642,7 @@ struct TestMDRange_5D { using HostViewType = typename ViewType::HostMirror; ViewType input_view; + using value_type = double; TestMDRange_5D( const DataType N0, const DataType N1, const DataType N2, const DataType N3, const DataType N4 ) : input_view( "input_view", N0, N1, N2, N3, N4 ) {} @@ -1131,7 +1653,7 @@ struct TestMDRange_5D { } KOKKOS_INLINE_FUNCTION - void operator()( const int i, const int j, const int k, const int l, const int m, double &lsum ) const + void operator()( const int i, const int j, const int k, const int l, const int m, value_type &lsum ) const { lsum += input_view( i, j, k, l, m ) * 2; } @@ -1144,6 +1666,110 @@ struct TestMDRange_5D { input_view( i, j, k, l, m ) = 3; } + // reduction tagged operators + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l, const int m, value_type &lsum ) const + { + lsum += input_view( i, j, k, l, m ) * 3; + } + + static void test_reduce5( const int N0, const int N1, const int N2, const int N3, const int N4 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 3 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); + } + + // Test with reducers - scalar + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 3, 3 }} ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::Experimental::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); + } + + // Test with reducers - scalar view + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4 }}, {{ 3, 3, 3, 3, 3 }} ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::View< value_type, Kokkos::HostSpace > sum_view("sum_view"); + sum_view() = sum; + Kokkos::Experimental::Sum< value_type > reducer_view( sum_view ); + + parallel_reduce( range, functor, reducer_view); + sum = sum_view(); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 ); + } + + // Tagged operator test + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<5, Iterate::Default, Iterate::Default >, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 2, 4, 6, 2, 2 } } ); + + TestMDRange_5D functor( N0, N1, N2, N3, N4 ); + + parallel_for( range, functor ); + + // check parallel_for results correct with InitTag + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + { + if ( h_view( i, j, k, l, m ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_reduce5 parallel_for init; mismatches = %d\n\n", counter ); + } + ASSERT_EQ( counter, 0 ); + + + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 9 * N0 * N1 * N2 * N3 * N4 ); + } + } + static void test_for5( const int N0, const int N1, const int N2, const int N3, const int N4 ) { using namespace Kokkos::Experimental; @@ -1155,7 +1781,7 @@ struct TestMDRange_5D { range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } } ); TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1184,10 +1810,10 @@ struct TestMDRange_5D { typedef typename range_type::tile_type tile_type; typedef typename range_type::point_type point_type; - range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 7 } } ); + range_type range( point_type{ { 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4 } }, tile_type{ { 3, 3, 3, 3, 5 } } ); TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1220,7 +1846,7 @@ struct TestMDRange_5D { TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1253,7 +1879,7 @@ struct TestMDRange_5D { TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1286,7 +1912,7 @@ struct TestMDRange_5D { TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1319,7 +1945,7 @@ struct TestMDRange_5D { TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1352,7 +1978,7 @@ struct TestMDRange_5D { TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1385,7 +2011,7 @@ struct TestMDRange_5D { TestMDRange_5D functor( N0, N1, N2, N3, N4 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1418,6 +2044,7 @@ struct TestMDRange_6D { using HostViewType = typename ViewType::HostMirror; ViewType input_view; + using value_type = double; TestMDRange_6D( const DataType N0, const DataType N1, const DataType N2, const DataType N3, const DataType N4, const DataType N5 ) : input_view( "input_view", N0, N1, N2, N3, N4, N5 ) {} @@ -1428,7 +2055,7 @@ struct TestMDRange_6D { } KOKKOS_INLINE_FUNCTION - void operator()( const int i, const int j, const int k, const int l, const int m, const int n, double &lsum ) const + void operator()( const int i, const int j, const int k, const int l, const int m, const int n, value_type &lsum ) const { lsum += input_view( i, j, k, l, m, n ) * 2; } @@ -1441,6 +2068,111 @@ struct TestMDRange_6D { input_view( i, j, k, l, m, n ) = 3; } + // reduction tagged operators + KOKKOS_INLINE_FUNCTION + void operator()( const InitTag &, const int i, const int j, const int k, const int l, const int m, const int n, value_type &lsum ) const + { + lsum += input_view( i, j, k, l, m, n ) * 3; + } + + static void test_reduce6( const int N0, const int N1, const int N2, const int N3, const int N4, const int N5 ) + { + using namespace Kokkos::Experimental; + + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6>, Kokkos::IndexType > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 3, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + parallel_for( range, functor ); + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); + } + + // Test with reducers - scalar + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 3, 3, 2 }} ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::Experimental::Sum< value_type > reducer_scalar( sum ); + + parallel_reduce( range, functor, reducer_scalar ); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); + } + + // Test with reducers - scalar view + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6>, Kokkos::IndexType > range_type; + range_type range( {{ 0, 0, 0, 0, 0, 0 }}, {{ N0, N1, N2, N3, N4, N5 }}, {{ 3, 3, 3, 3, 3, 2 }} ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + parallel_for( range, functor ); + + value_type sum = 0.0; + Kokkos::View< value_type, Kokkos::HostSpace > sum_view("sum_view"); + sum_view() = sum; + Kokkos::Experimental::Sum< value_type > reducer_view( sum_view ); + + parallel_reduce( range, functor, reducer_view); + sum = sum_view(); + + ASSERT_EQ( sum, 2 * N0 * N1 * N2 * N3 * N4 * N5 ); + } + + // Tagged operator test + { + typedef typename Kokkos::Experimental::MDRangePolicy< ExecSpace, Rank<6, Iterate::Default, Iterate::Default >, Kokkos::IndexType, InitTag > range_type; + typedef typename range_type::tile_type tile_type; + typedef typename range_type::point_type point_type; + + range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 2, 4, 6, 2, 2, 2 } } ); + + TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); + + parallel_for( range, functor ); + + // check parallel_for results correct with InitTag + HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); + Kokkos::deep_copy( h_view, functor.input_view ); + int counter = 0; + for ( int i = 0; i < N0; ++i ) + for ( int j = 0; j < N1; ++j ) + for ( int k = 0; k < N2; ++k ) + for ( int l = 0; l < N3; ++l ) + for ( int m = 0; m < N4; ++m ) + for ( int n = 0; n < N5; ++n ) + { + if ( h_view( i, j, k, l, m, n ) != 3 ) { + ++counter; + } + } + + if ( counter != 0 ) { + printf( "Defaults + InitTag op(): Errors in test_reduce6 parallel_for init; mismatches = %d\n\n", counter ); + } + ASSERT_EQ( counter, 0 ); + + + double sum = 0.0; + parallel_reduce( range, functor, sum ); + + ASSERT_EQ( sum, 9 * N0 * N1 * N2 * N3 * N4 * N5 ); + } + } + static void test_for6( const int N0, const int N1, const int N2, const int N3, const int N4, const int N5 ) { using namespace Kokkos::Experimental; @@ -1452,7 +2184,7 @@ struct TestMDRange_6D { range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } } ); TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1485,7 +2217,7 @@ struct TestMDRange_6D { range_type range( point_type{ { 0, 0, 0, 0, 0, 0 } }, point_type{ { N0, N1, N2, N3, N4, N5 } }, tile_type{ { 3, 3, 3, 3, 2, 3 } } ); //tile dims 3,3,3,3,3,3 more than cuda can handle with debugging TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1519,7 +2251,7 @@ struct TestMDRange_6D { TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1553,7 +2285,7 @@ struct TestMDRange_6D { TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1587,7 +2319,7 @@ struct TestMDRange_6D { TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1621,7 +2353,7 @@ struct TestMDRange_6D { TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1655,7 +2387,7 @@ struct TestMDRange_6D { TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1689,7 +2421,7 @@ struct TestMDRange_6D { TestMDRange_6D functor( N0, N1, N2, N3, N4, N5 ); - md_parallel_for( range, functor ); + parallel_for( range, functor ); HostViewType h_view = Kokkos::create_mirror_view( functor.input_view ); Kokkos::deep_copy( h_view, functor.input_view ); @@ -1726,11 +2458,19 @@ TEST_F( TEST_CATEGORY , mdrange_for ) { TestMDRange_6D< TEST_EXECSPACE >::test_for6( 10, 10, 10, 10, 5, 5 ); } -#ifndef KOKKOS_ENABLE_CUDA TEST_F( TEST_CATEGORY , mdrange_reduce ) { TestMDRange_2D< TEST_EXECSPACE >::test_reduce2( 100, 100 ); TestMDRange_3D< TEST_EXECSPACE >::test_reduce3( 100, 10, 100 ); + TestMDRange_4D< TEST_EXECSPACE >::test_reduce4( 100, 10, 10, 10 ); + TestMDRange_5D< TEST_EXECSPACE >::test_reduce5( 100, 10, 10, 10, 5 ); + TestMDRange_6D< TEST_EXECSPACE >::test_reduce6( 100, 10, 10, 10, 5, 5 ); } -#endif + +//#ifndef KOKKOS_ENABLE_CUDA +TEST_F( TEST_CATEGORY , mdrange_array_reduce ) { + TestMDRange_ReduceArray_2D< TEST_EXECSPACE >::test_arrayreduce2( 4, 5 ); + TestMDRange_ReduceArray_3D< TEST_EXECSPACE >::test_arrayreduce3( 4, 5, 10 ); +} +//#endif } // namespace Test diff --git a/lib/kokkos/core/unit_test/TestMemoryPool.hpp b/lib/kokkos/core/unit_test/TestMemoryPool.hpp index 941cd6c26d..9f708390c2 100644 --- a/lib/kokkos/core/unit_test/TestMemoryPool.hpp +++ b/lib/kokkos/core/unit_test/TestMemoryPool.hpp @@ -54,6 +54,96 @@ namespace TestMemoryPool { +template< typename MemSpace = Kokkos::HostSpace > +void test_host_memory_pool_defaults() +{ + typedef typename MemSpace::execution_space Space ; + typedef typename Kokkos::MemoryPool< Space > MemPool ; + + { + const size_t MemoryCapacity = 32000 ; + const size_t MinBlockSize = 64 ; + const size_t MaxBlockSize = 1024 ; + const size_t SuperBlockSize = 4096 ; + + MemPool pool( MemSpace() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + , SuperBlockSize + ); + + typename MemPool::usage_statistics stats ; + + pool.get_usage_statistics( stats ); + + ASSERT_LE( MemoryCapacity , stats.capacity_bytes ); + ASSERT_LE( MinBlockSize , stats.min_block_bytes ); + ASSERT_LE( MaxBlockSize , stats.max_block_bytes ); + ASSERT_LE( SuperBlockSize , stats.superblock_bytes ); + } + + { + const size_t MemoryCapacity = 10000 ; + + MemPool pool( MemSpace() + , MemoryCapacity + ); + + typename MemPool::usage_statistics stats ; + + pool.get_usage_statistics( stats ); + + ASSERT_LE( MemoryCapacity , stats.capacity_bytes ); + ASSERT_LE( 64u /* default */ , stats.min_block_bytes ); + ASSERT_LE( stats.min_block_bytes , stats.max_block_bytes ); + ASSERT_LE( stats.max_block_bytes , stats.superblock_bytes ); + ASSERT_LE( stats.superblock_bytes , stats.capacity_bytes ); + } + + { + const size_t MemoryCapacity = 10000 ; + const size_t MinBlockSize = 32 ; // power of two is exact + + MemPool pool( MemSpace() + , MemoryCapacity + , MinBlockSize + ); + + typename MemPool::usage_statistics stats ; + + pool.get_usage_statistics( stats ); + + ASSERT_LE( MemoryCapacity , stats.capacity_bytes ); + ASSERT_EQ( MinBlockSize , stats.min_block_bytes ); + ASSERT_LE( stats.min_block_bytes , stats.max_block_bytes ); + ASSERT_LE( stats.max_block_bytes , stats.superblock_bytes ); + ASSERT_LE( stats.superblock_bytes , stats.capacity_bytes ); + } + + { + const size_t MemoryCapacity = 32000 ; + const size_t MinBlockSize = 32 ; // power of two is exact + const size_t MaxBlockSize = 1024 ; // power of two is exact + + MemPool pool( MemSpace() + , MemoryCapacity + , MinBlockSize + , MaxBlockSize + ); + + typename MemPool::usage_statistics stats ; + + pool.get_usage_statistics( stats ); + + ASSERT_LE( MemoryCapacity , stats.capacity_bytes ); + ASSERT_EQ( MinBlockSize , stats.min_block_bytes ); + ASSERT_EQ( MaxBlockSize , stats.max_block_bytes ); + ASSERT_LE( stats.max_block_bytes , stats.superblock_bytes ); + ASSERT_LE( stats.superblock_bytes , stats.capacity_bytes ); + } +} + template< typename MemSpace = Kokkos::HostSpace > void test_host_memory_pool_stats() { @@ -188,8 +278,8 @@ void print_memory_pool_stats << " bytes reserved = " << stats.reserved_bytes << std::endl << " bytes free = " << ( stats.capacity_bytes - ( stats.consumed_bytes + stats.reserved_bytes ) ) << std::endl - << " alloc used = " << stats.consumed_blocks << std::endl - << " alloc reserved = " << stats.reserved_blocks << std::endl + << " block used = " << stats.consumed_blocks << std::endl + << " block reserved = " << stats.reserved_blocks << std::endl << " super used = " << stats.consumed_superblocks << std::endl << " super reserved = " << ( stats.capacity_superblocks - stats.consumed_superblocks ) << std::endl @@ -302,15 +392,147 @@ void test_memory_pool_v2( const bool print_statistics //---------------------------------------------------------------------------- //---------------------------------------------------------------------------- -} // namespace TestMemoryPool { +template< class DeviceType > +struct TestMemoryPoolCorners { + + typedef Kokkos::View< uintptr_t * , DeviceType > ptrs_type ; + typedef Kokkos::MemoryPool< DeviceType > pool_type ; + + pool_type pool ; + ptrs_type ptrs ; + uint32_t size ; + uint32_t stride ; + + TestMemoryPoolCorners( const pool_type & arg_pool + , const ptrs_type & arg_ptrs + , const uint32_t arg_base + , const uint32_t arg_stride + ) + : pool( arg_pool ) + , ptrs( arg_ptrs ) + , size( arg_base ) + , stride( arg_stride ) + {} + + // Specify reduction argument value_type to + // avoid confusion with tag-dispatch. + + using value_type = long ; + + KOKKOS_INLINE_FUNCTION + void operator()( int i , long & err ) const noexcept + { + unsigned alloc_size = size << ( i % stride ); + if ( 0 == ptrs(i) ) { + ptrs(i) = (uintptr_t) pool.allocate( alloc_size ); + if ( ptrs(i) && ! alloc_size ) { ++err ; } + } + } + + struct TagDealloc {}; + + KOKKOS_INLINE_FUNCTION + void operator()( int i ) const noexcept + { + unsigned alloc_size = size << ( i % stride ); + if ( ptrs(i) ) { pool.deallocate( (void*) ptrs(i) , alloc_size ); } + ptrs(i) = 0 ; + } +}; + +template< class DeviceType > +void test_memory_pool_corners( const bool print_statistics + , const bool print_superblocks ) +{ + typedef typename DeviceType::memory_space memory_space ; + typedef typename DeviceType::execution_space execution_space ; + typedef Kokkos::MemoryPool< DeviceType > pool_type ; + typedef TestMemoryPoolCorners< DeviceType > functor_type ; + typedef typename functor_type::ptrs_type ptrs_type ; + + { + // superblock size 1 << 14 + const size_t min_superblock_size = 1u << 14 ; + + // four superblocks + const size_t total_alloc_size = min_superblock_size * 4 ; + + // block sizes { 64 , 128 , 256 , 512 } + // block counts { 256 , 128 , 64 , 32 } + const unsigned min_block_size = 64 ; + const unsigned max_block_size = 512 ; + const unsigned num_blocks = 480 ; + + pool_type pool( memory_space() + , total_alloc_size + , min_block_size + , max_block_size + , min_superblock_size ); + + // Allocate one block from each superblock to lock that + // superblock into the block size. + + ptrs_type ptrs("ptrs",num_blocks); + + long err = 0 ; + + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< execution_space >(0,4) + , functor_type( pool , ptrs , 64 , 4 ) + , err + ); + + if ( print_statistics || err ) { + + typename pool_type::usage_statistics stats ; + + pool.get_usage_statistics( stats ); + + print_memory_pool_stats< pool_type >( stats ); + } + + if ( print_superblocks || err ) { + pool.print_state( std::cout ); + } + + // Now fill remaining allocations with small size + + Kokkos::parallel_reduce + ( Kokkos::RangePolicy< execution_space >(0,num_blocks) + , functor_type( pool , ptrs , 64 , 1 ) + , err + ); + + if ( print_statistics || err ) { + + typename pool_type::usage_statistics stats ; + + pool.get_usage_statistics( stats ); + + print_memory_pool_stats< pool_type >( stats ); + } + + if ( print_superblocks || err ) { + pool.print_state( std::cout ); + } + } +} + +//---------------------------------------------------------------------------- +//---------------------------------------------------------------------------- + +} // namespace TestMemoryPool namespace Test { TEST_F( TEST_CATEGORY, memory_pool ) { + TestMemoryPool::test_host_memory_pool_defaults<>(); TestMemoryPool::test_host_memory_pool_stats<>(); TestMemoryPool::test_memory_pool_v2< TEST_EXECSPACE >(false,false); + TestMemoryPool::test_memory_pool_corners< TEST_EXECSPACE >(false,false); } + } #endif diff --git a/lib/kokkos/core/unit_test/TestRange.hpp b/lib/kokkos/core/unit_test/TestRange.hpp index f55574761b..3cea1ad4a0 100644 --- a/lib/kokkos/core/unit_test/TestRange.hpp +++ b/lib/kokkos/core/unit_test/TestRange.hpp @@ -72,8 +72,33 @@ struct TestRange { typename view_type::HostMirror host_flags = Kokkos::create_mirror_view( m_flags ); Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType >( 0, N ), *this ); + +#if defined(KOKKOS_ENABLE_PROFILING) + { + typedef TestRange< ExecSpace, ScheduleType > ThisType; + std::string label("parallel_for"); + Kokkos::Impl::ParallelConstructName< ThisType, void> pcn(label); + ASSERT_EQ( pcn.get(), label ); + std::string empty_label(""); + Kokkos::Impl::ParallelConstructName< ThisType, void> empty_pcn(empty_label); + ASSERT_EQ( empty_pcn.get(), typeid(ThisType).name() ); + } +#endif + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace, ScheduleType, VerifyInitTag >( 0, N ), *this ); +#if defined(KOKKOS_ENABLE_PROFILING) + { + typedef TestRange< ExecSpace, ScheduleType > ThisType; + std::string label("parallel_for"); + Kokkos::Impl::ParallelConstructName< ThisType, VerifyInitTag> pcn(label); + ASSERT_EQ( pcn.get(), label ); + std::string empty_label(""); + Kokkos::Impl::ParallelConstructName< ThisType, VerifyInitTag> empty_pcn(empty_label); + ASSERT_EQ( empty_pcn.get(), std::string(typeid(ThisType).name()) + "/" + typeid(VerifyInitTag).name() ); + } +#endif + Kokkos::deep_copy( host_flags, m_flags ); int error_count = 0; diff --git a/lib/kokkos/core/unit_test/TestResize.hpp b/lib/kokkos/core/unit_test/TestResize.hpp new file mode 100644 index 0000000000..aaf0422b19 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestResize.hpp @@ -0,0 +1,140 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ +#ifndef TESTVIEWSUBVIEW_HPP_ +#define TESTVIEWSUBVIEW_HPP_ + +#include +#include + +namespace TestViewResize { + +template +void testResize () +{ + const int sizes[8] = {2, 3, 4, 5, 6, 7, 8, 9}; + + // Check #904 fix (no reallocation if dimensions didn't change). + { + typedef Kokkos::View view_type; + view_type view_1d ("view_1d", sizes[0]); + const int* oldPointer = view_1d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_1d, sizes[0]); + const int* newPointer = view_1d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } + { + typedef Kokkos::View view_type; + view_type view_2d ("view_2d", sizes[0], sizes[1]); + const int* oldPointer = view_2d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_2d, sizes[0], sizes[1]); + const int* newPointer = view_2d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } + { + typedef Kokkos::View view_type; + view_type view_3d ("view_3d", sizes[0], sizes[1], sizes[2]); + const int* oldPointer = view_3d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_3d, sizes[0], sizes[1], sizes[2]); + const int* newPointer = view_3d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } + { + typedef Kokkos::View view_type; + view_type view_4d ("view_4d", sizes[0], sizes[1], sizes[2], sizes[3]); + const int* oldPointer = view_4d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_4d, sizes[0], sizes[1], sizes[2], sizes[3]); + const int* newPointer = view_4d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } + { + typedef Kokkos::View view_type; + view_type view_5d ("view_5d", sizes[0], sizes[1], sizes[2], sizes[3], + sizes[4]); + const int* oldPointer = view_5d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_5d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4]); + const int* newPointer = view_5d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } + { + typedef Kokkos::View view_type; + view_type view_6d ("view_6d", sizes[0], sizes[1], sizes[2], sizes[3], + sizes[4], sizes[5]); + const int* oldPointer = view_6d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_6d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], + sizes[5]); + const int* newPointer = view_6d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } + { + typedef Kokkos::View view_type; + view_type view_7d ("view_7d", sizes[0], sizes[1], sizes[2], sizes[3], + sizes[4], sizes[5], sizes[6]); + const int* oldPointer = view_7d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_7d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], + sizes[5], sizes[6]); + const int* newPointer = view_7d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } + { + typedef Kokkos::View view_type; + view_type view_8d ("view_8d", sizes[0], sizes[1], sizes[2], sizes[3], + sizes[4], sizes[5], sizes[6], sizes[7]); + const int* oldPointer = view_8d.data (); + EXPECT_TRUE( oldPointer != NULL ); + Kokkos::resize (view_8d, sizes[0], sizes[1], sizes[2], sizes[3], sizes[4], + sizes[5], sizes[6], sizes[7]); + const int* newPointer = view_8d.data (); + EXPECT_TRUE( oldPointer == newPointer ); + } +} + +} // namespace TestViewSubview + +#endif // TESTVIEWSUBVIEW_HPP_ diff --git a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp index 3a88475620..4e66543857 100644 --- a/lib/kokkos/core/unit_test/TestTaskScheduler.hpp +++ b/lib/kokkos/core/unit_test/TestTaskScheduler.hpp @@ -250,13 +250,21 @@ struct TestTaskDependence { const int n = CHUNK < m_count ? CHUNK : m_count; if ( 1 < m_count ) { - future_type f[ CHUNK ]; + // Test use of memory pool for temporary allocation: + + // Raw allocation: + future_type * const f = + (future_type *) m_sched.memory()->allocate( sizeof(future_type) * n ); + + // In-place construction: + for ( int i = 0; i < n; ++i ) new(f+i) future_type(); const int inc = ( m_count + n - 1 ) / n; for ( int i = 0; i < n; ++i ) { long begin = i * inc; long count = begin + inc < m_count ? inc : m_count - begin; + f[i] = Kokkos::task_spawn( Kokkos::TaskSingle( m_sched ) , TestTaskDependence( count, m_sched, m_accum ) ); } @@ -264,6 +272,12 @@ struct TestTaskDependence { m_count = 0; Kokkos::respawn( this, Kokkos::when_all( f, n ) ); + + // In-place destruction to release future: + for ( int i = 0; i < n; ++i ) (f+i)->~future_type(); + + // Raw deallocation: + m_sched.memory()->deallocate( f , sizeof(future_type) * n ); } else if ( 1 == m_count ) { Kokkos::atomic_increment( & m_accum() ); @@ -641,19 +655,12 @@ namespace Test { TEST_F( TEST_CATEGORY, task_fib ) { - const int N = 24 ; // 25 triggers tbd bug on Cuda/Pascal + const int N = 27 ; for ( int i = 0; i < N; ++i ) { - TestTaskScheduler::TestFib< TEST_EXECSPACE >::run( i , ( i + 1 ) * ( i + 1 ) * 10000 ); + TestTaskScheduler::TestFib< TEST_EXECSPACE >::run( i , ( i + 1 ) * ( i + 1 ) * 2000 ); } } -#if defined(KOKKOS_ARCH_MAXWELL) || defined(KOKKOS_ARCH_PASCAL) - // TODO: Resolve bug in task DAG for Pascal - #define KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL -#endif - -#ifndef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL - TEST_F( TEST_CATEGORY, task_depend ) { for ( int i = 0; i < 25; ++i ) { @@ -667,11 +674,8 @@ TEST_F( TEST_CATEGORY, task_team ) //TestTaskScheduler::TestTaskTeamValue< TEST_EXECSPACE >::run( 1000 ); // Put back after testing. } -#else //ndef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL -#undef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL -#endif //ndef KOKKOS_IMPL_DISABLE_UNIT_TEST_TASK_DAG_PASCAL - } + #endif // #if defined( KOKKOS_ENABLE_TASKDAG ) #endif // #ifndef KOKKOS_UNITTEST_TASKSCHEDULER_HPP diff --git a/lib/kokkos/core/unit_test/TestTeamVector.hpp b/lib/kokkos/core/unit_test/TestTeamVector.hpp index e9e2f7548a..7f4663d0f9 100644 --- a/lib/kokkos/core/unit_test/TestTeamVector.hpp +++ b/lib/kokkos/core/unit_test/TestTeamVector.hpp @@ -838,6 +838,18 @@ public: }, result ); const ScalarType solution = (ScalarType) nrows * (ScalarType) ncols; + + if ( int64_t(solution) != int64_t(result) ) { + printf( " TestTripleNestedReduce failed solution(%ld) != result(%ld), nrows(%d) ncols(%d) league_size(%d) team_size(%d)\n" + , int64_t(solution) + , int64_t(result) + , int32_t(nrows) + , int32_t(ncols) + , int32_t(nrows/chunk_size) + , int32_t(team_size) + ); + } + ASSERT_EQ( solution, result ); } }; diff --git a/lib/kokkos/core/unit_test/TestTile.hpp b/lib/kokkos/core/unit_test/TestTile.hpp index 8f57dfea75..f15667322f 100644 --- a/lib/kokkos/core/unit_test/TestTile.hpp +++ b/lib/kokkos/core/unit_test/TestTile.hpp @@ -94,7 +94,7 @@ struct ReduceTileErrors const size_t jtile = iwork / tile_dim0; if ( jtile < tile_dim1 ) { - tile_type tile = Kokkos::Experimental::tile_subview( m_array, itile, jtile ); + tile_type tile = Kokkos::tile_subview( m_array, itile, jtile ); if ( tile( 0, 0 ) != ptrdiff_t( ( itile + jtile * tile_dim0 ) * TileLayout::N0 * TileLayout::N1 ) ) { ++errors; diff --git a/lib/kokkos/core/unit_test/TestUniqueToken.hpp b/lib/kokkos/core/unit_test/TestUniqueToken.hpp new file mode 100644 index 0000000000..28add61a8a --- /dev/null +++ b/lib/kokkos/core/unit_test/TestUniqueToken.hpp @@ -0,0 +1,138 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +namespace Test { + +template< class Space > +class TestUniqueToken +{ +public: + typedef typename Space::execution_space execution_space; + typedef Kokkos::View< int * , execution_space > view_type ; + + Kokkos::Experimental::UniqueToken< execution_space , Kokkos::Experimental::UniqueTokenScope::Global > tokens ; + + view_type verify ; + view_type counts ; + view_type errors ; + + KOKKOS_INLINE_FUNCTION + void operator()( long ) const + { + const int32_t t = tokens.acquire(); + + bool ok = true ; + + ok = ok && 0 <= t ; + ok = ok && t < tokens.size(); + ok = ok && 0 == Kokkos::atomic_fetch_add( & verify(t) , 1 ); + + Kokkos::atomic_fetch_add( & counts(t) , 1 ); + + ok = ok && 1 == Kokkos::atomic_fetch_add( & verify(t) , -1 ); + + if ( ! ok ) { Kokkos::atomic_fetch_add( & errors(0) , 1 ) ; } + + tokens.release(t); + } + + TestUniqueToken() + : tokens( execution_space() ) + , verify( "TestUniqueTokenVerify" , tokens.size() ) + , counts( "TestUniqueTokenCounts" , tokens.size() ) + , errors( "TestUniqueTokenErrors" , 1 ) + {} + + static void run() + { + using policy = Kokkos::RangePolicy ; + + TestUniqueToken self ; + + { + const int duplicate = 100 ; + const long n = duplicate * self.tokens.size(); + + Kokkos::parallel_for( policy(0,n) , self ); + Kokkos::parallel_for( policy(0,n) , self ); + Kokkos::parallel_for( policy(0,n) , self ); + Kokkos::fence(); + } + + typename view_type::HostMirror host_counts = + Kokkos::create_mirror_view( self.counts ); + + Kokkos::deep_copy( host_counts , self.counts ); + + int32_t max = 0 ; + + { + const long n = host_counts.extent(0); + for ( long i = 0 ; i < n ; ++i ) { + if ( max < host_counts[i] ) max = host_counts[i] ; + } + } + + std::cout << "TestUniqueToken max reuse = " << max << std::endl ; + + typename view_type::HostMirror host_errors = + Kokkos::create_mirror_view( self.errors ); + + Kokkos::deep_copy( host_errors , self.errors ); + + ASSERT_EQ( host_errors(0) , 0 ); + } +}; + + +TEST_F( TEST_CATEGORY, unique_token ) +{ + TestUniqueToken< TEST_EXECSPACE >::run(); +} + +} // namespace Test + diff --git a/lib/kokkos/core/unit_test/TestViewCtorPropEmbeddedDim.hpp b/lib/kokkos/core/unit_test/TestViewCtorPropEmbeddedDim.hpp new file mode 100644 index 0000000000..305ddb2a1d --- /dev/null +++ b/lib/kokkos/core/unit_test/TestViewCtorPropEmbeddedDim.hpp @@ -0,0 +1,160 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include + +#include + +#include + +#include +#include + +namespace Test { + +namespace { + +template +struct TestViewCtorProp_EmbeddedDim { + + using ViewIntType = typename Kokkos::View< int**, ExecSpace >; + using ViewDoubleType = typename Kokkos::View< double*, ExecSpace >; + + // Cuda 7.0 has issues with using a lamda in parallel_for to initialize the view - replace with this functor + template < class ViewType > + struct Functor { + + ViewType v; + + Functor( const ViewType & v_ ) : v(v_) {} + + KOKKOS_INLINE_FUNCTION + void operator()( const int i ) const { + v(i) = i; + } + + }; + + + static void test_vcpt( const int N0, const int N1 ) + { + + // Create views to test + { + using VIT = typename TestViewCtorProp_EmbeddedDim::ViewIntType ; + using VDT = typename TestViewCtorProp_EmbeddedDim::ViewDoubleType ; + + VIT vi1("vi1", N0, N1); + VDT vd1("vd1", N0); + + // TEST: Test for common type between two views, one with type double, other with type int + // Deduce common value_type and construct a view with that type + { + // Two views + auto view_alloc_arg = Kokkos::common_view_alloc_prop(vi1, vd1); + typedef typename decltype( view_alloc_arg )::value_type CommonViewValueType; + typedef typename Kokkos::View< CommonViewValueType*, ExecSpace > CVT; + typedef typename CVT::HostMirror HostCVT; + + // Construct View using the common type; for case of specialization, an 'embedded_dim' would be stored by view_alloc_arg + CVT cv1( Kokkos::view_alloc( "cv1", view_alloc_arg ), N0*N1 ); + + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace >(0, N0*N1), + Functor(cv1) + ); + + HostCVT hcv1 = Kokkos::create_mirror_view( cv1 ); + Kokkos::deep_copy( hcv1, cv1 ); + + ASSERT_EQ( (std::is_same< CommonViewValueType, double >::value) , true ) ; + #if 0 + // debug output + for ( int i = 0; i < N0*N1; ++i ) { + printf(" Output check: hcv1(%d) = %lf\n ", i, hcv1(i) ); + } + + printf( " Common value type view: %s \n", typeid( CVT() ).name() ); + printf( " Common value type: %s \n", typeid( CommonViewValueType() ).name() ); + if ( std::is_same< CommonViewValueType, double >::value == true ) { + printf("Proper common value_type\n"); + } + else { + printf("WRONG common value_type\n"); + } + // end debug output + #endif + } + + { + // Single view + auto view_alloc_arg = Kokkos::common_view_alloc_prop(vi1); + typedef typename decltype( view_alloc_arg )::value_type CommonViewValueType; + typedef typename Kokkos::View< CommonViewValueType*, ExecSpace > CVT; + typedef typename CVT::HostMirror HostCVT; + + // Construct View using the common type; for case of specialization, an 'embedded_dim' would be stored by view_alloc_arg + CVT cv1( Kokkos::view_alloc( "cv1", view_alloc_arg ), N0*N1 ); + + Kokkos::parallel_for( Kokkos::RangePolicy< ExecSpace >(0, N0*N1), + Functor(cv1) + ); + + HostCVT hcv1 = Kokkos::create_mirror_view( cv1 ); + Kokkos::deep_copy( hcv1, cv1 ); + + ASSERT_EQ( (std::is_same< CommonViewValueType, int>::value) , true ) ; + } + + } + + } // end test_vcpt + +}; // end struct + +} // namespace + +TEST_F( TEST_CATEGORY , viewctorprop_embedded_dim ) { + TestViewCtorProp_EmbeddedDim< TEST_EXECSPACE >::test_vcpt( 2, 3 ); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp index 6830c2e049..810ae72e73 100644 --- a/lib/kokkos/core/unit_test/TestViewMapping_a.hpp +++ b/lib/kokkos/core/unit_test/TestViewMapping_a.hpp @@ -56,24 +56,24 @@ void test_view_mapping() { typedef typename Space::execution_space ExecSpace; - typedef Kokkos::Experimental::Impl::ViewDimension<> dim_0; - typedef Kokkos::Experimental::Impl::ViewDimension< 2 > dim_s2; - typedef Kokkos::Experimental::Impl::ViewDimension< 2, 3 > dim_s2_s3; - typedef Kokkos::Experimental::Impl::ViewDimension< 2, 3, 4 > dim_s2_s3_s4; + typedef Kokkos::Impl::ViewDimension<> dim_0; + typedef Kokkos::Impl::ViewDimension< 2 > dim_s2; + typedef Kokkos::Impl::ViewDimension< 2, 3 > dim_s2_s3; + typedef Kokkos::Impl::ViewDimension< 2, 3, 4 > dim_s2_s3_s4; - typedef Kokkos::Experimental::Impl::ViewDimension< 0 > dim_s0; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 3 > dim_s0_s3; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 3, 4 > dim_s0_s3_s4; + typedef Kokkos::Impl::ViewDimension< 0 > dim_s0; + typedef Kokkos::Impl::ViewDimension< 0, 3 > dim_s0_s3; + typedef Kokkos::Impl::ViewDimension< 0, 3, 4 > dim_s0_s3_s4; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0 > dim_s0_s0; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 4 > dim_s0_s0_s4; + typedef Kokkos::Impl::ViewDimension< 0, 0 > dim_s0_s0; + typedef Kokkos::Impl::ViewDimension< 0, 0, 4 > dim_s0_s0_s4; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0 > dim_s0_s0_s0; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0 > dim_s0_s0_s0_s0; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0_s0; + typedef Kokkos::Impl::ViewDimension< 0, 0, 0 > dim_s0_s0_s0; + typedef Kokkos::Impl::ViewDimension< 0, 0, 0, 0 > dim_s0_s0_s0_s0; + typedef Kokkos::Impl::ViewDimension< 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0; + typedef Kokkos::Impl::ViewDimension< 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0; + typedef Kokkos::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0; + typedef Kokkos::Impl::ViewDimension< 0, 0, 0, 0, 0, 0, 0, 0 > dim_s0_s0_s0_s0_s0_s0_s0_s0; // Fully static dimensions should not be larger than an int. ASSERT_LE( sizeof( dim_0 ), sizeof( int ) ); @@ -186,12 +186,12 @@ void test_view_mapping() //---------------------------------------- - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s0, Kokkos::LayoutStride > stride_s0_s0_s0; + typedef Kokkos::Impl::ViewOffset< dim_s0_s0_s0, Kokkos::LayoutStride > stride_s0_s0_s0; //---------------------------------------- // Static dimension. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutLeft > left_s2_s3_s4; + typedef Kokkos::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutLeft > left_s2_s3_s4; ASSERT_EQ( sizeof( left_s2_s3_s4 ), sizeof( dim_s2_s3_s4 ) ); @@ -223,7 +223,7 @@ void test_view_mapping() //---------------------------------------- // Small dimension is unpadded. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; + typedef Kokkos::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( 2, 3, 0, 0, 0, 0, 0, 0 ) ); @@ -275,7 +275,7 @@ void test_view_mapping() constexpr int N0 = 2000; constexpr int N1 = 300; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; + typedef Kokkos::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); @@ -314,7 +314,7 @@ void test_view_mapping() //---------------------------------------- // Static dimension. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutRight > right_s2_s3_s4; + typedef Kokkos::Impl::ViewOffset< dim_s2_s3_s4, Kokkos::LayoutRight > right_s2_s3_s4; ASSERT_EQ( sizeof( right_s2_s3_s4 ), sizeof( dim_s2_s3_s4 ) ); @@ -350,7 +350,7 @@ void test_view_mapping() //---------------------------------------- // Small dimension is unpadded. { - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; + typedef Kokkos::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( 2, 3, 0, 0, 0, 0, 0, 0 ) ); @@ -391,7 +391,7 @@ void test_view_mapping() constexpr int N0 = 2000; constexpr int N1 = 300; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; + typedef Kokkos::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); @@ -431,18 +431,18 @@ void test_view_mapping() // Subview. { // Mapping rank 4 to rank 3 - typedef Kokkos::Experimental::Impl::SubviewExtents< 4, 3 > SubviewExtents; + typedef Kokkos::Impl::SubviewExtents< 4, 3 > SubviewExtents; constexpr int N0 = 1000; constexpr int N1 = 2000; constexpr int N2 = 3000; constexpr int N3 = 4000; - Kokkos::Experimental::Impl::ViewDimension< N0, N1, N2, N3 > dim; + Kokkos::Impl::ViewDimension< N0, N1, N2, N3 > dim; SubviewExtents tmp( dim , N0 / 2 - , Kokkos::Experimental::ALL + , Kokkos::ALL , std::pair< int, int >( N2 / 4, 10 + N2 / 4 ) , Kokkos::pair< int, int >( N3 / 4, 20 + N3 / 4 ) ); @@ -469,12 +469,12 @@ void test_view_mapping() constexpr int sub_N1 = 200; constexpr int sub_N2 = 4; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; + typedef Kokkos::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutLeft > left_s0_s0_s4; left_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutLeft( N0, N1, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::Experimental::Impl::SubviewExtents< 3, 3 > + Kokkos::Impl::SubviewExtents< 3, 3 > sub( dyn_off3.m_dim , Kokkos::pair< int, int >( 0, sub_N0 ) , Kokkos::pair< int, int >( 0, sub_N1 ) @@ -509,12 +509,12 @@ void test_view_mapping() constexpr int sub_N1 = 200; constexpr int sub_N2 = 4; - typedef Kokkos::Experimental::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; + typedef Kokkos::Impl::ViewOffset< dim_s0_s0_s4, Kokkos::LayoutRight > right_s0_s0_s4; right_s0_s0_s4 dyn_off3( std::integral_constant< unsigned, sizeof( int ) >() , Kokkos::LayoutRight( N0, N1, 0, 0, 0, 0, 0, 0 ) ); - Kokkos::Experimental::Impl::SubviewExtents< 3, 3 > + Kokkos::Impl::SubviewExtents< 3, 3 > sub( dyn_off3.m_dim , Kokkos::pair< int, int >( 0, sub_N0 ) , Kokkos::pair< int, int >( 0, sub_N1 ) @@ -544,7 +544,7 @@ void test_view_mapping() //---------------------------------------- // View data analysis. { - using namespace Kokkos::Experimental::Impl; + using namespace Kokkos::Impl; static_assert( rank_dynamic<>::value == 0, "" ); static_assert( rank_dynamic< 1 >::value == 0, "" ); @@ -554,7 +554,7 @@ void test_view_mapping() } { - using namespace Kokkos::Experimental::Impl; + using namespace Kokkos::Impl; typedef ViewArrayAnalysis< int[] > a_int_r1; typedef ViewArrayAnalysis< int**[4][5][6] > a_int_r5; @@ -598,7 +598,7 @@ void test_view_mapping() } { - using namespace Kokkos::Experimental::Impl; + using namespace Kokkos::Impl; typedef int t_i4[4]; @@ -616,12 +616,12 @@ void test_view_mapping() } { - using namespace Kokkos::Experimental::Impl; + using namespace Kokkos::Impl; typedef ViewDataAnalysis< const int[], void > a_const_int_r1; static_assert( std::is_same< typename a_const_int_r1::specialize, void >::value, "" ); - static_assert( std::is_same< typename a_const_int_r1::dimension, Kokkos::Experimental::Impl::ViewDimension<0> >::value, "" ); + static_assert( std::is_same< typename a_const_int_r1::dimension, Kokkos::Impl::ViewDimension<0> >::value, "" ); static_assert( std::is_same< typename a_const_int_r1::type, const int * >::value, "" ); static_assert( std::is_same< typename a_const_int_r1::value_type, const int >::value, "" ); @@ -637,7 +637,7 @@ void test_view_mapping() static_assert( std::is_same< typename a_const_int_r3::specialize, void >::value, "" ); - static_assert( std::is_same< typename a_const_int_r3::dimension, Kokkos::Experimental::Impl::ViewDimension<0, 0, 4> >::value, "" ); + static_assert( std::is_same< typename a_const_int_r3::dimension, Kokkos::Impl::ViewDimension<0, 0, 4> >::value, "" ); static_assert( std::is_same< typename a_const_int_r3::type, const int**[4] >::value, "" ); static_assert( std::is_same< typename a_const_int_r3::value_type, const int >::value, "" ); @@ -786,7 +786,7 @@ void test_view_mapping() // The execution space of the memory space must be available for view data initialization. if ( std::is_same< ExecSpace, typename ExecSpace::memory_space::execution_space >::value ) { - using namespace Kokkos::Experimental; + using namespace Kokkos; typedef typename ExecSpace::memory_space memory_space; typedef View< int*, memory_space > V; @@ -811,8 +811,8 @@ void test_view_mapping() { typedef Kokkos::ViewTraits< int***, Kokkos::LayoutStride, ExecSpace > traits_t; - typedef Kokkos::Experimental::Impl::ViewDimension< 0, 0, 0 > dims_t; - typedef Kokkos::Experimental::Impl::ViewOffset< dims_t, Kokkos::LayoutStride > offset_t; + typedef Kokkos::Impl::ViewDimension< 0, 0, 0 > dims_t; + typedef Kokkos::Impl::ViewOffset< dims_t, Kokkos::LayoutStride > offset_t; Kokkos::LayoutStride stride; @@ -836,8 +836,8 @@ void test_view_mapping() ASSERT_EQ( offset.span(), 60 ); ASSERT_TRUE( offset.span_is_contiguous() ); - Kokkos::Experimental::Impl::ViewMapping< traits_t, void > - v( Kokkos::Experimental::Impl::ViewCtorProp< int* >( (int*) 0 ), stride ); + Kokkos::Impl::ViewMapping< traits_t, void > + v( Kokkos::Impl::ViewCtorProp< int* >( (int*) 0 ), stride ); } { @@ -849,8 +849,8 @@ void test_view_mapping() constexpr int N1 = 11; V a( "a", N0, N1 ); - M b = Kokkos::Experimental::create_mirror( a ); - M c = Kokkos::Experimental::create_mirror_view( a ); + M b = Kokkos::create_mirror( a ); + M c = Kokkos::create_mirror_view( a ); M d; for ( int i0 = 0; i0 < N0; ++i0 ) @@ -859,8 +859,8 @@ void test_view_mapping() b( i0, i1 ) = 1 + i0 + i1 * N0; } - Kokkos::Experimental::deep_copy( a, b ); - Kokkos::Experimental::deep_copy( c, a ); + Kokkos::deep_copy( a, b ); + Kokkos::deep_copy( c, a ); for ( int i0 = 0; i0 < N0; ++i0 ) for ( int i1 = 0; i1 < N1; ++i1 ) @@ -868,7 +868,7 @@ void test_view_mapping() ASSERT_EQ( b( i0, i1 ), c( i0, i1 ) ); } - Kokkos::Experimental::resize( b, 5, 6 ); + Kokkos::resize( b, 5, 6 ); for ( int i0 = 0; i0 < 5; ++i0 ) for ( int i1 = 0; i1 < 6; ++i1 ) @@ -878,8 +878,8 @@ void test_view_mapping() ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c, 5, 6 ); - Kokkos::Experimental::realloc( d, 5, 6 ); + Kokkos::realloc( c, 5, 6 ); + Kokkos::realloc( d, 5, 6 ); ASSERT_EQ( b.dimension_0(), 5 ); ASSERT_EQ( b.dimension_1(), 6 ); @@ -889,7 +889,7 @@ void test_view_mapping() ASSERT_EQ( d.dimension_1(), 6 ); layout_type layout( 7, 8 ); - Kokkos::Experimental::resize( b, layout ); + Kokkos::resize( b, layout ); for ( int i0 = 0; i0 < 7; ++i0 ) for ( int i1 = 6; i1 < 8; ++i1 ) { @@ -909,8 +909,8 @@ void test_view_mapping() ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c, layout ); - Kokkos::Experimental::realloc( d, layout ); + Kokkos::realloc( c, layout ); + Kokkos::realloc( d, layout ); ASSERT_EQ( b.dimension_0(), 7 ); ASSERT_EQ( b.dimension_1(), 8 ); @@ -932,8 +932,8 @@ void test_view_mapping() const int order[] = { 1, 0 }; V a( "a", Kokkos::LayoutStride::order_dimensions( 2, order, dimensions ) ); - M b = Kokkos::Experimental::create_mirror( a ); - M c = Kokkos::Experimental::create_mirror_view( a ); + M b = Kokkos::create_mirror( a ); + M c = Kokkos::create_mirror_view( a ); M d; for ( int i0 = 0; i0 < N0; ++i0 ) @@ -942,8 +942,8 @@ void test_view_mapping() b( i0, i1 ) = 1 + i0 + i1 * N0; } - Kokkos::Experimental::deep_copy( a, b ); - Kokkos::Experimental::deep_copy( c, a ); + Kokkos::deep_copy( a, b ); + Kokkos::deep_copy( c, a ); for ( int i0 = 0; i0 < N0; ++i0 ) for ( int i1 = 0; i1 < N1; ++i1 ) @@ -954,7 +954,7 @@ void test_view_mapping() const int dimensions2[] = { 7, 8 }; const int order2[] = { 1, 0 }; layout_type layout = layout_type::order_dimensions( 2, order2, dimensions2 ); - Kokkos::Experimental::resize( b, layout ); + Kokkos::resize( b, layout ); for ( int i0 = 0; i0 < 7; ++i0 ) for ( int i1 = 0; i1 < 8; ++i1 ) @@ -964,8 +964,8 @@ void test_view_mapping() ASSERT_EQ( b( i0, i1 ), val ); } - Kokkos::Experimental::realloc( c, layout ); - Kokkos::Experimental::realloc( d, layout ); + Kokkos::realloc( c, layout ); + Kokkos::realloc( d, layout ); ASSERT_EQ( b.dimension_0(), 7 ); ASSERT_EQ( b.dimension_1(), 8 ); diff --git a/lib/kokkos/core/unit_test/TestViewSubview.hpp b/lib/kokkos/core/unit_test/TestViewSubview.hpp index e3a12e684e..106323492a 100644 --- a/lib/kokkos/core/unit_test/TestViewSubview.hpp +++ b/lib/kokkos/core/unit_test/TestViewSubview.hpp @@ -915,134 +915,134 @@ void test_3d_subview_5d_impl_layout() { inline void test_subview_legal_args_right() { - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutRight, Kokkos::LayoutRight, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); } inline void test_subview_legal_args_left() { - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair, int, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, int, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, int, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::pair, int, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::pair, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::pair, int >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t, int >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::Impl::ALL_t, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::Impl::ALL_t, int, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, Kokkos::pair, Kokkos::pair, int, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 5, 0, int, int, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); - ASSERT_EQ( 1, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); - ASSERT_EQ( 0, ( Kokkos::Experimental::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::pair >::value ) ); + ASSERT_EQ( 1, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::Impl::ALL_t, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::Impl::ALL_t, Kokkos::pair, Kokkos::pair >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::Impl::ALL_t >::value ) ); + ASSERT_EQ( 0, ( Kokkos::Impl::SubviewLegalArgsCompileTime< Kokkos::LayoutLeft, Kokkos::LayoutLeft, 3, 3, 0, Kokkos::pair, Kokkos::pair, Kokkos::pair >::value ) ); } } // namespace Impl diff --git a/lib/kokkos/core/unit_test/TestWorkGraph.hpp b/lib/kokkos/core/unit_test/TestWorkGraph.hpp new file mode 100644 index 0000000000..70cf6b47c0 --- /dev/null +++ b/lib/kokkos/core/unit_test/TestWorkGraph.hpp @@ -0,0 +1,172 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +#include + +namespace Test { + +namespace { + +/* This test is meant to be the WorkGraph equivalent of the Task DAG Scheduler test, + please see TestTaskScheduler.hpp for that test. + The algorithm computes the N-th fibonacci number as follows: + - Each "task" or "work item" computes the i-th fibonacci number + - If a task as (i < 2), it will record the known answer ahead of time. + - If a taks has (i >= 2), it will "spawn" two more tasks to compute + the (i - 1) and (i - 2) fibonacci numbers. + We do NOT do any de-duplication of these tasks. + De-duplication would result in only (N - 2) tasks which must be run in serial. + We allow duplicates both to increase the number of tasks and to increase the + amount of available parallelism. + */ + +template< class ExecSpace > +struct TestWorkGraph { + + using MemorySpace = typename ExecSpace::memory_space; + using Policy = Kokkos::Experimental::WorkGraphPolicy; + using Graph = typename Policy::graph_type; + using RowMap = typename Graph::row_map_type; + using Entries = typename Graph::entries_type; + using Values = Kokkos::View; + + long m_input; + Graph m_graph; + Graph m_transpose; + Values m_values; + + TestWorkGraph(long arg_input):m_input(arg_input) { + form_graph(); + transpose_crs(m_transpose, m_graph); + } + + inline + long full_fibonacci( long n ) { + constexpr long mask = 0x03; + long fib[4] = { 0, 1, 1, 2 }; + for ( long i = 2; i <= n; ++i ) { + fib[ i & mask ] = fib[ ( i - 1 ) & mask ] + fib[ ( i - 2 ) & mask ]; + } + return fib[ n & mask ]; + } + + struct HostEntry { + long input; + std::int32_t parent; + }; + std::vector form_host_graph() { + std::vector g; + g.push_back({ m_input , -1 }); + for (std::int32_t i = 0; i < std::int32_t(g.size()); ++i) { + auto e = g.at(std::size_t(i)); + if (e.input < 2) continue; + /* This part of the host graph formation is the equivalent of task spawning + in the Task DAG system. Notice how each task which is not a base case + spawns two more tasks, without any de-duplication */ + g.push_back({ e.input - 1, i }); + g.push_back({ e.input - 2, i }); + } + return g; + } + + void form_graph() { + auto hg = form_host_graph(); + m_graph.row_map = RowMap("row_map", hg.size() + 1); // row map always has one more + m_graph.entries = Entries("entries", hg.size() - 1); // all but the first have a parent + m_values = Values("values", hg.size()); + auto h_row_map = Kokkos::create_mirror_view(m_graph.row_map); + auto h_entries = Kokkos::create_mirror_view(m_graph.entries); + auto h_values = Kokkos::create_mirror_view(m_values); + h_row_map(0) = 0; + for (std::int32_t i = 0; i < std::int32_t(hg.size()); ++i) { + auto& e = hg.at(std::size_t(i)); + h_row_map(i + 1) = i; + if (e.input < 2) { + h_values(i) = e.input; + } + if (e.parent == -1) continue; + h_entries(i - 1) = e.parent; + } + Kokkos::deep_copy(m_graph.row_map, h_row_map); + Kokkos::deep_copy(m_graph.entries, h_entries); + Kokkos::deep_copy(m_values, h_values); + } + + KOKKOS_INLINE_FUNCTION + void operator()(std::int32_t i) const { + auto begin = m_transpose.row_map(i); + auto end = m_transpose.row_map(i + 1); + for (auto j = begin; j < end; ++j) { + auto k = m_transpose.entries(j); + m_values(i) += m_values( k ); + } + } + + void test_for() { + Kokkos::parallel_for(Policy(m_graph), *this); + auto h_values = Kokkos::create_mirror_view(m_values); + Kokkos::deep_copy(h_values, m_values); + ASSERT_EQ( h_values(0), full_fibonacci(m_input) ); + } + +}; + +} // anonymous namespace + +TEST_F( TEST_CATEGORY, DISABLED_workgraph_fib ) +{ + #ifdef KOKKOS_IMPL_CUDA_CLANG_WORKAROUND + int limit = 15; + #else + int limit = 27; + #endif + for ( int i = 0; i < limit; ++i) { + TestWorkGraph< TEST_EXECSPACE > f(i); + f.test_for(); + } +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/UnitTestMain.cpp b/lib/kokkos/core/unit_test/UnitTestMain.cpp index 4f52fc9567..a7dc7c4973 100644 --- a/lib/kokkos/core/unit_test/UnitTestMain.cpp +++ b/lib/kokkos/core/unit_test/UnitTestMain.cpp @@ -42,6 +42,7 @@ */ #include +#include int main( int argc, char *argv[] ) { ::testing::InitGoogleTest( &argc, argv ); diff --git a/lib/kokkos/core/unit_test/UnitTestMainInit.cpp b/lib/kokkos/core/unit_test/UnitTestMainInit.cpp index 21f851274b..62a01e9033 100644 --- a/lib/kokkos/core/unit_test/UnitTestMainInit.cpp +++ b/lib/kokkos/core/unit_test/UnitTestMainInit.cpp @@ -42,6 +42,8 @@ */ #include +#include + #include int main( int argc, char *argv[] ) { diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp index ba06b71192..fa6722615c 100644 --- a/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_Other.cpp @@ -48,3 +48,5 @@ #include #include #include + +#include diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_UniqueToken.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_UniqueToken.cpp new file mode 100644 index 0000000000..8424ae10d6 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_UniqueToken.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/core/unit_test/cuda/TestCuda_WorkGraph.cpp b/lib/kokkos/core/unit_test/cuda/TestCuda_WorkGraph.cpp new file mode 100644 index 0000000000..663ca1d560 --- /dev/null +++ b/lib/kokkos/core/unit_test/cuda/TestCuda_WorkGraph.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeResize.cpp b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeResize.cpp new file mode 100644 index 0000000000..c02905535b --- /dev/null +++ b/lib/kokkos/core/unit_test/default/TestDefaultDeviceTypeResize.cpp @@ -0,0 +1,57 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include "TestResize.hpp" + +namespace Test { + +TEST( kokkosresize, host_space_access ) +{ + // Test with the default device type. + using TestViewResize::testResize; + typedef Kokkos::View::device_type device_type; + testResize (); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp index 2f8daf7ad7..c12574a65a 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP.hpp @@ -86,25 +86,26 @@ class openmp : public ::testing::Test { protected: static void SetUpTestCase() { - const unsigned numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned cores_per_numa = Kokkos::hwloc::get_available_cores_per_numa(); - const unsigned threads_per_core = Kokkos::hwloc::get_available_threads_per_core(); + int threads_count = 0; + #pragma omp parallel + { + #pragma omp atomic + ++threads_count; + } - const unsigned threads_count = std::max( 1u, numa_count ) * - std::max( 2u, ( cores_per_numa * threads_per_core ) / 2 ); + if (threads_count > 3) { + threads_count /= 2; + } Kokkos::OpenMP::initialize( threads_count ); Kokkos::print_configuration( std::cout, true ); + srand( 10231 ); } static void TearDownTestCase() { Kokkos::OpenMP::finalize(); - - omp_set_num_threads( 1 ); - - ASSERT_EQ( 1, omp_get_max_threads() ); } }; diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp index 5e9535638d..33e7402ce6 100644 --- a/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_Other.cpp @@ -48,3 +48,93 @@ #include #include #include + +#include + +#include + +namespace Test { + +TEST_F( openmp, partition_master ) +{ + using Mutex = Kokkos::Experimental::MasterLock; + + Mutex mtx; + int errors = 0; + + auto master = [&errors, &mtx](int partition_id, int num_partitions) { + + const int pool_size = Kokkos::OpenMP::thread_pool_size(); + + { + std::unique_lock lock(mtx); + if ( Kokkos::OpenMP::in_parallel() ) { + ++errors; + } + if ( Kokkos::OpenMP::thread_pool_rank() != 0 ) { + ++errors; + } + } + + { + int local_errors = 0; + Kokkos::parallel_reduce( Kokkos::RangePolicy(0,1000) + , [pool_size]( const int , int & errs ) { + if ( Kokkos::OpenMP::thread_pool_size() != pool_size ) { + ++errs; + } + } + , local_errors + ); + Kokkos::atomic_add( &errors, local_errors ); + } + + Kokkos::Experimental::UniqueToken< Kokkos::OpenMP > token; + + Kokkos::View count( "", token.size() ); + + Kokkos::parallel_for( Kokkos::RangePolicy(0,1000), + [=] ( const int ) { + int i = token.acquire(); + ++count[i]; + token.release(i); + }); + + Kokkos::View sum (""); + Kokkos::parallel_for( Kokkos::RangePolicy(0,token.size()), + [=] ( const int i ) { + Kokkos::atomic_add( sum.data(), count[i] ); + }); + + if (sum() != 1000) { + Kokkos::atomic_add( &errors, 1 ); + } + }; + + master(0,1); + + ASSERT_EQ( errors, 0 ); + + Kokkos::OpenMP::partition_master( master ); + ASSERT_EQ( errors, 0 ); + + Kokkos::OpenMP::partition_master( master, 4, 0 ); + ASSERT_EQ( errors, 0 ); + + Kokkos::OpenMP::partition_master( master, 0, 4 ); + ASSERT_EQ( errors, 0 ); + + Kokkos::OpenMP::partition_master( master, 2, 2 ); + ASSERT_EQ( errors, 0 ); + + Kokkos::OpenMP::partition_master( master, 8, 0 ); + ASSERT_EQ( errors, 0 ); + + Kokkos::OpenMP::partition_master( master, 0, 8 ); + ASSERT_EQ( errors, 0 ); + + Kokkos::OpenMP::partition_master( master, 8, 8 ); + ASSERT_EQ( errors, 0 ); +} + +} // namespace Test diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_UniqueToken.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_UniqueToken.cpp new file mode 100644 index 0000000000..143a6d9910 --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_UniqueToken.cpp @@ -0,0 +1,46 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + diff --git a/lib/kokkos/core/unit_test/openmp/TestOpenMP_WorkGraph.cpp b/lib/kokkos/core/unit_test/openmp/TestOpenMP_WorkGraph.cpp new file mode 100644 index 0000000000..ec6fa1653c --- /dev/null +++ b/lib/kokkos/core/unit_test/openmp/TestOpenMP_WorkGraph.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp index a6a76a03bd..bc39b1e160 100644 --- a/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp +++ b/lib/kokkos/core/unit_test/serial/TestSerial_Other.cpp @@ -48,3 +48,5 @@ #include #include #include + +#include diff --git a/lib/kokkos/core/unit_test/serial/TestSerial_WorkGraph.cpp b/lib/kokkos/core/unit_test/serial/TestSerial_WorkGraph.cpp new file mode 100644 index 0000000000..de1638de5e --- /dev/null +++ b/lib/kokkos/core/unit_test/serial/TestSerial_WorkGraph.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp index c11155c5c0..160b37a2c8 100644 --- a/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp +++ b/lib/kokkos/core/unit_test/threads/TestThreads_Other.cpp @@ -48,3 +48,5 @@ #include #include #include + +#include diff --git a/lib/kokkos/core/unit_test/threads/TestThreads_WorkGraph.cpp b/lib/kokkos/core/unit_test/threads/TestThreads_WorkGraph.cpp new file mode 100644 index 0000000000..6b7dbb26db --- /dev/null +++ b/lib/kokkos/core/unit_test/threads/TestThreads_WorkGraph.cpp @@ -0,0 +1,45 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include diff --git a/lib/kokkos/example/cmake_build/CMakeLists.txt b/lib/kokkos/example/cmake_build/CMakeLists.txt index 4e149726ee..f92c5c6513 100644 --- a/lib/kokkos/example/cmake_build/CMakeLists.txt +++ b/lib/kokkos/example/cmake_build/CMakeLists.txt @@ -40,5 +40,7 @@ list(APPEND CMAKE_CXX_FLAGS ${CMAKE_CXX_FLAGS} -O3) add_subdirectory(${Example_SOURCE_DIR}/../.. ${Example_BINARY_DIR}/kokkos) +include_directories(${Kokkos_INCLUDE_DIRS_RET}) + add_executable(example cmake_example.cpp) target_link_libraries(example kokkos) diff --git a/lib/kokkos/example/feint/main.cpp b/lib/kokkos/example/feint/main.cpp index 616e584bf6..57a8f8fafb 100644 --- a/lib/kokkos/example/feint/main.cpp +++ b/lib/kokkos/example/feint/main.cpp @@ -69,12 +69,26 @@ int main() #if defined( KOKKOS_ENABLE_OPENMP ) { - // Use 4 cores per NUMA region, unless fewer available - const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count(); - const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() ); + int num_threads = 0; + if ( Kokkos::hwloc::available() ) { + // Use 4 cores per NUMA region, unless fewer available + const unsigned use_numa_count = Kokkos::hwloc::get_available_numa_count(); + const unsigned use_cores_per_numa = std::min( 4u , Kokkos::hwloc::get_available_cores_per_numa() ); + num_threads = use_numa_count * use_cores_per_numa; - Kokkos::OpenMP::initialize( use_numa_count * use_cores_per_numa ); + } + else { + #pragma omp parallel + { + #pragma omp atomic + ++num_threads; + } + num_threads = std::max(4, num_threads/4); + } + + + Kokkos::OpenMP::initialize( num_threads ); std::cout << "feint< OpenMP , NotUsingAtomic >" << std::endl ; Kokkos::Example::feint< Kokkos::OpenMP , false >(); diff --git a/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp b/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp index fb33aef56e..b6b8b2f5e0 100644 --- a/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp +++ b/lib/kokkos/example/global_2_local_ids/G2L_Main.cpp @@ -138,7 +138,16 @@ int main(int argc, char *argv[]) #endif #ifdef KOKKOS_ENABLE_OPENMP - Kokkos::OpenMP::initialize( threads_count ); + int num_threads = 0; + #pragma omp parallel + { + #pragma omp atomic + ++num_threads; + } + if( num_threads > 3 ) { + num_threads = std::max(4, num_threads/4); + } + Kokkos::OpenMP::initialize( num_threads ); num_errors += G2L::run_openmp(num_ids,num_find_iterations); Kokkos::OpenMP::finalize(); #endif diff --git a/lib/kokkos/example/grow_array/main.cpp b/lib/kokkos/example/grow_array/main.cpp index e7438a9bf4..3f1d534d93 100644 --- a/lib/kokkos/example/grow_array/main.cpp +++ b/lib/kokkos/example/grow_array/main.cpp @@ -88,7 +88,7 @@ int main( int argc , char ** argv ) #if defined( KOKKOS_ENABLE_OPENMP ) { std::cout << "Kokkos::OpenMP" << std::endl ; - Kokkos::OpenMP::initialize( num_threads , use_numa , use_core ); + Kokkos::OpenMP::initialize(); Example::grow_array< Kokkos::OpenMP >( length_array , span_values ); Kokkos::OpenMP::finalize(); } diff --git a/lib/kokkos/example/tutorial/03_simple_view/Makefile b/lib/kokkos/example/tutorial/03_simple_view/Makefile index e716b765e7..32483a2555 100644 --- a/lib/kokkos/example/tutorial/03_simple_view/Makefile +++ b/lib/kokkos/example/tutorial/03_simple_view/Makefile @@ -33,6 +33,7 @@ include $(KOKKOS_PATH)/Makefile.kokkos build: $(EXE) +#for unit testing only, for best preformance with OpenMP 4.0 or better test: $(EXE) ./$(EXE) diff --git a/lib/kokkos/example/tutorial/Advanced_Views/Makefile b/lib/kokkos/example/tutorial/Advanced_Views/Makefile index bc4012f68c..12ac5652e5 100644 --- a/lib/kokkos/example/tutorial/Advanced_Views/Makefile +++ b/lib/kokkos/example/tutorial/Advanced_Views/Makefile @@ -22,100 +22,102 @@ endif build: mkdir -p 01_data_layouts cd ./01_data_layouts; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} mkdir -p 02_memory_traits cd ./02_memory_traits; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} mkdir -p 03_subviews cd ./03_subviews; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} mkdir -p 04_dualviews cd ./04_dualviews; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} mkdir -p 05_NVIDIA_UVM cd ./05_NVIDIA_UVM; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} #mkdir -p 06_AtomicViews #cd ./06_AtomicViews; \ - #make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} + #$(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} #mkdir -p 07_Overlapping_DeepCopy #cd ./07_Overlapping_DeepCopy; \ - #make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} + #$(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} build-insource: cd ./01_data_layouts; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} #cd ./06_AtomicViews; \ - #make build -j 4 ${KOKKOS_SETTINGS} + #$(MAKE) build ${KOKKOS_SETTINGS} #cd ./07_Overlapping_DeepCopy; \ - #make build -j 4 ${KOKKOS_SETTINGS} + #$(MAKE) build ${KOKKOS_SETTINGS} + test: cd ./01_data_layouts; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} #cd ./06_AtomicViews; \ - #make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} + #$(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} #cd ./07_Overlapping_DeepCopy; \ - #make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} + #$(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} test-insource: cd ./01_data_layouts; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} #cd ./06_AtomicViews; \ - #make test -j 4 ${KOKKOS_SETTINGS} + #$(MAKE) test ${KOKKOS_SETTINGS} #cd ./07_Overlapping_DeepCopy; \ - #make test -j 4 ${KOKKOS_SETTINGS} + #$(MAKE) test ${KOKKOS_SETTINGS} + clean: cd ./01_data_layouts; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/01_data_layouts/Makefile ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/02_memory_traits/Makefile ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/03_subviews/Makefile ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/04_dualviews/Makefile ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/05_NVIDIA_UVM/Makefile ${KOKKOS_SETTINGS} #cd ./06_AtomicViews; \ - #make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} + #$(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/06_AtomicViews/Makefile ${KOKKOS_SETTINGS} #cd ./07_Overlapping_DeepCopy; \ - #make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} + #$(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/07_Overlapping_DeepCopy/Makefile ${KOKKOS_SETTINGS} clean-insource: cd ./01_data_layouts; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./02_memory_traits; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./03_subviews; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./04_dualviews; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./05_NVIDIA_UVM; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} #cd ./06_AtomicViews; \ - #make clean ${KOKKOS_SETTINGS} + #$(MAKE) clean ${KOKKOS_SETTINGS} #cd ./07_Overlapping_DeepCopy; \ - #make clean ${KOKKOS_SETTINGS} + #$(MAKE) clean ${KOKKOS_SETTINGS} diff --git a/lib/kokkos/example/tutorial/Algorithms/Makefile b/lib/kokkos/example/tutorial/Algorithms/Makefile index ad0b76f9d6..4e70ba7d97 100644 --- a/lib/kokkos/example/tutorial/Algorithms/Makefile +++ b/lib/kokkos/example/tutorial/Algorithms/Makefile @@ -22,22 +22,22 @@ endif build: mkdir -p 01_random_numbers cd ./01_random_numbers; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} build-insource: cd ./01_random_numbers; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} test: cd ./01_random_numbers; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} test-insource: cd ./01_random_numbers; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} clean: cd ./01_random_numbers; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/01_random_numbers/Makefile ${KOKKOS_SETTINGS} clean-insource: cd ./01_random_numbers; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} diff --git a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile index 44fdf90f8a..4bf6d487ae 100644 --- a/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile +++ b/lib/kokkos/example/tutorial/Hierarchical_Parallelism/Makefile @@ -22,74 +22,74 @@ endif build: mkdir -p 01_thread_teams cd ./01_thread_teams; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} mkdir -p 01_thread_teams_lambda cd ./01_thread_teams_lambda; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} mkdir -p 02_nested_parallel_for cd ./02_nested_parallel_for; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} mkdir -p 03_vectorization cd ./03_vectorization; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} mkdir -p 04_team_scan cd ./04_team_scan; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} build-insource: cd ./01_thread_teams; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} test: cd ./01_thread_teams; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} test-insource: cd ./01_thread_teams; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} clean: cd ./01_thread_teams; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams/Makefile ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/01_thread_teams_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/02_nested_parallel_for/Makefile ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/03_vectorization/Makefile ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/04_team_scan/Makefile ${KOKKOS_SETTINGS} clean-insource: cd ./01_thread_teams; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./01_thread_teams_lambda; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./02_nested_parallel_for; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./03_vectorization; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./04_team_scan; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} diff --git a/lib/kokkos/example/tutorial/Makefile b/lib/kokkos/example/tutorial/Makefile index 063ace8aab..7b2732eeed 100644 --- a/lib/kokkos/example/tutorial/Makefile +++ b/lib/kokkos/example/tutorial/Makefile @@ -23,152 +23,152 @@ endif build: mkdir -p 01_hello_world cd ./01_hello_world; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} mkdir -p 01_hello_world_lambda cd ./01_hello_world_lambda; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} mkdir -p 02_simple_reduce cd ./02_simple_reduce; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} mkdir -p 02_simple_reduce_lambda cd ./02_simple_reduce_lambda; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} mkdir -p 03_simple_view cd ./03_simple_view; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} mkdir -p 03_simple_view_lambda cd ./03_simple_view_lambda; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} mkdir -p 04_simple_memoryspaces cd ./04_simple_memoryspaces; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} mkdir -p 05_simple_atomics cd ./05_simple_atomics; \ - make build -j 4 -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} mkdir -p Advanced_Views cd ./Advanced_Views; \ - make build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' mkdir -p Algorithms cd ./Algorithms; \ - make build -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' mkdir -p Hierarchical_Parallelism cd ./Hierarchical_Parallelism; \ - make build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) build -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' build-insource: cd ./01_hello_world; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make build -j 4 ${KOKKOS_SETTINGS} + $(MAKE) build ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) build KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' test: cd ./01_hello_world; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make test -j 4 -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make test -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) test -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' test-insource: cd ./01_hello_world; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make test -j 4 ${KOKKOS_SETTINGS} + $(MAKE) test ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) test KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' clean: cd ./01_hello_world; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world/Makefile ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/01_hello_world_lambda/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce/Makefile ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/02_simple_reduce_lambda/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view/Makefile ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/03_simple_view_lambda/Makefile ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/04_simple_memoryspaces/Makefile ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/05_simple_atomics/Makefile ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Advanced_Views/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Algorithms/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) clean -f ${KOKKOS_PATH}/example/tutorial/Hierarchical_Parallelism/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' clean-insource: cd ./01_hello_world; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./01_hello_world_lambda; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./02_simple_reduce; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./02_simple_reduce_lambda; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./03_simple_view; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./03_simple_view_lambda; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./04_simple_memoryspaces; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./05_simple_atomics; \ - make clean ${KOKKOS_SETTINGS} + $(MAKE) clean ${KOKKOS_SETTINGS} cd ./Advanced_Views; \ - make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Algorithms; \ - make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' cd ./Hierarchical_Parallelism; \ - make clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' + $(MAKE) clean KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' diff --git a/lib/kokkos/example/tutorial/launch_bounds/CMakeLists.txt b/lib/kokkos/example/tutorial/launch_bounds/CMakeLists.txt new file mode 100644 index 0000000000..7c78db840f --- /dev/null +++ b/lib/kokkos/example/tutorial/launch_bounds/CMakeLists.txt @@ -0,0 +1,10 @@ + +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR}) +INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) + +# This is a tutorial, not a test, so we don't ask CTest to run it. +TRIBITS_ADD_EXECUTABLE( + tutorial_02_simple_reduce + SOURCES simple_reduce.cpp + COMM serial mpi + ) diff --git a/lib/kokkos/example/tutorial/launch_bounds/Makefile b/lib/kokkos/example/tutorial/launch_bounds/Makefile new file mode 100644 index 0000000000..5b605a4119 --- /dev/null +++ b/lib/kokkos/example/tutorial/launch_bounds/Makefile @@ -0,0 +1,56 @@ +KOKKOS_PATH = ../../.. +KOKKOS_SRC_PATH = ${KOKKOS_PATH} +SRC = $(wildcard ${KOKKOS_SRC_PATH}/example/tutorial/launch_bounds/*.cpp) +vpath %.cpp $(sort $(dir $(SRC))) + +default: build + echo "Start Build" + +ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES))) +CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = launch_bounds.cuda +KOKKOS_DEVICES = "Cuda,OpenMP" +KOKKOS_ARCH = "SNB,Kepler35" +else +CXX = g++ +CXXFLAGS = -O3 +LINK = ${CXX} +LINKFLAGS = +EXE = launch_bounds.host +KOKKOS_DEVICES = "OpenMP" +KOKKOS_ARCH = "SNB" +endif + +# WAR for "undefined memcpy" w/ Ubuntu + CUDA 7.5 +CXXFLAGS += -D_FORCE_INLINES +# Additional compile-time information +CXXFLAGS += -Xptxas=-v + +DEPFLAGS = -M + +OBJ = $(notdir $(SRC:.cpp=.o)) +LIB = + +include $(KOKKOS_PATH)/Makefile.kokkos + +temp: + echo $(KOKKOS_INTERNAL_USE_CUDA) $(CUDA_PATH) + +build: $(EXE) + +test: $(EXE) + ./$(EXE) + +$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS) + $(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE) + +clean: kokkos-clean + rm -f *.o *.cuda *.host + +# Compilation rules + +%.o:%.cpp $(KOKKOS_CPP_DEPENDS) + $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $< -o $(notdir $@) diff --git a/lib/kokkos/example/tutorial/launch_bounds/launch_bounds_reduce.cpp b/lib/kokkos/example/tutorial/launch_bounds/launch_bounds_reduce.cpp new file mode 100644 index 0000000000..9a26eda507 --- /dev/null +++ b/lib/kokkos/example/tutorial/launch_bounds/launch_bounds_reduce.cpp @@ -0,0 +1,173 @@ +/* +//@HEADER +// ************************************************************************ +// +// Kokkos v. 2.0 +// Copyright (2014) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov) +// +// ************************************************************************ +//@HEADER +*/ + +#include +#include + +// +// First reduction (parallel_reduce) example: +// 1. Start up Kokkos +// 2. Execute a parallel_reduce loop in the default execution space, +// using a functor to define the loop body +// 3. Shut down Kokkos +// +struct collision { +// Reduction functor +// For each i, we generate 10 hashes, look for and count collisions +// We use parallel_reduce to count the total collisions +// Note that we're just counting collisions within the 10 generated +// one i. +// This function was chosen as one that very simply can increase the +// register count. + typedef int value_type; + + KOKKOS_INLINE_FUNCTION + int hash(int q) const { + // A simple hash by Justin Sobel + // Thanks to Arash Partow (partow.net) + char* fourchars = (char*)&q; + int hash = 1315423911; + for (int i=0; i<4; fourchars++, i++) { + hash ^= ((hash<<5) + *fourchars + (hash >> 2)); + } + return hash; + } + + KOKKOS_INLINE_FUNCTION + void operator () (const int i, int& lsum) const { + //This is a silly function which generates 10 hashes + // then checks for collisions + int a = hash(i)%64; + int b = hash(i*3)%64; + int c = hash(i*5)%64; + int d = hash(i*7)%64; + int e = hash(i*11)%64; + int f = hash(i*17)%64; + int g = hash(i*23)%64; + int h = hash(i*29)%64; + int j = hash(i*31)%64; + int k = hash(i*37)%64; + + + if (a==b) lsum++; + if (a==c) lsum++; + if (a==d) lsum++; + if (a==e) lsum++; + if (a==f) lsum++; + if (a==g) lsum++; + if (a==h) lsum++; + if (a==j) lsum++; + if (a==k) lsum++; + if (b==c) lsum++; + if (b==d) lsum++; + if (b==e) lsum++; + if (b==f) lsum++; + if (b==g) lsum++; + if (b==h) lsum++; + if (b==j) lsum++; + if (b==k) lsum++; + if (c==d) lsum++; + if (c==e) lsum++; + if (c==f) lsum++; + if (c==g) lsum++; + if (c==h) lsum++; + if (c==j) lsum++; + if (c==k) lsum++; + if (d==e) lsum++; + if (d==f) lsum++; + if (d==g) lsum++; + if (d==h) lsum++; + if (d==j) lsum++; + if (d==k) lsum++; + if (e==f) lsum++; + if (e==g) lsum++; + if (e==h) lsum++; + if (e==j) lsum++; + if (e==k) lsum++; + if (f==g) lsum++; + if (f==h) lsum++; + if (f==j) lsum++; + if (f==k) lsum++; + if (g==h) lsum++; + if (g==j) lsum++; + if (g==k) lsum++; + if (h==j) lsum++; + if (h==k) lsum++; + if (j==k) lsum++; + } + + + +}; + +int main (int argc, char* argv[]) { + Kokkos::initialize (argc, argv); + const int n = 10000; + + // Compute and count hash collisions in + // parallel, using Kokkos. + // This is not really a useful algorithm, but it demonstrates the + // LaunchBounds functionality + int sum1 = 0; + int sum2 = 0; + + //Without LaunchBounds, the kernel uses 56 registers + Kokkos::parallel_reduce (n, collision (), sum1); + + //With LaunchBounds, we can reduce the register usage to 32 + Kokkos::parallel_reduce (Kokkos::RangePolicy>(0,n), collision (), sum2); + + printf ("Number of collisions, " + "computed in parallel, is %i\n", sum1); + + if (sum1 != sum2) { + printf( "Uh-oh! Results do not match\n"); + return -1; + } + + Kokkos::finalize(); + + + return 0; +} + diff --git a/lib/kokkos/generate_makefile.bash b/lib/kokkos/generate_makefile.bash index 5f2442102d..6d636dc7e4 100755 --- a/lib/kokkos/generate_makefile.bash +++ b/lib/kokkos/generate_makefile.bash @@ -1,7 +1,6 @@ #!/bin/bash KOKKOS_DEVICES="" -MAKE_J_OPTION="32" KOKKOS_DO_EXAMPLES="1" @@ -70,7 +69,8 @@ do KOKKOS_DEBUG=yes ;; --make-j*) - MAKE_J_OPTION="${key#*=}" + echo "Warning: ${key} is deprecated" + echo "Call make with appropriate -j flag" ;; --no-examples) KOKKOS_DO_EXAMPLES="0" @@ -110,23 +110,34 @@ do echo "--with-devices: Explicitly add a set of backends." echo "" echo "--arch=[OPT]: Set target architectures. Options are:" + echo " [AMD]" + echo " AMDAVX = AMD CPU" + echo " [ARM]" echo " ARMv80 = ARMv8.0 Compatible CPU" echo " ARMv81 = ARMv8.1 Compatible CPU" echo " ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU" + echo " [IBM]" + echo " Power8 = IBM POWER8 CPUs" + echo " Power9 = IBM POWER9 CPUs" + echo " [Intel]" + echo " WSM = Intel Westmere CPUs" echo " SNB = Intel Sandy/Ivy Bridge CPUs" echo " HSW = Intel Haswell CPUs" echo " BDW = Intel Broadwell Xeon E-class CPUs" echo " SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)" + echo " [Intel Xeon Phi]" echo " KNC = Intel Knights Corner Xeon Phi" echo " KNL = Intel Knights Landing Xeon Phi" + echo " [NVIDIA]" echo " Kepler30 = NVIDIA Kepler generation CC 3.0" + echo " Kepler32 = NVIDIA Kepler generation CC 3.2" echo " Kepler35 = NVIDIA Kepler generation CC 3.5" echo " Kepler37 = NVIDIA Kepler generation CC 3.7" + echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" + echo " Maxwell52 = NVIDIA Maxwell generation CC 5.2" + echo " Maxwell53 = NVIDIA Maxwell generation CC 5.3" echo " Pascal60 = NVIDIA Pascal generation CC 6.0" echo " Pascal61 = NVIDIA Pascal generation CC 6.1" - echo " Maxwell50 = NVIDIA Maxwell generation CC 5.0" - echo " Power8 = IBM POWER8 CPUs" - echo " Power9 = IBM POWER9 CPUs" echo "" echo "--compiler=/Path/To/Compiler Set the compiler." echo "--debug,-dbg: Enable Debugging." @@ -142,10 +153,14 @@ do echo " tests.)" echo "--with-hwloc=/Path/To/Hwloc: Set path to hwloc." echo "--with-options=[OPT]: Additional options to Kokkos:" + echo " compiler_warnings" echo " aggressive_vectorization = add ivdep on loops" + echo " disable_profiling = do not compile with profiling hooks" + echo " " echo "--with-cuda-options=[OPT]: Additional options to CUDA:" echo " force_uvm, use_ldg, enable_lambda, rdc" - echo "--make-j=[NUM]: Set -j flag used during build." + echo "--make-j=[NUM]: DEPRECATED: call make with appropriate" + echo " -j flag" exit 0 ;; *) @@ -237,27 +252,27 @@ else KOKKOS_INSTALL_PATH=${KOKKOS_TEST_INSTALL_PATH} fi -mkdir install +mkdir -p install echo "#Makefile to satisfy existens of target kokkos-clean before installing the library" > install/Makefile.kokkos echo "kokkos-clean:" >> install/Makefile.kokkos echo "" >> install/Makefile.kokkos -mkdir core -mkdir core/unit_test -mkdir core/perf_test -mkdir containers -mkdir containers/unit_tests -mkdir containers/performance_tests -mkdir algorithms -mkdir algorithms/unit_tests -mkdir algorithms/performance_tests -mkdir example -mkdir example/fixture -mkdir example/feint -mkdir example/fenl -mkdir example/tutorial +mkdir -p core +mkdir -p core/unit_test +mkdir -p core/perf_test +mkdir -p containers +mkdir -p containers/unit_tests +mkdir -p containers/performance_tests +mkdir -p algorithms +mkdir -p algorithms/unit_tests +mkdir -p algorithms/performance_tests +mkdir -p example +mkdir -p example/fixture +mkdir -p example/feint +mkdir -p example/fenl +mkdir -p example/tutorial if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then - mkdir example/ichol + mkdir -p example/ichol fi KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" @@ -266,115 +281,115 @@ KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/unit_test/Makefile echo "" >> core/unit_test/Makefile echo "all:" >> core/unit_test/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS}" >> core/unit_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS}" >> core/unit_test/Makefile echo "" >> core/unit_test/Makefile echo "test: all" >> core/unit_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} test" >> core/unit_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} test" >> core/unit_test/Makefile echo "" >> core/unit_test/Makefile echo "clean:" >> core/unit_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/unit_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/unit_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/unit_test/Makefile echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > core/perf_test/Makefile echo "" >> core/perf_test/Makefile echo "all:" >> core/perf_test/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS}" >> core/perf_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS}" >> core/perf_test/Makefile echo "" >> core/perf_test/Makefile echo "test: all" >> core/perf_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} test" >> core/perf_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} test" >> core/perf_test/Makefile echo "" >> core/perf_test/Makefile echo "clean:" >> core/perf_test/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/perf_test/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/perf_test/Makefile ${KOKKOS_SETTINGS} clean" >> core/perf_test/Makefile echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/unit_tests/Makefile echo "" >> containers/unit_tests/Makefile echo "all:" >> containers/unit_tests/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/unit_tests/Makefile echo "" >> containers/unit_tests/Makefile echo "test: all" >> containers/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/unit_tests/Makefile echo "" >> containers/unit_tests/Makefile echo "clean:" >> containers/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/unit_tests/Makefile echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > containers/performance_tests/Makefile echo "" >> containers/performance_tests/Makefile echo "all:" >> containers/performance_tests/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/performance_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS}" >> containers/performance_tests/Makefile echo "" >> containers/performance_tests/Makefile echo "test: all" >> containers/performance_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/performance_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} test" >> containers/performance_tests/Makefile echo "" >> containers/performance_tests/Makefile echo "clean:" >> containers/performance_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/performance_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/containers/performance_tests/Makefile ${KOKKOS_SETTINGS} clean" >> containers/performance_tests/Makefile echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > algorithms/unit_tests/Makefile echo "" >> algorithms/unit_tests/Makefile echo "all:" >> algorithms/unit_tests/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> algorithms/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS}" >> algorithms/unit_tests/Makefile echo "" >> algorithms/unit_tests/Makefile echo "test: all" >> algorithms/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> algorithms/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} test" >> algorithms/unit_tests/Makefile echo "" >> algorithms/unit_tests/Makefile echo "clean:" >> algorithms/unit_tests/Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> algorithms/unit_tests/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/algorithms/unit_tests/Makefile ${KOKKOS_SETTINGS} clean" >> algorithms/unit_tests/Makefile KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_TEST_INSTALL_PATH}" echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fixture/Makefile echo "" >> example/fixture/Makefile echo "all:" >> example/fixture/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS}" >> example/fixture/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS}" >> example/fixture/Makefile echo "" >> example/fixture/Makefile echo "test: all" >> example/fixture/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} test" >> example/fixture/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} test" >> example/fixture/Makefile echo "" >> example/fixture/Makefile echo "clean:" >> example/fixture/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} clean" >> example/fixture/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fixture/Makefile ${KOKKOS_SETTINGS} clean" >> example/fixture/Makefile echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/feint/Makefile echo "" >> example/feint/Makefile echo "all:" >> example/feint/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS}" >> example/feint/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS}" >> example/feint/Makefile echo "" >> example/feint/Makefile echo "test: all" >> example/feint/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} test" >> example/feint/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} test" >> example/feint/Makefile echo "" >> example/feint/Makefile echo "clean:" >> example/feint/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} clean" >> example/feint/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/feint/Makefile ${KOKKOS_SETTINGS} clean" >> example/feint/Makefile echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/fenl/Makefile echo "" >> example/fenl/Makefile echo "all:" >> example/fenl/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS}" >> example/fenl/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS}" >> example/fenl/Makefile echo "" >> example/fenl/Makefile echo "test: all" >> example/fenl/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} test" >> example/fenl/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} test" >> example/fenl/Makefile echo "" >> example/fenl/Makefile echo "clean:" >> example/fenl/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} clean" >> example/fenl/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/fenl/Makefile ${KOKKOS_SETTINGS} clean" >> example/fenl/Makefile echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/tutorial/Makefile echo "" >> example/tutorial/Makefile echo "build:" >> example/tutorial/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} build">> example/tutorial/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} build">> example/tutorial/Makefile echo "" >> example/tutorial/Makefile echo "test: build" >> example/tutorial/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} test" >> example/tutorial/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} test" >> example/tutorial/Makefile echo "" >> example/tutorial/Makefile echo "clean:" >> example/tutorial/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} clean" >> example/tutorial/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/tutorial/Makefile KOKKOS_SETTINGS='${KOKKOS_SETTINGS}' KOKKOS_PATH=${KOKKOS_PATH} clean" >> example/tutorial/Makefile if [ ${#KOKKOS_ENABLE_EXAMPLE_ICHOL} -gt 0 ]; then echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > example/ichol/Makefile echo "" >> example/ichol/Makefile echo "all:" >> example/ichol/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS}" >> example/ichol/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS}" >> example/ichol/Makefile echo "" >> example/ichol/Makefile echo "test: all" >> example/ichol/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} test" >> example/ichol/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} test" >> example/ichol/Makefile echo "" >> example/ichol/Makefile echo "clean:" >> example/ichol/Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} clean" >> example/ichol/Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/example/ichol/Makefile ${KOKKOS_SETTINGS} clean" >> example/ichol/Makefile fi KOKKOS_SETTINGS="${KOKKOS_SETTINGS_NO_KOKKOS_PATH} KOKKOS_PATH=${KOKKOS_PATH}" @@ -385,62 +400,64 @@ echo "KOKKOS_SETTINGS=${KOKKOS_SETTINGS}" > Makefile echo "" >> Makefile echo "kokkoslib:" >> Makefile echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} build-lib" >> Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} build-lib" >> Makefile echo "" >> Makefile echo "install: kokkoslib" >> Makefile echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} install" >> Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_INSTALL_PATH} install" >> Makefile echo "" >> Makefile echo "kokkoslib-test:" >> Makefile echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} build-lib" >> Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} build-lib" >> Makefile echo "" >> Makefile echo "install-test: kokkoslib-test" >> Makefile echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -j ${MAKE_J_OPTION} -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} install" >> Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} PREFIX=${KOKKOS_TEST_INSTALL_PATH} install" >> Makefile echo "" >> Makefile echo "build-test: install-test" >> Makefile -echo -e "\tmake -C core/unit_test" >> Makefile -echo -e "\tmake -C core/perf_test" >> Makefile -echo -e "\tmake -C containers/unit_tests" >> Makefile -echo -e "\tmake -C containers/performance_tests" >> Makefile -echo -e "\tmake -C algorithms/unit_tests" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test" >> Makefile +echo -e "\t\$(MAKE) -C core/perf_test" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests" >> Makefile +echo -e "\t\$(MAKE) -C containers/performance_tests" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests" >> Makefile if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then -echo -e "\tmake -C example/fixture" >> Makefile -echo -e "\tmake -C example/feint" >> Makefile -echo -e "\tmake -C example/fenl" >> Makefile -echo -e "\tmake -C example/tutorial build" >> Makefile +echo -e "\t\$(MAKE) -C example/fixture" >> Makefile +echo -e "\t\$(MAKE) -C example/feint" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial build" >> Makefile fi echo "" >> Makefile echo "test: build-test" >> Makefile -echo -e "\tmake -C core/unit_test test" >> Makefile -echo -e "\tmake -C core/perf_test test" >> Makefile -echo -e "\tmake -C containers/unit_tests test" >> Makefile -echo -e "\tmake -C containers/performance_tests test" >> Makefile -echo -e "\tmake -C algorithms/unit_tests test" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile +echo -e "\t\$(MAKE) -C core/perf_test test" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests test" >> Makefile +echo -e "\t\$(MAKE) -C containers/performance_tests test" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests test" >> Makefile if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then -echo -e "\tmake -C example/fixture test" >> Makefile -echo -e "\tmake -C example/feint test" >> Makefile -echo -e "\tmake -C example/fenl test" >> Makefile -echo -e "\tmake -C example/tutorial test" >> Makefile +echo -e "\t\$(MAKE) -C example/fixture test" >> Makefile +echo -e "\t\$(MAKE) -C example/feint test" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl test" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial test" >> Makefile fi echo "" >> Makefile echo "unit-tests-only:" >> Makefile -echo -e "\tmake -C core/unit_test test" >> Makefile -echo -e "\tmake -C containers/unit_tests test" >> Makefile -echo -e "\tmake -C algorithms/unit_tests test" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test test" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests test" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests test" >> Makefile echo "" >> Makefile + echo "clean:" >> Makefile -echo -e "\tmake -C core/unit_test clean" >> Makefile -echo -e "\tmake -C core/perf_test clean" >> Makefile -echo -e "\tmake -C containers/unit_tests clean" >> Makefile -echo -e "\tmake -C containers/performance_tests clean" >> Makefile -echo -e "\tmake -C algorithms/unit_tests clean" >> Makefile +echo -e "\t\$(MAKE) -C core/unit_test clean" >> Makefile +echo -e "\t\$(MAKE) -C core/perf_test clean" >> Makefile +echo -e "\t\$(MAKE) -C containers/unit_tests clean" >> Makefile +echo -e "\t\$(MAKE) -C containers/performance_tests clean" >> Makefile +echo -e "\t\$(MAKE) -C algorithms/unit_tests clean" >> Makefile if [ ${KOKKOS_DO_EXAMPLES} -gt 0 ]; then -echo -e "\tmake -C example/fixture clean" >> Makefile -echo -e "\tmake -C example/feint clean" >> Makefile -echo -e "\tmake -C example/fenl clean" >> Makefile -echo -e "\tmake -C example/tutorial clean" >> Makefile +echo -e "\t\$(MAKE) -C example/fixture clean" >> Makefile +echo -e "\t\$(MAKE) -C example/feint clean" >> Makefile +echo -e "\t\$(MAKE) -C example/fenl clean" >> Makefile +echo -e "\t\$(MAKE) -C example/tutorial clean" >> Makefile fi echo -e "\tcd core; \\" >> Makefile -echo -e "\tmake -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} clean" >> Makefile +echo -e "\t\$(MAKE) -f ${KOKKOS_PATH}/core/src/Makefile ${KOKKOS_SETTINGS} clean" >> Makefile + diff --git a/lib/linalg/Install.py b/lib/linalg/Install.py deleted file mode 100644 index c7076ca52f..0000000000 --- a/lib/linalg/Install.py +++ /dev/null @@ -1,52 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do build of the linear algebra library -# used to automate the steps described in the README file in this dir - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# make the library - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.%s clean; make -f Makefile.%s" % (machine,machine) -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) diff --git a/lib/linalg/Install.py b/lib/linalg/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/linalg/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/linalg/Makefile.gfortran b/lib/linalg/Makefile.gfortran index 89b7f2d7a0..7e1d97a5bc 100644 --- a/lib/linalg/Makefile.gfortran +++ b/lib/linalg/Makefile.gfortran @@ -18,10 +18,8 @@ OBJ = $(SRC:.f=.o) # ------ SETTINGS ------ FC = gfortran -FFLAGS = -O3 -fPIC -march=native -mpc64 \ - -ffast-math -funroll-loops -fstrict-aliasing -Wall -W -Wno-uninitialized -fno-second-underscore -FFLAGS0 = -O0 -fPIC -march=native -mpc64 \ - -Wall -W -Wno-uninitialized -fno-second-underscore +FFLAGS = -O3 -fPIC -ffast-math -fstrict-aliasing -fno-second-underscore +FFLAGS0 = -O0 -fPIC -fno-second-underscore ARCHIVE = ar AR = ar ARCHFLAG = -rcs @@ -47,7 +45,7 @@ dlamch.o: dlamch.f # ------ CLEAN ------ clean: - -rm *.o *.mod *~ $(LIB) + -rm -f *.o *.mod *~ $(LIB) tar: -tar -czvf ../linalg.tar.gz $(FILES) diff --git a/lib/linalg/Makefile.mingw32-cross b/lib/linalg/Makefile.mingw32-cross deleted file mode 100644 index 02aa3f71a3..0000000000 --- a/lib/linalg/Makefile.mingw32-cross +++ /dev/null @@ -1,67 +0,0 @@ -# -*- makefile -*- -# *_________________________________________________________________________* -# * Minimal BLAS/LAPACK Library for use by other LAMMPS packages - -SHELL = /bin/sh - -# ------ FILES ------ - -SRC = $(wildcard *.f) - -FILES = $(SRC) Makefile.* README - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw32/ -LIB = $(DIR)liblinalg.a -OBJ = $(SRC:%.f=$(DIR)%.o) - -# ------ SETTINGS ------ - -FC = i686-w64-mingw32-gfortran -FFLAGS = -O3 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -ffast-math -funroll-loops -fstrict-aliasing -Wall -W \ - -Wno-uninitialized -fno-second-underscore -FFLAGS0 = -O0 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -Wall -W -Wno-uninitialized -fno-second-underscore -ARCHIVE = i686-w64-mingw32-ar -AR = i686-w64-mingw32-ar -ARCHFLAG = -rcs -USRLIB = -SYSLIB = - -.PHONY: default clean tar - -.SUFFIXES: -.SUFFIXES: .F .f .o - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - -$(DIR): - mkdir $(DIR) - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.F - $(F90) $(F90FLAGS) -c $< -o $@ - -$(DIR)%.o:%.f - $(FC) $(FFLAGS) -c $< -o $@ - -$(DIR)dlamch.o: dlamch.f - $(FC) $(FFLAGS0) -c $< -o $@ - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.mod *~ $(LIB) - -rmdir $(DIR) - -tar: - -tar -czvf ../linalg.tar.gz $(FILES) - diff --git a/lib/linalg/Makefile.mingw32-cross-mpi b/lib/linalg/Makefile.mingw32-cross-mpi deleted file mode 100644 index 1e35c5b461..0000000000 --- a/lib/linalg/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw32-cross - -rm -f Obj_mingw32-mpi - ln -s Obj_mingw32 Obj_mingw32-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw32-cross - -rm -f Obj_mingw32-mpi - diff --git a/lib/linalg/Makefile.mingw64-cross b/lib/linalg/Makefile.mingw64-cross deleted file mode 100644 index ee6eef819b..0000000000 --- a/lib/linalg/Makefile.mingw64-cross +++ /dev/null @@ -1,67 +0,0 @@ -# -*- makefile -*- -# *_________________________________________________________________________* -# * Minimal BLAS/LAPACK Library for use by other LAMMPS packages - -SHELL = /bin/sh - -# ------ FILES ------ - -SRC = $(wildcard *.f) - -FILES = $(SRC) Makefile.* README - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw64/ -LIB = $(DIR)liblinalg.a -OBJ = $(SRC:%.f=$(DIR)%.o) - -# ------ SETTINGS ------ - -FC = x86_64-w64-mingw32-gfortran -FFLAGS = -O3 -march=core2 -mtune=generic -msse2 -mpc64 \ - -ffast-math -funroll-loops -fstrict-aliasing -Wall -W \ - -Wno-uninitialized -fno-second-underscore -FFLAGS0 = -O0 -march=core2 -mtune=generic -msse2 -mpc64 \ - -Wall -W -Wno-uninitialized -fno-second-underscore -ARCHIVE = x86_64-w64-mingw32-ar -AR = x86_64-w64-mingw32-ar -ARCHFLAG = -rcs -USRLIB = -SYSLIB = - -.PHONY: default clean tar - -.SUFFIXES: -.SUFFIXES: .F .f .o - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - -$(DIR): - mkdir $(DIR) - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.F - $(F90) $(F90FLAGS) -c $< -o $@ - -$(DIR)%.o:%.f - $(FC) $(FFLAGS) -c $< -o $@ - -$(DIR)dlamch.o: dlamch.f - $(FC) $(FFLAGS0) -c $< -o $@ - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.mod *~ $(LIB) - -rmdir $(DIR) - -tar: - -tar -czvf ../linalg.tar.gz $(FILES) - diff --git a/lib/linalg/Makefile.mingw64-cross-mpi b/lib/linalg/Makefile.mingw64-cross-mpi deleted file mode 100644 index ca6f4a6d43..0000000000 --- a/lib/linalg/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw64-cross - -rm -f Obj_mingw64-mpi - ln -s Obj_mingw64 Obj_mingw64-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw64-cross - -rm -f Obj_mingw64-mpi - diff --git a/lib/linalg/Makefile.mpi b/lib/linalg/Makefile.mpi new file mode 100644 index 0000000000..dd22ff134c --- /dev/null +++ b/lib/linalg/Makefile.mpi @@ -0,0 +1,52 @@ +# -*- makefile -*- +# *_________________________________________________________________________* +# * Minimal BLAS/LAPACK Library for use by other LAMMPS packages + +SHELL = /bin/sh + +# ------ FILES ------ + +SRC = $(wildcard *.f) + +FILES = $(SRC) Makefile.* README + +# ------ DEFINITIONS ------ + +LIB = liblinalg.a +OBJ = $(SRC:.f=.o) + +# ------ SETTINGS ------ + +FC = mpifort +FFLAGS = -O3 -fPIC +FFLAGS0 = -O0 -fPIC +ARCHIVE = ar +AR = ar +ARCHFLAG = -rcs +USRLIB = +SYSLIB = + +# ------ MAKE PROCEDURE ------ + +lib: $(OBJ) + $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) + +# ------ COMPILE RULES ------ + +%.o:%.F + $(F90) $(F90FLAGS) -c $< + +%.o:%.f + $(FC) $(FFLAGS) -c $< + +dlamch.o: dlamch.f + $(FC) $(FFLAGS0) -c $< + +# ------ CLEAN ------ + +clean: + -rm -f *.o *.mod *~ $(LIB) + +tar: + -tar -czvf ../linalg.tar.gz $(FILES) + diff --git a/lib/linalg/Makefile.serial b/lib/linalg/Makefile.serial new file mode 120000 index 0000000000..c52fbcb986 --- /dev/null +++ b/lib/linalg/Makefile.serial @@ -0,0 +1 @@ +Makefile.gfortran \ No newline at end of file diff --git a/lib/meam/Install.py b/lib/meam/Install.py deleted file mode 100644 index 18b426f928..0000000000 --- a/lib/meam/Install.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file - -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix - does not alter existing Makefile.machine -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None -extraflag = 0 - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 - suffix = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - -if not os.path.exists("Makefile.%s" % machine): - error("lib/%s/Makefile.%s does not exist" % (lib,machine)) - -lines = open("Makefile.%s" % machine,'r').readlines() -fp = open("Makefile.auto",'w') - -for line in lines: - words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - -fp.close() - -# make the library via Makefile.auto - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/meam/Install.py b/lib/meam/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/meam/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/atc/Makefile.lammps b/lib/meam/Makefile.lammps.empty similarity index 52% rename from lib/atc/Makefile.lammps rename to lib/meam/Makefile.lammps.empty index c8cd66af26..10394b68ad 100644 --- a/lib/atc/Makefile.lammps +++ b/lib/meam/Makefile.lammps.empty @@ -1,5 +1,5 @@ # Settings that the LAMMPS build will import when this package library is used -user-atc_SYSINC = -user-atc_SYSLIB = -lblas -llapack -user-atc_SYSPATH = +meam_SYSINC = +meam_SYSLIB = +meam_SYSPATH = diff --git a/lib/meam/Makefile.mingw32-cross b/lib/meam/Makefile.mingw32-cross deleted file mode 100644 index d4d2dad093..0000000000 --- a/lib/meam/Makefile.mingw32-cross +++ /dev/null @@ -1,69 +0,0 @@ -# * -*- makefile -*- -# *_________________________________________________________________________* -# * MEAM: MODEFIED EMBEDDED ATOM METHOD * -# * DESCRIPTION: SEE READ-ME * -# * FILE NAME: Makefile * -# * AUTHORS: Greg Wagner, Sandia National Laboratories * -# * CONTACT: gjwagne@sandia.gov * -# *_________________________________________________________________________*/ - -SHELL = /bin/sh - -# which file will be copied to Makefile.lammps - -EXTRAMAKE = Makefile.lammps.gfortran - -# ------ FILES ------ - -SRC = meam_data.F meam_setup_done.F meam_setup_global.F meam_setup_param.F meam_dens_init.F meam_dens_final.F meam_force.F meam_cleanup.F - -FILES = $(SRC) Makefile - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw32/ -LIB = $(DIR)libmeam.a -OBJ = $(SRC:%.F=$(DIR)%.o) $(DIR)fm_exp.o - -# ------ SETTINGS ------ - -F90 = i686-w64-mingw32-gfortran -F90FLAGS = -O3 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -ffast-math -funroll-loops -fstrict-aliasing -J$(DIR) \ - -Wall -W -Wno-uninitialized -fno-second-underscore -#F90FLAGS = -O -ARCHIVE = i686-w64-mingw32-ar -ARCHFLAG = -rcs -LINK = i686-w64-mingw32-g++ -LINKFLAGS = -O -USRLIB = -SYSLIB = - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) - -$(DIR): - -mkdir $(DIR) - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.F - $(F90) $(F90FLAGS) -c $< -o $@ - -$(DIR)%.o:%.c - $(F90) $(F90FLAGS) -c $< -o $@ - -include .depend -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.mod *~ $(LIB) - -rmdir $(DIR) - -tar: - -tar -cvf ../MEAM.tar $(FILES) diff --git a/lib/meam/Makefile.mingw32-cross-mpi b/lib/meam/Makefile.mingw32-cross-mpi deleted file mode 100644 index 1e35c5b461..0000000000 --- a/lib/meam/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw32-cross - -rm -f Obj_mingw32-mpi - ln -s Obj_mingw32 Obj_mingw32-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw32-cross - -rm -f Obj_mingw32-mpi - diff --git a/lib/meam/Makefile.mingw64-cross-mpi b/lib/meam/Makefile.mingw64-cross-mpi deleted file mode 100644 index ca6f4a6d43..0000000000 --- a/lib/meam/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw64-cross - -rm -f Obj_mingw64-mpi - ln -s Obj_mingw64 Obj_mingw64-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw64-cross - -rm -f Obj_mingw64-mpi - diff --git a/lib/meam/Makefile.mingw64-cross b/lib/meam/Makefile.mpi similarity index 63% rename from lib/meam/Makefile.mingw64-cross rename to lib/meam/Makefile.mpi index 1a8e97febe..fd3dbde555 100644 --- a/lib/meam/Makefile.mingw64-cross +++ b/lib/meam/Makefile.mpi @@ -1,4 +1,4 @@ -# * -*- makefile -*- +# * # *_________________________________________________________________________* # * MEAM: MODEFIED EMBEDDED ATOM METHOD * # * DESCRIPTION: SEE READ-ME * @@ -11,7 +11,7 @@ SHELL = /bin/sh # which file will be copied to Makefile.lammps -EXTRAMAKE = Makefile.lammps.gfortran +EXTRAMAKE = Makefile.lammps.empty # ------ FILES ------ @@ -21,49 +21,41 @@ FILES = $(SRC) Makefile # ------ DEFINITIONS ------ -DIR = Obj_mingw64/ -LIB = $(DIR)libmeam.a -OBJ = $(SRC:%.F=$(DIR)%.o) $(DIR)fm_exp.o +LIB = libmeam.a +OBJ = $(SRC:.F=.o) fm_exp.o # ------ SETTINGS ------ -F90 = x86_64-w64-mingw32-gfortran -F90FLAGS = -O3 -march=core2 -mtune=core2 -msse2 -mpc64 \ - -ffast-math -funroll-loops -fstrict-aliasing -J$(DIR) \ - -Wall -W -Wno-uninitialized -fno-second-underscore +F90 = mpifort +CC = mpicc +F90FLAGS = -O3 -fPIC #F90FLAGS = -O -ARCHIVE = x86_64-w64-mingw32-ar -ARCHFLAG = -rcs -LINK = x86_64-w64-mingw32-g++ +ARCHIVE = ar +ARCHFLAG = -rc +LINK = mpicxx LINKFLAGS = -O USRLIB = SYSLIB = # ------ MAKE PROCEDURE ------ -default: $(DIR) $(LIB) - -$(DIR): - -mkdir $(DIR) - -$(LIB): $(OBJ) +lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) @cp $(EXTRAMAKE) Makefile.lammps # ------ COMPILE RULES ------ -$(DIR)%.o:%.F - $(F90) $(F90FLAGS) -c $< -o $@ +%.o:%.F + $(F90) $(F90FLAGS) -c $< -$(DIR)%.o:%.c - $(F90) $(F90FLAGS) -c $< -o $@ +%.o:%.c + $(CC) $(F90FLAGS) -c $< include .depend # ------ CLEAN ------ clean: - -rm $(DIR)*.o $(DIR)*.mod *~ $(LIB) - -rmdir $(DIR) + -rm *.o *.mod *~ $(LIB) tar: -tar -cvf ../MEAM.tar $(FILES) diff --git a/lib/meam/Makefile.serial b/lib/meam/Makefile.serial new file mode 120000 index 0000000000..c52fbcb986 --- /dev/null +++ b/lib/meam/Makefile.serial @@ -0,0 +1 @@ +Makefile.gfortran \ No newline at end of file diff --git a/lib/mscg/.gitignore b/lib/mscg/.gitignore new file mode 100644 index 0000000000..7d45bcb60a --- /dev/null +++ b/lib/mscg/.gitignore @@ -0,0 +1,4 @@ +# files to ignore +/liblink +/includelink +/MSCG-release-master diff --git a/lib/mscg/Install.py b/lib/mscg/Install.py index e547232614..76c986ef6d 100644 --- a/lib/mscg/Install.py +++ b/lib/mscg/Install.py @@ -3,46 +3,90 @@ # Install.py tool to download, unpack, build, and link to the MS-CG library # used to automate the steps described in the README file in this dir -import sys,os,re,commands +from __future__ import print_function +import sys,os,re,subprocess # help message help = """ -Syntax: python Install.py -h hpath hdir -g -b [suffix] -l - specify one or more options, order does not matter - -h = set home dir of MS-CG to be hpath/hdir - hpath can be full path, contain '~' or '.' chars - default hpath = . = lib/mscg - default hdir = MSCG-release-master = what GitHub zipfile unpacks to - -g = grab (download) zipfile from MS-CG GitHub website - unpack it to hpath/hdir - hpath must already exist - if hdir already exists, it will be deleted before unpack - -b = build MS-CG library in its src dir - optional suffix specifies which src/Make/Makefile.suffix to use +Syntax from src dir: make lib-mscg args="-p [path] -m [suffix]" + or: make lib-mscg args="-b -m [suffix]" +Syntax from lib dir: python Install.py -p [path] -m [suffix] +Syntax from lib dir: python Install.py -b -m [suffix] + +specify one or more options, order does not matter + + -b = download and build MS-CG library + -p = specify folder of existing MS-CG installation + -m = machine suffix specifies which src/Make/Makefile.suffix to use default suffix = g++_simple - -l = create 2 softlinks (includelink,liblink) in lib/mscg to MS-CG src dir + +Example: + +make lib-mscg args="-b -m serial " # download/build in lib/mscg/MSCG-release-master with settings compatible with "make serial" +make lib-mscg args="-b -m mpi " # download/build in lib/mscg/MSCG-release-master with settings compatible with "make mpi" +make lib-mscg args="-p /usr/local/mscg-release " # use existing MS-CG installation in /usr/local/mscg-release """ # settings -url = "https://github.com/uchicago-voth/MSCG-release/archive/master.zip" -zipfile = "MS-CG-master.zip" -zipdir = "MSCG-release-master" +url = "http://github.com/uchicago-voth/MSCG-release/archive/master.tar.gz" +tarfile = "MS-CG-master.tar.gz" +tardir = "MSCG-release-master" # print error message or help def error(str=None): - if not str: print help - else: print "ERROR",str + if not str: print(help) + else: print("ERROR",str) sys.exit() # expand to full path name # process leading '~' or relative path - + def fullpath(path): return os.path.abspath(os.path.expanduser(path)) - + +def which(program): + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + path = path.strip('"') + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + +def geturl(url,fname): + success = False + + if which('curl') != None: + cmd = 'curl -L -o "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling curl failed with: %s" % e.output.decode('UTF-8')) + + if not success and which('wget') != None: + cmd = 'wget -O "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling wget failed with: %s" % e.output.decode('UTF-8')) + + if not success: + error("Failed to download source code with 'curl' or 'wget'") + return + # parse args args = sys.argv[1:] @@ -50,73 +94,93 @@ nargs = len(args) if nargs == 0: error() homepath = "." -homedir = zipdir +homedir = tardir -grabflag = 0 -buildflag = 0 +buildflag = False +pathflag = False +linkflag = True msuffix = "g++_simple" -linkflag = 0 iarg = 0 while iarg < nargs: - if args[iarg] == "-h": - if iarg+3 > nargs: error() - homepath = args[iarg+1] - homedir = args[iarg+2] - iarg += 3 - elif args[iarg] == "-g": - grabflag = 1 - iarg += 1 + if args[iarg] == "-p": + if iarg+2 > nargs: error() + mscgpath = fullpath(args[iarg+1]) + pathflag = True + iarg += 2 + elif args[iarg] == "-m": + if iarg+2 > nargs: error() + msuffix = args[iarg+1] + iarg += 2 elif args[iarg] == "-b": - buildflag = 1 - if iarg+1 < nargs and args[iarg+1][0] != '-': - msuffix = args[iarg+1] - iarg += 1 - iarg += 1 - elif args[iarg] == "-l": - linkflag = 1 + buildflag = True iarg += 1 else: error() homepath = fullpath(homepath) -if not os.path.isdir(homepath): error("MS-CG path does not exist") homedir = "%s/%s" % (homepath,homedir) -# download and unpack MS-CG zipfile +if (pathflag): + if not os.path.isdir(mscgpath): error("MS-CG path does not exist") + homedir = mscgpath -if grabflag: - print "Downloading MS-CG ..." - cmd = "curl -L %s > %s/%s" % (url,homepath,zipfile) - print cmd - print commands.getoutput(cmd) +if (buildflag and pathflag): + error("Cannot use -b and -p flag at the same time") - print "Unpacking MS-CG zipfile ..." - if os.path.exists("%s/%s" % (homepath,zipdir)): - commands.getoutput("rm -rf %s/%s" % (homepath,zipdir)) - cmd = "cd %s; unzip %s" % (homepath,zipfile) - commands.getoutput(cmd) - if os.path.basename(homedir) != zipdir: - if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir) - os.rename("%s/%s" % (homepath,zipdir),homedir) +if (not buildflag and not pathflag): + error("Have to use either -b or -p flag") + +# download and unpack MS-CG tarfile + +if buildflag: + print("Downloading MS-CG ...") + geturl(url,"%s/%s" % (homepath,tarfile)) + + print("Unpacking MS-CG tarfile ...") + if os.path.exists("%s/%s" % (homepath,tardir)): + cmd = 'rm -rf "%s/%s"' % (homepath,tardir) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + cmd = 'cd "%s"; tar -xzvf %s' % (homepath,tarfile) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + os.remove("%s/%s" % (homepath,tarfile)) + if os.path.basename(homedir) != tardir: + if os.path.exists(homedir): + cmd = 'rm -rf "%s"' % homedir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + os.rename("%s/%s" % (homepath,tardir),homedir) # build MS-CG if buildflag: - print "Building MS-CG ..." - cmd = "cd %s/src; cp Make/Makefile.%s .; make -f Makefile.%s" % \ - (homedir,msuffix,msuffix) - txt = commands.getoutput(cmd) - print txt + print("Building MS-CG ...") + if os.path.exists("%s/src/Make/Makefile.%s" % (homedir,msuffix)): + cmd = 'cd "%s/src"; cp Make/Makefile.%s .; make -f Makefile.%s' % \ + (homedir,msuffix,msuffix) + elif os.path.exists("Makefile.%s" % msuffix): + cmd = 'cd "%s/src"; cp ../../Makefile.%s .; make -f Makefile.%s' % \ + (homedir,msuffix,msuffix) + else: + error("Cannot find Makefile.%s" % msuffix) + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + print(txt.decode('UTF-8')) + if not os.path.exists("Makefile.lammps"): + print("Creating Makefile.lammps") + if os.path.exists("Makefile.lammps.%s" % msuffix): + cmd = 'cp Makefile.lammps.%s Makefile.lammps' % msuffix + else: + cmd = 'cp Makefile.lammps.default Makefile.lammps' + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + else: print("Makefile.lammps exists. Please check its settings") # create 2 links in lib/mscg to MS-CG src dir if linkflag: - print "Creating links to MS-CG include and lib files" + print("Creating links to MS-CG include and lib files") if os.path.isfile("includelink") or os.path.islink("includelink"): os.remove("includelink") if os.path.isfile("liblink") or os.path.islink("liblink"): os.remove("liblink") - cmd = "ln -s %s/src includelink" % homedir - commands.getoutput(cmd) - cmd = "ln -s %s/src liblink" % homedir - commands.getoutput(cmd) + cmd = 'ln -s "%s/src" includelink' % homedir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + cmd = 'ln -s "%s/src" liblink' % homedir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) diff --git a/lib/mscg/Makefile.lammps.default b/lib/mscg/Makefile.lammps.default new file mode 100644 index 0000000000..7f04ff2eac --- /dev/null +++ b/lib/mscg/Makefile.lammps.default @@ -0,0 +1,5 @@ +# Settings that the LAMMPS build will import when this package library is used + +mscg_SYSINC = +mscg_SYSLIB = -lgsl -lgslcblas +mscg_SYSPATH = diff --git a/lib/mscg/Makefile.lammps b/lib/mscg/Makefile.lammps.g++_simple similarity index 100% rename from lib/mscg/Makefile.lammps rename to lib/mscg/Makefile.lammps.g++_simple diff --git a/lib/mscg/Makefile.lammps.mpi b/lib/mscg/Makefile.lammps.mpi new file mode 100644 index 0000000000..7f04ff2eac --- /dev/null +++ b/lib/mscg/Makefile.lammps.mpi @@ -0,0 +1,5 @@ +# Settings that the LAMMPS build will import when this package library is used + +mscg_SYSINC = +mscg_SYSLIB = -lgsl -lgslcblas +mscg_SYSPATH = diff --git a/lib/mscg/Makefile.lammps.serial b/lib/mscg/Makefile.lammps.serial new file mode 100644 index 0000000000..7f04ff2eac --- /dev/null +++ b/lib/mscg/Makefile.lammps.serial @@ -0,0 +1,5 @@ +# Settings that the LAMMPS build will import when this package library is used + +mscg_SYSINC = +mscg_SYSLIB = -lgsl -lgslcblas +mscg_SYSPATH = diff --git a/lib/mscg/Makefile.mpi b/lib/mscg/Makefile.mpi new file mode 100644 index 0000000000..d5088176b4 --- /dev/null +++ b/lib/mscg/Makefile.mpi @@ -0,0 +1,104 @@ +# This Makefile is meant for use after +# module load gsl/2.2.1+gcc-6.1 +# module load gcc/6.1 +# It also requires LAPACK +# Module names refer to those on any of RCC's clusters at UChicago. + +# This makefile does NOT include GROMACS reading or MKL (sparse matrix) +# It uses the gcc/g++ compiler (v4.9+) for C++11 support + +# 1) Try this first (as it is the easiest) +NO_GRO_LIBS = -lgsl -lgslcblas + +# 2) If it does not find your libraries automatically, you can specify them manually +# # A) Set the GSL_LIB to the location of your GSL library's lib directory (must be V2+) +GSL_LIB = /software/gsl-2.2.1-el6-x86_64+gcc-6.1/lib +# # B) Set the LAPACK_DIR to the location of your LAPACK library base directory +LAPACK_LIB = $(HOME)/local/lapack-3.7.0 +# # C) Uncomment this next line and then run again (after cleaning up any object files) +#NO_GRO_LIBS = -L$(GSL_LIB) -L$(LAPACK_LIB) -lgsl -lgslcblas -llapack -lm + +OPT = -O2 +NO_GRO_LDFLAGS = $(OPT) +NO_GRO_CFLAGS = $(OPT) +DIMENSION = 3 +CC = mpicc + +COMMON_SOURCE = control_input.h fm_output.h force_computation.h geometry.h interaction_hashing.h interaction_model.h matrix.h splines.h topology.h trajectory_input.h misc.h mscg.h +NO_GRO_COMMON_OBJECTS = control_input.o fm_output.o force_computation.o geometry.o interaction_hashing.o interaction_model.o matrix.o splines.o topology.o trajectory_input_no_gro.o misc.o + +# Target executables +# The library for LAMMPS is lib_mscg.a +libmscg.a: mscg.o $(NO_GRO_COMMON_OBJECTS) + ar rvs libmscg.a *.o + +newfm_no_gro.x: newfm.o $(NO_GRO_COMMON_OBJECTS) + $(CC) $(NO_GRO_LDFLAGS) -o $@ newfm.o $(NO_GRO_COMMON_OBJECTS) -D"_exclude_gromacs=1" $(NO_GRO_LIBS) + +combinefm_no_gro.x: combinefm.o batch_fm_combination.o $(NO_GRO_COMMON_OBJECTS) + $(CC) $(NO_GRO_LDFLAGS) -o $@ combinefm.o batch_fm_combination.o $(NO_GRO_COMMON_OBJECTS) -D"_exclude_gromacs=1" $(NO_GRO_LIBS) + +rangefinder_no_gro.x: rangefinder.o range_finding.o $(NO_GRO_COMMON_OBJECTS) + $(CC) $(NO_GRO_LDFLAGS) -o $@ rangefinder.o range_finding.o $(NO_GRO_COMMON_OBJECTS) -D"_exclude_gromacs=1" $(NO_GRO_LIBS) + +# Target objects + +mscg.o: mscg.cpp $(COMMON_SOURCE) range_finding.o + $(CC) $(NO_GRO_CFLAGS) -c mscg.cpp -o mscg.o $(NO_GRO_LIBS) + +newfm.o: newfm.cpp $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c newfm.cpp + +combinefm.o: combinefm.cpp batch_fm_combination.h $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c combinefm.cpp + +rangefinder.o: rangefinder.cpp range_finding.h $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c rangefinder.cpp + +scalarfm.o: scalarfm.cpp $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c scalarfm.cpp + +batch_fm_combination.o: batch_fm_combination.cpp batch_fm_combination.h external_matrix_routines.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c batch_fm_combination.cpp + +control_input.o: control_input.cpp control_input.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c control_input.cpp + +geometry.o: geometry.cpp geometry.h + $(CC) $(NO_GRO_CFLAGS) -c geometry.cpp -DDIMENSION=$(DIMENSION) + +fm_output.o: fm_output.cpp fm_output.h force_computation.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c fm_output.cpp + +force_computation.o: force_computation.cpp force_computation.h interaction_model.h matrix.h trajectory_input.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c force_computation.cpp -DDIMENSION=$(DIMENSION) + +interaction_hashing.o: interaction_hashing.cpp interaction_hashing.h + $(CC) $(NO_GRO_CFLAGS) -c interaction_hashing.cpp + +interaction_model.o: interaction_model.cpp interaction_model.h control_input.h interaction_hashing.h topology.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c interaction_model.cpp -DDIMENSION=$(DIMENSION) + +matrix.o: matrix.cpp matrix.h control_input.h external_matrix_routines.h interaction_model.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c matrix.cpp -DDIMENSION=$(DIMENSION) + +misc.o: misc.cpp misc.h + $(CC) $(NO_GRO_CFLAGS) -c misc.cpp + +range_finding.o: range_finding.cpp range_finding.h force_computation.h interaction_model.h matrix.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c range_finding.cpp -DDIMENSION=$(DIMENSION) + +splines.o: splines.cpp splines.h interaction_model.h + $(CC) $(NO_GRO_CFLAGS) -c splines.cpp -DDIMENSION=$(DIMENSION) + +topology.o: topology.cpp topology.h interaction_model.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c topology.cpp -DDIMENSION=$(DIMENSION) + +trajectory_input_no_gro.o: trajectory_input.cpp trajectory_input.h control_input.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c trajectory_input.cpp -D"_exclude_gromacs=1" -o trajectory_input_no_gro.o + +# Other convenient commands +clean: + rm *.[o] + +all: libmscg.a newfm_no_gro.x rangefinder_no_gro.x combinefm_no_gro.x diff --git a/lib/mscg/Makefile.serial b/lib/mscg/Makefile.serial new file mode 100644 index 0000000000..4e34706f01 --- /dev/null +++ b/lib/mscg/Makefile.serial @@ -0,0 +1,104 @@ +# This Makefile is meant for use after +# module load gsl/2.2.1+gcc-6.1 +# module load gcc/6.1 +# It also requires LAPACK +# Module names refer to those on any of RCC's clusters at UChicago. + +# This makefile does NOT include GROMACS reading or MKL (sparse matrix) +# It uses the gcc/g++ compiler (v4.9+) for C++11 support + +# 1) Try this first (as it is the easiest) +NO_GRO_LIBS = -lgsl -lgslcblas + +# 2) If it does not find your libraries automatically, you can specify them manually +# # A) Set the GSL_LIB to the location of your GSL library's lib directory (must be V2+) +GSL_LIB = /software/gsl-2.2.1-el6-x86_64+gcc-6.1/lib +# # B) Set the LAPACK_DIR to the location of your LAPACK library base directory +LAPACK_LIB = $(HOME)/local/lapack-3.7.0 +# # C) Uncomment this next line and then run again (after cleaning up any object files) +#NO_GRO_LIBS = -L$(GSL_LIB) -L$(LAPACK_LIB) -lgsl -lgslcblas -llapack -lm + +OPT = -O2 +NO_GRO_LDFLAGS = $(OPT) +NO_GRO_CFLAGS = $(OPT) +DIMENSION = 3 +CC = g++ + +COMMON_SOURCE = control_input.h fm_output.h force_computation.h geometry.h interaction_hashing.h interaction_model.h matrix.h splines.h topology.h trajectory_input.h misc.h mscg.h +NO_GRO_COMMON_OBJECTS = control_input.o fm_output.o force_computation.o geometry.o interaction_hashing.o interaction_model.o matrix.o splines.o topology.o trajectory_input_no_gro.o misc.o + +# Target executables +# The library for LAMMPS is lib_mscg.a +libmscg.a: mscg.o $(NO_GRO_COMMON_OBJECTS) + ar rvs libmscg.a *.o + +newfm_no_gro.x: newfm.o $(NO_GRO_COMMON_OBJECTS) + $(CC) $(NO_GRO_LDFLAGS) -o $@ newfm.o $(NO_GRO_COMMON_OBJECTS) -D"_exclude_gromacs=1" $(NO_GRO_LIBS) + +combinefm_no_gro.x: combinefm.o batch_fm_combination.o $(NO_GRO_COMMON_OBJECTS) + $(CC) $(NO_GRO_LDFLAGS) -o $@ combinefm.o batch_fm_combination.o $(NO_GRO_COMMON_OBJECTS) -D"_exclude_gromacs=1" $(NO_GRO_LIBS) + +rangefinder_no_gro.x: rangefinder.o range_finding.o $(NO_GRO_COMMON_OBJECTS) + $(CC) $(NO_GRO_LDFLAGS) -o $@ rangefinder.o range_finding.o $(NO_GRO_COMMON_OBJECTS) -D"_exclude_gromacs=1" $(NO_GRO_LIBS) + +# Target objects + +mscg.o: mscg.cpp $(COMMON_SOURCE) range_finding.o + $(CC) $(NO_GRO_CFLAGS) -c mscg.cpp -o mscg.o $(NO_GRO_LIBS) + +newfm.o: newfm.cpp $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c newfm.cpp + +combinefm.o: combinefm.cpp batch_fm_combination.h $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c combinefm.cpp + +rangefinder.o: rangefinder.cpp range_finding.h $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c rangefinder.cpp + +scalarfm.o: scalarfm.cpp $(COMMON_SOURCE) + $(CC) $(NO_GRO_CFLAGS) -c scalarfm.cpp + +batch_fm_combination.o: batch_fm_combination.cpp batch_fm_combination.h external_matrix_routines.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c batch_fm_combination.cpp + +control_input.o: control_input.cpp control_input.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c control_input.cpp + +geometry.o: geometry.cpp geometry.h + $(CC) $(NO_GRO_CFLAGS) -c geometry.cpp -DDIMENSION=$(DIMENSION) + +fm_output.o: fm_output.cpp fm_output.h force_computation.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c fm_output.cpp + +force_computation.o: force_computation.cpp force_computation.h interaction_model.h matrix.h trajectory_input.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c force_computation.cpp -DDIMENSION=$(DIMENSION) + +interaction_hashing.o: interaction_hashing.cpp interaction_hashing.h + $(CC) $(NO_GRO_CFLAGS) -c interaction_hashing.cpp + +interaction_model.o: interaction_model.cpp interaction_model.h control_input.h interaction_hashing.h topology.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c interaction_model.cpp -DDIMENSION=$(DIMENSION) + +matrix.o: matrix.cpp matrix.h control_input.h external_matrix_routines.h interaction_model.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c matrix.cpp -DDIMENSION=$(DIMENSION) + +misc.o: misc.cpp misc.h + $(CC) $(NO_GRO_CFLAGS) -c misc.cpp + +range_finding.o: range_finding.cpp range_finding.h force_computation.h interaction_model.h matrix.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c range_finding.cpp -DDIMENSION=$(DIMENSION) + +splines.o: splines.cpp splines.h interaction_model.h + $(CC) $(NO_GRO_CFLAGS) -c splines.cpp -DDIMENSION=$(DIMENSION) + +topology.o: topology.cpp topology.h interaction_model.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c topology.cpp -DDIMENSION=$(DIMENSION) + +trajectory_input_no_gro.o: trajectory_input.cpp trajectory_input.h control_input.h misc.h + $(CC) $(NO_GRO_CFLAGS) -c trajectory_input.cpp -D"_exclude_gromacs=1" -o trajectory_input_no_gro.o + +# Other convenient commands +clean: + rm *.[o] + +all: libmscg.a newfm_no_gro.x rangefinder_no_gro.x combinefm_no_gro.x diff --git a/lib/mscg/README b/lib/mscg/README index b73c8563cd..329eebba96 100755 --- a/lib/mscg/README +++ b/lib/mscg/README @@ -6,12 +6,11 @@ The MS-CG library is available at https://github.com/uchicago-voth/MSCG-release and was developed by Jacob Wagner in Greg Voth's group at the University of Chicago. -This library requires a compiler with C++11 support (e.g., g++ v4.9+), -LAPACK, and the GNU scientific library (GSL v 2.1+). +This library requires a the GNU scientific library (GSL v 2.1+). You can type "make lib-mscg" from the src directory to see help on how to download and build this library via make commands, or you can do -the same thing by typing "python Install.py" from within this +the same thing by typing "python Install.py -m " from within this directory, or you can do it manually by following the instructions below. @@ -21,17 +20,17 @@ You must perform the following steps yourself. 1. Download MS-CG at https://github.com/uchicago-voth/MSCG-release either as a tarball or via SVN, and unpack the tarball either in - this /lib/mscg directory or somewhere else on your system. + this lib/mscg directory or somewhere else on your system. + +2. Ensure that you have GSL installed and a compiler with support for C++11. -2. Ensure that you have LAPACK and GSL (or Intel MKL) as well as a compiler - with support for C++11. - 3. Compile MS-CG from within its home directory using your makefile of choice: - % make -f Makefile."name" libmscg.a - It is recommended that you start with Makefile.g++_simple - for most machines + % make -f Makefile. libmscg.a + It is recommended that you start with Makefile.g++_simple for + most machines. There are also two Makefile with settings matching + the "mpi" and "serial" makefiles in the main LAMMPS folder. -4. There is no need to install MS-CG if you only wish +4. There is no need to install MS-CG system-wide if you only wish to use it from LAMMPS. 5. Create two soft links in this dir (lib/mscg) to the MS-CG src @@ -43,6 +42,9 @@ You must perform the following steps yourself. % ln -s /usr/local/include includelink % ln -s /usr/local/lib liblink +6. Copy a suitable Makefile.lammps. to Makefile.lammps or + copy Makefile.lammps.default to Makefile.lammps and edit as needed. + ----------------- When these steps are complete you can build LAMMPS with the MS-CG diff --git a/lib/poems/Install.py b/lib/poems/Install.py deleted file mode 100644 index 18b426f928..0000000000 --- a/lib/poems/Install.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file - -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix - does not alter existing Makefile.machine -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None -extraflag = 0 - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 - suffix = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - -if not os.path.exists("Makefile.%s" % machine): - error("lib/%s/Makefile.%s does not exist" % (lib,machine)) - -lines = open("Makefile.%s" % machine,'r').readlines() -fp = open("Makefile.auto",'w') - -for line in lines: - words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - -fp.close() - -# make the library via Makefile.auto - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/poems/Install.py b/lib/poems/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/poems/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/poems/Makefile.g++ b/lib/poems/Makefile.g++ index 54c897a22c..afcbc4a01d 100644 --- a/lib/poems/Makefile.g++ +++ b/lib/poems/Makefile.g++ @@ -68,7 +68,7 @@ OBJ = $(SRC:.cpp=.o) # ------ SETTINGS ------ CC = g++ -CCFLAGS = -O -g -fPIC -Wall #-Wno-deprecated +CCFLAGS = -O3 -g -fPIC -Wall #-Wno-deprecated ARCHIVE = ar ARCHFLAG = -rc DEPFLAGS = -M diff --git a/lib/poems/Makefile.mingw32-cross b/lib/poems/Makefile.mingw32-cross deleted file mode 100644 index 17e81b51f0..0000000000 --- a/lib/poems/Makefile.mingw32-cross +++ /dev/null @@ -1,110 +0,0 @@ -# * -# *_________________________________________________________________________* -# * POEMS: PARALLELIZABLE OPEN SOURCE EFFICIENT MULTIBODY SOFTWARE * -# * DESCRIPTION: SEE READ-ME * -# * FILE NAME: Makefile * -# * AUTHORS: See Author List * -# * GRANTS: See Grants List * -# * COPYRIGHT: (C) 2005 by Authors as listed in Author's List * -# * LICENSE: Please see License Agreement * -# * DOWNLOAD: Free at www.rpi.edu/~anderk5 * -# * ADMINISTRATOR: Prof. Kurt Anderson * -# * Computational Dynamics Lab * -# * Rensselaer Polytechnic Institute * -# * 110 8th St. Troy NY 12180 * -# * CONTACT: anderk5@rpi.edu * -# *_________________________________________________________________________*/ - -SHELL = /bin/sh - -# which file will be copied to Makefile.lammps - -EXTRAMAKE = Makefile.lammps.empty - -# ------ FILES ------ - -SRC_MAIN = workspace.cpp system.cpp poemsobject.cpp -INC_MAIN = workspace.h system.h poemsobject.h - -SRC_BODY = body.cpp rigidbody.cpp particle.cpp inertialframe.cpp -INC_BODY = bodies.h body.h rigidbody.h particle.h inertialframe.h - - -SRC_JOINT = joint.cpp revolutejoint.cpp prismaticjoint.cpp sphericaljoint.cpp \ - freebodyjoint.cpp body23joint.cpp mixedjoint.cpp -INC_JOINT = joints.h joint.h revolutejoint.h prismaticjoint.h sphericaljoint.h \ - freebodyjoint.h body23joint.h mixedjoint.h - -SRC_POINT = point.cpp fixedpoint.cpp -INC_POINT = points.h point.h fixedpoint.h - -SRC_SOLVE = solver.cpp -INC_SOLVE = solver.h - -SRC_ORDERN = onsolver.cpp onfunctions.cpp onbody.cpp -INC_ORDERN = onsolver.h onfunctions.h onbody.h - -SRC_MAT = virtualmatrix.cpp matrix.cpp matrixfun.cpp mat3x3.cpp virtualcolmatrix.cpp \ - colmatrix.cpp vect3.cpp virtualrowmatrix.cpp rowmatrix.cpp mat6x6.cpp vect6.cpp \ - fastmatrixops.cpp colmatmap.cpp eulerparameters.cpp vect4.cpp norm.cpp mat4x4.cpp \ - -INC_MAT = matrices.h virtualmatrix.h matrix.h matrixfun.h mat3x3.h virtualcolmatrix.h \ - colmatrix.h vect3.h virtualrowmatrix.h rowmatrix.h mat6x6.h vect6.h \ - fastmatrixops.h colmatmap.h eulerparameters.h vect4.h norm.h mat4x4.h - -SRC_MISC = poemstreenode.cpp -INC_MISC = poemslist.h poemstreenode.h poemstree.h poemsnodelib.h SystemProcessor.h defines.h POEMSChain.h - -SRC = $(SRC_MAIN) $(SRC_BODY) $(SRC_JOINT) $(SRC_POINT) $(SRC_SOLVE) $(SRC_ORDERN) $(SRC_MAT) $(SRC_MISC) -INC = $(INC_MAIN) $(INC_BODY) $(INC_JOINT) $(INC_POINT) $(INC_SOLVE) $(INC_ORDERN) $(INC_MAT) $(INC_MISC) - -FILES = $(SRC) $(INC) Makefile Authors_List.txt Grants_List.txt POEMS_License.txt README Copyright_Notice - -# ------ DEFINITIONS ------ - -DIR = Obj_mingw32/ -LIB = $(DIR)libpoems.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) - -# ------ SETTINGS ------ - -CC = i686-w64-mingw32-g++ -CCFLAGS = -O2 -march=i686 -mtune=generic -mfpmath=387 -mpc64 \ - -ffast-math -funroll-loops -finline-functions -fno-rtti \ - -fno-exceptions -fstrict-aliasing \ - -Wall -W -Wno-uninitialized -ARCHIVE = i686-w64-mingw32-ar -ARCHFLAG = -rcs -DEPFLAGS = -M -LINK = i686-w64-mingw32-g++ -LINKFLAGS = -O -USRLIB = -SYSLIB = - -# ------ MAKE PROCEDURE ------ - -default: $(DIR) $(LIB) - -$(DIR): - -mkdir $(DIR) - -$(LIB): $(OBJ) - $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) - @cp $(EXTRAMAKE) Makefile.lammps - -# ------ COMPILE RULES ------ - -$(DIR)%.o:%.cpp - $(CC) $(CCFLAGS) -c $< -o $@ - -# ------ DEPENDENCIES ------ - -include .depend - -# ------ CLEAN ------ - -clean: - -rm $(DIR)*.o $(DIR)*.d *~ $(LIB) - -tar: - -tar -cvf ../POEMS.tar $(FILES) diff --git a/lib/poems/Makefile.mingw32-cross-mpi b/lib/poems/Makefile.mingw32-cross-mpi deleted file mode 100644 index 1e35c5b461..0000000000 --- a/lib/poems/Makefile.mingw32-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw32-cross - -rm -f Obj_mingw32-mpi - ln -s Obj_mingw32 Obj_mingw32-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw32-cross - -rm -f Obj_mingw32-mpi - diff --git a/lib/poems/Makefile.mingw64-cross-mpi b/lib/poems/Makefile.mingw64-cross-mpi deleted file mode 100644 index ca6f4a6d43..0000000000 --- a/lib/poems/Makefile.mingw64-cross-mpi +++ /dev/null @@ -1,13 +0,0 @@ -# -*- makefile -*- wrapper for non-MPI libraries - -SHELL=/bin/sh - -all: - $(MAKE) $(MFLAGS) mingw64-cross - -rm -f Obj_mingw64-mpi - ln -s Obj_mingw64 Obj_mingw64-mpi - -clean: - $(MAKE) $(MFLAGS) clean-mingw64-cross - -rm -f Obj_mingw64-mpi - diff --git a/lib/poems/Makefile.mingw64-cross b/lib/poems/Makefile.mpi similarity index 86% rename from lib/poems/Makefile.mingw64-cross rename to lib/poems/Makefile.mpi index 2df43dea94..0f0546419c 100644 --- a/lib/poems/Makefile.mingw64-cross +++ b/lib/poems/Makefile.mpi @@ -62,40 +62,31 @@ FILES = $(SRC) $(INC) Makefile Authors_List.txt Grants_List.txt POEMS_License.tx # ------ DEFINITIONS ------ -DIR = Obj_mingw64/ -LIB = $(DIR)libpoems.a -OBJ = $(SRC:%.cpp=$(DIR)%.o) +LIB = libpoems.a +OBJ = $(SRC:.cpp=.o) # ------ SETTINGS ------ -CC = x86_64-w64-mingw32-g++ -CCFLAGS = -O2 -march=core2 -mtune=core2 -msse2 -mpc64 \ - -ffast-math -funroll-loops -finline-functions -fno-rtti \ - -fno-exceptions -fstrict-aliasing \ - -Wall -W -Wno-uninitialized -ARCHIVE = x86_64-w64-mingw32-ar -ARCHFLAG = -rcs +CC = mpicxx +CCFLAGS = -O3 -g -fPIC -Wall #-Wno-deprecated +ARCHIVE = ar +ARCHFLAG = -rc DEPFLAGS = -M -LINK = x86_64-w64-mingw32-g++ +LINK = mpicxx LINKFLAGS = -O USRLIB = SYSLIB = # ------ MAKE PROCEDURE ------ -default: $(DIR) $(LIB) - -$(DIR): - -mkdir $(DIR) - -$(LIB): $(OBJ) +lib: $(OBJ) $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) @cp $(EXTRAMAKE) Makefile.lammps # ------ COMPILE RULES ------ -$(DIR)%.o:%.cpp - $(CC) $(CCFLAGS) -c $< -o $@ +%.o:%.cpp + $(CC) $(CCFLAGS) -c $< # ------ DEPENDENCIES ------ @@ -104,7 +95,7 @@ include .depend # ------ CLEAN ------ clean: - -rm $(DIR)*.o $(DIR)*.d *~ $(LIB) + -rm *.o *.d *~ $(LIB) tar: -tar -cvf ../POEMS.tar $(FILES) diff --git a/lib/poems/Makefile.serial b/lib/poems/Makefile.serial new file mode 120000 index 0000000000..9d7bb000f9 --- /dev/null +++ b/lib/poems/Makefile.serial @@ -0,0 +1 @@ +Makefile.g++ \ No newline at end of file diff --git a/lib/poems/poemsobject.cpp b/lib/poems/poemsobject.cpp index 4a5f903fca..7c3f1ca872 100644 --- a/lib/poems/poemsobject.cpp +++ b/lib/poems/poemsobject.cpp @@ -21,7 +21,7 @@ POEMSObject::POEMSObject(){ name = 0; - ChangeName("unnamed"); + ChangeName((const char*)"unnamed"); ID = -1; } @@ -29,7 +29,7 @@ POEMSObject::~POEMSObject(){ delete [] name; } -void POEMSObject::ChangeName(char* newname){ +void POEMSObject::ChangeName(const char* newname){ delete [] name; name = new char[strlen(newname)+1]; strcpy(name,newname); diff --git a/lib/poems/poemsobject.h b/lib/poems/poemsobject.h index d898ab3c66..63b2915638 100644 --- a/lib/poems/poemsobject.h +++ b/lib/poems/poemsobject.h @@ -26,7 +26,7 @@ class POEMSObject { public: POEMSObject(); virtual ~POEMSObject(); - void ChangeName(char* newname); + void ChangeName(const char* newname); char* GetName(); int GetID(); void SetID(int id); diff --git a/lib/qmmm/Install.py b/lib/qmmm/Install.py deleted file mode 100644 index 18b426f928..0000000000 --- a/lib/qmmm/Install.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file - -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix - does not alter existing Makefile.machine -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None -extraflag = 0 - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 - suffix = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - -if not os.path.exists("Makefile.%s" % machine): - error("lib/%s/Makefile.%s does not exist" % (lib,machine)) - -lines = open("Makefile.%s" % machine,'r').readlines() -fp = open("Makefile.auto",'w') - -for line in lines: - words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - -fp.close() - -# make the library via Makefile.auto - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/qmmm/Install.py b/lib/qmmm/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/qmmm/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/qmmm/Makefile.mpi b/lib/qmmm/Makefile.mpi new file mode 100644 index 0000000000..590b1047f8 --- /dev/null +++ b/lib/qmmm/Makefile.mpi @@ -0,0 +1,66 @@ +# -*- Makefile -*- for coupling LAMMPS to PWscf for QM/MM molecular dynamics + +# this file will be copied to Makefile.lammps +EXTRAMAKE = Makefile.lammps.empty + +# top level directory of Quantum ESPRESSO 5.4.1 or later +QETOPDIR=$(HOME)/compile/espresso + +# import compiler settings from Quantum ESPRESSO +sinclude $(QETOPDIR)/make.sys + +# FLAGS for c++ OpenMPI 1.8.8 or later when QE was compiled with GNU Fortran 4.x +MPICXX=mpicxx +MPICXXFLAGS= -DMPICH_SKIP_MPICXX -DOMPI_SKIP_MPICXX=1 -O2 -g -fPIC\ + -I../../src -I$(QETOPDIR)/COUPLE/include +MPILIBS=-fopenmp -lgfortran -ldl -ljpeg -lpng -lz -lmpi_mpifh -lmpi + +# location of required libraries +# part 1: hi-level libraries for building pw.x +PWOBJS = \ +$(QETOPDIR)/COUPLE/src/libqecouple.a \ +$(QETOPDIR)/PW/src/libpw.a \ +$(QETOPDIR)/Modules/libqemod.a +# part 2: lo-level libraries for all of Q-E +LIBOBJS = \ +$(QETOPDIR)/FFTXlib/libqefft.a \ +$(QETOPDIR)/LAXlib/libqela.a \ +$(QETOPDIR)/clib/clib.a \ +$(QETOPDIR)/iotk/src/libiotk.a + +# part 3: add-on libraries and main library for LAMMPS +sinclude ../../src/Makefile.package +LAMMPSCFG = mpi +LAMMPSLIB = ../../src/liblammps_$(LAMMPSCFG).a + +# part 4: local QM/MM library and progams +SRC=pwqmmm.c libqmmm.c +OBJ=$(SRC:%.c=%.o) + + +default: libqmmm.a + +all : tldeps libqmmm.a pwqmmm.x + +pwqmmm.x : pwqmmm.o $(OBJ) $(PWOBJS) $(LIBOBJS) $(LAMMPSLIB) + $(MPICXX) $(LDFLAGS) -o $@ $^ $(PKG_PATH) $(PKG_LIB) $(MPILIBS) $(LIBS) + +libqmmm.a: libqmmm.o + $(AR) $(ARFLAGS) $@ $^ + @cp $(EXTRAMAKE) Makefile.lammps + +%.o: %.c + $(MPICXX) -c $(LAMMPSFLAGS) $(MPICXXFLAGS) $< -o $@ + +tldeps: + ( cd $(QETOPDIR) ; $(MAKE) $(MFLAGS) couple || exit 1) + $(MAKE) -C ../../src $(MFLAGS) $(LAMMPSCFG) + $(MAKE) -C ../../src $(MFLAGS) mode=lib $(LAMMPSCFG) + +clean : + -rm -f *.x *.o *.a *~ *.F90 *.d *.mod *.i *.L + +# explicit dependencies + +pwqmmm.o: pwqmmm.c libqmmm.h +libqmmm.o: libqmmm.c libqmmm.h diff --git a/lib/qmmm/Makefile.serial b/lib/qmmm/Makefile.serial new file mode 100644 index 0000000000..f091482792 --- /dev/null +++ b/lib/qmmm/Makefile.serial @@ -0,0 +1,66 @@ +# -*- Makefile -*- for coupling LAMMPS to PWscf for QM/MM molecular dynamics + +# this file will be copied to Makefile.lammps +EXTRAMAKE = Makefile.lammps.empty + +# top level directory of Quantum ESPRESSO 5.4.1 or later +QETOPDIR=$(HOME)/compile/espresso + +# import compiler settings from Quantum ESPRESSO +sinclude $(QETOPDIR)/make.sys + +# FLAGS for GNU c++ with STUBS. non-functional for real coupling +MPICXX=g++ +MPICXXFLAGS= -I../../src/STUBS -O2 -g -fPIC\ + -I../../src -I$(QETOPDIR)/COUPLE/include +MPILIBS=-fopenmp -lgfortran -ldl -ljpeg -lpng -lz -lmpi_mpifh -lmpi + +# location of required libraries +# part 1: hi-level libraries for building pw.x +PWOBJS = \ +$(QETOPDIR)/COUPLE/src/libqecouple.a \ +$(QETOPDIR)/PW/src/libpw.a \ +$(QETOPDIR)/Modules/libqemod.a +# part 2: lo-level libraries for all of Q-E +LIBOBJS = \ +$(QETOPDIR)/FFTXlib/libqefft.a \ +$(QETOPDIR)/LAXlib/libqela.a \ +$(QETOPDIR)/clib/clib.a \ +$(QETOPDIR)/iotk/src/libiotk.a + +# part 3: add-on libraries and main library for LAMMPS +sinclude ../../src/Makefile.package +LAMMPSCFG = mpi +LAMMPSLIB = ../../src/liblammps_$(LAMMPSCFG).a + +# part 4: local QM/MM library and progams +SRC=pwqmmm.c libqmmm.c +OBJ=$(SRC:%.c=%.o) + + +default: libqmmm.a + +all : tldeps libqmmm.a pwqmmm.x + +pwqmmm.x : pwqmmm.o $(OBJ) $(PWOBJS) $(LIBOBJS) $(LAMMPSLIB) + $(MPICXX) $(LDFLAGS) -o $@ $^ $(PKG_PATH) $(PKG_LIB) $(MPILIBS) $(LIBS) + +libqmmm.a: libqmmm.o + $(AR) $(ARFLAGS) $@ $^ + @cp $(EXTRAMAKE) Makefile.lammps + +%.o: %.c + $(MPICXX) -c $(LAMMPSFLAGS) $(MPICXXFLAGS) $< -o $@ + +tldeps: + ( cd $(QETOPDIR) ; $(MAKE) $(MFLAGS) couple || exit 1) + $(MAKE) -C ../../src $(MFLAGS) $(LAMMPSCFG) + $(MAKE) -C ../../src $(MFLAGS) mode=lib $(LAMMPSCFG) + +clean : + -rm -f *.x *.o *.a *~ *.F90 *.d *.mod *.i *.L + +# explicit dependencies + +pwqmmm.o: pwqmmm.c libqmmm.h +libqmmm.o: libqmmm.c libqmmm.h diff --git a/lib/quip/.gitignore b/lib/quip/.gitignore new file mode 100644 index 0000000000..d6797a67fe --- /dev/null +++ b/lib/quip/.gitignore @@ -0,0 +1 @@ +/QUIP diff --git a/lib/quip/Makefile.lammps b/lib/quip/Makefile.lammps index 19ff20b073..e471d3f6f4 100644 --- a/lib/quip/Makefile.lammps +++ b/lib/quip/Makefile.lammps @@ -1,17 +1,26 @@ # Settings that the LAMMPS build will import when this package library is used -# include ${QUIP_ROOT}/Makefiles/Makefile.${QUIP_ARCH} - -F95=$(shell egrep 'F95[ ]*=' ${QUIP_ROOT}/arch/Makefile.${QUIP_ARCH} | sed 's/.*F95[ ]*=[ ]*//') +# try to guess settings assuming there is a configured QUIP git checkout inside the lib/quip directory +QUIPDIR=$(abspath ../../lib/quip/QUIP) +ifeq (${QUIP_ROOT},) + QUIP_ROOT=$(shell test -d $(QUIPDIR) && echo $(QUIPDIR)) + ifeq (${QUIP_ARCH},) + QUIP_ARCH=$(notdir $(wildcard $(QUIP_ROOT)/build/*)) + endif +else +# uncomment and set manually or set the corresponding environment variables +# QUIP_ROOT= +# QUIP_ARCH= +endif ifeq (${QUIP_ROOT},) -$(error Environment variable QUIP_ROOT must be set.) +$(error Environment or make variable QUIP_ROOT must be set.) endif - ifeq (${QUIP_ARCH},) -$(error Environment variable QUIP_ARCH must be set.) +$(error Environment or make variable QUIP_ARCH must be set.) endif +F95=$(shell egrep 'F95[ ]*=' ${QUIP_ROOT}/arch/Makefile.${QUIP_ARCH} | sed 's/.*F95[ ]*=[ ]*//') include ${QUIP_ROOT}/build/${QUIP_ARCH}/Makefile.inc include ${QUIP_ROOT}/Makefile.rules @@ -28,3 +37,4 @@ $(error fortran compiler >>${F95}<< not recognised. Edit lib/quip/Makefile.lammp endif quip_SYSPATH = -L${QUIP_ROOT}/build/${QUIP_ARCH} + diff --git a/lib/quip/README b/lib/quip/README index 94039cfa17..e6cc3903bd 100644 --- a/lib/quip/README +++ b/lib/quip/README @@ -17,7 +17,7 @@ Building LAMMPS with QUIP support: 1) Building QUIP 1.1) Obtaining QUIP -The most current release of QUIP can be obtained from github: +The most current release of QUIP can be obtained from github: $ git clone https://github.com/libAtoms/QUIP.git QUIP @@ -59,7 +59,7 @@ necessary libraries will be built. for example: $ cd QUIP -$ export QUIP_ROOT=/path/to/QUIP +$ export QUIP_ROOT=${PWD} $ export QUIP_ARCH=linux_x86_64_gfortran $ make config $ make libquip @@ -70,21 +70,32 @@ to run a test suite. 2) Building LAMMPS -LAMMPS is now shipped with the interface necessary to use QUIP potentials, but -it should be enabled first. Enter the LAMMPS directory: +Edit Makefile.lammps in the lib/quip folder, if necessary. If you +have cloned, configured, and built QUIP inside this folder, QUIP_ROOT +and QUIP_ARCH should be autodetected, even without having to set +the environment variables. Otherwise export the environment variables +as shown above or edit Makefile.lammps + +LAMMPS ships with a user package containing the interface necessary +to use QUIP potentials, but it needs to be added to the compilation +first. To do that, enter the LAMMPS source directory and type: -$ cd LAMMPS -$ cd src $ make yes-user-quip 2.2) Build LAMMPS according to the instructions on the LAMMPS website. -3) There are two example sets in examples/USER/quip: +3) There are three example sets in examples/USER/quip: - a set of input files to compute the energy of an 8-atom cubic diamond cell of silicon with the Stillinger-Weber potential. Use this to benchmark that the interface is working correctly. +- a set of input files demonstrating the use of the QUIP pair style + for a molecular system with pair style hybrid/overlay and different + exclusion settings for different pair styles. This input is + for DEMONSTRATION purposes only, and does not simulate a physically + meaningful system. + - a set of input files to demonstrate how GAP potentials are specified in a LAMMPS input file to run a short MD. The GAP parameter file gap_example.xml is intended for TESTING purposes only. Potentials can be diff --git a/lib/reax/Install.py b/lib/reax/Install.py deleted file mode 100644 index 18b426f928..0000000000 --- a/lib/reax/Install.py +++ /dev/null @@ -1,82 +0,0 @@ -#!/usr/bin/env python - -# install.py tool to do a generic build of a library -# soft linked to by many of the lib/Install.py files -# used to automate the steps described in the corresponding lib/README - -import sys,commands,os - -# help message - -help = """ -Syntax: python Install.py -m machine -e suffix - specify -m and optionally -e, order does not matter - -m = peform a clean followed by "make -f Makefile.machine" - machine = suffix of a lib/Makefile.* file - -e = set EXTRAMAKE variable in Makefile.machine to Makefile.lammps.suffix - does not alter existing Makefile.machine -""" - -# print error message or help - -def error(str=None): - if not str: print help - else: print "ERROR",str - sys.exit() - -# parse args - -args = sys.argv[1:] -nargs = len(args) -if nargs == 0: error() - -machine = None -extraflag = 0 - -iarg = 0 -while iarg < nargs: - if args[iarg] == "-m": - if iarg+2 > nargs: error() - machine = args[iarg+1] - iarg += 2 - elif args[iarg] == "-e": - if iarg+2 > nargs: error() - extraflag = 1 - suffix = args[iarg+1] - iarg += 2 - else: error() - -# set lib from working dir - -cwd = os.getcwd() -lib = os.path.basename(cwd) - -# create Makefile.auto as copy of Makefile.machine -# reset EXTRAMAKE if requested - -if not os.path.exists("Makefile.%s" % machine): - error("lib/%s/Makefile.%s does not exist" % (lib,machine)) - -lines = open("Makefile.%s" % machine,'r').readlines() -fp = open("Makefile.auto",'w') - -for line in lines: - words = line.split() - if len(words) == 3 and extraflag and \ - words[0] == "EXTRAMAKE" and words[1] == '=': - line = line.replace(words[2],"Makefile.lammps.%s" % suffix) - print >>fp,line, - -fp.close() - -# make the library via Makefile.auto - -print "Building lib%s.a ..." % lib -cmd = "make -f Makefile.auto clean; make -f Makefile.auto" -txt = commands.getoutput(cmd) -print txt - -if os.path.exists("lib%s.a" % lib): print "Build was successful" -else: error("Build of lib/%s/lib%s.a was NOT successful" % (lib,lib)) -if not os.path.exists("Makefile.lammps"): - print "lib/%s/Makefile.lammps was NOT created" % lib diff --git a/lib/reax/Install.py b/lib/reax/Install.py new file mode 120000 index 0000000000..ffe709d44c --- /dev/null +++ b/lib/reax/Install.py @@ -0,0 +1 @@ +../Install.py \ No newline at end of file diff --git a/lib/reax/Makefile.gfortran b/lib/reax/Makefile.gfortran index b2b16fcc57..ab42301688 100644 --- a/lib/reax/Makefile.gfortran +++ b/lib/reax/Makefile.gfortran @@ -28,7 +28,7 @@ OBJ = $(SRC:.F=.o) # ------ SETTINGS ------ F90 = gfortran -F90FLAGS = -O -fPIC -fno-second-underscore +F90FLAGS = -O3 -fPIC -fno-second-underscore ARCHIVE = ar ARCHFLAG = -rc USRLIB = diff --git a/lib/reax/Makefile.lammps.empty b/lib/reax/Makefile.lammps.empty new file mode 100644 index 0000000000..758755f3c8 --- /dev/null +++ b/lib/reax/Makefile.lammps.empty @@ -0,0 +1,5 @@ +# Settings that the LAMMPS build will import when this package library is used + +reax_SYSINC = +reax_SYSLIB = +reax_SYSPATH = diff --git a/lib/reax/Makefile.mpi b/lib/reax/Makefile.mpi new file mode 100644 index 0000000000..142f7e9bc6 --- /dev/null +++ b/lib/reax/Makefile.mpi @@ -0,0 +1,51 @@ +# * +# *_________________________________________________________________________* +# * Fortran Library for Reactive Force Field * +# * DESCRIPTION: SEE READ-ME * +# * FILE NAME: Makefile * +# * CONTRIBUTING AUTHORS: Hansohl Cho(MIT), Aidan Thompson(SNL) * +# * and Greg Wagner(SNL) * +# * CONTACT: hansohl@mit.edu, athompson@sandia.gov, gjwagne@sandia.gov * +# *_________________________________________________________________________*/ + +SHELL = /bin/sh + +# which file will be copied to Makefile.lammps + +EXTRAMAKE = Makefile.lammps.empty + +# ------ FILES ------ + +SRC = reax_connect.F reax_inout.F reax_lammps.F reax_poten.F reax_reac.F reax_charges.F + +HEADERFILES = reax_defs.h *.blk + +# ------ DEFINITIONS ------ + +LIB = libreax.a +OBJ = $(SRC:.F=.o) + +# ------ SETTINGS ------ + +F90 = mpifort +F90FLAGS = -O3 -fPIC +ARCHIVE = ar +ARCHFLAG = -rc +USRLIB = +SYSLIB = + +# ------ MAKE PROCEDURE ------ + +lib: $(OBJ) + $(ARCHIVE) $(ARFLAGS) $(LIB) $(OBJ) + @cp $(EXTRAMAKE) Makefile.lammps + +# ------ COMPILE RULES ------ + +%.o:%.F $(HEADERFILES) + $(F90) $(F90FLAGS) -c $< + +# ------ CLEAN ------ + +clean: + -rm *.o $(LIB) diff --git a/lib/reax/Makefile.serial b/lib/reax/Makefile.serial new file mode 120000 index 0000000000..c52fbcb986 --- /dev/null +++ b/lib/reax/Makefile.serial @@ -0,0 +1 @@ +Makefile.gfortran \ No newline at end of file diff --git a/lib/smd/.gitignore b/lib/smd/.gitignore new file mode 100644 index 0000000000..4ab7a789ec --- /dev/null +++ b/lib/smd/.gitignore @@ -0,0 +1,5 @@ +# ignore these entries with git +/eigen.tar.gz +/eigen-eigen-* +/includelink +/eigen3 diff --git a/lib/smd/Install.py b/lib/smd/Install.py index dc0a3187ce..9247cb449b 100644 --- a/lib/smd/Install.py +++ b/lib/smd/Install.py @@ -3,42 +3,90 @@ # Install.py tool to download, unpack, and point to the Eigen library # used to automate the steps described in the README file in this dir -import sys,os,re,glob,commands +from __future__ import print_function +import sys,os,re,glob,subprocess # help message help = """ -Syntax: python Install.py -h hpath hdir -g -l - specify one or more options, order does not matter - -h = set home dir of Eigen to be hpath/hdir - hpath can be full path, contain '~' or '.' chars - default hpath = . = lib/smd - default hdir = "ee" = what tarball unpacks to (eigen-eigen-*) - -g = grab (download) tarball from http://eigen.tuxfamily.org website - unpack it to hpath/hdir - hpath must already exist - if hdir already exists, it will be deleted before unpack - -l = create softlink (includelink) in lib/smd to Eigen src dir +Syntax from src dir: make lib-smd args="-b" + or: make lib-smd args="-p /usr/include/eigen3" + +Syntax from lib dir: python Install.py -b + or: python Install.py -p /usr/include/eigen3" + or: python Install.py -v 3.3.4 -b + +specify one or more options, order does not matter + + -b = download and unpack/configure the Eigen library + -p = specify folder holding an existing installation of Eigen + -v = set version of Eigen library to download and set up (default = 3.3.4) + + +Example: + +make lib-smd args="-b" # download/build in default lib/smd/eigen-eigen-* +make lib-smd args="-p /usr/include/eigen3" # use existing Eigen installation in /usr/include/eigen3 """ # settings -url = "http://bitbucket.org/eigen/eigen/get/3.3.3.tar.gz" +version = '3.3.4' tarball = "eigen.tar.gz" # print error message or help def error(str=None): - if not str: print help - else: print "ERROR",str + if not str: print(help) + else: print("ERROR",str) sys.exit() # expand to full path name # process leading '~' or relative path - + def fullpath(path): return os.path.abspath(os.path.expanduser(path)) - + +def which(program): + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + path = path.strip('"') + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + +def geturl(url,fname): + success = False + + if which('curl') != None: + cmd = 'curl -L -o "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling curl failed with: %s" % e.output.decode('UTF-8')) + + if not success and which('wget') != None: + cmd = 'wget -O "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling wget failed with: %s" % e.output.decode('UTF-8')) + + if not success: + error("Failed to download source code with 'curl' or 'wget'") + return + # parse args args = sys.argv[1:] @@ -46,58 +94,65 @@ nargs = len(args) if nargs == 0: error() homepath = "." -homedir = "ee" +homedir = "eigen3" -grabflag = 0 -linkflag = 0 +buildflag = False +pathflag = False +linkflag = True iarg = 0 while iarg < nargs: - if args[iarg] == "-h": - if iarg+3 > nargs: error() - homepath = args[iarg+1] - homedir = args[iarg+2] - iarg += 3 - elif args[iarg] == "-g": - grabflag = 1 - iarg += 1 - elif args[iarg] == "-l": - linkflag = 1 + if args[iarg] == "-v": + if iarg+2 > nargs: error() + version = args[iarg+1] + iarg += 2 + elif args[iarg] == "-p": + if iarg+2 > nargs: error() + eigenpath = fullpath(args[iarg+1]) + pathflag = True + iarg += 2 + elif args[iarg] == "-b": + buildflag = True iarg += 1 else: error() homepath = fullpath(homepath) -if not os.path.isdir(homepath): error("Eigen path does not exist") + +if (pathflag): + if not os.path.isdir(eigenpath): error("Eigen path does not exist") + +if (buildflag and pathflag): + error("Cannot use -b and -p flag at the same time") + +if (not buildflag and not pathflag): + error("Have to use either -b or -p flag") # download and unpack Eigen tarball -# glob to find name of dir it unpacks to +# use glob to find name of dir it unpacks to -if grabflag: - print "Downloading Eigen ..." - cmd = "curl -L %s > %s/%s" % (url,homepath,tarball) - print cmd - print commands.getoutput(cmd) +if buildflag: + print("Downloading Eigen ...") + url = "http://bitbucket.org/eigen/eigen/get/%s.tar.gz" % version + geturl(url,"%s/%s" % (homepath,tarball)) - print "Unpacking Eigen tarball ..." + print("Unpacking Eigen tarball ...") edir = glob.glob("%s/eigen-eigen-*" % homepath) for one in edir: - if os.path.isdir(one): commands.getoutput("rm -rf %s" % one) - cmd = "cd %s; tar zxvf %s" % (homepath,tarball) - commands.getoutput(cmd) - if homedir != "ee": - if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir) - edir = glob.glob("%s/eigen-eigen-*" % homepath) - os.rename(edir[0],"%s/%s" % (homepath,homedir)) + if os.path.isdir(one): + subprocess.check_output('rm -rf "%s"' % one,stderr=subprocess.STDOUT,shell=True) + cmd = 'cd "%s"; tar -xzvf %s' % (homepath,tarball) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + edir = glob.glob("%s/eigen-eigen-*" % homepath) + os.rename(edir[0],"%s/%s" % (homepath,homedir)) + os.remove(tarball) # create link in lib/smd to Eigen src dir if linkflag: - print "Creating link to Eigen files" + print("Creating link to Eigen files") if os.path.isfile("includelink") or os.path.islink("includelink"): os.remove("includelink") - if homedir == "ee": - edir = glob.glob("%s/eigen-eigen-*" % homepath) - linkdir = edir[0] + if pathflag: linkdir = eigenpath else: linkdir = "%s/%s" % (homepath,homedir) - cmd = "ln -s %s includelink" % linkdir - commands.getoutput(cmd) + cmd = 'ln -s "%s" includelink' % linkdir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) diff --git a/lib/smd/Makefile.lammps b/lib/smd/Makefile.lammps index 7bbf3924ef..6951a1394c 100644 --- a/lib/smd/Makefile.lammps +++ b/lib/smd/Makefile.lammps @@ -1,5 +1,5 @@ # Settings that the LAMMPS build will import when this package library is used -user-smd_SYSINC = +user-smd_SYSINC = -I../../lib/includelink/eigen3 user-smd_SYSLIB = user-smd_SYSPATH = diff --git a/lib/voronoi/.gitignore b/lib/voronoi/.gitignore new file mode 100644 index 0000000000..6ca01c094f --- /dev/null +++ b/lib/voronoi/.gitignore @@ -0,0 +1,4 @@ +# files to ignore +/liblink +/includelink +/voro++-* diff --git a/lib/voronoi/Install.py b/lib/voronoi/Install.py index 7d847183b3..f40eb53bc6 100644 --- a/lib/voronoi/Install.py +++ b/lib/voronoi/Install.py @@ -3,25 +3,29 @@ # Install.py tool to download, unpack, build, and link to the Voro++ library # used to automate the steps described in the README file in this dir -import sys,os,re,urllib,commands +from __future__ import print_function +import sys,os,re,subprocess # help message help = """ -Syntax: python Install.py -v version -h hpath hdir -g -b -l - specify one or more options, order does not matter - -v = version of Voro++ to download and build - default version = voro++-0.4.6 (current as of Jan 2015) - -h = set home dir of Voro++ to be hpath/hdir - hpath can be full path, contain '~' or '.' chars - default hpath = . = lib/voronoi - default hdir = voro++-0.4.6 = what tarball unpacks to - -g = grab (download) tarball from math.lbl.gov/voro++ website - unpack it to hpath/hdir - hpath must already exist - if hdir already exists, it will be deleted before unpack - -b = build Voro++ library in its src dir - -l = create 2 softlinks (includelink,liblink) in lib/voronoi to Voro++ src dir +Syntax from src dir: make lib-voronoi args="-b" + or: make lib-voronoi args="-p /usr/local/voro++-0.4.6" + or: make lib-voronoi args="-b -v voro++-0.4.6" +Syntax from lib dir: python Install.py -b -v voro++-0.4.6 + or: python Install.py -b + or: python Install.py -p /usr/local/voro++-0.4.6 + +specify one or more options, order does not matter + + -b = download and build the Voro++ library + -p = specify folder of existing Voro++ installation + -v = set version of Voro++ to download and build (default voro++-0.4.6) + +Example: + +make lib-voronoi args="-b" # download/build in lib/voronoi/voro++-0.4.6 +make lib-voronoi args="-p $HOME/voro++-0.4.6" # use existing Voro++ installation in $HOME/voro++-0.4.6 """ # settings @@ -32,16 +36,56 @@ url = "http://math.lbl.gov/voro++/download/dir/%s.tar.gz" % version # print error message or help def error(str=None): - if not str: print help - else: print "ERROR",str + if not str: print(help) + else: print("ERROR",str) sys.exit() # expand to full path name # process leading '~' or relative path - + def fullpath(path): return os.path.abspath(os.path.expanduser(path)) - + +def which(program): + def is_exe(fpath): + return os.path.isfile(fpath) and os.access(fpath, os.X_OK) + + fpath, fname = os.path.split(program) + if fpath: + if is_exe(program): + return program + else: + for path in os.environ["PATH"].split(os.pathsep): + path = path.strip('"') + exe_file = os.path.join(path, program) + if is_exe(exe_file): + return exe_file + + return None + +def geturl(url,fname): + success = False + + if which('curl') != None: + cmd = 'curl -L -o "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling curl failed with: %s" % e.output.decode('UTF-8')) + + if not success and which('wget') != None: + cmd = 'wget -O "%s" %s' % (fname,url) + try: + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + success = True + except subprocess.CalledProcessError as e: + print("Calling wget failed with: %s" % e.output.decode('UTF-8')) + + if not success: + error("Failed to download source code with 'curl' or 'wget'") + return + # parse args args = sys.argv[1:] @@ -51,9 +95,9 @@ if nargs == 0: error() homepath = "." homedir = version -grabflag = 0 -buildflag = 0 -linkflag = 0 +buildflag = False +pathflag = False +linkflag = True iarg = 0 while iarg < nargs: @@ -61,58 +105,65 @@ while iarg < nargs: if iarg+2 > nargs: error() version = args[iarg+1] iarg += 2 - elif args[iarg] == "-h": - if iarg+3 > nargs: error() - homepath = args[iarg+1] - homedir = args[iarg+2] - iarg += 3 - elif args[iarg] == "-g": - grabflag = 1 - iarg += 1 + elif args[iarg] == "-p": + if iarg+2 > nargs: error() + voropath = fullpath(args[iarg+1]) + pathflag = True + iarg += 2 elif args[iarg] == "-b": - buildflag = 1 - iarg += 1 - elif args[iarg] == "-l": - linkflag = 1 + buildflag = True iarg += 1 else: error() homepath = fullpath(homepath) -if not os.path.isdir(homepath): error("Voro++ path does not exist") -homedir = "%s/%s" % (homepath,homedir) +homedir = "%s/%s" % (homepath,version) + +if (pathflag): + if not os.path.isdir(voropath): error("Voro++ path does not exist") + homedir = voropath + +if (buildflag and pathflag): + error("Cannot use -b and -p flag at the same time") + +if (not buildflag and not pathflag): + error("Have to use either -b or -p flag") # download and unpack Voro++ tarball -if grabflag: - print "Downloading Voro++ ..." - urllib.urlretrieve(url,"%s/%s.tar.gz" % (homepath,version)) - - print "Unpacking Voro++ tarball ..." +if buildflag: + print("Downloading Voro++ ...") + geturl(url,"%s/%s.tar.gz" % (homepath,version)) + + print("Unpacking Voro++ tarball ...") if os.path.exists("%s/%s" % (homepath,version)): - commands.getoutput("rm -rf %s/%s" % (homepath,version)) - cmd = "cd %s; tar zxvf %s.tar.gz" % (homepath,version) - commands.getoutput(cmd) + cmd = 'rm -rf "%s/%s"' % (homepath,version) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + cmd = 'cd "%s"; tar -xzvf %s.tar.gz' % (homepath,version) + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + os.remove("%s/%s.tar.gz" % (homepath,version)) if os.path.basename(homedir) != version: - if os.path.exists(homedir): commands.getoutput("rm -rf %s" % homedir) + if os.path.exists(homedir): + cmd = 'rm -rf "%s"' % homedir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) os.rename("%s/%s" % (homepath,version),homedir) # build Voro++ if buildflag: - print "Building Voro++ ..." - cmd = "cd %s; make" % homedir - txt = commands.getoutput(cmd) - print txt + print("Building Voro++ ...") + cmd = 'cd "%s"; make CXX=g++ CFLAGS="-fPIC -O3"' % homedir + txt = subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + print(txt.decode('UTF-8')) # create 2 links in lib/voronoi to Voro++ src dir if linkflag: - print "Creating links to Voro++ include and lib files" + print("Creating links to Voro++ include and lib files") if os.path.isfile("includelink") or os.path.islink("includelink"): os.remove("includelink") if os.path.isfile("liblink") or os.path.islink("liblink"): os.remove("liblink") - cmd = "ln -s %s/src includelink" % homedir - commands.getoutput(cmd) - cmd = "ln -s %s/src liblink" % homedir - commands.getoutput(cmd) + cmd = 'ln -s "%s/src" includelink' % homedir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) + cmd = 'ln -s "%s/src" liblink' % homedir + subprocess.check_output(cmd,stderr=subprocess.STDOUT,shell=True) diff --git a/lib/voronoi/README b/lib/voronoi/README index 9863632be0..2ca11c9221 100644 --- a/lib/voronoi/README +++ b/lib/voronoi/README @@ -22,7 +22,7 @@ Instructions: or somewhere else on your system. 2. compile Voro++ from within its home directory - % make + % make 3. There is no need to install Voro++ if you only wish to use it from LAMMPS. You can install it if you diff --git a/python/examples/pylammps/.gitignore b/python/examples/pylammps/.gitignore index 95ef7c6bd1..3f885f6a7a 100644 --- a/python/examples/pylammps/.gitignore +++ b/python/examples/pylammps/.gitignore @@ -1 +1,4 @@ *.orig +*-checkpoint.ipynb +*.png +*.mp4 diff --git a/python/examples/pylammps/dihedrals/dihedral.ipynb b/python/examples/pylammps/dihedrals/dihedral.ipynb index db7e81aaf6..6b919816d7 100644 --- a/python/examples/pylammps/dihedrals/dihedral.ipynb +++ b/python/examples/pylammps/dihedrals/dihedral.ipynb @@ -9,10 +9,8 @@ }, { "cell_type": "code", - "execution_count": 1, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "%matplotlib notebook" @@ -20,7 +18,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": { "collapsed": true }, @@ -31,7 +29,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "collapsed": true }, @@ -42,29 +40,17 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "LAMMPS output is captured by PyLammps wrapper\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "L = IPyLammps()" ] }, { "cell_type": "code", - "execution_count": 5, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "import math\n", @@ -80,47 +66,17 @@ }, { "cell_type": "code", - "execution_count": 6, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "['Reading data file ...',\n", - " ' triclinic box = (-5 -5 -5) to (5 5 5) with tilt (0 0 0)',\n", - " ' 1 by 1 by 1 MPI processor grid',\n", - " ' reading atoms ...',\n", - " ' 4 atoms',\n", - " ' scanning dihedrals ...',\n", - " ' 1 = max dihedrals/atom',\n", - " ' reading dihedrals ...',\n", - " ' 1 dihedrals',\n", - " 'Finding 1-2 1-3 1-4 neighbors ...',\n", - " ' Special bond factors lj: 0 0 0 ',\n", - " ' Special bond factors coul: 0 0 0 ',\n", - " ' 0 = max # of 1-2 neighbors',\n", - " ' 0 = max # of 1-3 neighbors',\n", - " ' 0 = max # of 1-4 neighbors',\n", - " ' 1 = max # of special neighbors']" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "L.read_data(\"data.dihedral\")" ] }, { "cell_type": "code", - "execution_count": 7, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "L.pair_style(\"zero\", 5)\n", @@ -129,10 +85,8 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "L.mass(1, 1.0)" @@ -140,10 +94,8 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "L.velocity(\"all\", \"set\", 0.0, 0.0, 0.0)" @@ -151,10 +103,8 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "L.run(0);" @@ -162,55 +112,26 @@ }, { "cell_type": "code", - "execution_count": 11, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgAAAAIACAIAAAB7GkOtAAAAG3RFWHRTb2Z0d2FyZQBMQU1NUFMg\nMTMgQXVnIDIwMTZFN+maAAAgAElEQVR42uzdd3Ck133m++/pRiPHQQ6DfjugkSbngBkOR8EyrWxJ\nlmzJCiSVLCddB117r7131/Z6b23trd0ql69lW7ZFUVmyEpUFTOQQM8BgBjk3co6d83v/6G4AHFOi\nSA4w6fcplIpV4mCK3eecp0+/73lepes6QgghHj0GeQmEEEICQAghhASAEEIICQAhhBASAEIIISQA\nhBBCSAAIIYSQABBCCCEBIIQQQgJACCGEBIAQQggJACGEEBIAQgghJACEEEJIAAghhJAAEEIIIQEg\nhBBCAkAIIYQEgBBCCAkAIYQQEgBCCCEkAIQQQkgACCGEkAAQQggJACGEEBIAQgghJACEEEJIAAgh\nhJAAEEIIIQEghBBCAkAIIYQEgBBCCAkAIYQQEgBCCCEkAIQQQkgACCGEkAAQQgghASCEEEICQAgh\nhASAEEIICQAhhBASAEIIISQAhBBCSAAIIYSQABBCCCEBIIQQQgJACCEkAIQQQkgACCGEkAAQQggh\nASCEEEICQAghhASAEEIICQAhhBASAEL8Yr29amhIBYOpfX3q859X8oIIcQ8pXdflVRA7Y2BAASsr\nnDz5RWiGF2BsdtY7MKCfOyfjUIidliIvgdgBnZ0qI4NYjKUlnE5OnkyDP4YMcJaXXyovvwBlPt+y\n0xm5fp0Pf1jCQAjZAYiHwqVLqqKCYJD5eUZH6erC7+ezn62DfXAajkMluKANmuE6jE9P+/v69Ne/\nXganEBIA4oHV2qoKC/H5mJtjeJjubkIhFhd5/HE0TWlaal1dYXq6HY5DEzRAKgzDRbgIvR7P6uho\npK2Nj3xEBqoQEgDiwXHrlsrOxu1mZobBQQYHCQaZmaGxEZeL73yHM2dobMRmw2w22mzZVVVVsB+a\n4CiUwwrcgGa4AZOTk4GeHv1Nb5IRK4QEgLi/9fSo1FTW15maor+f8XH8fkZG+OAH6evD5eIf/zEx\n9pRSp05x8CAOB5qmLJa0uroik8mR3BbUgREG4AJcgn6Xa21kJNrezlNPyegVQgJA3Gf6+5XBwMoK\nExP09TE7i9fLmTMUFamVFf2OANhq92516hSNjVitmM0pdntOefluOABNcARKYBFaoRnaYXpsLNDd\nzZvfLMNYCAkAca8ND6tYjFiM5WXGxujpYWUFl4svflH/7GfVywbABqNRNTVx4EBiW6Bp6Q0NxUrV\nwkk4BbWgQx+0wBUYWF1dHxmJdXTw9NMypIX4pchtoOJuil/yjURYWMDppKsLj4e1Nb761Ve8KEej\nm3/EalUnT/rr6yes1glNa7Hbc0tKquEQNMEH4A9grqDg2pEjLUeO3NT1dKczePs273iHJIEQEgBi\nR1y9qkpLCQSYm2NkhNu3CYdZWuLf//21LsSjo4nfYDKps2cj+/ev1NSsaFqnxfLluroSqE9uC94C\nEaV6rNYWq/UKFCwvu4aGYrdu8fGPSxgIIQEgtkd7uyotxetlZoahIfr7CQaZm+O55+7myhsOb/62\n2lp1/Livvn7MYhnTtJ/W1OQVFprhMDTBU/BHMF1Y+HxhYcuJEx2RSJrTGero4D3vkSQQQgJA3D1d\nXSovD5eLqSkGBxkZIRBgYoKf/WwbV9uBgcQvT01Vjz8e3rdvqaZmyWzusFiedThKoTG5LXgnhFJS\nbtfUtNTUPA95i4uewcHY6dOSBEICQIjXpq9PpaezssLkJP39TE3h89HdTXv7Dq2wodDm7aSNjbGj\nR7319aMWi9Ns/pHDkZefb4EjcAZ+Bz4Dk8XFV4qLW6AqFFpwOsNtbfzWb0kYCAkAIV7xx3BlMLC4\nyPg4vb0sLuLx8IUv3Jv1dOstbRkZ6vz50N69i3b7otncZrV+3mYrgz1wGk7Ce8GXmnqrtra5tvYa\n5M7NeQYH9bNnJQmEBIAQL+f2bZWZSSzG4iJjY3R3s77O+jpf/vJ9sYb6/Zvbgv37Y0ePemprhy2W\nEU37vsNRkJNjg6PQBJ+GTBgrK7tcVnYBKvz+Jacz3NoqnXRCAkCIl3Lxoqqs3Ox36+wkEGBlha9/\n/b5bNLduC/Ly1LlzoT175m22eU1rtVo/p2kVsBdOw3n4bXBnZNxsaGhuaHgBsmdmfP39+vnzkgRC\nAkAIAFpbVWUlPh+zswwP09NDKMTCAt/5zv2+UK6vb24LjhyJHj7srq0d0LRBTftube2uzEw7HIMm\n+AykwWhFxcWKiotQ6vWujI5GbtyQTjohASAeYbduqcJC3G6mpxkaSvS7TU/zox89SCvj1m1BcbE6\ncya4Z8+szTZrNj9vs/3T7t2VsA+a4Al4Clazstr27m3eu/c6ZE1N+Xt79Te+UZJASACIR0lPj8rK\nYm2NyUkGBhL9bsPDXLnyAK+Gi4ub24ITJ6KHDq07HOua1mexfKuurjA1tSbZSfeXkAJDVVUXqqou\nQbHbvRrvpHvySQkDIQEgHmr9/cpkYnn5Rf1uzzzz8Kx9W7cFFRWqqSnQ0DBts02bzVfs9n+oqKhK\ndtK9Ez4Byzk5rQcONB840AaZ4+P+nh6eeEKSQEgAiIfL4GDi6e0LC5v9bm43X/ziQ7vezcwk/tMM\nBnX6dOTgwTWHY03TejTtG/X1RUZjLZyA0/A3oKDfbL5gNl+CwrW19ZGR6M2b0kknJADEg++FF1RR\nUeIy7+go3d2vvt/tQRSLbf5napo6edLf0DBptU6azRfs9pyysmo4CE3wPvhdWMjPf+Hw4ebDh9sh\nw+kMdHbytrdJEggJAPEAive7+f3MzzM8TGfnXet3exCNjSX+q1NS1JkzkQMHVmtqVjWty2L5an19\nMdQlyyeegCj0WiwtFssVKFhZcQ0Pxzo6+NjHJAyEBIB4EOxMv9uDKBLZfAVqatSJE776+nGLZVzT\nmu323OJiMxyCM/Ah+DTM7tr1/LFjLceOdUSj6U5n8NYt3vUuSQIhASDuV/F+t/V1pqcZGGB0lECA\n8XGam2XlepGhoc1OusceC+/fv2y3L2vaLYvlS7W1JdCQ3Ba8HUJGY5fd3mK3X4X8pSX30FCsvZ1P\nfUpeUiEBIO4bW/vd+vqYnsbno7OTjg5Zqn6ujU46oL5eHT/uratzWq1Os/nHNTV5u3ZZ4DCcgY/D\nn8BUUdHVoqKWkydvh8NpTmeovZ33vU9eXiEBIO6pwcH7qN/tgU3QxMuVlqbOnw/v27dkty+Zze1W\n6xfs9lLYA6fgJLwH/CbTbYej2eG4BrkLC56BAf3MGXm1hQSA2FkdHSo7m2iUxUWcTrq7cbnuo363\nB1EwuHnKbO/e2NGjnro6j8Uyqmk/cDjyc3OtyU6634c/h/GSksslJRegMhBYdDrD16/zwQ/Kiy8k\nAMQ2u3BBVVUl+t1GRujsJBhkeZlvfEMWoLtg6ymz7Gz1+OOhvXsXbLYFTbthtf6rxVKerKo+C+8H\nT3p6R319c339C5AzO+sdGNDPnZM3QkgAiG1w/bqqqnog+90eRB7P5rbg0KHo4cPuujq3pg1p2nO1\ntQVZWbZkJ90fQwaMlpdfLi+/AGU+33K8k06qqoUEgLg7bt9Wu3Yl+t0GBxkaIhhkaoof/1hWmZ3b\nFuzapR57LNjYOGe3z5nNL1it/2w2VyQ76d4AHwJXZmbbnj3Ne/a0Qtb0tL+vT3/96+U9EhIA4tXq\n6VGZmYl+t/5+Jibw+xka4upVWVl21MrK5rbg2LHo4cMuh8OlaQOa9u26usL09JrktuA/QSoMV1Ze\nrKy8CCUez8roaLStTaqqhQSAeCW29rv19jI397D1uz3o24KyMtXUFGxsnLHZZszmKzbbZ6uqqmA/\nNMFb4KOwkp19Y9++n+3bdwMyJyYCvb36m94k76CQABC/cOk3GuFR6nd7EM3NbW4LTp2KHjq0XlOz\nbrH0ato36+qKTCZHspPuv4IRBqqrL1RXX4Iil2stXlX91FPyhgoJALHFtWuquHiz362rC6/3Eep3\ne9C3Bbt3q9OnAw0NU1brlNl8yW7PKS/fnayqfjf8Dizm5rYePNh88GAbZIyNBbq6eMtb5M0VEgCP\nvOefVyUl+P3MzSVu93yU+90eRJOTiXfKaFRNTZGDB+OddN2a9rWGhmKl6pLbgv8OOvRpWoumXYZd\nq6vr8U66j35U3mshAfDouXlTlZTg8ST63QYGCAaZneX735cV4cETjW6+a1ZrvKp6wmKZiHfSlZSY\n4SCcgQ/AH8BcQcG1o0dbjh69GYulj40Fb9/mHe+Q910CQDwaurpUbi7r60xNMTiY6HcbG6OlRVaB\nB97oaOJNNJnU2bORAwdW7PYVTbttsXy5rq4E6pOddG+BiMHQbbW2WK1XoWB52TU0FOvo4BOfkGEg\nASAeUhv9bhMT9PczPY3Xy+3b3L4t0/6hEg5vvqG1ter4cV99/ZjVOmY2/7SmJq+wUIPD0ARPwx/D\ndGHh1cLClhMnbkUiaU5nqKOD97xHhoQEgHiIbO136+lhaQm3m2eflXn+kBsY2Kyqfvzx8P798U66\nm1brF2pqSqEx2Un36xBMSemsqWmpqbkKeYuLnsHB2OnTMkIkAMSD7OZNlZMj/W6Puo2qaqVUY2Ps\n2DFvXd2oxeI0m3/kcOTl51vhCDTB78BnYLK4+EpxcQtUhUILo6Phtjbe/34ZMBIA4oFy8aKqrEw8\nxmt0VPrdxItuJ83IUOfPh/buXbTbFzXthsXybzZb+Zaq6veCLzX1Vl1dc13dNciZm/MODOiPPSaD\nRwJA3Pdu3FCVlYl+t6EhensJhZif57vflQksAPz+zW3BgQOxI0c8tbVDFsuwpj3ncBTk5NiSVdX/\nB2TAWFnZ5bKyC1Du9y85nZHWVumkkwAQ96Xbt1VBgfS7iVe8LcjLU+fOhfbsmbfZ5jWt1Wr9nKZV\nwF5ogvPw2+DOyGhvaGhuaGiF7JkZX3+/fv68jCsJAHF/iPe7ra4yNbXZ7zY4yPPPyywVL2N9fXNb\ncORI9PBhd23tgKYNatp36+p2ZWTYk510fwapMFJRcami4iKUer0r8apq6aSTABD3zMBAot9tfJy+\nPul3E3dhW1BcrM6eDTY2ztpss2bz8zbbP+3eXZmsqn4CnoLVrKy2vXub9+69DllTU/7eXv2Nb5RR\nJwEgdkpfn0pJQddZXGRsjO5uVldxufjSl2QeitdkcXFzW3DiRPTQoXWHY91i6dO0b9XVFaam1iTL\nJ/4SUmCwqupiVdUlKHa7V+OddE8+KYNQAkBsm3jDT/wyr9OZ6HdbXeVrX5OJJ7ZlW1BZqU6fDjQ2\nTlut02bzFbv9/6uoqEp20r0TPgHLOTmtBw40HzjQBpnj4/7ubn7t12RASgCIu+ratc1+t+FhuroI\nh1lc5Fvfkskmtsv0dGJ0GQyqqSly4MCaw7GmaT2a9o36+iKjsTa5LfgbUNBnNl8wmy9D4dra+shI\n9OZNnn5axqcEgHhtbt5UxcXS7ybumVhsc6Rpmjp1yl9fP2m1TmraBbs9p7S0OtlJ95vwe7CQn3/t\n8OGWw4fbId3pDHZ28ra3yViVABCvXHf3Zr/bwABOJ4EATicXLsiMEvfA2Fhi4KWkqLNnI/v3x6uq\nuyyWr9bXF0MdnITT8GsQhR6L5YLFcgUKVlZc8arqj31Mhq4EgPgl9PWp1FSWl5mcpK+PmRm8Xtrb\n6emRKSTusUhkcxDW1KgTJ3z19eNW67jZ3FxTk1tUZE520n0YPg0zu3ZdO3as+dixjmg0zekM3brF\nu94lw1gCQPwc8X63pSXGxujtlX43cf8aGtrspDt3Lrxv33JNzbLZfMti+WJtbQk0JMsn3g4ho7HL\nbm+x269C/tKSe3Aw1t7O7/6uDGwJAAFAe7vKzSUaZWEh0e/mdrO2xle+IpNE3Nc2OumAhgZ17Ji3\nvt5psYyZzT92OPILCrRkJ93H4U9gqqjoalFRy6lTt8PhNKcz1N7O+94ng1wC4BF26ZKqqCAQYH4+\n8UDHYJCVFel3Ew+Y3t7EiE1LU+fPh/fti3fStVssz9jtZVuqqt8DfpPptsPR7HBcg9z5ec/goH7m\njAx4CYBHzI0bqqICn4+ZGYaHpd9NPAyCwc1TZvv2xY4c8dTVDVssI5r2A4cjPzfXmuyk+334cxgv\nLb1cWnoBKgKBJaczfP06H/ygjH8JgIddvN/N5Ur0uw0PEwwyOclPfiKjXzwMtp4yy8lR586F9u5d\nsNkWNO2G1fqvFks57IXTcBbeD5709Jv19c319S9Azuyst79ff/xxmQsSAA/nfjnR7zY5SX8/k5P4\nfAwOcu2ajHjxEHK7N7cFhw5Fjxxx19a6NW1I075XW1uQlWVPbgv+FNJhtLz8Unn5RSjz+ZbjnXRS\nVS0B8JAYGFApKSwtMTFBby/z89LvJh7FbUFhoTp7Nrhnz5zNNmc2X7PZ/rm6Ot5JdxreCB8GV2Zm\n2549zXv2tELW9LS/t1d/wxtkpkgAPJh6elRqKrrOwgJjY/T0SL+beHQtL29uC44dix4+7HI4XBZL\nv6Z9q7a2MD29Bo7Dafi/wARDlZUXKysvQYnHsxLvpJOqagmAB8bVq6q0lGCQhQVGR+nqwueTfjch\nXrQtKCtTTU3BxsYZm23GbL5is/1DVVUV7IcmeBt8DFays6/v39+8f/8NyJyYCPT06L/6qzKJJADu\nY9euqdJS6XcT4mXMzW1uC06fjh48uO5wrGtar6Z9s76+KCXFkeyk+69ggIHq6ovV1ZegaH19Ld5J\n99RTj+6ckgC4H3V0bPa7DQ4yOEgwyMwMP/iBrP5CvPy2oLpanToVaGiYslqnNO2SzZZTXr4bDkIT\nvAd+Bxbz8loPHWo+dKgNMsbGAl1dvOUtj9z8kgC473R3q5ycO/vdRke5eFFWfyF+KRMTicliNKoz\nZyIHDsQ76brjnXRK1SW3Bf8ddOjVtAvh8P/o71cjI4/WEwskAO4vG/1uExP09yf63T7/eVn6hXg1\notHNuWOzqRMn/A0NE1brhNncbLfnlpSY4dDly/9cXk4oxMICc3N88IPq3/7tUZlxEgD3kXi/2+Ii\n4+P09LC8LP1uQtw1IyOJqWQyqccei+zfv1JTs3LgQEd5OX4/s7MMD9Pdjcn0CL0mEgD3hbY2lZ8v\n/W5C7IRwODGtOjpUTg4eD9PTiYcphUJEIhIAYgddvqzKy+/sd1te5pvflNVfiO3S3a2ys1lbS1xs\nGxvD72d0FItFAkDs4Gf/8nK8XmZnGRqir49QiLk5vvc9Wf2F2C79/So1lZUVJiYSD1Py+ThyhLQ0\nAgEJALEjOjtVfv6d/W4TE/z0p7L6C7FdBgcVsLiYeJjS8jIuF+94h5qdldtAxU7p7VUZGXf2u/X3\n09oqq78Q2+L6dbVrF5EIi4uMjtLdjcfD2hpvfesj+oJIANwbAwPKaGRpifFx+vqYn8fj4QtfkKVf\niO1y5YoqKyMQYG4ucbEtFEpcbHv2WSUBIHZCd7dKS0PXEzvQ7m7W1qTfTYjt1damyspedLEtGGR+\n/lG/2CYBcA8+g2z0u3V24vdLv5sQ22vrxbaBAUZGCASYmOBnP3vU550EwM554QVVVvaiIyehEIuL\nfPvbsvoLsV3iF9tWVhIX26am8Pno7eXGDZl3EgA7paNDFRXdeeRkepof/lBGoRDbZevFtt5eFhbk\nYpsEwI57ySMnTqf0uwmxXTo7VUYGsRhLS4nT9evrrK/z5S/LpJMA2EF3HDmZnZV+NyG216VLqqIi\ncZk3/jAlv5+VFb7+dZl3EgA7aHBQKfWiIyfS7ybEtmptVRUV+HyJhynJxTYJgHvgxg1VUPASR06k\n302I7XPrliosxO2+82FKcrFNAmDn/IIjJ/LiCLFNenpUVlbiYlt/P+Pj+P2MjHD5ssw7CYCd8h+P\nnEi/mxDbrb9fmUyJhyltXGx75hmZdBIAO+glj5xMTkq/mxDbZXhYxWKQ7Hfr6WFlBZeLL35RJp0E\nwA7a6HeLP9AxfuSkr4/r12UgCrEtWltVYSGRSOJhSl1diYttX/2qTDoJgB0kR06E2GFXr6rS0s2L\nbbdvEw6ztMS//7vMOwmAnXLHkZOeHul3E2Lbtber0lK8XmZmGBqiv59gkLk5nntO5p0EwE6JP9BR\njpwIsZO6ulReHi4XU1MMDkq/mwTAvdDaqsrL5ciJEDuqr0+lp9/Z79bdTXu7zDsJgJ0iR06E2HkD\nA8pgYHExcbFtcVEutkkA7Lju7s0jJxv9bqOjXLokA1GIbXH7tsrMJBbbfJiS9LtJANwDd/S7zczg\n80m/mxDb6OJFVVm5ebGts5NAQC62SQDsrNFRFYmAHDkRYge1tqrKSny+xMOUenoIhVhY4DvfkXkn\nAbBTrl9Xu3ZtHjnp7sbtliMnQmyvl7zYNj3Nj34k804CYKdIv5sQO2+j321ykoGBRL/b8DBXrsi8\nkwDYKe3tiX63mRmGh+nrkyMnQmw76XeTALj3OjvlyIkQO2pwUMX/YWFh82Kb2y0X2yQAdla83+2O\nIyc9PbS1yUAUYlu88IIqKkpc5t36MCW52CYBsKPi/W6Li0xMSL+bEDsh3u/m9zM/z/AwnZ3S7yYB\nsOPkyIkQO0/63SQA7r1Ll1RFhRw5EWJHxfvd1tcTD1MaHSUQYHyc5maZdxIAO6W1VVVU3NnvJkdO\nhNhWW/vd+vqYnsbno7OTjg6ZdxIAO0WOnAix8wYHpd9NAuBe2zhyMjVFf3/iyMnICJcvy0AUYlt0\ndKjsbKJRFhcTp+tdLrnYJgGw4+TIiRA77MIFVVWVuNgWP10fDLK8zDe+IfNOAmCnDA0pXQdYWGB8\nXPrdhNgJ16+rqirpd5MAuKfiR07C4US/W1eXHDkRYtvdvq127cLtZnqawUGGhggGmZrixz+WeScB\nsFPiR07i/W5y5ESIndHTozIzE/1u/f1MTOD3MzTE1asy7yQAdoocORFi52292Nbby9ycXGyTANhx\ncuREiJ1f+o1GkH43CYB76yWPnHR1cfOmDEQhtsW1a6q4eLPfrasLr1cutkkA3AMH6uv3gXN+3hMI\n6D/5iRw5EWJ7Pf+8KinB7998mJJcbJMAuAeGh1Pt9qtQDmOlpZdLSy+cPdsVCCz19anWVj70IRmO\nQtxlN2+qkhI8nsTFtoEBgkFmZ/n+92W6SQDsrO7usN3+ftgLp+EcfAA86ek36+ub6+tfgOzZWV9/\nv/744zI0hbgLurpUbi7r64mHKcUvto2N0dIiU0wCYMctLPDNbw4pNWQ2f6+2tiAryw7HoAn+FNJh\npLz8cnn5BSjz+ZZHRyPXr/ORj8hIFeLV2LjYNjFBfz/T03i93L7N7dsypyQA7p3lZf3znw/u2TNn\ns82Zzddstn+qrq6EfdAEvwIfhvXMzLY9e5r37LkOWVNT/r4+/Q1vkFErxC9ra79bTw9LS7jdPPus\nTCIJgPuA1crcHH/1Vxw/Hj10yFVb69K0fk37dl3drrS0GjgOTfAXYIKhqqqLVVUXocTtXhkdjba1\n8eSTMo6FeGk3b6qcHOl3kwC47+n65ogsL1dNTYHGxhmrdUbTrtpsn62srIL90ARvg4/Bck7Ojf37\nf7Z/fxtkTkz4e3r41V+VMS3EposXVWVl4kxl/GFK0u8mAfAAmJ1NDFCl1OnTkYMH1xyONU3r0bRv\n1NcXpaTUwgk4DX8NCgaqqy9UV1+CovX1tZGR6M2bPPWUDHHxSLtxQ1VWJvrdhobo7SUUYn6e735X\npoYEwAO4LaiuVqdOBRobp6zWKbP5ot2eU1a2Gw5CE/wGfAoW8/JeOHSo+dChdsgYGwt0dvLWt8pw\nF4+c27dVQYH0u0kAPEQmJhJj12hUZ85EDhxYralZ1bRui+WrDQ3FUAcn4RS8CXTo1bQWTbsCu1ZX\n14eHYx0dfPSjMvrFwy/e77a6mniYUrzfbXCQ55+X8S8B8OCLRjfHsc2mTp7019dPWK0TZnNLTU1O\ncbEZDkETfBD+EGYLCq4dPdpy9OjNWCzd6Qzevs073ykzQTycBgYS/W7j4/T1Sb+bBMBDbWQkMbJN\nJvXYY+H9+1dqalY07bbF8qXa2hJoSG4L3gZhg6HLZmux2a5C/tKSe3g4dvMmn/ykzA3xMOjrUykp\n6DqLi4yN0d3N6iouF1/6koxwCYCHXTi8Ocrr6tTx4776+jGLZUzTflJTk7drlwaH4Qx8FP4EpoqK\nrhYVtZw4cTscTnM6Qzdv8t73yjwRD6p4w0/8Mm/8YUpeL6urfO1rMqolAB4x/f2JQZ+Wph5/PLxv\n35LdvqRpNy2WL9TUlEEjnIKT8G4ImEy3HY4Wh+N5yFtYcA8O6k1NMmfEg+Tatc1+t+FhuroIh1lc\n5FvfkpEsAfAICwY3byfdsyd29Ki3rm7EYhnVtB86HPl5eRY4Ck3wu/B/wkRJyZWSkhaoDAYXnc7w\njRt84AMyhcR97eZNVVws/W4SAOLn23o7aVaWevzx0N69C3b7gtncZrX+q9VaDnvgNJyG3wRvWlpH\nXV1zXd01yJmb8w4M6I89JtNJ3He6uzf73QYGcDoJBHA6uXBBhqsEgHgpXu/mtuDgweiRI57a2iGL\nZdhsfq62tiA725bspPsjyABnWdnlsrILUO73L8U76T78YZld4t7r61OpqSwvJx6mNDOD10t7Oz09\nMj4lAMQr2RYUFKjHHgvt2TNvs82bza022+fM5grYC03wOvgguDIy2hsbmxsbWyF7etrX36+/7nUy\n08S9Ee93W1pibIzeXul3kwAQr8Hq6ua24OjR6OHDLofDZbEMaNp3amt3ZWTUJLcFfw6pMFxZeamy\n8iKUejwro6ORtjapqhY7pL1d5eYSjbKwkOh3c7tZW+MrX5ERKAEg7t62oKREnTkT3LNn1mqd1bSr\nVus/7t5dmeykezM8DavZ2Tf27Wvet+86ZE5OBnp79V/5FZmHYrtcuqQqKggEmJ9PPNAxGGRlRfrd\nJADE3bawsLktOHkyeujQusOxrml9mvbv9fWFJpMj2Un3f4MRBnfvvrB79yUodrlW4510UlUt7qIb\nN1RFBT4fMwcE/tsAACAASURBVDMMD0u/mwSA2PFtQVWVOn060NAwbbNNm82Xbba/r6jYDQegCd4F\nn4Sl3NzWgwebDx5sg4zx8UBXF29+s0xR8ZrE+91crkS/2/AwwSCTk/zkJzK0JADETpmaSsw3g0E1\nNUUOHlyrqVnTtG6L5ev19UUGQ11yW/DfAOg3m1vM5stQuLa2Pjwc7ejg6adlxopXprc30e82OUl/\nP5OT+HwMDnLtmowlCQBxL8Rim3PPYlEnT/obGiat1kmzucVuzy0trU5WVf8W/D7M5+dfO3Kk5ciR\ndl1PdzqDnZ28/e0ye8XLGxhQKSksLTExQW8v8/PS7yYBIO4nTmdiNqakqLNnIwcOxDvpOjXtK/X1\nxVCf7KR7M0SV6rZaW6zWq1CwsuIaGop1dPDxj8t8Fnfq6VGpqeg6CwuMjdHTI/1uEgDiPhaJbM5M\nh0OdOOGrrx+3WMbN5p/V1OQWFWlwCM7Ak/BHMLNr1/PHjzcfP34rGk1zOkMdHbz73TK3BcDVq6q0\nNHGZd3SUri58Pul3kwAQD4jBwcRETU1V586F9+9fttuXNa3DYvmiw1EKDclOundAyGjstNtb7Par\nkLe46Bkaip06JfP80XXtmiotlX43CQDx4AuFNm8nbWiIHTvmrasbtVicmvajmpr8ggILHIEm+CT8\nKUwWF18tLm6BqlBowekMt7fzm78p0/4R0tGx2e82OMjgIMEgMzP84AcyDCQAxANr6+2k6enq/Pnw\nvn2Ldvui2dxmtX7eZiuDPcltwW+APzX1Vm1tc23tNcidn/cMDOhnz8oS8JDr7lY5OXf2u42OcvGi\nvPUSAOJhEQhsbgv2748dOeKpqxu2WEbM5u/X1hbk5FiTVdV/CP8JxkpLL5eWXoCKQGBpdDR8/Tof\n+pCsCA+bjX63iQn6+xP9bp//vLzREgDiEdgW5Oaqc+dCe/bM2+3zZvN1q/VfLJaKZFX1OfgAuNPT\nbzY0NDc0tEL27Kyvv19//HFZIB4G8X63xUXGx+npYXlZ+t0kAMSjxOXa3BYcPhw9fNhdWztgsQxq\n2vdqawsyM+3JTrrPQDqMlJdfKi+/CGVe77LTGbl+XTrpHkhtbSo/X/rdJACE+A/bgqIidfZssLFx\nzmab07RrVus/VVdXwj5ogjfBk7CWldW2Z0/znj3XIWtqyt/Xp7/hDbJ2PBguX1bl5Xf2uy0v881v\nyjsoASAeeUtLm9uC48ejhw65amtdmtavad+uq9uVluaA43Aa/gJMMFhVdbGq6hKUuN0ro6PRtjbp\npLuvP/uXl+P1MjvL0BB9fYRCzM3xve/JWyYBIMTP2RaUl6umpkBj44zVOqNpV2y2f6isrEpWVb8d\nPg7LOTnX9+9v3r+/DTInJvzd3TzxhCwr95HOTpWff2e/28QEP/2pvE0SAEL8fLOzm9uCpqbIgQNr\nDseapvVYLN+oqytKSalNdtL9NSjor66+WF19CYrW19fiVdXi3urtVRkZd/a79ffT2iqrvwSAEK98\nW2A2q1OnAg0NU1brlNl80W7PKSurhgNwBn4DPgULeXmthw41HzrUDjNOZ6Cri74+eRV32ka/2/g4\nfX3Mz+Px8IUvyNIvASDEqzU+nlhBjEZ19mxk//5Vh2PVbO6yWL7a0BDvpItvC94EOvRYLBcslstv\nfevQ6ur6jRuqo4OPflTWoO3V3a3S0jb73bq7WVuTfjchASDunmh0czWx29WJE/76+gmrdcJsbq6p\nyS0urobD0AQfhD+E2YKCa0ePNh892hGLpTudwVu3+PVfl/Xo7rtyRZWVEQyysMDoKJ2d+P3S7yYk\nAMS2GR5OLC4mkzp3Lrxv33JNzbKm3bZYvlhbWwr1yfKJt0HYYOiy2VpstquQv7TkjldVf/KTsjzd\nBS+8oMrK8PuZnWV4mO5uQiEWF/n2t+XlFRIAYpuFw5sLTX29OnbMV1/vtFicmvaTmpq8Xbu0ZCfd\nx+CPYbqo6GpRUcvJk7fC4TSnM3TzJu99ryxVr1JHhyoqwuNhepqhIQYGCIWk301IAIh7oa9P/+xn\nVVGRet/79McfD+/bt2S3L2naTYvlmZqaMmhMbgveDQGT6bbD0exwPA+5CwuewUG9qUmWrVegu1tl\nZ7O2luh3GxvD78fplH43IQEg7qm/+zv6+lhd5WMfY8+eeFX1iMUyajb/0OHIz8uzJrcFvwd/BhMl\nJZdLSi5AZTC46HSGr1/nt39bVrFfpL9fpaayssLEBH19zM5Kv5uQABD3ma23k2ZlqccfD+3du2C3\nL5jNN6zWf7Nay2AvnEo+5diblnazrq65ru4FyJmb8/b36+fOyaJ2p8FBpRSLi4yN0dsr/W5CAkDc\n97zezVNmBw9Gjxxx19a6LZZhTXvO4SjIzrYlq6r/GDLAWVZ2qazsIpT7/Uujo5Hr1/nwhx/1Ne7G\nDVVQQCTC4iJOJ11deDzS7yYkAMSDuS0oKFCPPRbcs2fOZpszm1+w2T5nNlfAPjgNb4APgSsjo72x\nsbmxsRWyp6d9fX3661//KK538ds9AwHm5hL9bqGQ9LsJCQDxwFpd3dwWHD0aPXw43kk3oGnfrq3d\nlZFRk+yk+3NIheHKykuVlReh1ONZGR2NtLU9KlXVbW2qrEz63YQEgHjYtwWlperMmWBj46zVOqtp\nV222z1ZVVSWrqt8MT8Nqdvb1ffua9+27AZmTk4GeHv1Nb3pol8Kt/W4DA4yMEAgwOSn9bkICQDx0\n5uc3twWnTkUPHlx3ONY1rVfT/r2+vtBkciTLJ/4LGGFg9+6Lu3dfgmKXazXeSfcwVVVv9LvFH+g4\nNYXPR18f16/L6i8kAMSjsS2oqlKnTwcaGqZttmmz+bLd/vfl5bvhIDTBu+CTsJSb23rwYPPBg22Q\nMTYW6O7mzW9+sFfJgQFlNCb63Xp7WViQfjchASAePVNTm510G1XVZnO3xfK1+voig6EuuS34W9Ch\nT9MuaNplKFxbWxsejnV08PTTD9K62dmpMjKIxVhawumkp0f63YQEgHjkbe2ks1jUyZP+hoZJq3XS\nbG6x23NLS6vhEDTB++H3YT4//9qRI81HjtzU9XSnM9jZydvffr+vofEHOgaDzM8zOkpXF34/Kyt8\n/euy+gsJACEAcDoTC2JKinrsscj+/Ss1NSua1mmxfLmurgTq4BScgjdDVKluq7XFar0CBcvLrvi2\n4OMfv++W1NZWVV6Oz8fcnPS7CQkAIV5OJLK5ODoc6sQJX339mMUypmk/s9tzi4q0ZFX1k/BHMFNY\neLWwsOX48VuRSJrTGbp1i3e/+75YXm/dUoWFuN3MzDA4yOAgwSAzM/zwh7L6CwkAIV7O4GBirUxN\nVY8/Ht63b9luX9a0DovlWYejFBqSnXTvhFBKSmdNTXNNzfOQt7joGRyMnT59z5ba7m6VlXVnv9vo\nKJcuyeovJACEeCVCoc3bSRsa4p10oxaLU9N+7HDk5edbkp10vwOfgcni4ivFxRegKhRacDrDbW38\n1m/t3Mp7R7/bzAw+n/S7CQkAIV6brbeTpqer170utHfvot2+aDa3Wa2ft9nKYE9yW/Be8KemdtTW\nttTWXoPc+XnPwIB+9uw2LsSjoyoSARL9bj09rKzgcvHFL8rqLyQAhLh7AoHNbcH+/bEjRzx1dcMW\ny4jZ/P3a2oKcHGuyk+4P4T/BWGnp5dLSC1ARCCyNjoZbW+9yJ93162rXLiIRFhZwOunuxu1mbY2v\nflVWfyEBIMT2bwtyc9W5c6G9e+dttnmz+brN9i+aVgF74TScgw+AOz39ZkNDc0PDC5A9M+Pr79fP\nn3+ta7T0uwkJACHuMZdrc1tw+HD0yBG3wzFgsQxq2ndra3dlZtqSnXSfgXQYqai4VFFxEcq83uXR\n0ciNG6+mk669PdHvNjPD8DB9fQSDzM3x3HOy+gsJACHu6bagqEidPRtsbJy12WY17ZrV+o/V1ZWw\nH07Dm+BJWMvKurF3b/Pevdcha2rK39urv/GNv9Ty3dmp8vJwuZiaYnAw0e82McHPfiarv5AAEOJe\nW1ra3BacOBE9dMjlcLg0rV/TvlVXtystzZHcFvwlpMBQVdXFqqpLUOx2r46MRNvbf24nXbzfbWWF\nycnNfreeHtraZPUXEgBC3K/bgooKdfp0oLFxxmabMZuv2Gz/UFlZBQegCd4OH4flnJzrBw40Hzhw\nAzInJvzd3TzxxOZvkH43IQEgxANpZiaxUhsM6vTpyMGDazU1a5rWY7F8o76+0GisTXbS/TUo6K+u\nvlBdfRkK19fXh4ejubnEYonbPbu7WV9nfZ0vf1lWfyEBIMSDIxbbXLXNZnXqlL+hYcpqnTKbL9rt\nOWVl1cmq6vfC78JCXt4Lhw83Q0tz8/LoKJ2dBALS7yYkAIR4wI2Pb3bSnTkTOXBgtaZmVdO6NO0r\nDQ3xTrqTcAp+Fd6XlrZ886b0uwkJACEeLls76ex2deKEv6Fh3GIZ17Rmuz23uLhsdHS0t5dQiMlJ\nfvQjWf2FBIAQD6Ph4cT6bjKpc+fC+/cvHz++Mjur9/QQDsvqLyQAhHgEhMM68OyzSl4KsfMM8hII\nIYQEgBBCiEeIfAUkRMJPlQpDCIIQhAA8rct38UICQIiHV5tSEciGKgCiEAQfuOAZpWZgHf7m1SZB\ns1JhCEME/BCAAHxCckVIAAhxz7UrVaxUtlKpBoMBYroe1vWArvt0PQsyIRNm4Q+V+n9fyao9oNQS\n5MFuMEAMwhAAD6zDvyg1CX8hMSAkAIS4J24rla6ULSUl32QiNRWDASAaJRwORCKp0WiKrht0HdAh\nBn+g1Ah895dYta8rtQscBkOGwWBSSoeIrgdjMb+uZ+l6JqRDOvwPpfrhnyQGhASAEDvpplJ5SlWk\npWVkZZGTQ2YmKSlEIgQCeL3pfr8xGCQSicZiEV0PQQD84IW3KfXtX7hkdytVbTQWmUwpqamYTBgM\nRKNEItnhsC8cNsVixlgsHipRiMJHlPqcZICQABBiZ/QpladUeVpaRkEBZWWUlJCXh8GA38/aGsvL\nrKyYIFPXg7qerusZkAW5UASLL/eby1JSijIzycsjJ4f0dIBgEJ8PjyfT7ycYjEEkFgsnrzNXwbuU\nescX5G0REgBCbD8dck2mzLw8qqqw2zGb2bULXWdtjZkZjMb4Z/a0SCQ1FjPpeqqup0E6ZEMhvEmp\nH77UZ/ZupQpTUopycigpoaKC4mIyM4lEcLlYXmZxEaUydT0UDAa25EoelMlbIiQAhNgBV5UqNxgK\nMjIoKsJiobERu534k9fn5hL7AI8Hj8cYCBjDYSMYwAgmSIUMyH6pX/u8UuUGQ0lGBsXF2GzU1FBR\nQWYmgQDz80xOEosRiRAOp0YiplgsRddTk78wR94VIQEgxA7IhBSjMSUjg8JCKirQNGw28vPx+9F1\nVlcTX92YTBgMSimUUrquQIEBTJAOb1bqey/eBORCZkqKMTeXigpqatizh+pq0tNZXycjg1AIlwuX\nC4/HaDQaIxGDUgZdT4EUSJV3RUgACLEDDJBqNJKeTlYWubnk5pKdTWYmsRgmE0YjSm38RJWKQgxi\nEF/vFZgg48W/88dKVSuVmZpKTg7FxVRWYjZTXY3JREYGHg+5uWRkkJpKSopSSlebzT8KjPKuCAkA\nIbbbc0pVKKUMBgwGlCIWIxTC6wVwu+Of0AkECIeJRiOxWFjXIxD/id+0E0t+HXTHrgKDIdVkSuRK\ndjZZWaSnYzBgNCZuME0u+lGlYslQ2cgVISQAhNguP1QqfuNNr67f9PmejEYJBFhbS3zvn56O18v0\nNIuLrK3h9RIK+aPRoK6HdD0E8YqI+IFeIxjhjUr9eMu3QNH4Er8RKh4Pa2sAq6usr2/NlXAstnE2\neCNXhJAAEOIuu6BULqRBNUQhBD5wwxfGx2fGx13NzX/16U/jcpGais/H0hIzMywv4/FEgkFvNOqP\nxQLJ/oZ4QZBK/mydOfGlPBiLpYVCuN0sLJCdjc+HrrOywvQ0S0u4XAQCsUjEH4sFdT2UTJT4/woh\nASDEXdOq1C6oMxjSlTKCDmFdD+q6T9dzIAuyYA7+5H/+z//nAx8gJYVgMHG/5sqK7vGsBYOeaNSn\n6z5d98PGz0sKQkDXvZFIbjxFJieJRpmbQ9dxuZifZ36etTV8Pnco5IvF/Loe0PWtuSKEBIAQd0eP\nUprRuCslxbRxXTcaJRIJRiLp0ahJ1426Hm9C1+FPnnmmH75z4kT8BtCwx7Pm96+Hw65o1KPrXtj4\nCSdrIXSIbPnrfGDSdVckkuvxZC0toRRuN5mZ6HricNnKCuvrHr/fFQ4ncmVLqETkDRMSAELcFQNK\nVZpM+ZmZiYuxJhOxWPw4bprPlxIMEonEkh0P8eO4XnjdCy/8oLzcFwp5gkFPOOyJRLyxmFvXPRD/\ncW+5chuFrRcAPGDQ9fVIJM3vVysrmfHDX2lp6DqhED4fXu+6z7ceDMZDZWuueOQ6sJAAEOKu6Feq\nxGTKz82lpISSEgoKMJk2v95ZXTW63ZmBQCgcDup6JmRBDhRBMUy73cFoNBCJ+GMxXyzm1fX4uu+C\ndQgnL9hGIPTiv3QdgPRo1BAMRnU9LxLJ8njSUlKASDQaCIW8oZA3HPZGIp54ACQTxQ0hiMnbJiQA\nhHiNbilVaDTuys6mvBybDU2jqIiUFNxu5uZITUXXiUTSI5HUaNSk6yZdj5/FzYIC+JTH8xmjMaTr\nAV3367ovuUyvgyf52T8C4f9wMWAJAKOu69FoOBj0RqMZgUCKwaDi/dLRaDAa9cdi/ljMp+teXd8I\nlfXkfkIICQAhXpNMpfLT0ti1i+pq6uqoq6OkBGBpibS0xC3/Xi+BQEo4bIzFjFvO4sZ3A38RjX4i\n+b1QPABcyQ/4seR9RAF47sXHgP+zrv+X+N39sVhI172xWJrBkKKUAl3XI7oev/4c0PV4pWg8ANaS\nBwvkGoCQABDiNWlVqthozElPp6CAigosFmw2SkqIREhNxe1mfj5xHNdoVIbNp2HHOx5SIA3yYHnL\nhYF4AJC89huBIHhe6m+fT17ODeh6pq6n6XoKGJK1z3d0SscfCxPc8muFkAAQ4jUNZYPRSFoaWVnk\n5ZGfT14e2dmEQqSlJWoe4geA45/WlYrfz7O14yENFiAE/uQOQCUDIAph8MKPXqoK9O90/Uml4ucM\nsiA9GQAkl/hQ8kmT8QAIvvjXCiEBIMSrp4NBKYxGjEZg4+kuBAK43Yl/CIeJRonFwhBJ1jxsdDyk\ngAmeh8rkHfoGUFs+pwd+zuofNwYF4IUcyIDU5B//jwGw8aX/xq8VQgJAiFfpa0rtjn+XEr/j0+Vi\ncZHMTNbXCQSYnWVhIdHxEAwGotFALBY/grvxE0nWPqfBNBiSPyTv+g9Cyy98dNfPdP2IUmuwC7Ih\nDYzJ/IgknwnsT37jtPFrQ+CW909IAAjxqkXjX9zHYv5QKCNexhDv4s/IIBhkeTnR8+NyEQx6IxF/\nLLb1LG4IIsml2QB/nCxpCCavB/wjXP8lHtzYpuu7lSqAfMhMbgL05NVjwPjiXUUIPNCq688+q+RN\nFBIAQrwa8a/svbHYWjCYsb7O7CyxGKurpKZuPpNreRm3ez0QcEcid5zFjd/W+RSEIBtSQCU/9XvB\nBb8Df6OUG/7by8XApK4rpXKgADKSEywVTJCS3BMAMQjDEgzLA4GFBIAQr4ULciBN11dDoVS3u9Bg\nSHzwN5mIRjce8rXm9a6FQq5o1BOLeXXdm/xS3gsfhEKlspVKU0opFdP1sK4HdN2n69mQBZkwC7+v\n1P96uSVb13VAKRV/imQ6pCUDwJDcr/hgVJZ+IQEgxGv3+7r+L0oZYrHUSET5/RFdzw0EMtLSMBjQ\n9Vg47AsGPcGgJxTyRCKeWMyzpePhzZCuVInRWGAyJW4W0vV4d5A/EkmLRlNisY2bRnX4PaX+9y+x\ndutb/p00pdLAAGuy6AsJACHuuvi5qpRYTA+FgrGYKxRKT0lJUUqHSDQajEYD0ag/GvXFYt5YbOM0\n1nnIVqoiNTUjO5ucHDIySElJ3EHk82X4fMZgUI9EorFYJHkKzA9PKvXPr2QpD8q6LyQAhNg+i/E7\neXQ9EosFwmFPNJoaDsefthj/PicUv/C7pePhTZCuVHl6ekZ+PqWllJSQl5d4LvzqKsvLGI3xQ8Kh\nSCQQi8VPC+dCMbxVqe/Isi4kAIS4H/yNrv+pUhEI67pf1zN0PTUaNSpFvI8h/jyAZMdDfAegINdk\nyszLY/dubDbMZgoKEo+Gn5khJYVYjHA4PRo1RaOpSpl0PS3ZHVQIv6LUjyQDhASAEPeDGfBCCHIh\nQ9dTwajraqOPYUvJjxfeDukGw66MDIqK0DQaG7HbKSggHGZuDqORQACPB6/XGAikGAyGWCz+MMh4\naUQm5MgrLiQAhLhPPKPrv66UHwogK3nvzcZN9+Hkl/jxr4AywWQ0GjMzKSyksjLRHZSfj88HsLpK\ndjbp6aSkJB4poxS6Hn8epDHZG/FrSj0nmwAhASDE/aADNFiPbwKSR7F4cQB4kweyUlNSSE8nO5u8\nPPLyyMkhM5NYDJOJlJSN4iCSxUGx5OPA9GR3UIa84kICQIj7xKiuFyhVCYXJPoaUFxfyxMAIn4SY\nUkqpxCqffGQYuo7bfUd3UFjXw/+hOGijO0gICQAh7herug7kK1UA2Vv6GOLf2xiTe4J4/0/imNja\nGvPzxMtEPR6mp1lYYH0dn49wOBCNBnU9pOsh2GgQiiZLpF+v1E/lWyAhASDE/WNN15VSpmQ3Zzqk\nQkpyQ5C4oz8WiwWDhnh1RGoqLhcmEz4fi4vMzLC8jMcTf8KXPxYLJKuB4j8q+SObACEBIMR9Z+Ms\nrlIqC9LBBAaIxvt/dN0Tja76/YXxyqBwmIUFUlIIBllfZ3mZlRXd41kPBt3RqE/Xfbp+R3eQEBIA\nQjwwSbDhH5VKg4xodDkYNK2v5wJ+PxkZGI2Ew/HuoLDHs+b3r4fD7mjUo+veZGuQF8LJS8ExeZyL\nkAAQ4sGyEm/o1HVTOIzPF4zFcvz+dJMJg4FYLLTRHRQOeyIRbyzm3tId5E5eBI4/JVguAAgJACEe\nJH+q6/9bKXTdEI1GQ6FALLYeCqUZjQaldF0P39EdpOueZHfQevIKcDT5mBchJACEeMX6+1VbG+9/\n/735BL0Uf0iLroejUX8slh6JpCplUAqI6no4FgvqekDX/Vu6g9bBk/zsH1/95WKAkAAQ4uVdvKjO\nnycQ0OfneeqpfrgFzXV11yBnbs47MKA/9tiOJsEUlIIOIV336Xq6rqfGnyoMsS3dQRtPh49//Cf5\nzU/8PiI5BiwkAIR4GTduqMpKfD5mZxka4nWv+3togk9DJoyVlV0uK7sA5X7/ktMZaW3lwx/e9oX1\nc7r+YaX8EIAcSE92B7HlKb7BLdUR68k/uFEr5JH3VUgACPGL3b6tCgpwu5meZnCQoSE+8pH/9Rd/\n8TlNq4C9cBrOw2+DOyOjvaGhuaGhFbJnZnz9/fr589uYBP+i6+9WyrflEb4pye6gaDIANnYA8Uc5\nbvxfXpAqUCEBIMQv0tOjMjNZXWVqiv5+Jibw++np4W//1l1bO6Bpg5r23draXZmZdjgGTfBnkAoj\nFRWXKiouQqnXuzI6Grlxg4985O4vuENQCIXxTUDyoMDW9lA/hJJnvkje9xmQ1V9IAAjxiw0MKJOJ\n5WXGx+nrY24Or5dnntlcOouL1ZkzwT17Zm22WbP5eZvtn3bvroR90ARPwFOwmpXVtndv89691yFr\nasrf26u/8Y13bfG9peuVSu2CwmSBqHHLQh8Ew5YnuevJVGiR1V9IAAjx8/T1qZQUdJ3FRZxOenpY\nXcXl4ktfetHSubi4eVj3xInooUPrDse6xdKnad+qqytMTa2B49AEfwkpMFRVdaGq6hIUu92rIyPR\n9naefPK1rsXTug7kKVUY74hObgJMW7qDNr4XCsB1Wf2FBIAQP8/zz6uSEkIh5udxOunqwutldZWv\nfe3nLp1bD+tWVqrTpwONjdNW67TZfMVu/4eKiio4AE3wTvgELOfktB440HzgQBtkjo/7e3p44onX\ntC6v67pSKg2yIQPSkpcEjMk8iECnLP1CAkCIX+DaNVVSgt/P3BzDw3R1EQ6zuMi3vvXLrp7T04l/\n02BQp09HDh5cczjWNK1H075RX19kNNbCCTgNfwMK+s3mFrP5MhSura2PjERv3uTpp1/NSn1Hd1Ba\nsugtmGwYFUICQIif6+ZNVVyMx8PMDENDDAwQDDI7y/e//2oW0Fhs809pmjp50t/QMGm1TmraBbs9\np7S0Gg5CE/wm/B4s5Oe/cPhw8+HD7ZDhdAY6O3nb215TEgghASDEL6W7W+Xmsr7O1BQDAzidBAI4\nnVy4cBfW07GxxC9JSVFnzkQOHFitqVnVtC6L5av19cVQByfhNDwBUeixWC5YLFegYGXFNTwc6+jg\nYx+TZV0ICQCxDfr6VGoqy8tMTtLXx8wMXi9tbfT23uVlNxLZ/IU1NerECV99/bjVOm42N9fU5BYV\nmeEQnIEPwadhdteu548daz52rCMaTXc6g7du8a53SRIIIQEg7pLBQWUwsLjI+Di9vSwt4Xbz7LPb\nvs4ODSX+itRU9dhj4f37l2tqls3mWxbLl2prS6ABTsFJeDuEjMYuu73Fbr8K+UtL7qGhWHs7n/qU\nhIGQABDiVWlvV7m5RKMsLOB00t2N283aGl/5yo4urKHQ5l/X0KCOHfPW1zstljGz+ccOR15BgQWO\nQBN8HP4EpoqKrhYVtZw8eTscTnM6Q+3tvO99kgRCAkCIX9qlS6qigkCA+XlGRujsJBhkZYVvfONe\nLqYbXzqlpanz58P79i3Z7Utmc7vV+ozdXgp7ktuC94DfZLrtcDQ7HNcgd37eMzionzkjSSD+//bu\nM77N6zAX+PNy702CS8T7YoNDpChRg6Smkzhx7AxnXydOE484TdN0JG1v0t7+2v7aX++Hfr5NV5bl\nxLEdAtEkOAAAIABJREFUx3acYSfgkCiKewAgBkmAC9x7YHDhfgAgMKqTxrEIkuLz/6jxQXjPOQ+P\nDs7zMgCIfquODqGwEG43JicxNISBgeAX/1977bAsoD5f+GudFRW7NTXrOt26JDlE8WcaTUZamgKo\nAeqBrwDfAEZlshsyWSNQ6PXOO51b7e347GcZBsQAIPp1gX631dVgv9vQEHw+jI/jzTcP44q592ud\nKSnC1aubFRWzSuWsKHYoFN+WpAKgHKgDLgGfBtYTErr1eoNefxtInZrasFr9V68yCYgBQAQMDAT7\n3cbHYbVifBxuN+x2tLYegVVyfT28Laiu3jlzZk2rXRPFQVF8XavNTE5Whjrp/hJIABwFBc0FBU1A\nvtu9EOiki0BVNREDgA4jm02IicH8PMbGMDCAmZm7+92Oir3bgqws4fJlX3n5tFI5LZffVij+Uy4v\nDHXSvQf4HLCalNRZXm4oL28Dkl0uj8Xif9e7mATEAKDjwWwW4uLg92N2FiMjv7Hf7ShaXAxvC86e\n3Tl9elWjWRVFmyS9otVmJySoQ9uCvwFigcGiouaioiYgb319MdBJtx9V1UQMADoUWloEmQw+H2Zn\n4XDAaITb/T/0ux1Fe7cF+flCfb2vrGxSqZyUy28qlf9WXFwMVAL1wAeAp4HFlJT2ykpDZWUHkDQ2\n5h0Y8L/3vUwCYgDQfaS1VZDJ3lG/21E0PR3eFtTWBquqRXFAFH+k1+fExGhCnXT/AEQD1pKSppKS\nZiBnZWU50En35JMMA2IA0FHW0xPud7PbYbfD58PkJH72s+Oyuu3dFpw4IdTVeUtLJxSKCVFsVipT\nCwpOhKqqPwZ8CZhLT2+rrjZUV3cCiSMjXqMRjzzCJCAGAB01JpOQmnp3v5vDgaamY7qijY8H/+HR\n0Xs76UyS9IJenysIutC24P8CfmBAFBtF8QaQtbS0Euike/pphgExAOjQs1qD/W5jY7Bag/1u3/0u\n1y8A2NkJfw5KpXD+vKe0dEySxkTRoFKl5eUFOunqgc8AfwJMZ2a21tQYamp6dncTnE5fXx8efZSf\nJDEA6FCy2wVBCPa7mc1YWIhQv9tRNDwc/FhiY4XLl7crKxdVqkVR7JOk7+t0eYAeqAVqgUeA7ago\nk1LZoFS2ABkLC2uDg7s9PUhL46dIDAA6BDo7hYyMg+93O4q2tsIfkVYrnD/v1ulGFIoRufyXanV6\ndrYInAbqgaeArwGu7OyW7OyG8+d7r18f56dHDAA6YDduCAUFd/e7LSzgRz/i6v/22GzhTrqrV+90\n0nUrFM+q1TKgLNRJ9xHAB2j2/t2WFqGujh84MQAosj/7FxRgYwNTUxgchMWCzU1MT+MnP+Fi9Pvb\n20lXVrZ79uyGTueQJKco/kKtTs/IUABn7vordXXvA4p9vlmnc6uzE5/+ND9/YgDQfurvFzIy7u53\nGxvDL3/J1efe2Pt10sRE4dq1zYqKOZVqThQ7/tuf/Sbgjo/v1ekMOl0rkDo9vWGz+S9f5rMgBgDd\nawMDQmLi3f1uViva2rji7AuPJ7wtqKra/epX7/r9T4aqqv8cSARG8vNv5Oc3AgUez7zTud3Wxk46\nYgDQvWCzCdHRmJ/H6CgsFszMYH0dzz7L9SVy24Lr14W9v/itb92Sy9sUiv8SxUAnXR1wDXgcWEtM\n7CotNZSWtgEpk5Nui8X/wAN8UsQAoLfPZBLi4+H3Y24OIyMwmbC8fJ/0ux1pn/88zpzZOX16Tau1\niaJdFF/V6bISE1WhTrqvA3HAcGFhc2FhEyDb2FgMVFWzk44YAPQ7uXlTyM8P97v198PjuQ/73Y7u\ntiAgN1e4dMlXVjalVE7J5beUyv84caIIqATqgIeAJ4Gl5OSOigpDRUUHkDwx4TGb/Q8+yIdIDAD6\nDW7fFvLz4fFgagpDQzCZsLmJuTm88goXjsNlbi58WnDhws6pUysazYokWUTxZZ0uOy5OA5wD6oC/\nA6IBe3FxU3FxM5C7trYUqKp+4gk+U2IAUEhPj5CTg/V1uFwYHITNhs1NuFz4+c+5UhyNbUFRkVBX\n5y0rcykULrn8hkr1r4WFgU66OuBR4IvAfGpqe1WVoaqqE0gaHfWYTHj/+/l8iQFwvJlMQkoKlpeD\n/W4jI/B44HCguZmrw5HhcgUfVlSUUF+/XVW1rNEsi6JJFF/U63Oio7WhTrp/AgTAIpc3yuU3gOzl\n5ZVAVfVTT/FxEwPgmAn0uy0uYmwMFgumptjvdrTt7oafnSgKtbUevX5coRgXxUaVKlUmKwl10v0v\n4I+BmYyM26dPG06f7vb7E0ZGfP39+OAH+fQZAHQM3Ol3GxnBwAD73e43IyPBRxkTI1y6tF1ZGaiq\nNkrS83p9LqAHLgC1wPuBHUEwS1KDJN0EMhcXVwNV1V/4AgcDA4DuOx0dQmYmtrcxNweHAyYT1tfZ\n73bf2t4OP1a1Wjh/3q3XjyoUo3L5r9TqtJwceaiT7vPAnwOTWVm3zp5tOHu2Z2cn3unc7O3FRz/K\ngcEAoPtC4OueXi+mp4P9bpub7Hc7LgYHg085Lk64cmXr5MkFtXpBLu9VKJ7TaPKA0lAn3YeBzeho\no0rVoFK1ABnz82t2+25XF778ZY4TBgAdTZ2dQn4++90Im5vhJ15aKpw9u6HXOyVpRC5/Q6PJyMwU\ngTNAPfAM8BfARE5OS05OQ21t39ZWvMOx2d2NT32KY4YBQEfH3n43mw3Dw/B6MT7OfrfjbmAgXFX9\nwANboU66Lkn6nkqVv6eq+uOAJza2T6s1aLW3gLSZmXW73X/xIscPA4AO+yQXEhOxuBjsd5uYgNsN\niwXt7Zy9FLS3qvrkyd2amnWtdkiShkXxZxpNRlqaItRJ9yfAN4BRmeyGTNYIFHq9807nVns7PvtZ\nDicGAB0ye/vdBgYwO8t+N/pt9t4yS00VrlzZrKiYVSpnRbFDofi2JBUAFUAdcAn4NLCekNCt1xv0\n+ttA6tTUhtXqv3qVo4sBQAetv19ITMTuLubn4XTCbGa/G709a2vhbUF19c6ZM2ta7ZooDoriT7Ta\nzORkVWhb8JdAAuAoKGguKGgC8t3uhUAnHauqGQB0AAIvdPT5MDMDhwNGIzweLC7ixRc5IekdbQuy\ns4VLl3zl5dNK5bRc3qpU/mdJSVGoqvpB4HPASlJSV3m5oby8DUh2uTwDA/53v5sDjwFAEdHWJhQU\nwO3G9DT73egeW1gIbwvOnduprl7VaFYlySqKr2i1WQkJ6lAn3d8AscBgUVFTUVEzkLe+vhjopGNV\nNQOA9ktvr5CdjbU1TE7CbofdDp8Pk5Psd6N93Bbk5wsXL3rLyiYVikm5vEWp/GZxcXGok+6DwBeA\nxZSU9spKQ2VlB5A0NuY1m/3vex/HJAOA7h2TSUhOZr8bRdr0dHhbUFe3HaiqFsUBUXxJr8+JidGE\nOun+AYgCbCUljSUlN4CclZXlQCfdk09yiDIA6B24q99tchJuN/vd6MC2BSUlQm2tt7R0QqGYEMVm\npTK1oOAEcAqoBz4B/BEwl55+u7raUF3dBSSOjHiNRjzyCEcsA4DejuFhYWcHQLDfzWzG4iJWV/Hc\nc5xLdGDGxoLDLzpauHhxu6oq0ElnkqQf6vW5gqALbQveC/iBAVFsFMUbQNbS0kqgk+7ppzmAGQD0\nW7W1CdnZ2N7G7CycThiNwX63H/6Qk4cOhZ2d8FBUKoXz5z2lpWMKxZhc3qBSpeblyUNV1Z8B/gSY\nzsxsrakx1NR07+4mOJ2+vj48+igHMwOA/pu7+t36+rC1xX43Osy71eDIjI0VLl/eqqxcVKsX5fI+\nSfq+ThfopAtUVT8CbEdFmZTKBqXyJpCxsLA2OLjb04MvfpFjmwFAQFdXsN9tchKDg7Ba4fNhehqv\nv84ZQofd1lZ4lOp0wrlzbp1uRKEYkcvfVKvTs7PFUFX1U8DXAFd2dkt2dsP5873b2/FO52Z3Nz7x\nCY5zBsBx1d8vpKdjdRUTE7Dbg/1uY2P41a84K+iIsVrDnXRXr26dPDmvUs2LYrckPatW5++pqv4o\n4I2J6VerDWr1LSB9dnbdbt+tr+eYZwAcJ2/Z72Y2o7OTM4GOsL2ddOXluzU1GzrdsCQ5RPEXanV6\nRoYiVFX9R8D/Bsby8m7m5TUCRT7fnNO51dGBz3yGU4ABcF8L9LvNzWFsjP1udH/a+3XSpCTh2rXN\nioo5pXJOFDsUiu8oFAVAOVAH1AGfAtzx8T06nUGnuw2kTk9v2Gz+y5c5IxgA95e+PiEpCbu7wa97\nmkxYWcHKCn7wA451um+53eFtQVXV7pkz6zrdoCgOieLrWm1mSooSqAEuAl8FEoGR/Pzm/PxGoMDj\nmXc4ttvb2UnHADj6mpuFwsJwv1t/P7xe9rvRMd0WZGQIly9vlpfPqFQzcnmbQvEtUSwIddI9AHwW\nWEtM7CorM5SV3QZSJifdFov/gQc4WRgAR1Bbm1BYCLcbU1MYGoLZjM1NzM7i1Vc5oOk4Wl4Obwtq\nanaqq1e12lVJsoviq1ptVmKiCjgH1ANfB+KA4cLCpsLCJkC2sbEYqKpmJx0D4Gh4y343lwu/+AVH\nMHFbEJ4FeXnCxYu+srIppXJKLr+lVP77iRNFQCVQB7wfeApYSk7uqKgwVFS0A8nj456BAf+DD3Ie\nMQAOK7M53O9mtWJ0FB4Phodx4wZHLdGvmZ0NbwsuXNiprg500llE8WWdLjsuThPaFvwdEA3YT5xo\nOnGiGchdXV1yOHa6upCQwACgQ8NqFWJjsbAQ7HebmsLGBr73PS79RL/rtqCoSKir85aVuRQKlyje\nUCr/tbDwBFAF1AOPAl8E5tPS2quqDFVVHdevDzMA6OANDgqBMTw7i9FR9rsR/Z5cruCUiYoS6uu3\nT51aVquXRdEkSS/q9TlRUdpQJ90/AQIgMQDogN2+LeTkYGuL/W5E98zubnj6SJJw4YKntHRcoRiX\nyxtVqjSZrKSvr/fcOayt+RMTYTYzAOggtLQIMlmw321oCP392NrC/DxefpmrP9G94XQGZ1NMjHDp\n0nZV1eITTywmJWF5OXi7PiqKAUAR19UlyGTsdyOKkO1tPwCrVYiKCh62DQxgevrYHbYxAA6e0Sik\np2NlBS4XbDY4HPB6MToKg4GrP9G+sFqF6GgAmJ0Nv0xpbe3YHbYxAA6YxSIkJAT73SwWuFxwu2E0\norubqz/RvmhtFXJzg3cqHQ4YjdjYOKaHbQyAg2SzCVFRmJvD6CgGBjA3x343ov1165aQlwePJ/gy\npWN+2MYAOBi9vUJycrDfzemE2cx+N6J9190t5OVhfT142GazwefD1BR++tNjOu8YAAegqUkoKgr2\nuw0Pw2hkvxvRvjMahbQ0rKwEX6YUOGwbGUFDw/GddwyASGtvF4qK2O9GFFF3DtvGxmC1wuXCxgb6\n+tDXd6znHQMgonp7hawsrK3B5YLdjsFB9rsR7Tu7PXzYZjZjfh5ra7h+nZOOARBBd/rdxsdhswX7\n3QYH0dLCgUi0L7q7hdRU7OwED9tMJqyu8rCNARBxe/vdjueVE6IIu3PYNj0dfJmSz4eFBbz0Eucd\nAyBSAt/1xLG/ckIUSR0d4cO2wUEMDGBzEzMzeO01zjsGQKTwyglR5PX1CZmZdx+2TUzgjTc47xgA\nkcIrJ0SRZzYLSUlYWgq+TGlsDB4P7HbcusV5xwCIlDtXTgI70EC/23G+ckIUATZb8LBtdBQWCw/b\nGAAH4c6Vk739bsf8ygnRvrJYhJgY+P2Ym8PICEwmLC1hdRXf/z4nHQMgsgPxv1856e9Hby8HItG+\nCPx3a+CYN/AypY0NLC3hhRc46RgAEbT3ykmg341XToj2VWtr+LBtaAhGI7a2MDeHH/+Y844BECk9\nPUJKCq+cEEVUd7eQm8t+NwbAgeKVE6LIM5nC/W42G5xOeL1wOtHYyHnHAIgU9rsRRZ7FIsTFYWEh\n+DKlyUlsbKCrC2Yz5x0DIFL6+t6i341XToj2VeCwbX4eIyMYGGC/GwPgIPDKCVGEdXUJaWnY2cHs\nbPCwbW0Ny8t4/nlOOgZABPHKCVGENTcLhYXweoMvUwocti0u8rCNARBBd105Yb8bUQR0dAiFhXC7\nMTmJoSH2uzEADsKdKyd7+9145YRoXwX63VZXg4dtQ0Pw+TA+jjff5LxjAER29b+r341XToj21cBA\n8LBtfBxWK8bH4XbDbkdrK+cdAyBS7vS78coJUcTYbEJMDObngy9TmpnhYRsDIOLu9LvtvXLCfjei\n/WM2C3Fx8PvDL1NivxsD4ABYLEJ8/N1XTnp6YDRyIBLti5YWQSaDzxc+bHO7edjGAIg4XjkhirDW\nVkEmY78bA+BA3blyEuh3Mxp55YRo3/X0hPvd7HbY7fD5MDmJn/2M844BECm8ckIUeSaTkJp692Gb\nw4GmJs47BkCk8MoJUeTd6XcLvEwpcNj23e9y0jEAIihw5STQ72az8coJUSTsfZmS2YyFBR62MQAi\nbu+VE5sNY2O8ckK0vzo7hYwM9rsxAA4ar5wQRdiNG0JBwd2HbQsL+NGPOO8YABH8wT82lldOiCL9\ns39BATY2MDWFwUFYLNjcxPQ0fvITzjsGQKQErpwEjnl55YQoMvr7hYyMu/vdxsbwy19y3jEAIoVX\nTogOZM+dmHh3v5vVirY2zjsGQKTsvXJyp9+NV06I9pXNJkRHY34++DKlmRmsr+PZZznpGAAR9JZX\nTpxONDZyIBLt16SLjw+/TMlkwvIyD9sYAAchcOVkb78br5wQ7Z+bN4X8/HC/W38/PB4etjEADgiv\nnBBFzO3bQn4+PB5MTWFoCCYTNjcxN4dXXuG8YwAchMFBXjkhioSeHiEnB+vrcLmCh22bm3C58POf\nc94dpKjj/I9/6CHhQx/C3BxXf6J9ZDIJKSlYXg7+p7/NBo8HQ0Nc/bkDOCD/8i/jwBhwIy+vsb6+\n3+eLczq3Ojrwmc9wRBLdS1arEBeHxUWMjcFiwdQUD9sYAAdkZwcAnnxSDXwFqAXqgceAjfj4Hp3O\noNO1AqnT0xtWq//KFQ5QonfKbhcEIfiFn4EBHrYxAA7UF7/of/ppwekclKQhufx1rTYjJUUF1AD1\nwFeBRMCZn38jP78RKPB45h2O7fZ2fO5zHK9Eb09Hh5CZie1tzM3B4YDJhPV1HrYdOoLffxyfR2am\ncPkyysuhVEIuj1Yqk+XyQqACqAfOAUXAKtAFGIA2YMzlclut/gce4NilfXH9uhAXJ0xN+c1meL34\nzneO9kgLfN3T68X0dLDfbXOT/W7cARwaS0vBgSgIQk3NzunTqxrNqiTZRPFVrTYrMVENnAXqgW8A\nccBQUVFzUVETIFtfX3Q4tjs78fnPcygTvYXOTiE/n/1uDICjYO8GSCYT6ut95eVTCsWUKLYolf9e\nXFwEVAL1wMPAU8BSSkr7yZOGkyc7gKTxce/AgP/BBzmsiYL29rvZbBgehteL8XH2uzEADr2ZmfC2\n4MKFnerqFY1mRRQtoviyXp8dG6sBzgN1wN8D0YDtxImmEyeagdzV1aXh4Z3ubjzxBEc5HV93+t0C\nL3ScmIDbDYsF7e2cF4fUMT0D+N0VFwt1dSgtDZwWxCiVKYWFJ4AqoB6oAfKAeaANMACdgGt01Gs0\n4uGH+anS23AfnAHYbAKAxUWMjmJgALOz7HfjDuDom5gIjuDoaKG+fruqalmtXhZFkyS9qNfnREXp\nQtuCfwb8gEUub5TLbwDZy8vLQ0O7PT146inOAbqf9fcLiYnY3cX8PJxOmM3sd2MA3Hd2dsKjWZKE\nCxc8paXjCsW4XN6gUqXJZCXAKeAi8GngK8BMRkbrmTOGM2e6/f4Ep9PX348PfYjzge43gRc6+nzh\nlyl5PFhcxIsvcrQzAO5TTmdwcMfECJcvb1dWLqrVi6LYL4rP6/V5gA64ANQBDwM7gmBSKBoUihYg\nc2FhNbAteOYZTg868trahIICuN3Blymx340BcLxsb4cHukYjnD/v1utHJGlEFH+lUqXl5IjAaaAe\neAL4KjCZnd2Snd1w7lzv9nb8yMhmTw8+9jFOFTqSenuF7GysrWFyEnY77Pbgy5TY8MMAOI7s9uC4\nj4sTrl7dOnlyQaVaEMUeSbqu0ciAUqAWuAA8CmzGxPSrVA0qVQuQPje3Pji4W1vLaUNHhskkJCdj\neTn4MqWREXg8cDjQ3MxhzAA43jY3w18nLS3dPXt2Q6dzSJJTFN9Qq9MzMyXgDFAP/CHwl8B4bm5L\nbm4DULy5Oet0bnV24rHHOIvo8Lqr321yEm43+90YAPTr9n7FNiFBuHZt8+TJOZVqTi7vVCi+q1Tm\nA+WhbcEnAE9cXK9Wa9BqW4G0mZl1m81/6RInFR0iDoewvQ0g2O9mNmNxEaureO45DlQGAP1mXm94\nW1BZuXvmzLpONyRJw3L5T7XazNRURaiT7k+BvwZGZLIbMlkjUOj1zjscW21t7KSjA9beLmRlYXsb\ns7NwOsMvU/rhDzkyGQD09rcFaWnClSubFRUzSuWMXN6uUHxLkgKddHXAFeAzwFpCQndpqaG09DaQ\nMjnptlr9165xvlGksd+NAUD32OpqeFtw+vTO6dNrWq1Nkuyi+JpWm5mUpAp10v0VkAAMFxY2FxY2\nAfkbGwsOx3ZHBzvpKBK6uoL9bpOTGBqCxQKfD9PTeP11Dj8GAN3TbUFOjnDpkq+sbFqpnBbFVoXi\nP0pKAp10dcB7gSeA5eTkzooKQ0VFG5A8MeEZGPC/5z2cirQv+vuF9HSsrmJiAnZ7sN9tbAy/+hWH\nHAOA7rX5+fC24Ny5YFW1KFpF8cc6XVZ8vAY4B9QB/weIAQaLi5uKi5uBvLW1RYdjp7OTnXR0zwT6\n3RYXMT4e7nczm9HZyTHGAKBIbQsKC4W6Om9Z2aRSOSmX31Qqv1lUVAxUAXXAh4BngIXU1PbKSkNl\nZQeQNDbmMZnw0EOcpfT7s9mE6GjMzWFsjP1uDAA6OJOTwVkXFSXU1W2fOhXopDNL0kt6fXZ0tDbU\nSfePgABYS0oaS0puADkrK8uBquonn+S8pd9VX5+QlITd3eDXPU0mrKxgZQU/+AFHEQOADs7ubngG\nyuVCba2ntHRCoZiQy5tUqtT8/EAnXT3wSeDLwGx6+u3q6obq6i4g0en0Go34wAc4h+m3aW4WCgvD\n/W79/fB62e/GAKBDZnQ03El38eJ2VdWSWr0kikZRfL60NBfQh7YF7wP8gFmSGiXpBpC1uLgyNLTb\n24unn+aUpl/T1iYUFt7d7zY7i1df5VBhANChtLeTTqUSzp/36PVjCsWYXG5Qq9Nyc+VANVAPfBb4\nU2AqK6v17FnD2bM9u7sJTqevtxcf+QinN711v5vLhV/8gsODAUBHwdBQcK7GxgpXrmxVVgY66Xol\n6TmtVgbogVqgFvggsBUVZVQqG5TKFiBjfn5tcHC3uxtf+hJn+3FkNof73axWjI7C48HwMG7c4Hhg\nANBRs7UVnrd6vXDunFunc0qSUxTfVKvTs7KkUFX1F4CvAa6cnJacnIYLF3q3tuKdzs3ubnzyk5z5\nx4XVKsTGYmEh2O82NYWNDXzvexwADAA6+iyW4EyOjxeuXduqqJhXqeZFsUuh+J5KlQ+UhTrpPgZ4\nY2P7NBqDRnMLSJudXbfb/fX1XAjuW4ODQuD7xrOzGB1lvxsDgO5fPl/4lllFxW5NzYZONyxJDrn8\n5xpNRnq6IlRV/cfA14HRvLybeXmNQJHPN+dwbHV04PHHuS7cP27fFnJysLUV7HczGrG+zn43BgDd\n7/beMktOFq5e3ayomFWpZuXyDoXiOwpFfqiT7iLwGLAeH9+j1xv0+ttA6vT0htXqv3KFa8TR1tIi\nyGTBfrehIfT3Y2sL8/N4+WU+WQYAHRsbG+FtwalTO2fOrGm1a5I0JIqvazSZKSnKUFX114BEwJmf\n35yf3wQUuN3zTud2ezurqo+eri5BJgv2uw0OwmplvxsDgLgtCMnMFC5f9pWXBzrpbisU/yWXFwIn\ngTrg3cAfAKtJSV1lZYaysjYgxeVyWyz+d72Ly8cRYDQK6elYWYHLBZsNDge8XoyOwmDg42MAEAFL\nS+FtQU3NzunTq1rtqijaRPEVnS4rIUEd6qT7BhAHDBUVNRUVNQN56+tLDsd2Zyerqg8pi0VISAj2\nu1kscLngdsNoRHc3nxcDgOg3bwtkMuHiRV9Z2ZRCMSWKLUrlvxUXF4eqqh8BngYWU1I6Tp40nDzZ\nASSNj3vNZv9738uV5bCw2YSoKMzNYXQUAwOYm2O/GwOA6HczMxPeFtTW7pw6taLRrIjigCT9SKfL\njo3VhrYFfw9EA7YTJ5pOnGgGclZXg510rKo+KL29QnJysN/N6YTZzH43YgDQO94WFBcHqqpdCoVL\nLm9Wqf5fQcGJUCfdR4E/BObT0tpOnTKcOtUJJI6MeE0mPPww153IaWoSioqC/W7DwzAa2e9GDAC6\nFyYmgotIdLRQX79dVbWs0SyLokkUX9Drc6Oi7lRV/zPgByyi2CiKN4Ds5eXloaHdnh489RSXoX3U\n3i4UFcHtxtQUhoZgNrPfjRgAdK/t7IQXFIVCuHDhTiddg1qdlpdXEuqk+zTwFWAmI6P1zBnDmTPd\nfn+C0+nr68OHP8wl6R7r7RWysrC2BpcLdjsGB9nvRgwA2mcOR7iT7tKl7crKRbV6URT7JekHOl0e\noAcuALXAw8COIJgUigaF4iaQubCwGtgWPPMMV6h36k6/2/g4bLZgv9vQEG7e5GdLDADaf3s76bRa\n4dw5t14/IkkjovhLtTotO1sMddI9AXwVcGVn38rObjh3rmd7O97p3Ozpwcc/ztXq98F+N2IA0CFi\nswVXn7g44erVrZMnA1XVPZJ0XaORAaWhTrpHgc2YmH612qBW3wLS5+bW7fbdujouXr/j5yxERQHA\n7CxGRoL9bmtr7HcjBgAdApub4a+Tlpbunj27odM5FAqnXP6GRpOekSGFOum+BPwVMJ6bezM3txEH\ne4zEAAAHjUlEQVQo3tycdTq3Ojvx2GNcy95aa6uQmxs85nU4YDRiY4P9bsQAoMNn79dJExKEBx7Y\nrKiYU6nm5PJOheK7SmU+UA7UAReATwKeuLgerdag1bYCaTMz6zab/9Ilrmtht24JeXnweDA9jeFh\n9rsRA4COCK83vC2orNytqVnXaockaVgUf6rRZKamKoAa4CLwZ0ASMCKT3ZDJGoFCr3fe4dhqazvu\nnXTd3UJeHtbXMTUV7nebmsJPf8rVnxgAdAS3BenpwuXLmxUVM0rljFzerlR+SxQLQ1XVV4HHgbWE\nhO7SUkNp6W0gZXLSbbX6r107dkue0Sikpd3d7zYygoYGrv7EAKCjaWUlvC04fTpQVW0TRbsovqbV\nZiUlqYCzQD3wV0ACMFxY2FxY2AjINjYWHY7tjo5j0Ul3p99tbAxWK1wubGygvx+9vVz9iQFA99e2\nICdHuHTJV14e6KS7pVD8e0lJUaiT7n3AE8BycnJHRYWhoqIdSJ6Y8AwM+N/znvtzNbTb7+53W1vD\n9etc+okBQPej+fnwtuD8+Z3q6lWNZlUUraL4Y50uOz4+UFVdD/wtEAMMFhc3FRc3A7lra0vDwztd\nXfdJJ11Pj5CSgp2dYL+byYTVVfa7EQOAjt+2oLBQqK/3lpa6lEqXXH5TqfxmUVExUAXUAx8GngEW\nUlPbq6oMVVUdQNLYmMdkwkMPHdW18k6/2/Q0HA7098Pnw8ICXnqJqz8xAOiYmZwMLnxRUUJd3fap\nU8tq9bIkmUXxJb0+JzpaE+qk+0dAAKwlJY0lJTeA7JWVlaGhnZ4ePPnkkVk62e9GDACit7C7G14E\n5XKhttZTWjquUIzL5U0qVWp+fkmoqvqTwJeB2fT026dPG06f7gISnU6v0YgPfOBQL6N9fW/R7zYx\ngTfe4OpPDACikNHR4JoYEyNcvLhdVbWkVi+JolEUf1hamgvoQp107wN2gQFJapCkm0DW4uJKoJPu\nC184XKuq2SwkJWFpCRMTsFoxNgaPB3Y7bt3i6k8MAKK3sr0dXh9VKuH8eXdp6agkjYqiQaVKy82V\nh6qq/wD4M2AqK+vW2bMNZ8/27OwkjIz4envxkY8c/AprswX73UZHYbFgepr9bsQAIHo7hobCVdVX\nrmxVVgY66Xol6TmtNtBJF9gWfAjYio42KpUGpbIFyJifXxsc3O3uxpe+FOk112IRYmLg92Nujv1u\nxAAgesf2VlXr9cK5c26dzhnopFOr07OypFBV9TPAXwCunJybOTmNFy70bm3FO52bXV341Kcisf4G\nGn7u6ndbWsILL3D1JwYA0T34ETu4mMbHC9eubZ08Oa9SzcvlXQrFsyqVDCgD6oDzwMcBb2xsr0bT\noNHcAtJmZ9dtNv/Fi/u1Fr9lv9vcHH78Y67+xAAguqd8vvAts4qK3ZqadZ1uXZIccvnPNZqM9HQF\ncAa4CPwx8HVgNC/vZl5eA1Dk8805HFsdHXj88Xu2NN/pd5ucxOAgbDb2uxEDgGj/7b1llpIiXL26\nWV4+q1LNyuUdCsV3FIqCUFX1ReAxYD0+vkevN+j1t4HUqakNm81/5co7Wqbv9LtNTMBmg9PJfjdi\nABBF3Pp6eFtw6lSgk25NkgZF8XWtNjM5WRnqpPsakAg4CwqaCwoagXy3e8Hp3G5vf9tV1RaLEB+P\nhQWMj8NiweQkNjbQ0wOjkas/MQCIDnpbkJUV6KSbViqnRfG2QvGfcnkhcBKoA94N/AGwmpTUWVZm\nKCtrB5JdLo/F4n/Xu/7nFTzQ7zY/j5ERDAxgfp79bsQAIDpMFhfD24KzZ3eqq1e12lVRtIniKzpd\ndkKCKtRJ99dAHDBUVNRUVNQE5K2vLzkc252dv7Gq+k6/m9GItTUsL+P557n6070n7P2JhojeIZlM\nuHgRZWVQKiGXRyuVKcXFxUAlUA/UAAXAItABGIAOYHx83Gs2+xcWEBcnTE35zWZ4vXj8cQwPw2iE\n14vFRfa7EQOA6GhNLUGorcWpU9BoIIqCJMXrdDmxsZrQtkAHRAM2oBFovn69ZW8A1NRgYACbm5iZ\nwWuvcYYSA4DoyDpxQqitRVkZFArI5TEqVWpBwYlQVfUZIO/69eK9AZCUBJ8P4+N4801OT9pHPAMg\n2nfj48F1PDpaqK/frqpa0mgCnXQvlJbmCoL2rj/v8cBmQ2srV39iABDdL3Z2wmu6QiFcuODR68cU\nijFJinG5du781re/zaWfIoH/BUR0wGJjhUuXUFqK1VW43Wz4IQYAERHtsyh+BEREDAAiImIAEBER\nA4CIiBgARETEACAiIgYAERExAIiIiAFAREQMACIiYgAQEREDgIiIGABERMQAICIiBgARETEAiIiI\nAUBERAwAIiJiABAREQOAiIgYAERExAAgIiIGABERMQCIiBgARETEACAiIgYAERExAIiIiAFAREQM\nACIiYgAQEREDgIiIGABERMQAICIiBgARETEAiIiIAUBERAwAIiJiABAREQOAiIgYAERExAAgIiIG\nABERMQCIiIgBQEREDAAiImIAEBExAIiIiAFAREQMACIiYgAQEREDgIiIGABERMQAICIiBgARER0V\n/x+AnPCD2jXp5gAAAABJRU5ErkJggg==\n", - "text/plain": [ - "" - ] - }, - "execution_count": 11, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "L.image(zoom=1.0)" ] }, { "cell_type": "code", - "execution_count": 12, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "(1.0000000000000009, 1.0000000000000009, 0.0)" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "L.atoms[3].position" ] }, { "cell_type": "code", - "execution_count": 13, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "L.atoms[3].position = (1.0, 0.0, 1.0)" @@ -218,55 +139,26 @@ }, { "cell_type": "code", - "execution_count": 14, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAgAAAAIACAIAAAB7GkOtAAAAG3RFWHRTb2Z0d2FyZQBMQU1NUFMg\nMTMgQXVnIDIwMTZFN+maAAAgAElEQVR42uzdZ3Sc133v++8e9EqC6IWYZwoGlb0TIEUysWMrdlxi\nO3ZiR7YlucWpN8UnyU1OSXJyzjrr3HXXOlm5cRw7sYqrHDe5C+yiQAIEid4Hvdfp9Xnui5kBIEa2\nJVkECfL/WXihZUsjamY/+4c9+9m/RxmGgRBCiIePSd4CIYSQABBCCCEBIIQQQgJACCGEBIAQQggJ\nACGEEBIAQgghJACEEEJIAAghhJAAEEIIIQEghBBCAkAIIYQEgBBCCAkAIYQQEgBCCCEkAIQQQkgA\nCCGEkAAQQgghASCEEEICQAghhASAEEIICQAhhBASAEIIISQAhBBCAkAIIYQEgBBCCAkAIYQQEgBC\nCCEkAIQQQkgACCGEkAAQQgghASCEEEICQAghhASAEEIICQAhhBASAEIIISQAhBBCSAAIIYSQABBC\nCCEBIIQQQgJACCGEBIAQQggJACGEEBIAQgghJACEEEJIAAghhJAAEEIICQAhhBASAEIIISQAhBBC\nSAAIIYSQABBCCCEBIIQQQgJACCGEBIAQP1tPjxocVMFgam+v+uIXlbwhQtxDyjAMeRfE1ujvV8Dy\nMidOPAvN8BKMzsx4+/uNM2dkHAqx1ZLlLRBboKNDZWSg6ywu4nRy4kQa/AlkgLO09FJp6QUo8fmW\nnM7I9et85CMSBkLICkA8EC5dUmVlBIPMzTEyQmcnfj+f/WwN7IVGOAbl4IJWaIbrMDY15e/tNX75\nl2VwCiEBILatlhaVn4/Px+wsQ0N0dREKsbDA2bNomtK01Jqa/PR0OxyDJqiDVBiCi3ARejyelZGR\nSGsrH/2oDFQhJADE9nHrlsrOxu1mepqBAQYGCAaZnqa+HpeLb3+bU6eor8dmw2xOstmyKyoqYB80\nwREohWW4Ac1wAyYmJgLd3cZb3iIjVggJAHF/6+5WqamsrTE5SV8fY2P4/QwP89hj9PbicvHP/xwf\ne0qpkyc5cACHA01TFktaTU1BSoojsSyogSTohwtwCfpcrtXh4WhbG088IaNXCAkAcZ/p61MmE8vL\njI/T28vMDF4vp05RUKCWl407AmCz3bvVyZPU12O1YjYn2+05paW7YT80wWEoggVogWZog6nR0UBX\nF297mwxjISQAxL02NKR0HV1naYnRUbq7WV7G5eLZZ43Pflb93ABYl5SkmprYvz++LNC09Lq6QqWq\n4QSchGowoBfOwxXoX1lZGx7W29t58kkZ0kK8KnIbqHgjxbZ8IxHm53E66ezE42F1la9+9TVPytHo\nxj9itaoTJ/y1teNW67imnbfbc4uKKuEgNMGH4A9gNi/v2uHD5w8fvmkY6U5n8PZt3vUuSQIhJADE\nlrh6VRUXEwgwO8vwMLdvEw6zuMi///svOhGPjMRfISVFnT4d2bdvuapqWdM6LJYv19QUQW1iWfB2\niCjVbbWet1qvQN7SkmtwUL91i098QsJACAkAcXe0taniYrxepqcZHKSvj2CQ2Vmef/6NnHnD4Y1X\nq65Wx475amtHLZZRTftJVdWO/HwzHIImeAL+GKby81/Mzz9//Hh7JJLmdIba23nf+yQJhJAAEG+c\nzk61YwcuF5OTDAwwPEwgwPg4L7xwF2fb/v74i6emqrNnw3v3LlZVLZrN7RbLMw5HMdQnlgXvhlBy\n8u2qqvNVVS/CjoUFz8CA3tgoSSAkAIT4xfT2qvR0lpeZmKCvj8lJfD66umhr26IZNhTauJ20vl4/\ncsRbWztisTjN5h86HDt27rTAYTgFvwOfgYnCwiuFheehIhSadzrDra381m9JGAgJACFe86/hymRi\nYYGxMXp6WFjA4+Hpp+/NfLr5lraMDHXuXGjPngW7fcFsbrVav2izlUADNMIJeD/4UlNvVVc3V1df\ng9zZWc/AgHH6tCSBkAAQ4ue5fVtlZqLrLCwwOkpXF2trrK3x5S/fF3Oo37+xLNi3Tz9yxFNdPWSx\nDGva9xyOvJwcGxyBJvgjyITRkpLLJSUXoMzvX3Q6wy0t0kknJACEeCUXL6ry8o1+t44OAgGWl/n6\n1++7SXPzsmDHDnXmTKihYc5mm9O0Fqv185pWBnugEc7Bb4M7I+NmXV1zXd1LkD097evrM86dkyQQ\nEgBCANDSosrL8fmYmWFoiO5uQiHm5/n2t+/3iXJtbWNZcPhw9NAhd3V1v6YNaNp3qqt3ZWba4Sg0\nwWcgDUbKyi6WlV2EYq93eWQkcuOGdNIJCQDxELt1S+Xn43YzNcXgYLzfbWqKH/5wO82Mm5cFhYXq\n1KlgQ8OMzTZjNr9os31u9+5y2AtN8Cg8AStZWa179jTv2XMdsiYn/T09xpvfLEkgJADEw6S7W2Vl\nsbrKxAT9/fF+t6EhrlzZxrPhwsLGsuD48ejBg2sOx5qm9Vos36ypyU9NrUp00v01JMNgRcWFiopL\nUOh2r8Q66R5/XMJASACIB1pfn0pJYWnpZf1uTz314Mx9m5cFZWWqqSlQVzdls02ZzVfs9n8qK6tI\ndNK9Gz4JSzk5Lfv3N+/f3wqZY2P+7m4efVSSQEgAiAfLwED86e3z8xv9bm43zz77wM5309Px/zST\nSTU2Rg4cWHU4VjWtW9Oeq60tSEqqhuPQCH8HCvrM5gtm8yXIX11dGx6O3rwpnXRCAkBsfy+9pAoK\n4tu8IyN0db3+frftSNc3/jM1TZ044a+rm7BaJ8zmC3Z7TklJJRyAJvgA/C7M79z50qFDzYcOtUGG\n0xno6OAd75AkEBIAYhuK9bv5/czNMTRER8cb1u+2HY2Oxv+rk5PVqVOR/ftXqqpWNK3TYvlqbW0h\n1CTKJx6FKPRYLOctliuQt7zsGhrS29v5+MclDIQEgNgOtqbfbTuKRDbegaoqdfy4r7Z2zGIZ07Rm\nuz23sNAMB+EUfBj+CGZ27Xrx6NHzR4+2R6PpTmfw1i3e8x5JAiEBIO5XsX63tTWmpujvZ2SEQICx\nMZqbZeZ6mcHBjU66Rx4J79u3ZLcvadoti+VL1dVFUJdYFrwTQklJnXb7ebv9KuxcXHQPDuptbXz6\n0/KWCgkAcd/Y3O/W28vUFD4fHR20t8tU9VOtd9IBtbXq2DFvTY3TanWazT+qqtqxa5cFDsEp+AT8\nKUwWFFwtKDh/4sTtcDjN6Qy1tfGBD8jbKyQAxD01MHAf9btt2wSNv11paercufDevYt2+6LZ3Ga1\nPm23F0MDnIQT8D7wp6TcdjiaHY5rkDs/7+nvN06dkndbSACIrdXerrKziUZZWMDppKsLl+s+6nfb\njoLBjVNme/boR454amo8FsuIpn3f4diZm2tNdNL9PvwFjBUVXS4qugDlgcCC0xm+fp3HHpM3X0gA\niLvswgVVURHvdxsepqODYJClJZ57TiagN8DmU2bZ2ers2dCePfM227ym3bBa/9ViKU1UVZ+GD4In\nPb29tra5tvYlyJmZ8fb3G2fOyAchJADEXXD9uqqo2Jb9btuRx7OxLDh4MHrokLumxq1pg5r2fHV1\nXlaWLdFJ9yeQASOlpZdLSy9Aic+3FOukk6pqIQEg3hi3b6tdu+L9bgMDDA4SDDI5yY9+JLPM1i0L\ndu1SjzwSrK+ftdtnzeaXrNZ/MZvLEp10b4IPgyszs7WhobmhoQWypqb8vb3GL/+yfEZCAkC8Xt3d\nKjMz3u/W18f4OH4/g4NcvSozy5ZaXt5YFhw9Gj10yOVwuDStX9O+VVOTn55elVgW/CWkwlB5+cXy\n8otQ5PEsj4xEW1ulqlpIAIjXYnO/W08Ps7MPWr/bdl8WlJSopqZgff20zTZtNl+x2T5bUVEB+6AJ\n3g4fg+Xs7Bt7976wd+8NyBwfD/T0GG95i3yCQgJA/MypPykJHqZ+t+1odnZjWXDyZPTgwbWqqjWL\npUfTvlFTU5CS4kh00v03SIL+ysoLlZWXoMDlWo1VVT/xhHygQgJAbHLtmios3Oh36+zE632I+t22\n+7Jg927V2Bioq5u0WifN5kt2e05p6e5EVfV74XdgITe35cCB5gMHWiFjdDTQ2cnb3y4frpAAeOi9\n+KIqKsLvZ3Y2frvnw9zvth1NTMQ/qaQk1dQUOXAg1knXpWlfq6srVKomsSz4H2BAr6ad17TLsGtl\nZS3WSfexj8lnLSQAHj43b6qiIjyeeL9bfz/BIDMzfO97MiNsP9HoxqdmtcaqqsctlvFYJ11RkRkO\nwCn4EPwBzOblXTty5PyRIzd1PX10NHj7Nu96l3zuEgDi4dDZqXJzWVtjcpKBgXi/2+go58/LLLDt\njYzEP8SUFHX6dGT//mW7fVnTblssX66pKYLaRCfd2yFiMnVZreet1quQt7TkGhzU29v55CdlGEgA\niAfUer/b+Dh9fUxN4fVy+za3b8tl/0AJhzc+0OpqdeyYr7Z21GodNZt/UlW1Iz9fg0PQBE/Cn8BU\nfv7V/Pzzx4/fikTSnM5Qezvve58MCQkA8QDZ3O/W3c3iIm43zzwj1/kDrr9/o6r67Nnwvn2xTrqb\nVuvTVVXFUJ/opPt1CCYnd1RVna+qugo7FhY8AwN6Y6OMEAkAsZ3dvKlycqTf7WG3XlWtlKqv148e\n9dbUjFgsTrP5hw7Hjp07rXAYmuB34DMwUVh4pbDwPFSEQvMjI+HWVj74QRkwEgBiW7l4UZWXxx/j\nNTIi/W7iZbeTZmSoc+dCe/Ys2O0LmnbDYvk3m610U1X1+8GXmnqrpqa5puYa5MzOevv7jUcekcEj\nASDuezduqPLyeL/b4CA9PYRCzM3xne/IBSwA/P6NZcH+/frhw57q6kGLZUjTnnc48nJybImq6v8L\nMmC0pORySckFKPX7F53OSEuLdNJJAIj70u3bKi9P+t3Ea14W7NihzpwJNTTM2WxzmtZitX5e08pg\nDzTBOfhtcGdktNXVNdfVtUD29LSvr884d07GlQSAuD/E+t1WVpic3Oh3GxjgxRflKhU/x9raxrLg\n8OHooUPu6up+TRvQtO/U1OzKyLAnOun+HFJhuKzsUlnZRSj2epdjVdXSSScBIO6Z/v54v9vYGL29\n0u8m3oBlQWGhOn06WF8/Y7PNmM0v2myf2727PFFV/Sg8AStZWa179jTv2XMdsiYn/T09xpvfLKNO\nAkBsld5elZyMYbCwwOgoXV2srOBy8aUvyXUofiELCxvLguPHowcPrjkcaxZLr6Z9s6YmPzW1KlE+\n8deQDAMVFRcrKi5Bodu9Euuke/xxGYQSAOKuiTX8xLZ5nc54v9vKCl/7mlx44q4sC8rLVWNjoL5+\nymqdMpuv2O3/X1lZRaKT7t3wSVjKyWnZv795//5WyBwb83d18au/KgNSAkC8oa5d2+h3Gxqis5Nw\nmIUFvvlNudjE3TI1FR9dJpNqaors37/qcKxqWremPVdbW5CUVJ1YFvwdKOg1my+YzZchf3V1bXg4\nevMmTz4p41MCQPxibt5UhYXS7ybuGV3fGGmapk6e9NfWTlitE5p2wW7PKS6uTHTS/Sb8Hszv3Hnt\n0KHzhw61QbrTGezo4B3vkLEqASBeu66ujX63/n6cTgIBnE4uXJArStwDo6PxgZecrE6fjuzbF6uq\n7rRYvlpbWwg1cAIa4VchCt0WywWL5QrkLS+7YlXVH/+4DF0JAPEq9Paq1FSWlpiYoLeX6Wm8Xtra\n6O6WS0jcY5HIxiCsqlLHj/tqa8es1jGzubmqKregwJzopPsI/BFM79p17ejR5qNH26PRNKczdOsW\n73mPDGMJAPFTxPrdFhcZHaWnR/rdxP1rcHCjk+7MmfDevUtVVUtm8y2L5dnq6iKoS5RPvBNCSUmd\ndvt5u/0q7FxcdA8M6G1t/O7vysCWABAAtLWp3FyiUebn4/1ubjerq3zlK3KRiPvaeicdUFenjh71\n1tY6LZZRs/lHDsfOvDwt0Un3CfhTmCwouFpQcP7kydvhcJrTGWpr4wMfkEEuAfAQu3RJlZURCDA3\nF3+gYzDI8rL0u4ltpqcnPmLT0tS5c+G9e2OddG0Wy1N2e8mmqur3gT8l5bbD0exwXIPcuTnPwIBx\n6pQMeAmAh8yNG6qsDJ+P6WmGhqTfTTwIgsGNU2Z79+qHD3tqaoYslmFN+77DsTM315ropPt9+AsY\nKy6+XFx8AcoCgUWnM3z9Oo89JuNfAuBBF+t3c7ni/W5DQwSDTEzw4x/L6BcPgs2nzHJy1JkzoT17\n5m22eU27YbX+q8VSCnugEU7DB8GTnn6ztra5tvYlyJmZ8fb1GWfPyrUgAfBgrpfj/W4TE/T1MTGB\nz8fAANeuyYgXDyC3e2NZcPBg9PBhd3W1W9MGNe271dV5WVn2xLLgzyAdRkpLL5WWXoQSn28p1kkn\nVdUSAA+I/n6VnMziIuPj9PQwNyf9buJhXBbk56vTp4MNDbM226zZfM1m+5fKylgnXSO8GT4CrszM\n1oaG5oaGFsiamvL39BhvepNcKRIA21N3t0pNxTCYn2d0lO5u6XcTD6+lpY1lwdGj0UOHXA6Hy2Lp\n07RvVlfnp6dXwTFohP8bUmCwvPxiefklKPJ4lmOddFJVLQGwbVy9qoqLCQaZn2dkhM5OfD7pdxPi\nZcuCkhLV1BSsr5+22abN5is22z9VVFTAPmiCd8DHYTk7+/q+fc379t2AzPHxQHe38da3ykUkAXAf\nu3ZNFRdLv5sQP8fs7MayoLExeuDAmsOxpmk9mvaN2tqC5GRHopPuv4EJ+isrL1ZWXoKCtbXVWCfd\nE088vNeUBMD9qL19o99tYICBAYJBpqf5/vdl9hfi5y8LKivVyZOBurpJq3VS0y7ZbDmlpbvhADTB\n++B3YGHHjpaDB5sPHmyFjNHRQGcnb3/7Q3d9SQDcd7q6VE7Onf1uIyNcvCizvxCvyvh4/GJJSlKn\nTkX274910nXFOumUqkksC/4HGNCjaRfC4f/V16eGhx+uJxZIANxf1vvdxsfp64v3u33xizL1C/F6\nRKMb147Npo4f99fVjVut42Zzs92eW1RkhoOXL/9LaSmhEPPzzM7y2GPq3/7tYbniJADuI7F+t4UF\nxsbo7mZpSfrdhHjDDA/HL6WUFPXII5F9+5arqpb3728vLcXvZ2aGoSG6ukhJeYjeEwmA+0Jrq9q5\nU/rdhNgK4XD8smpvVzk5eDxMTcUfphQKEYlIAIgtdPmyKi29s99taYlvfENmfyHulq4ulZ3N6mp8\ns210FL+fkREsFgkAsYW/+5eW4vUyM8PgIL29hELMzvLd78rsL8Td0tenUlNZXmZ8PP4wJZ+Pw4dJ\nSyMQkAAQW6KjQ+3ceWe/2/g4P/mJzP5C3C0DAwpYWIg/TGlpCZeLd71LzczIbaBiq/T0qIyMO/vd\n+vpoaZHZX4i74vp1tWsXkQgLC4yM0NWFx8PqKr/2aw/pGyIBcG/096ukJBYXGRujt5e5OTwenn5a\npn4h7pYrV1RJCYEAs7PxzbZQKL7Z9swzSgJAbIWuLpWWhmHEV6BdXayuSr+bEHdXa6sqKXnZZlsw\nyNzcw77ZJgFwD34HWe936+jA75d+NyHurs2bbf39DA8TCDA+zgsvPOzXnQTA1nnpJVVS8rIjJ6EQ\nCwt861sy+wtxt8Q225aX45ttk5P4fPT0cOOGXHcSAFulvV0VFNx55GRqih/8QEahEHfL5s22nh7m\n52WzTQJgy73ikROnU/rdhLhbOjpURga6zuJi/HT92hpra3z5y3LRSQBsoTuOnMzMSL+bEHfXpUuq\nrCy+zRt7mJLfz/IyX/+6XHcSAFtoYEAp9bIjJ9LvJsRd1dKiysrw+eIPU5LNNgmAe+DGDZWX9wpH\nTqTfTYi759YtlZ+P233nw5Rks00CYOv8jCMn8uYIcZd0d6usrPhmW18fY2P4/QwPc/myXHcSAFvl\nPx45kX43Ie62vj6VkhJ/mNL6ZttTT8lFJwGwhV7xyMnEhPS7CXG3DA0pXYdEv1t3N8vLuFw8+6xc\ndBIAW2i93y32QMfYkZPeXq5fl4EoxF3R0qLy84lE4g9T6uyMb7Z99aty0UkAbCE5ciLEFrt6VRUX\nb2y23b5NOMziIv/+73LdSQBslTuOnHR3S7+bEHddW5sqLsbrZXqawUH6+ggGmZ3l+eflupMA2Cqx\nBzrKkRMhtlJnp9qxA5eLyUkGBqTfTQLgXmhpUaWlcuREiC3V26vS0+/sd+vqoq1NrjsJgK0iR06E\n2Hr9/cpkYmEhvtm2sCCbbRIAW66ra+PIyXq/28gIly7JQBTirrh9W2VmousbD1OSfjcJgHvgjn63\n6Wl8Pul3E+IuunhRlZdvbLZ1dBAIyGabBMDWGhlRkQjIkRMhtlBLiyovx+eLP0ypu5tQiPl5vv1t\nue4kALbK9etq166NIyddXbjdcuREiLvrFTfbpqb44Q/lupMA2CrS7ybE1lvvd5uYoL8/3u82NMSV\nK3LdSQBslba2eL/b9DRDQ/T2ypETIe466XeTALj3OjrkyIkQW2pgQMX+Yn5+Y7PN7ZbNNgmArRXr\nd7vjyEl3N62tMhCFuCteekkVFMS3eTc/TEk22yQAtlSs321hgfFx6XcTYivE+t38fubmGBqio0P6\n3SQAtpwcORFi60m/mwTAvXfpkiorkyMnQmypWL/b2lr8YUojIwQCjI3R3CzXnQTAVmlpUWVld/a7\nyZETIe6qzf1uvb1MTeHz0dFBe7tcdxIAW0WOnAix9QYGpN9NAuBeWz9yMjlJX1/8yMnwMJcvy0AU\n4q5ob1fZ2USjLCzET9e7XLLZJgGw5eTIiRBb7MIFVVER32yLna4PBlla4rnn5LqTANgqg4PKMADm\n5xkbk343IbbC9euqokL63SQA7qnYkZNwON7v1tkpR06EuOtu31a7duF2MzXFwACDgwSDTE7yox/J\ndScBsFViR05i/W5y5ESIrdHdrTIz4/1ufX2Mj+P3MzjI1aty3UkAbBU5ciLE1tu82dbTw+ysbLZJ\nAGw5OXIixNZP/UlJIP1uEgD31iseOens5OZNGYhC3BXXrqnCwo1+t85OvF7ZbJMAuAf219buBefc\nnCcQMH78YzlyIsTd9eKLqqgIv3/jYUqy2SYBcA8MDaXa7VehFEaLiy8XF184fbozEFjs7VUtLXz4\nwzIchXiD3bypiorweOKbbf39BIPMzPC978nlJgGwtbq6wnb7B2EPNMIZ+BB40tNv1tY219a+BNkz\nM76+PuPsWRmaQrwBOjtVbi5ra/GHKcU220ZHOX9eLjEJgC03P883vjGo1KDZ/N3q6rysLDschSb4\nM0iH4dLSy6WlF6DE51saGYlcv85HPyojVYjXY32zbXycvj6mpvB6uX2b27flmpIAuHeWlowvfjHY\n0DBrs82azddsts9VVpbDXmiCX4GPwFpmZmtDQ3NDw3XImpz09/Yab3qTjFohXq3N/W7d3Swu4nbz\nzDNyEUkA3AesVmZn+Zu/4dix6MGDrupql6b1adq3amp2paVVwTFogr+CFBisqLhYUXERitzu5ZGR\naGsrjz8u41iIV3bzpsrJkX43CYD7nmFsjMjSUtXUFKivn7ZapzXtqs322fLyCtgHTfAO+Dgs5eTc\n2LfvhX37WiFzfNzf3c1b3ypjWogNFy+q8vL4mcrYw5Sk300CYBuYmYkPUKVUY2PkwIFVh2NV07o1\n7bna2oLk5Go4Do3wt6Cgv7LyQmXlJShYW1sdHo7evMkTT8gQFw+1GzdUeXm8321wkJ4eQiHm5vjO\nd+TSkADYhsuCykp18mSgvn7Sap00my/a7TklJbvhADTBb8CnYWHHjpcOHmw+eLANMkZHAx0d/Nqv\nyXAXD53bt1VenvS7SQA8QMbH42M3KUmdOhXZv3+lqmpF07oslq/W1RVCDZyAk/AWMKBH085r2hXY\ntbKyNjSkt7fzsY/J6BcPvli/28pK/GFKsX63gQFefFHGvwTA9heNboxjm02dOOGvrR23WsfN5vNV\nVTmFhWY4CE3wGPwhzOTlXTty5PyRIzd1Pd3pDN6+zbvfLVeCeDD198f73cbG6O2VfjcJgAfa8HB8\nZKekqEceCe/bt1xVtaxpty2WL1VXF0FdYlnwDgibTJ0223mb7SrsXFx0Dw3pN2/yqU/JtSEeBL29\nKjkZw2BhgdFRurpYWcHl4ktfkhEuAfCgC4c3RnlNjTp2zFdbO2qxjGraj6uqduzapcEhOAUfgz+F\nyYKCqwUF548fvx0OpzmdoZs3ef/75ToR21Ws4Se2zRt7mJLXy8oKX/uajGoJgIdMX1980KelqbNn\nw3v3Ltrti5p202J5uqqqBOrhJJyA90IgJeW2w3He4XgRdszPuwcGjKYmuWbEdnLt2ka/29AQnZ2E\nwyws8M1vykiWAHiIBYMbt5M2NOhHjnhraoYtlhFN+4HDsXPHDgscgSb4XfhPMF5UdKWo6DyUB4ML\nTmf4xg0+9CG5hMR97eZNVVgo/W4SAOKn23w7aVaWOns2tGfPvN0+bza3Wq3/arWWQgM0QiP8JnjT\n0tprappraq5Bzuyst7/feOQRuZzEfaera6Pfrb8fp5NAAKeTCxdkuEoAiFfi9W4sCw4ciB4+7Kmu\nHrRYhszm56ur87KzbYlOuj+GDHCWlFwuKbkApX7/YqyT7iMfkatL3Hu9vSo1laWl+MOUpqfxemlr\no7tbxqcEgHgty4K8PPXII6GGhjmbbc5sbrHZPm82l8EeaIJfgsfAlZHRVl/fXF/fAtlTU76+PuOX\nfkmuNHFvxPrdFhcZHaWnR/rdJADEL2BlZWNZcORI9NAhl8Phslj6Ne3b1dW7MjKqEsuCv4BUGCov\nv1RefhGKPZ7lkZFIa6tUVYst0tamcnOJRpmfj/e7ud2srvKVr8gIlAAQb9yyoKhInToVbGiYsVpn\nNO2q1frPu3eXJzrp3gZPwkp29o29e5v37r0OmRMTgZ4e41d+Ra5DcbdcuqTKyggEmJuLP9AxGGR5\nWfrdJADEG21+fmNZcOJE9ODBNYdjTdN6Ne3fa2vzU1IciU66/wJJMLB794Xduy9Bocu1Euukk6pq\n8Qa6cUOVlZmFRaEAACAASURBVOHzMT3N0JD0u0kAiC1fFlRUqMbGQF3dlM02ZTZfttn+saxsN+yH\nJngPfAoWc3NbDhxoPnCgFTLGxgKdnbztbXKJil9IrN/N5Yr3uw0NEQwyMcGPfyxDSwJAbJXJyfj1\nZjKppqbIgQOrVVWrmtZlsXy9trbAZKpJLAv+OwB9ZvN5s/ky5K+urg0NRdvbefJJuWLFa9PTE+93\nm5igr4+JCXw+Bga4dk3GkgSAuBd0fePas1jUiRP+uroJq3XCbD5vt+cWF1cmqqp/C34f5nbuvHb4\n8PnDh9sMI93pDHZ08M53ytUrfr7+fpWczOIi4+P09DA3J/1uEgDifuJ0xq/G5GR1+nRk//5YJ12H\npn2ltrYQahOddG+DqFJdVut5q/Uq5C0vuwYH9fZ2PvEJuZ7Fnbq7VWoqhsH8PKOjdHdLv5sEgLiP\nRSIbV6bDoY4f99XWjlksY2bzC1VVuQUFGhyEU/A4/DFM79r14rFjzceO3YpG05zOUHs7732vXNsC\n4OpVVVwc3+YdGaGzE59P+t0kAMQ2MTAQv1BTU9WZM+F9+5bs9iVNa7dYnnU4iqEu0Un3LgglJXXY\n7eft9quwY2HBMzionzwp1/nD69o1VVws/W4SAGL7C4U2bietq9OPHvXW1IxYLE5N+2FV1c68PAsc\nhib4FPwZTBQWXi0sPA8VodC80xlua+M3f1Mu+4dIe/tGv9vAAAMDBINMT/P978swkAAQ29bm20nT\n09W5c+G9exfs9gWzudVq/aLNVgINiWXBb4A/NfVWdXVzdfU1yJ2b8/T3G6dPyxTwgOvqUjk5d/a7\njYxw8aJ89BIA4kERCGwsC/bt0w8f9tTUDFksw2bz96qr83JyrImq6j+Ev4TR4uLLxcUXoCwQWBwZ\nCV+/zoc/LDPCg2a93218nL6+eL/bF78oH7QEgHgIlgW5uerMmVBDw5zdPmc2X7dav2CxlCWqqs/A\nh8Cdnn6zrq65rq4FsmdmfH19xtmzMkE8CGL9bgsLjI3R3c3SkvS7SQCIh4nLtbEsOHQoeuiQu7q6\n32IZ0LTvVlfnZWbaE510n4F0GC4tvVRaehFKvN4lpzNy/bp00m1Lra1q507pd5MAEOI/LAsKCtTp\n08H6+lmbbVbTrlmtn6usLIe90ARvgcdhNSurtaGhuaHhOmRNTvp7e403vUnmju3h8mVVWnpnv9vS\nEt/4hnyCEgDiobe4uLEsOHYsevCgq7rapWl9mvatmppdaWkOOAaN8FeQAgMVFRcrKi5Bkdu9PDIS\nbW2VTrr7+nf/0lK8XmZmGBykt5dQiNlZvvtd+cgkAIT4KcuC0lLV1BSor5+2Wqc17YrN9k/l5RWJ\nqup3widgKSfn+r59zfv2tULm+Li/q4tHH5Vp5T7S0aF27ryz3218nJ/8RD4mCQAhfrqZmY1lQVNT\nZP/+VYdjVdO6LZbnamoKkpOrE510fwsK+iorL1ZWXoKCtbXVWFW1uLd6elRGxp39bn19tLTI7C8B\nIMRrXxaYzerkyUBd3aTVOmk2X7Tbc0pKKmE/nILfgE/D/I4dLQcPNh882AbTTmegs5PeXnkXt9p6\nv9vYGL29zM3h8fD00zL1SwAI8XqNjcVnkKQkdfp0ZN++FYdjxWzutFi+WlcX66SLLQveAgZ0WywX\nLJbLv/ZrgysrazduqPZ2PvYxmYPurq4ulZa20e/W1cXqqvS7CQkA8caJRjdmE7tdHT/ur60dt1rH\nzebmqqrcwsJKOARN8Bj8Iczk5V07cqT5yJF2XU93OoO3bvHrvy7z0RvvyhVVUkIwyPw8IyN0dOD3\nS7+bkAAQd83QUHxySUlRZ86E9+5dqqpa0rTbFsuz1dXFUJson3gHhE2mTpvtvM12FXYuLrpjVdWf\n+pRMT2+Al15SJSX4/czMMDREVxehEAsLfOtb8vYKCQBxl4XDGxNNba06etRXW+u0WJya9uOqqh27\ndmmJTrqPw5/AVEHB1YKC8ydO3AqH05zO0M2bvP/9MlW9Tu3tqqAAj4epKQYH6e8nFJJ+NyEBIO6F\n3l7js59VBQXqAx8wzp4N7927aLcvatpNi+WpqqoSqE8sC94LgZSU2w5Hs8PxIuTOz3sGBoymJpm2\nXoOuLpWdzepqvN9tdBS/H6dT+t2EBIC4p/7hH+jtZWWFj3+choZYVfWwxTJiNv/A4di5Y4c1sSz4\nPfhzGC8qulxUdAHKg8EFpzN8/Tq//dsyi/0sfX0qNZXlZcbH6e1lZkb63YQEgLjPbL6dNCtLnT0b\n2rNn3m6fN5tvWK3/ZrWWwB44mXjKsTct7WZNTXNNzUuQMzvr7eszzpyRSe1OAwNKKRYWGB2lp0f6\n3YQEgLjveb0bp8wOHIgePuyurnZbLEOa9rzDkZedbUtUVf8JZICzpORSSclFKPX7F0dGItev85GP\nPOxz3I0bKi+PSISFBZxOOjvxeKTfTUgAiO25LMjLU488EmxomLXZZs3ml2y2z5vNZbAXGuFN8GFw\nZWS01dc319e3QPbUlK+31/jlX34Y57vY7Z6BALOz8X63UEj63YQEgNi2VlY2lgVHjkQPHYp10vVr\n2reqq3dlZFQlOun+AlJhqLz8Unn5RSj2eJZHRiKtrQ9LVXVrqyopkX43IQEgHvRlQXGxOnUqWF8/\nY7XOaNpVm+2zFRUViarqt8GTsJKdfX3v3ua9e29A5sREoLvbeMtbHtipcHO/W38/w8MEAkxMSL+b\nkAAQD5y5uY1lwcmT0QMH1hyONU3r0bR/r63NT0lxJMon/iskQf/u3Rd3774EhS7XSqyT7kGqql7v\nd4s90HFyEp+P3l6uX5fZX0gAiIdjWVBRoRobA3V1UzbblNl82W7/x9LS3XAAmuA98ClYzM1tOXCg\n+cCBVsgYHQ10dfG2t23vWbK/XyUlxfvdenqYn5d+NyEBIB4+k5MbnXTrVdVmc5fF8rXa2gKTqSax\nLPh7MKBX0y5o2mXIX11dHRrS29t58sntNG92dKiMDHSdxUWcTrq7pd9NSACIh97mTjqLRZ044a+r\nm7BaJ8zm83Z7bnFxJRyEJvgg/D7M7dx57fDh5sOHbxpGutMZ7Ojgne+83+fQ2AMdg0Hm5hgZobMT\nv5/lZb7+dZn9hQSAEAA4nfEJMTlZPfJIZN++5aqqZU3rsFi+XFNTBDVwEk7C2yCqVJfVet5qvQJ5\nS0uu2LLgE5+476bUlhZVWorPx+ys9LsJCQAhfp5IZGNydDjU8eO+2tpRi2VU016w23MLCrREVfXj\n8McwnZ9/NT///LFjtyKRNKczdOsW733vfTG93rql8vNxu5meZmCAgQGCQaan+cEPZPYXEgBC/DwD\nA/G5MjVVnT0b3rt3yW5f0rR2i+UZh6MY6hKddO+GUHJyR1VVc1XVi7BjYcEzMKA3Nt6zqbarS2Vl\n3dnvNjLCpUsy+wsJACFei1Bo43bSurpYJ92IxeLUtB85HDt27rQkOul+Bz4DE4WFVwoLL0BFKDTv\ndIZbW/mt39q6mfeOfrfpaXw+6XcTEgBC/GI2306anq5+6ZdCe/Ys2O0LZnOr1fpFm60EGhLLgveD\nPzW1vbr6fHX1Ncidm/P09xunT9/FiXhkREUiQLzfrbub5WVcLp59VmZ/IQEgxBsnENhYFuzbpx8+\n7KmpGbJYhs3m71VX5+XkWBOddH8IfwmjxcWXi4svQFkgsDgyEm5peYM76a5fV7t2EYkwP4/TSVcX\nbjerq3z1qzL7CwkAIe7+siA3V505E9qzZ85mmzObr9tsX9C0MtgDjXAGPgTu9PSbdXXNdXUvQfb0\ntK+vzzh37hedo6XfTUgACHGPuVwby4JDh6KHD7sdjn6LZUDTvlNdvSsz05bopPsMpMNwWdmlsrKL\nUOL1Lo2MRG7ceD2ddG1t8X636WmGhujtJRhkdpbnn5fZX0gACHFPlwUFBer06WB9/YzNNqNp16zW\nf66sLId90AhvgcdhNSvrxp49zXv2XIesyUl/T4/x5je/qum7o0Pt2IHLxeQkAwPxfrfxcV54QWZ/\nIQEgxL22uLixLDh+PHrwoMvhcGlan6Z9s6ZmV1qaI7Es+GtIhsGKiosVFZeg0O1eGR6OtrX91E66\nWL/b8jITExv9bt3dtLbK7C8kAIS4X5cFZWWqsTFQXz9ts02bzVdstn8qL6+A/dAE74RPwFJOzvX9\n+5v3778BmePj/q4uHn104xWk301IAAixLU1Px2dqk0k1NkYOHFitqlrVtG6L5bna2vykpOpEJ93f\ngoK+ysoLlZWXIX9tbW1oKJqbi67Hb/fs6mJtjbU1vvxlmf2FBIAQ24eub8zaZrM6edJfVzdptU6a\nzRft9pySkspEVfX74XdhfseOlw4daobzzc1LIyN0dBAISL+bkAAQYpsbG9vopDt1KrJ//0pV1Yqm\ndWraV+rqYp10J+AkvBU+kJa2dPOm9LsJCQAhXqMrSkUgDRREIAgeWIYV+CPj3k+mmzvp7HZ1/Li/\nrm7MYhnTtGa7PbewsGRkZKSnh1CIiQl++EOZ/YUEgBCvQotSmVChVBqYQIewYQTAB3mwAp9Vagz+\n1rhfZtWhofifJCVFnTkT3rdv6dix5ZkZo7ubcFhmfyEBIMSrc1upCpNpZ1JSZlKSMpkwDEPXg7ru\ni0a9hpFqGCmQBMnwn5Xqgy8b99H0Gg4bwDPPKPkchQSAEK9Nv1KVycl56elkZJCejslENKpCofRg\nMC0YTI5EVDRqGEYUIhCCILxVqe8b8iu2EBIAYjsbUKo4NXVnTg67drFrF1lZKEUggNvN2ppyu3P8\n/ohhhHU9aBjZkAv5YIHTSl2SDBASAPIWiG2qW6n8lJSdubmUlmI2U1ZGTg66ztoa8/PMzABK19N1\n3R8OpxlGGmRAFuyEwld6wZ8oFU6sEoIQgCclJIQEgBD3m6tKlZlMBRkZFBRgtVJXh6aRk0MoxNwc\naWlEIgQCBAKpoVByNJpkGMmGkQKpkAG58DalvmsYQKtSEciGCgCiEAQfuOAppaZhDf7u9SZBs1Jh\nCEME/BCAAHxSckVIAAjxumVDanJyclYWxcVYLFRXY7ORnY3XS2oqXi9LS2RmkpKSlJRkUsoECkyJ\n3eB0yIQ3K/XfoVCpbKVSTSYT6IYRNoyAYfgMIwsyIRNm4A+V+n9ey6zdr9Qi7IDd63clQQA8sAZf\nUGoC/kpiQEgACPF6Bq5S6cnJZGWxcyeFhZSUUFxMRgYpKSwvk5lJWhrJySQloZQBOhgQm3EVJEEa\npIEtOXlnSgqpqZhMANEo4XAgEkmNRpMNw2QYQOwf/wOlhuE7r2LWvq7ULnCYTBkmU4pSBkQMI6jr\nfsPIMoxMSId0+F9K9cHnJAaEBIAQr94LShWbTMlJScTm7pQUTCZ0nUiEcDj+E4kQjaLrsa9foomf\n9SSIrQN27thBTg6ZmSQnx7818nrT/f6kYJBIJKrrEcMIQQD84IV3KPWtnzlldylVmZRUkJKSvP4H\ni0aJRLLDYV84nKLrSboeC5XYn+ejSn1eMkBIAAjx6sXmdHSdcBiPh+Vl0tNJTmZ1lfl5VlbweAgG\niUSCuh4yjPUv4mM/JL4LorKSoiJ27MBkwu9ndZWlJZaXUyDTMIKGkW4Ysa3jXCiAhZ/5p+pVqiQ5\nuSAzk1iupKcDBIP4fHg8mX4/waAOEV0PJ/aZK+A9Sr3raflIhQSAEK9CbPYM6jqhEGtrzM2RmYnH\nQ1ISbjfT08zOsrqKz6eHw75oNGAYwU2394QS+wEmoKYGs5lduzAMVleZniYpKfY7e1okkqrrKYaR\nahhpkA7ZkA9vUeoHr/Q7e5dS+cnJBTk5FBVRVkZhIZmZRCK4XCwtsbCAUpmGEQoGA5tyZQeUyCcq\nJACEeJUC4DcMTzQa8vlSY09OiUSYm8NkwudjeZm5OZaX8XrXgkFvNOo3DL9hrN+EE0i8jgLq67Hb\niT15fXY2vg7wePB4kgKBpHA4KbF1vH4HUfYr/ZFeVKrUZCrKyKCwEJuNqirKysjMJBBgbo6JifVv\nqFIjkRRdTzaM1MQL5sgnKiQAhHiVfJBqGK5IZMnvL11eBvD5yMxEKUKh2CkwXK41n28tHPZEo17D\n8MH6TzjxOgagadhs7NyJ349hsLIS/+omJQWTSSmFUsowVGLRkALpm24hXZcLmcnJSbm5lJVRVUVD\nA5WVpKeztkZGBqEQLhcuFx5PUlJSUiRiUspkGMmQDKnyiQoJACFeJRckQbqupwaDuN350Whq7O5P\nkym2kRsMBNx+vysUckcibsPwGIYH1n/0xE8UyM0lO5vMTHSdlJTYXUPrP1Gloom/ef0OohTIePmf\n50dKVSqVmZpKTg6FhZSXYzZTWUlKChkZeDzk5pKRQWoqyclKKUNtNP/EbkkSQgJAiFdlKbaFq+um\nSCTq9/sjkaxAIC0pyaRUVNdDkUggEvGFw95o1KfrHl33gAvc4IbQptk/AoRCeL0AbnfsN3QCAcJh\notGIrocNY33feP0motjXQZtlAiZTakoK6elkZZGdTVZWvJgoKSl+g2li0o8qpW8KIbkBSEgACPEa\n/IVh/E+ldCAaDRuGX9fTw+EUk8kEBkR0PaTrQV3367rfMLzgATeswVri/ssIhMEP8e/909Pxepma\nYmGB1VW8XkIhfzQaNIyQYYQgVhERu48oCZLgzUr9aNO3QNHYFK8Usa1pj4fVVYCVFdbWNudKWNc3\n35IUyxUhJACEeLUWIAgGhHTdZxjpSiUrZYrNxYYRv3nfMPzgg9gKYBUisYRI3Ed0ABgZweUiNRWf\nj8VFpqdZWsLjicR2j3V9fd84dgeRSvxsvnJiU3lQ19NiOxDz82Rn4/NhGCwvMzXF4iIuF4GAHon4\ndT1oGKFEooQ27UkIIQEgxM83DBUQgQBkGUa6YaSASSkMQ09M8bGJ25sIgNg8qydmfy+Uw5/+7//9\nPz/0IZKTCQbj92suLxsez2ow6IlGfYbhMww/rP+8oiAEDMMbieTGUmRigmiU2VkMA5eLuTnm5mK3\npbpDIZ+u+w0jYBibc0UICQAhXq1vGMavK+UBP+RABqRAkmEo0Df9jh9bAbgTX7PEvv8Jgw/+EyyC\nAX/61FN98O3jx2M3gIY9nlW/fy0cdkWjHsPwwvpPOFELYSROk8X4IMUwXJFIrseTtbiIUrjdZGZi\nGPHDZcvLrK15/H5XOBzPlU2hEpGPU0gACPGaPGcYb1bKA3mQBamQDOrlX/LE+hvW77nRE88KXoLs\nxNGw2Crhl1566fulpb5QyBMMesJhTyTi1XX3ptuH3C+/fWjzBoAHTIaxFomk+f1qeTkzdvgrLQ3D\nIBTC58PrXfP51oLBWKhszhWP7AMLCQAhXocfGcZBpXZBbmIRYNr0a34QwpCUCIDY/x6CVfiWybSq\n65mQBTlQAIUw5XYHo9FAJOLXdZ+uew0jNu+7YA3CiQ3b2JPFNlsDID0aNQWDUcPYEYlkeTxpyclA\nJBoNhELeUMgbDnsjEU8sABKJsn5XkhASAEK8ZjcNo0SpXZADaYkBHfslPRli53hjARCb/ZehKytr\nORhMMYwUw0hNPCUmDz7t8XwmKSlkGAHD8BuGb9PtQ+unBzZuH9pkEYAkwzCi0XAw6I1GMwKBZJNJ\nxfqlo9FgNOrXdb+u+wzDaxjrobK2fhxBCAkAIV6HWcNQSqVADsTKltMgZVMAkHjMy63GRqanWVlJ\nDoeTdD1p01nc2Grgr6LRTya+F1q/fWhtU6jEmkGff/kx4P9sGP81dne/rocMw6vraSZTslIKDMOI\nGEbYMIKxXAFvYlWxmjhYIHsAQgJAiNfPMIwWpY6DB7IhPREAsdk/BKP/5//Q08PAAKmpJCWp2OEs\nINHxkAxpsAOWNm0MxAKAxN5vbP/A80p/gLnEdm7AMDINI80wkjd9H3VHp3TssTDBTS8rhASAEL/Q\nUHampmr5+TgcHD7MkSPU1pKfTyjE5CS9vfFDuUoR+21dqTueEpMCaTAPocTtQ56X7x+EwQs/fKUq\n0H8wjMeVCoEPsiA9EQAkpvhQ4kmTsQAIvvxlhZAAEOIXWATEzgEkJZGUBKw/3YVAALc7/hfh8MZT\nYhI1D+sdD8mQAi9CeeIOfVPitiI9cebghz/94S2jkAfexG2pqYl//D8GwPqX/usvK4QEgBCv09eU\n2h37LkXX40e6FhbIzGRtjUCAmRnm5+MdD8FgIBoN6HrsCO76TyRR+5wGU4mnBZgS0RL78uf8z3x0\n1wuGcVipVdgF2ZCWuAHJ2HQwzZ/4xmn9ZUPgls9PSAAI8brF9ngDuu4PhTJiZQyxLv6MDIJBlpbi\nPT8uF8GgNxLx6/rms7ghiGx6SownsX+gNs3+11/FgxtbDWO3UnmwEzITi4D120/ZdFdSbFURAg+0\nGMYzzyj5EIUEgBCvR+wre6+urwaDGWtrzMyg66yskJq68UyupSXc7rVAwB2J3HEW947bOt0v/+W9\n87U8s3fCMJRSOZAHGYkLLHXTXUmxmT7WSLEIQ/JAYCEBIMQvwhU7B2AYK6FQqtudbzLFf/FPSSEa\nXX/I16rXuxoKuaJRj657DcOb+FLem9gNjt3sPwsK5l7v1GwYBqCUSk/ckpT28ruSouCDEZn6hQSA\nEL+43zeMLyhl0vXUSET5/RHDyA0EMtLSMJkwDD0c9gWDnmDQEwp5IhGPrt/xiBhjU81DCObfiKnZ\n2PQiaUqlgQlWZdIXEgBCvOFi56qSdd0IhYK67gqF0pOTk5UyIBKNBqPRQDTqj0Z9uu7Vde+mjgfv\ny0/5/uAuzNFBmfeFBIAQd89C7E4ew4joeiAc9kSjqeFw7GmLumGEDSMU2/h9ecfD6qbZPyLNzEIC\nQIjt6O8M48+UikDYMPyGkWEYqdFoklLE+hggbBjrHQ+xFcD6I2LWy+Nc8j4KCQAhtqNp8EIoVg5q\nGKmJJwSsPwZycwC4ErP/esmPF16Q72qEBIAQ29FThvHrSvkTTwiI3XuzftN9OFHlFvsKKJr4v2LZ\n4IcpeQeFBIAQ21c7aLCWeEJA6qZCnvUA8G66zX/9nNcqtMuv/0ICQIjta8Qw8pQqh/xEH0Pyywt5\n9MTsrzad0V2DGzL7CwkAIba7FcMAdiqVB9mb+hiSYs8NTvQ9rJcxvChTv5AAEOJBsrrpKTEZkJ54\naHDypgeE+aFfZn8hASDEg2f9LK5SKivxlBgTRGFW5n0hJADEQ5UEQojNTPIWCCGEBIAQQggJACHu\nvr4+9fTT8iAUIe4Z2QMQW+TiRXXuHIGAMTfHE0/0wS1orqm5Bjmzs97+fuORR+SbeiFkBSAeODdu\nqPJyfD6mpujtBf4RkuCP4Hn4XknJ3zzyyJug1O9P6elRX/iCLAuEkBWAeCDcvq3y8nC7mZpiYIDB\nQT760f/3r/7q85pWBnugEc7Bb4M7I6Otrq65rq4FsqenfX19xrlzsiwQQgJAbE/d3Sozk5UVJifp\n62N8HL+f7m7+/u/d1dX9mjagad+prt6VmWmHo9AEfw6pMFxWdqms7CIUe73LIyORGzf46EclDISQ\nABDbRH+/SklhaYmxMXp7mZ3F6+Wppzbm8cJCdepUsKFhxmabMZtftNk+t3t3OeyFJngUnoCVrKzW\nPXua9+y5DlmTk/6eHuPNb5YkEEICQNyventVcjKGwcICTifd3ays4HLxpS+9bO5eWNg4rHv8ePTg\nwTWHY81i6dW0b9bU5KemVsExaIK/hmQYrKi4UFFxCQrd7pXh4WhbG48/LmEghASAuG+8+KIqKiIU\nYm4Op5POTrxeVlb42td+6mS9+bBueblqbAzU109ZrVNm8xW7/Z/KyipgPzTBu+GTsJST07J/f/P+\n/a2QOTbm7+7m0UclCYSQABD31LVrqqgIv5/ZWYaG6OwkHGZhgW9+89VO0FNT8b/TZFKNjZEDB1Yd\njlVN69a052prC5KSquE4NMLfgYI+s/m82XwZ8ldX14aHozdv8uSTEgZCSACIrXXzpiosxONheprB\nQfr7CQaZmeF733s9M7Kub/xTmqZOnPDX1U1YrROadsFuzykuroQD0AS/Cb8H8zt3vnToUPOhQ22Q\n4XQGOjp4xzskCYSQABB3X1eXys1lbY3JSfr7cToJBHA6uXDhDZiFR0fjL5KcrE6diuzfv1JVtaJp\nnRbLV2trC6EGTkAjPApR6LZYLlgsVyBvedk1NKS3t/Pxj0sYCCEBIO6C3l6VmsrSEhMT9PYyPY3X\nS2srPT1v8LQbiWy8YFWVOn7cV1s7ZrWOmc3NVVW5BQVmOAin4MPwRzCza9eLR482Hz3aHo2mO53B\nW7d4z3skCYSQABBvkIEBZTKxsMDYGD09LC7idvPMM3d9nh0cjP8rUlPVI4+E9+1bqqpaMptvWSxf\nqq4ugjo4CSfgnRBKSuq028/b7Vdh5+Kie3BQb2vj05+WMBASAEK8Lm1tKjeXaJT5eZxOurpwu1ld\n5Stf2dKJNRTa+NfV1amjR721tU6LZdRs/pHDsSMvzwKHoQk+AX8KkwUFVwsKzp84cTscTnM6Q21t\nfOADkgRCAkCIV+3SJVVWRiDA3BzDw3R0EAyyvMxzz93LyXT9S6e0NHXuXHjv3kW7fdFsbrNan7Lb\ni6EhsSx4H/hTUm47HM0OxzXInZvzDAwYp05JEggJACF+phs3VFkZPh/T0wwN0dMTv/H/O9+5XybQ\nYHDjlNmePfqRI56aGo/FMqJp33c4dubmWuEINMHvw1/AWHHx5eLiC1AWCCw6neHr13nsMQkDIQEg\nxMvF+t1crni/29AQwSATE/z4x/fjjLn5lFl2tjp7NrRnz7zNNq9pN6zWf7VYSqEBGuE0fBA86ek3\na2uba2tfgpyZGW9fn3H2rCSBkAAQAnp64v1uExP09TExgc/HwADXrm2DWdLj2VgWHDwYPXzYXV3t\n1rRBTXu+ujovK8uW6KT7M0iHkdLSS6WlF6HE51uKddJ95CMSBkICQDyU+vtVcjKLi4yP09Pz/7d3\nn/Ft09UwoQAAIABJREFUXoe5wJ/DvfcmRbwvNjhEihI1SGo6iRPHznD2dew08YjTNJ1J25u29/7a\n/tpf74d+vk1XmsRy4tiOYyfOcBIuiZK4BwBikAS4wL0XAC7cDwAEVnV641gESfH5f9T4ILznnIdH\nB+d5MT19b7/bUbF3W5CRIS5f9paVTalUUwrFHaXy3xSKgmAn3fuAzwMrCQkdZWX1ZWWtQKLL5bZY\nfO95D5OAGAB0PJjNIiYGPh9mZjA8/Gv73Y6ihYXQtuDs2Z3Tp1e02hVJssny6zpdZlycJrgt+Csg\nGhgoLGwuLGwCctbWFvyddKyqJgYAPbBaWkRuLrxezMzA4YDRiI2N/0+/21G0d1uQlyfq6rylpRMq\n1YRCcVOl+ueioiKgAqgDPgQ8BywkJbVVVNRXVLQDCaOjnv5+3/vfzyQgBgA9QG7fFrm576rf7Sia\nmgptC2pqAlXVktQvST8wGLKiorTBTrq/BSIBa3FxU3FxM5C1vLzk76R75hmGATEA6Cjr7g71u9nt\nsNvh9WJiAj/96XFZ3fZuC06cELW1npKScaVyXJKaVark/PwTwarqTwBfBmZTU1urquqrqjqA+OFh\nj9GIxx5jEhADgI4ak0kkJ9/b7+ZwoKnpmK5oY2OBf3hk5N5OOpMsv2wwZAuhD24L/g/gA/olqVGS\nbgAZi4vL/k66555jGBADgA49qzXQ7zY6Cqs10O/27W9z/QKAnZ3Q56BSifPn3SUlo7I8Kkn1anVK\nTo6/k64OeBL4Q2AqPf12dXV9dXX37m6c0+nt7cXjj/OTJAYAHUp2uxAi0O9mNmN+Pkz9bkfR0FDg\nY4mOFpcvb1dULKjVC5LUK8vf1etzAANQA9QAjwHbEREmlapBpWoB0ubnVwcGdru7kZLCT5EYAHQI\ndHSItLSD73c7ira2Qh+RTifOn9/Q64eVymGF4pcaTWpmpgScBuqAZ4GvAa7MzJbMzIbz53uuXx/j\np0cMADpgN26I/Px7+93m5/GDH3D1f2dstlAn3dWrdzvpupTKFzSaXKA02En3McALaPf+3ZYWUVvL\nD5wYABTen/3z87G+jslJDAzAYsHmJqam8OMfczH67e3tpCst3T17dl2vd8iyU5J+rtGkpqUpgTP3\n/JXa2g8ARV7vjNO51dGBz36Wnz8xAGg/9fWJtLR7+91GR/HLX3L1uT/2fp00Pl5cu7ZZXj6rVs9K\nUvt/+bPfADZiY3v0+nq9/jaQPDW1brP5Ll/msyAGAN1v/f0iPv7efjerFa2tXHH2hdsd2hZUVu5+\n9av3/P6ng1XVfwLEA8N5eTfy8hqBfLd7zuncbm1lJx0xAOh+sNlEZCTm5jAyAosF09NYW8MLL3B9\nCd+24Pp1sfcXv/nNWwpFq1L575Lk76SrBa4BTwGr8fGdJSX1JSWtQNLExIbF4nvoIT4pYgDQO2cy\nidhY+HyYncXwMEwmLC09IP1uR9oXvoAzZ3ZOn17V6WySZJekN/T6jPh4dbCT7utADDBUUNBcUNAE\n5K6vL/irqtlJRwwA+o3cvCny8kL9bn19cLsfwH63o7st8MvOFpcueUtLJ1WqSYXilkr1rydOFAIV\nQC3wCPAMsJiY2F5eXl9e3g4kjo+7zWbfww/zIRIDgH6NO3dEXh7cbkxOYnAQJhM2NzE7i9df58Jx\nuMzOhk4LLlzYOXVqWatdlmWLJL2m12fGxGiBc0At8NdAJGAvKmoqKmoGsldXF/1V1U8/zWdKDAAK\n6u4WWVlYW4PLhYEB2GzY3ITLhZ/9jCvF0dgWFBaK2lpPaalLqXQpFDfU6n8qKPB30tUCjwNfAuaS\nk9sqK+srKzuAhJERt8mED36Qz5cYAMebySSSkrC0FOh3Gx6G2w2HA83NXB2ODJcr8LAiIkRd3XZl\n5ZJWuyRJJkl6xWDIiozUBTvp/h4QgEWhaFQobgCZS0vL/qrqZ5/l4yYGwDHj73dbWMDoKCwWTE6y\n3+1o290NPTtJEjU1boNhTKkck6RGtTo5N7c42En3P4DfB6bT0u6cPl1/+nSXzxc3POzt68OHP8yn\nzwCgY+Buv9vwMPr72e/2oBkeDjzKqChx6dJ2RYW/qtooyy8ZDNmAAbgA1AAfBHaEMMtygyzfBNIX\nFlb8VdVf/CIHAwOAHjjt7SI9HdvbmJ2FwwGTCWtr7Hd7YG1vhx6rRiPOn98wGEaUyhGF4lcaTUpW\nliLYSfcF4E+AiYyMW2fPNpw9272zE+t0bvb04OMf58BgANADwf91T48HU1OBfrfNTfa7HRcDA4Gn\nHBMjrlzZOnlyXqOZVyh6lMoXtdocoCTYSfdRYDMy0qhWN6jVLUDa3Nyq3b7b2YmvfIXjhAFAR1NH\nh8jLY78bYXMz9MRLSsTZs+sGg1OWhxWKt7TatPR0CTgD1AHPA38KjGdltWRlNdTU9G5txTocm11d\n+MxnOGYYAHR07O13s9kwNASPB2Nj7Hc77vr7Q1XVDz20Feyk65Tl76jVeXuqqj8JuKOje3W6ep3u\nFpAyPb1mt/suXuT4YQDQYZ/kIj4eCwuBfrfxcWxswGJBWxtnLwXsrao+eXK3unpNpxuU5SFJ+qlW\nm5aSogx20v0h8BfASG7ujdzcRqDA45lzOrfa2vC5z3E4MQDokNnb79bfj5kZ9rvRf2fvLbPkZHHl\nymZ5+YxKNSNJ7Urlf8hyPlAO1AKXgM8Ca3FxXQZDvcFwB0ienFy3Wn1Xr3J0MQDooPX1ifh47O5i\nbg5OJ8xm9rvRO7O6GtoWVFXtnDmzqtOtStKAJP1Yp0tPTFQHtwV/BsQBjvz85vz8JiBvY2Pe30nH\nqmoGAB0A/wsdvV5MT8PhgNEItxsLC3jlFU5IelfbgsxMcemSt6xsSqWaUihuq1T/VlxcGKyqfhj4\nPLCckNBZVlZfVtYKJLpc7v5+33vfy4HHAKCwaG0V+fnY2MDUFPvd6D6bnw9tC86d26mqWtFqV2TZ\nKkmv63QZcXGaYCfdXwHRwEBhYVNhYTOQs7a24O+kY1U1A4D2S0+PyMzE6iomJmC3w26H14uJCfa7\n0T5uC/LyxMWLntLSCaVyQqFoUam+UVRUFOyk+zDwRWAhKamtoqK+oqIdSBgd9ZjNvg98gGOSAUD3\nj8kkEhPZ70bhNjUV2hbU1m77q6olqV+SXjUYsqKitMFOur8FIgBbcXFjcfENIGt5ecnfSffMMxyi\nDAB6F+7pd5uYwMYG+93owLYFxcWipsZTUjKuVI5LUrNKlZyffwI4BdQBnwJ+D5hNTb1TVVVfVdUJ\nxA8Pe4xGPPYYRywDgN6JoSGxswMg0O9mNmNhASsrePFFziU6MKOjgeEXGSkuXtyurPR30plk+fsG\nQ7YQ+uC24P2AD+iXpEZJugFkLC4u+zvpnnuOA5gBQP+t1laRmYntbczMwOmE0Rjod/v+9zl56FDY\n2QkNRZVKnD/vLikZVSpHFYoGtTo5J0cRrKp+EvhDYCo9/XZ1dX11ddfubpzT6e3txeOPczAzAOi/\nuKffrbcXW1vsd6PDvFsNjMzoaHH58lZFxYJGs6BQ9Mryd/V6fyedv6r6MWA7IsKkUjWoVDeBtPn5\n1YGB3e5ufOlLHNsMAAI6OwP9bhMTGBiA1QqvF1NTePNNzhA67La2QqNUrxfnzm3o9cNK5bBC8QuN\nJjUzUwpWVT8LfA1wZWa2ZGY2nD/fs70d63RudnXhU5/iOGcAHFd9fSI1FSsrGB+H3R7odxsdxa9+\nxVlBR4zVGuqku3p16+TJObV6TpK6ZPkFjSZvT1X1xwFPVFSfRlOv0dwCUmdm1uz23bo6jnkGwHHy\ntv1uZjM6OjgT6Ajb20lXVrZbXb2u1w/JskOSfq7RpKalKYNV1b8H/E9gNCfnZk5OI1Do9c46nVvt\n7XjySU4BBsADzd/vNjuL0VH2u9GDae/XSRMSxLVrm+XlsyrVrCS1K5XfUirzgTKgFqgFPgNsxMZ2\n6/X1ev0dIHlqat1m812+zBnBAHiw9PaKhATs7ga+7mkyYXkZy8v43vc41umBtbER2hZUVu6eObOm\n1w9I0qAkvanTpSclqYBq4CLwVSAeGM7La87LawTy3e45h2O7rY2ddAyAo6+5WRQUhPrd+vrg8bDf\njY7ptiAtTVy+vFlWNq1WTysUrUrlNyUpP9hJ9xDwOWA1Pr6ztLS+tPQOkDQxsWGx+B56iJOFAXAE\ntbaKggJsbGByEoODMJuxuYmZGbzxBgc0HUdLS6FtQXX1TlXVik63Ist2SXpDp8uIj1cD54A64OtA\nDDBUUNBUUNAE5K6vL/irqtlJxwA4Gt62383lws9/zhFM3BaEZkFOjrh40VtaOqlSTSoUt1Sqfzlx\nohCoAGqBDwLPAouJie3l5fXl5W1A4tiYu7/f9/DDnEcMgMPKbA71u1mtGBmB242hIdy4wVFL9J/M\nzIS2BRcu7FRV+TvpLJL0ml6fGROjDW4L/hqIBOwnTjSdONEMZK+sLDocO52diItjANChYbWK6GjM\nzwf63SYnsb6O73yHSz/Rb7otKCwUtbWe0lKXUumSpBsq1T8VFJwAKoE64HHgS8BcSkpbZWV9ZWX7\n9etDDAA6eAMDwj+GZ2YwMsJ+N6LfkssVmDIREaKubvvUqSWNZkmSTLL8isGQFRGhC3bS/T0gAJkB\nQAfszh2RlYWtLfa7Ed03u7uh6SPL4sIFd0nJmFI5plA0qtUpubnFvb09585hddUXHw+zmQFAB6Gl\nReTmBvrdBgfR14etLczN4bXXuPoT3R9OZ2A2RUWJS5e2KysXnn56ISEBS0uB2/UREQwACrvOTpGb\ny343ojDZ3vYBsFpFRETgsK2/H1NTx+6wjQFw8IxGkZqK5WW4XLDZ4HDA48HICOrrufoT7QurVURG\nAsDMTOhlSqurx+6wjQFwwCwWERcX6HezWOByYWMDRiO6urj6E+2L27dFdnbgTqXDAaMR6+vH9LCN\nAXCQbDYREYHZWYyMoL8fs7PsdyPaX7duiZwcuN2Blykd88M2BsDB6OkRiYmBfjenE2Yz+92I9l1X\nl8jJwdpa4LDNZoPXi8lJ/OQnx3TeMQAOQFOTKCwM9LsNDcFoZL8b0b4zGkVKCpaXAy9T8h+2DQ+j\noeH4zjsGQLi1tYnCQva7EYXV3cO20VFYrXC5sL6O3l709h7reccACKueHpGRgdVVuFyw2zEwwH43\non1nt4cO28xmzM1hdRXXr3PSMQDC6G6/29gYbLZAv9vAAFpaOBCJ9kVXl0hOxs5O4LDNZMLKCg/b\nGABht7ff7XheOSEKs7uHbVNTgZcpeb2Yn8err3LeMQDCxf9dTxz7KydE4dTeHjpsGxhAfz82NzE9\njR/9iPOOARAuvHJCFH69vSI9/d7DtvFxvPUW5x0DIFx45YQo/MxmkZCAxcXAy5RGR+F2w27HrVuc\ndwyAcLl75cS/A/X3ux3nKydEYWCzBQ7bRkZgsfCwjQFwEO5eOdnb73bMr5wQ7SuLRURFwefD7CyG\nh2EyYXERKyv47nc56RgA4R2I//XKSV8feno4EIn2hf+/W/3HvP6XKa2vY3ERL7/MSccACKO9V078\n/W68ckK0r27fDh22DQ7CaMTWFmZn8cMfct4xAMKlu1skJfHKCVFYdXWJ7Gz2uzEADhSvnBCFn8kU\n6nez2eB0wuOB04nGRs47BkC4sN+NKPwsFhETg/n5wMuUJiawvo7OTpjNnHcMgHDp7X2bfjdeOSHa\nV/7Dtrk5DA+jv5/9bgyAg8ArJ0Rh1tkpUlKws4OZmcBh2+oqlpbw0kucdAyAMOKVE6Iwa24WBQXw\neAIvU/Ifti0s8LCNARBG91w5Yb8bURi0t4uCAmxsYGICg4Psd2MAHIS7V0729rvxygnRvvL3u62s\nBA7bBgfh9WJsDL/4BecdAyC8q/89/W68ckK0r/r7A4dtY2OwWjE2ho0N2O24fZvzjgEQLnf73Xjl\nhChsbDYRFYW5ucDLlKanedjGAAi7u/1ue6+csN+NaP+YzSImBj5f6GVK7HdjABwAi0XExt575aS7\nG0YjByLRvmhpEbm58HpDh20bGzxsYwCEHa+cEIXZ7dsiN5f9bgyAA3X3yom/381o5JUTon3X3R3q\nd7PbYbfD68XEBH76U847BkC48MoJUfiZTCI5+d7DNocDTU2cdwyAcOGVE6Lwu9vv5n+Zkv+w7dvf\n5qRjAISR/8qJv9/NZuOVE6Jw2PsyJbMZ8/M8bGMAhN3eKyc2G0ZHeeWEaH91dIi0NPa7MQAOGq+c\nEIXZjRsiP//ew7b5efzgB5x3DIAw/uAfHc0rJ0Th/tk/Px/r65icxMAALBZsbmJqCj/+MecdAyBc\n/FdO/Me8vHJCFB59fSIt7d5+t9FR/PKXnHcMgHDhlROiA9lzx8ff2+9mtaK1lfOOARAue6+c3O13\n45UTon1ls4nISMzNBV6mND2NtTW88AInHQMgjN72yonTicZGDkSi/Zp0sbGhlymZTFha4mEbA+Ag\n+K+c7O1345UTov1z86bIywv1u/X1we3mYRsD4IDwyglR2Ny5I/Ly4HZjchKDgzCZsLmJ2Vm8/jrn\nHQPgIAwM8MoJUTh0d4usLKytweUKHLZtbsLlws9+xnl3kCKO8z/+kUfERz6C2Vmu/kT7yGQSSUlY\nWgr8p7/NBrcbg4Nc/bkDOCD/+I9jwChwIyensa6uz+uNcTq32tvx5JMckUT3k9UqYmKwsIDRUVgs\nmJzkYRsD4IDs7ADAM89ogD8AaoA64AlgPTa2W6+v1+tvA8lTU+tWq+/KFQ5QonfLbhdCBL7w09/P\nwzYGwIH60pd8zz0nnM4BWR5UKN7U6dKSktRANVAHfBWIB5x5eTfy8hqBfLd7zuHYbmvD5z/P8Ur0\nzrS3i/R0bG9jdhYOB0wmrK3xsO3QET7fcXwe6eni8mWUlUGlgkIRqVIlKhQFQDlQB5wDCoEVoBOo\nB1qBUZdrw2r1PfQQxy7ti+vXRUyMmJz0mc3wePCtbx3tkeb/uqfHg6mpQL/b5ib73bgDODQWFwMD\nUQhRXb1z+vSKVrsiyzZJekOny4iP1wBngTrgL4AYYLCwsLmwsAnIXVtbcDi2OzrwhS9wKBO9jY4O\nkZfHfjcGwFGwdwOUmyvq6rxlZZNK5aQktahU/1JUVAhUAHXAo8CzwGJSUtvJk/UnT7YDCWNjnv5+\n38MPc1gTBeztd7PZMDQEjwdjY+x3YwAcetPToW3BhQs7VVXLWu2yJFkk6TWDITM6WgucB2qBvwEi\nAduJE00nTjQD2Ssri0NDO11dePppjnI6vu72u/lf6Dg+jo0NWCxoa+O8OKSO6RnAb66oSNTWoqTE\nf1oQpVIlFRScACqBOqAayAHmgFagHugAXCMjHqMRjz7KT5XegQfgDMBmEwAWFjAygv5+zMyw3407\ngKNvfDwwgiMjRV3ddmXlkkazJEkmWX7FYMiKiNAHtwX/APgAi0LRqFDcADKXlpYGB3e7u/Hss5wD\n9CDr6xPx8djdxdwcnE6Yzex3YwA8cHZ2QqNZlsWFC+6SkjGlckyhaFCrU3Jzi4FTwEXgs8AfANNp\nabfPnKk/c6bL54tzOr19ffjIRzgf6EHjf6Gj1xt6mZLbjYUFvPIKRzsD4AHldAYGd1SUuHx5u6Ji\nQaNZkKQ+SXrJYMgB9MAFoBZ4FNgRwqRUNiiVLUD6/PyKf1vw/POcHnTktbaK/HxsbARepsR+NwbA\n8bK9HRroWq04f37DYBiW5WFJ+pVanZKVJQGngTrgaeCrwERmZktmZsO5cz3b27HDw5vd3fjEJzhV\n6Ejq6RGZmVhdxcQE7HbY7YGXKbHhhwFwHNntgXEfEyOuXt06eXJerZ6XpG5Zvq7V5gIlQA1wAXgc\n2IyK6lOrG9TqFiB1dnZtYGC3pobTho4Mk0kkJmJpKfAypeFhuN1wONDczGHMADjeNjdDXyctKdk9\ne3Zdr3fIslOS3tJoUtPTZeAMUAf8LvBnwFh2dkt2dgNQtLk543RudXTgiSc4i+jwuqffbWICGxvs\nd2MA0H+29yu2cXHi2rXNkydn1epZhaJDqfy2SpUHlAW3BZ8C3DExPTpdvU53G0iZnl6z2XyXLnFS\n0SHicIjtbQCBfjezGQsLWFnBiy9yoDIA6NfzeELbgoqK3TNn1vT6QVkeUih+otOlJycrg510fwT8\nJTCcm3sjN7cRKPB45hyOrdZWdtLRAWtrExkZ2N7GzAycztDLlL7/fY5MBgC9821BSoq4cmWzvHxa\npZpWKNqUym/Ksr+Trha4AjwJrMbFdZWU1JeU3AGSJiY2rFbftWucbxRu7HdjANB9trIS2hacPr1z\n+vSqTmeTZbsk/UinS09IUAc76f4ciAOGCgqaCwqagLz19XmHY7u9nZ10FA6dnYF+t4kJDA7CYoHX\ni6kpvPkmhx8DgO7rtiArS1y65C0tnVKppiTptlL5r8XF/k66WuD9wNPAUmJiR3l5fXl5K5A4Pu7u\n7/e9732cirQv+vpEaipWVjA+Drs90O82Oopf/YpDjgFA99vcXGhbcO5coKpakqyS9EO9PiM2Vguc\nA2qB/wVEAQNFRU1FRc1AzurqgsOx09HBTjq6b/z9bgsLGBsL9buZzejo4BhjAFC4tgUFBaK21lNa\nOqFSTSgUN1WqbxQWFgGVQC3wEeB5YD45ua2ior6ioh1IGB11m0x45BHOUvrt2WwiMhKzsxgdZb8b\nA4AOzsREYNZFRIja2u1Tp/yddGZZftVgyIyM1AU76f4OEIC1uLixuPgGkLW8vOSvqn7mGc5b+k31\n9oqEBOzuBr7uaTJheRnLy/je9ziKGAB0cHZ3QzNQoRA1Ne6SknGlclyhaFKrk/Py/J10dcCnga8A\nM6mpd6qqGqqqOoF4p9NjNOJDH+Icpv9Oc7MoKAj1u/X1weNhvxsDgA6ZkZFQJ93Fi9uVlYsazaIk\nGSXppZKSbMAQ3BZ8APABZllulOUbQMbCwvLg4G5PD557jlOa/pPWVlFQcG+/28wM3niDQ4UBQIfS\n3k46tVqcP+82GEaVylGFol6jScnOVgBVQB3wOeCPgMmMjNtnz9afPdu9uxvndHp7evCxj3F609v3\nu7lc+PnPOTwYAHQUDA4G5mp0tLhyZauiwt9J1yPLL+p0uYABqAFqgA8DWxERRpWqQaVqAdLm5lYH\nBna7uvDlL3O2H0dmc6jfzWrFyAjcbgwN4cYNjgcGAB01W1uheWswiHPnNvR6pyw7JekXGk1qRoYc\nrKr+IvA1wJWV1ZKV1XDhQs/WVqzTudnVhU9/mjP/uLBaRXQ05ucD/W6Tk1hfx3e+wwHAAKCjz2IJ\nzOTYWHHt2lZ5+ZxaPSdJnUrld9TqPKA02En3CcATHd2r1dZrtbeAlJmZNbvdV1fHheCBNTAg/N83\nnpnByAj73RgA9ODyekO3zMrLd6ur1/X6IVl2KBQ/02rTUlOVwarq3we+Dozk5NzMyWkECr3eWYdj\nq70dTz3FdeHBceeOyMrC1lag381oxNoa+90YAPSg23vLLDFRXL26WV4+o1bPKBTtSuW3lMq8YCfd\nReAJYC02tttgqDcY7gDJU1PrVqvvyhWuEUdbS4vIzQ30uw0Ooq8PW1uYm8Nrr/HJMgDo2FhfD20L\nTp3aOXNmVadbleVBSXpTq01PSlIFq6q/BsQDzry85ry8JiB/Y2PO6dxua2NV9dHT2SlycwP9bgMD\nsFrZ78YAIG4LgtLTxeXL3rIyfyfdHaXy3xWKAuAkUAu8F/gdYCUhobO0tL60tBVIcrk2LBbfe97D\n5eMIMBpFaiqWl+FywWaDwwGPByMjqK/n42MAEAGLi6FtQXX1zunTKzrdiiTZJOl1vT4jLk4T7KT7\nCyAGGCwsbCosbAZy1tYWHY7tjg5WVR9SFouIiwv0u1kscLmwsQGjEV1dfF4MAKJfvy3IzRUXL3pL\nSyeVyklJalGp/rmoqChYVf0Y8BywkJTUfvJk/cmT7UDC2JjHbPa9//1cWQ4Lm01ERGB2FiMj6O/H\n7Cz73RgARL+Z6enQtqCmZufUqWWtdlmS+mX5B3p9ZnS0Lrgt+BsgErCdONF04kQzkLWyEuikY1X1\nQenpEYmJgX43pxNmM/vdiAFA73pbUFTkr6p2KZUuhaJZrf6/+fkngp10Hwd+F5hLSWk9dar+1KkO\nIH542GMy4dFHue6ET1OTKCwM9LsNDcFoZL8bMQDofhgfDywikZGirm67snJJq12SJJMkvWwwZEdE\n3K2q/gfAB1gkqVGSbgCZS0tLg4O73d149lkuQ/uorU0UFmJjA5OTGByE2cx+N2IA0P22sxNaUJRK\nceHC3U66Bo0mJSenONhJ91ngD4DptLTbZ87UnznT5fPFOZ3e3l589KNcku6znh6RkYHVVbhcsNsx\nMMB+N2IA0D5zOEKddJcubVdULGg0C5LUJ8vf0+tzAANwAagBHgV2hDAplQ1K5U0gfX5+xb8teP55\nrlDv1t1+t7Ex2GyBfrfBQdy8yc+WGAC0//Z20ul04ty5DYNhWJaHJemXGk1KZqYU7KR7Gvgq4MrM\nvJWZ2XDuXPf2dqzTudndjU9+kqvVb4P9bsQAoEPEZgusPjEx4urVrZMn/VXV3bJ8XavNBUqCnXSP\nA5tRUX0aTb1GcwtInZ1ds9t3a2u5eP2Gn7OIiACAmRkMDwf63VZX2e9GDAA6BDY3Q18nLSnZPXt2\nXa93KJVOheItrTY1LU0OdtJ9GfhzYCw7+2Z2diNQtLk543RudXTgiSe4lr2927dFdnbgmNfhgNGI\n9XX2uxEDgA6fvV8njYsTDz20WV4+q1bPKhQdSuW3Vao8oAyoBS4AnwbcMTHdOl29TncbSJmeXrPZ\nfJcucV0LuXVL5OTA7cbUFIaG2O9GDAA6Ijye0LagomK3unpNpxuU5SFJ+olWm56crASqgYvAHwMJ\nwHBu7o3c3EagwOOZczi2WluPeyddV5fIycHaGiYnQ/1uk5P4yU+4+hMDgI7gtiA1VVy+vFlePq1P\nx26sAAAGyElEQVRSTSsUbSrVNyWpIFhVfRV4CliNi+sqKakvKbkDJE1MbFitvmvXjt2SZzSKlJR7\n+92Gh9HQwNWfGAB0NC0vh7YFp0/7q6ptkmSXpB/pdBkJCWrgLFAH/DkQBwwVFDQXFDQCuevrCw7H\ndnv7seiku9vvNjoKqxUuF9bX0deHnh6u/sQAoAdrW5CVJS5d8paV+TvpbimV/1JcXBjspPsA8DSw\nlJjYXl5eX17eBiSOj7v7+33ve9+DuRra7ff2u62u4vp1Lv3EAKAH0dxcaFtw/vxOVdWKVrsiSVZJ\n+qFenxkb66+qrgP+NxAFDBQVNRUVNQPZq6uLQ0M7nZ0PSCddd7dISsLOTqDfzWTCygr73YgBQMdv\nW1BQIOrqPCUlLpXKpVDcVKm+UVhYBFQCdcBHgeeB+eTktsrK+srKdiBhdNRtMuGRR47qWnm3321q\nCg4H+vrg9WJ+Hq++ytWfGAB0zExMBBa+iAhRW7t96tSSRrMky2ZJetVgyIqM1AY76f4OEIC1uLix\nuPgGkLm8vDw4uNPdjWeeOTJLJ/vdiAFA9DZ2d0OLoEIhamrcJSVjSuWYQtGkVifn5RUHq6o/DXwF\nmElNvXP6dP3p051AvNPpMRrxoQ8d6mW0t/dt+t3Gx/HWW1z9iQFAFDQyElgTo6LExYvblZWLGs2i\nJBkl6fslJdmAPthJ9wFgF+iX5QZZvglkLCws+zvpvvjFw7Wqms0iIQGLixgfh9WK0VG43bDbcesW\nV39iABC9ne3t0PqoVovz5zdKSkZkeUSS6tXqlOxsRbCq+neAPwYmMzJunT3bcPZs985O3PCwt6cH\nH/vYwa+wNlug321kBBYLpqbY70YMAKJ3YnAwVFV95cpWRYW/k65Hll/U6fyddP5twUeArchIo0pV\nr1K1AGlzc6sDA7tdXfjyl8O95losIioKPh9mZ9nvRgwAondtb1W1wSDOndvQ653+TjqNJjUjQw5W\nVT8P/Cngysq6mZXVeOFCz9ZWrNO52dmJz3wmHOuvv+Hnnn63xUW8/DJXf2IAEN2HH7EDi2lsrLh2\nbevkyTm1ek6h6FQqX1Crc4FSoBY4D3wS8ERH92i1DVrtLSBlZmbNZvNdvLhfa/Hb9rvNzuKHP+Tq\nTwwAovvK6w3dMisv362uXtPr12TZoVD8TKtNS01VAmeAi8DvA18HRnJybubkNACFXu+sw7HV3o6n\nnrpvS/PdfreJCQwMwGZjvxsxAIj2395bZklJ4urVzbKyGbV6RqFoVyq/pVTmB6uqLwJPAGuxsd0G\nQ73BcAdInpxct9l8V668q2X6br/b+DhsNjid7HcjBgBR2K2thbYFp075O+lWZXlAkt7U6dITE1XB\nTrqvAfGAMz+/OT+/Ecjb2Jh3Orfb2t5xVbXFImJjMT+PsTFYLJiYwPo6urthNHL1JwYA0UFvCzIy\n/J10UyrVlCTdUSr/TaEoAE4CtcB7gd8BVhISOkpL60tL24BEl8ttsfje857//wru73ebm8PwMPr7\nMTfHfjdiABAdJgsLoW3B2bM7VVUrOt2KJNkk6XW9PjMuTh3spPtLIAYYLCxsKixsAnLW1hYdju2O\njl9bVX23381oxOoqlpbw0ktc/en+E3t/oiGidyk3V1y8iNJSqFRQKCJVqqSioiKgAqgDqoF8YAFo\nB+qBdmBsbMxjNvvm5xETIyYnfWYzPB489RSGhmA0wuPBwgL73YgBQHS0ppYQNTU4dQpaLSRJyHKs\nXp8VHa0Nbgv0QCRgAxqB5uvXW/YGQHU1+vuxuYnpafzoR5yhxAAgOrJOnBA1NSgthVIJhSJKrU7O\nzz8RrKo+A+Rcv160NwASEuD1YmwMv/gFpyftI54BEO27sbHAOh4ZKerqtisrF7VafyfdyyUl2ULo\n7vnzbjdsNty+zdWfGABED4qdndCarlSKCxfcBsOoUjkqy1Eu187d3/qP/+DST+HA/wIiOmDR0eLS\nJZSUYGUFGxts+CEGABER7bMIfgRERAwAIiJiABAREQOAiIgYAERExAAgIiIGABERMQCIiIgBQERE\nDAAiImIAEBERA4CIiBgARETEACAiIgYAERExAIiIiAFAREQMACIiYgAQEREDgIiIGABERMQAICIi\nBgARETEAiIgYAERExAAgIiIGABERMQCIiIgBQEREDAAiImIAEBERA4CIiBgARETEACAiIgYAEREx\nAIiIiAFAREQMACIiYgAQEREDgIiIGABERMQAICIiBgARETEAiIiIAUBERAwAIiJiABARMQCIiIgB\nQEREDAAiImIAEBERA4CIiBgARETEACAiIgYAEREdFf8PkjHA9hViIbwAAAAASUVORK5CYII=\n", - "text/plain": [ - "" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "L.image(zoom=1.0)" ] }, { "cell_type": "code", - "execution_count": 15, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "text/plain": [ - "160.0" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "L.eval(\"pe\")" ] }, { "cell_type": "code", - "execution_count": 16, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "L.atoms[3].position = (1.0, 0.0, -1.0)" @@ -274,10 +166,8 @@ }, { "cell_type": "code", - "execution_count": 17, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "L.run(0);" @@ -285,10 +175,8 @@ }, { "cell_type": "code", - "execution_count": 18, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "phi = [d * math.pi / 180 for d in range(360)]" @@ -296,10 +184,8 @@ }, { "cell_type": "code", - "execution_count": 19, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "pos = [(1.0, math.cos(p), math.sin(p)) for p in phi]" @@ -307,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": { "collapsed": true }, @@ -321,10 +207,8 @@ }, { "cell_type": "code", - "execution_count": 21, - "metadata": { - "collapsed": false - }, + "execution_count": null, + "metadata": {}, "outputs": [], "source": [ "pe = []\n", @@ -336,798 +220,9 @@ }, { "cell_type": "code", - "execution_count": 22, - "metadata": { - "collapsed": false - }, - "outputs": [ - { - "data": { - "application/javascript": [ - "/* Put everything inside the global mpl namespace */\n", - "window.mpl = {};\n", - "\n", - "mpl.get_websocket_type = function() {\n", - " if (typeof(WebSocket) !== 'undefined') {\n", - " return WebSocket;\n", - " } else if (typeof(MozWebSocket) !== 'undefined') {\n", - " return MozWebSocket;\n", - " } else {\n", - " alert('Your browser does not have WebSocket support.' +\n", - " 'Please try Chrome, Safari or Firefox ≥ 6. ' +\n", - " 'Firefox 4 and 5 are also supported but you ' +\n", - " 'have to enable WebSockets in about:config.');\n", - " };\n", - "}\n", - "\n", - "mpl.figure = function(figure_id, websocket, ondownload, parent_element) {\n", - " this.id = figure_id;\n", - "\n", - " this.ws = websocket;\n", - "\n", - " this.supports_binary = (this.ws.binaryType != undefined);\n", - "\n", - " if (!this.supports_binary) {\n", - " var warnings = document.getElementById(\"mpl-warnings\");\n", - " if (warnings) {\n", - " warnings.style.display = 'block';\n", - " warnings.textContent = (\n", - " \"This browser does not support binary websocket messages. \" +\n", - " \"Performance may be slow.\");\n", - " }\n", - " }\n", - "\n", - " this.imageObj = new Image();\n", - "\n", - " this.context = undefined;\n", - " this.message = undefined;\n", - " this.canvas = undefined;\n", - " this.rubberband_canvas = undefined;\n", - " this.rubberband_context = undefined;\n", - " this.format_dropdown = undefined;\n", - "\n", - " this.image_mode = 'full';\n", - "\n", - " this.root = $('
');\n", - " this._root_extra_style(this.root)\n", - " this.root.attr('style', 'display: inline-block');\n", - "\n", - " $(parent_element).append(this.root);\n", - "\n", - " this._init_header(this);\n", - " this._init_canvas(this);\n", - " this._init_toolbar(this);\n", - "\n", - " var fig = this;\n", - "\n", - " this.waiting = false;\n", - "\n", - " this.ws.onopen = function () {\n", - " fig.send_message(\"supports_binary\", {value: fig.supports_binary});\n", - " fig.send_message(\"send_image_mode\", {});\n", - " fig.send_message(\"refresh\", {});\n", - " }\n", - "\n", - " this.imageObj.onload = function() {\n", - " if (fig.image_mode == 'full') {\n", - " // Full images could contain transparency (where diff images\n", - " // almost always do), so we need to clear the canvas so that\n", - " // there is no ghosting.\n", - " fig.context.clearRect(0, 0, fig.canvas.width, fig.canvas.height);\n", - " }\n", - " fig.context.drawImage(fig.imageObj, 0, 0);\n", - " };\n", - "\n", - " this.imageObj.onunload = function() {\n", - " this.ws.close();\n", - " }\n", - "\n", - " this.ws.onmessage = this._make_on_message_function(this);\n", - "\n", - " this.ondownload = ondownload;\n", - "}\n", - "\n", - "mpl.figure.prototype._init_header = function() {\n", - " var titlebar = $(\n", - " '
');\n", - " var titletext = $(\n", - " '
');\n", - " titlebar.append(titletext)\n", - " this.root.append(titlebar);\n", - " this.header = titletext[0];\n", - "}\n", - "\n", - "\n", - "\n", - "mpl.figure.prototype._canvas_extra_style = function(canvas_div) {\n", - "\n", - "}\n", - "\n", - "\n", - "mpl.figure.prototype._root_extra_style = function(canvas_div) {\n", - "\n", - "}\n", - "\n", - "mpl.figure.prototype._init_canvas = function() {\n", - " var fig = this;\n", - "\n", - " var canvas_div = $('
');\n", - "\n", - " canvas_div.attr('style', 'position: relative; clear: both; outline: 0');\n", - "\n", - " function canvas_keyboard_event(event) {\n", - " return fig.key_event(event, event['data']);\n", - " }\n", - "\n", - " canvas_div.keydown('key_press', canvas_keyboard_event);\n", - " canvas_div.keyup('key_release', canvas_keyboard_event);\n", - " this.canvas_div = canvas_div\n", - " this._canvas_extra_style(canvas_div)\n", - " this.root.append(canvas_div);\n", - "\n", - " var canvas = $('');\n", - " canvas.addClass('mpl-canvas');\n", - " canvas.attr('style', \"left: 0; top: 0; z-index: 0; outline: 0\")\n", - "\n", - " this.canvas = canvas[0];\n", - " this.context = canvas[0].getContext(\"2d\");\n", - "\n", - " var rubberband = $('');\n", - " rubberband.attr('style', \"position: absolute; left: 0; top: 0; z-index: 1;\")\n", - "\n", - " var pass_mouse_events = true;\n", - "\n", - " canvas_div.resizable({\n", - " start: function(event, ui) {\n", - " pass_mouse_events = false;\n", - " },\n", - " resize: function(event, ui) {\n", - " fig.request_resize(ui.size.width, ui.size.height);\n", - " },\n", - " stop: function(event, ui) {\n", - " pass_mouse_events = true;\n", - " fig.request_resize(ui.size.width, ui.size.height);\n", - " },\n", - " });\n", - "\n", - " function mouse_event_fn(event) {\n", - " if (pass_mouse_events)\n", - " return fig.mouse_event(event, event['data']);\n", - " }\n", - "\n", - " rubberband.mousedown('button_press', mouse_event_fn);\n", - " rubberband.mouseup('button_release', mouse_event_fn);\n", - " // Throttle sequential mouse events to 1 every 20ms.\n", - " rubberband.mousemove('motion_notify', mouse_event_fn);\n", - "\n", - " rubberband.mouseenter('figure_enter', mouse_event_fn);\n", - " rubberband.mouseleave('figure_leave', mouse_event_fn);\n", - "\n", - " canvas_div.on(\"wheel\", function (event) {\n", - " event = event.originalEvent;\n", - " event['data'] = 'scroll'\n", - " if (event.deltaY < 0) {\n", - " event.step = 1;\n", - " } else {\n", - " event.step = -1;\n", - " }\n", - " mouse_event_fn(event);\n", - " });\n", - "\n", - " canvas_div.append(canvas);\n", - " canvas_div.append(rubberband);\n", - "\n", - " this.rubberband = rubberband;\n", - " this.rubberband_canvas = rubberband[0];\n", - " this.rubberband_context = rubberband[0].getContext(\"2d\");\n", - " this.rubberband_context.strokeStyle = \"#000000\";\n", - "\n", - " this._resize_canvas = function(width, height) {\n", - " // Keep the size of the canvas, canvas container, and rubber band\n", - " // canvas in synch.\n", - " canvas_div.css('width', width)\n", - " canvas_div.css('height', height)\n", - "\n", - " canvas.attr('width', width);\n", - " canvas.attr('height', height);\n", - "\n", - " rubberband.attr('width', width);\n", - " rubberband.attr('height', height);\n", - " }\n", - "\n", - " // Set the figure to an initial 600x600px, this will subsequently be updated\n", - " // upon first draw.\n", - " this._resize_canvas(600, 600);\n", - "\n", - " // Disable right mouse context menu.\n", - " $(this.rubberband_canvas).bind(\"contextmenu\",function(e){\n", - " return false;\n", - " });\n", - "\n", - " function set_focus () {\n", - " canvas.focus();\n", - " canvas_div.focus();\n", - " }\n", - "\n", - " window.setTimeout(set_focus, 100);\n", - "}\n", - "\n", - "mpl.figure.prototype._init_toolbar = function() {\n", - " var fig = this;\n", - "\n", - " var nav_element = $('
')\n", - " nav_element.attr('style', 'width: 100%');\n", - " this.root.append(nav_element);\n", - "\n", - " // Define a callback function for later on.\n", - " function toolbar_event(event) {\n", - " return fig.toolbar_button_onclick(event['data']);\n", - " }\n", - " function toolbar_mouse_event(event) {\n", - " return fig.toolbar_button_onmouseover(event['data']);\n", - " }\n", - "\n", - " for(var toolbar_ind in mpl.toolbar_items) {\n", - " var name = mpl.toolbar_items[toolbar_ind][0];\n", - " var tooltip = mpl.toolbar_items[toolbar_ind][1];\n", - " var image = mpl.toolbar_items[toolbar_ind][2];\n", - " var method_name = mpl.toolbar_items[toolbar_ind][3];\n", - "\n", - " if (!name) {\n", - " // put a spacer in here.\n", - " continue;\n", - " }\n", - " var button = $('