diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 911b6f0f15..40d3de0e7b 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -10,6 +10,9 @@ get_filename_component(LAMMPS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../src ABSO get_filename_component(LAMMPS_LIB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../lib ABSOLUTE) get_filename_component(LAMMPS_LIB_BINARY_DIR ${CMAKE_BINARY_DIR}/lib ABSOLUTE) get_filename_component(LAMMPS_DOC_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../doc ABSOLUTE) +get_filename_component(LAMMPS_TOOLS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tools ABSOLUTE) +get_filename_component(LAMMPS_PYTHON_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../python ABSOLUTE) +get_filename_component(LAMMPS_POTENTIALS_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../potentials ABSOLUTE) find_package(Git) @@ -23,57 +26,13 @@ file(GLOB LIB_SOURCES ${LAMMPS_SOURCE_DIR}/[^.]*.cpp) file(GLOB LMP_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp) list(REMOVE_ITEM LIB_SOURCES ${LMP_SOURCES}) -# Utility functions -function(list_to_bulletpoints result) - list(REMOVE_AT ARGV 0) - set(temp "") - foreach(item ${ARGV}) - set(temp "${temp}* ${item}\n") - endforeach() - set(${result} "${temp}" PARENT_SCOPE) -endfunction(list_to_bulletpoints) +# Cmake modules/macros are in a subdirectory to keep this file cleaner +set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules) -function(validate_option name values) - string(TOLOWER ${${name}} needle_lower) - string(TOUPPER ${${name}} needle_upper) - list(FIND ${values} ${needle_lower} IDX_LOWER) - list(FIND ${values} ${needle_upper} IDX_UPPER) - if(${IDX_LOWER} LESS 0 AND ${IDX_UPPER} LESS 0) - list_to_bulletpoints(POSSIBLE_VALUE_LIST ${${values}}) - message(FATAL_ERROR "\n########################################################################\n" - "Invalid value '${${name}}' for option ${name}\n" - "\n" - "Possible values are:\n" - "${POSSIBLE_VALUE_LIST}" - "########################################################################") - endif() -endfunction(validate_option) - -function(get_lammps_version version_header variable) - file(READ ${version_header} line) - set(MONTHS x Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) - string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\1" day "${line}") - string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\2" month "${line}") - string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\3" year "${line}") - string(STRIP ${day} day) - string(STRIP ${month} month) - string(STRIP ${year} year) - list(FIND MONTHS "${month}" month) - string(LENGTH ${day} day_length) - string(LENGTH ${month} month_length) - if(day_length EQUAL 1) - set(day "0${day}") - endif() - if(month_length EQUAL 1) - set(month "0${month}") - endif() - set(${variable} "${year}${month}${day}" PARENT_SCOPE) -endfunction() +include(LAMMPSUtils) get_lammps_version(${LAMMPS_SOURCE_DIR}/version.h LAMMPS_VERSION) -# Cmake modules/macros are in a subdirectory to keep this file cleaner -set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules) include(PreventInSourceBuilds) @@ -85,22 +44,7 @@ string(TOUPPER "${CMAKE_BUILD_TYPE}" BTYPE) # check for files auto-generated by make-based buildsystem # this is fast, so check for it all the time -message(STATUS "Running check for auto-generated files from make-based build system") -file(GLOB SRC_AUTOGEN_FILES ${LAMMPS_SOURCE_DIR}/style_*.h) -file(GLOB SRC_AUTOGEN_PACKAGES ${LAMMPS_SOURCE_DIR}/packages_*.h) -list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${LAMMPS_SOURCE_DIR}/lmpinstalledpkgs.h ${LAMMPS_SOURCE_DIR}/lmpgitversion.h) -foreach(_SRC ${SRC_AUTOGEN_FILES}) - get_filename_component(FILENAME "${_SRC}" NAME) - if(EXISTS ${LAMMPS_SOURCE_DIR}/${FILENAME}) - message(FATAL_ERROR "\n########################################################################\n" - "Found header file(s) generated by the make-based build system\n" - "\n" - "Please run\n" - "make -C ${LAMMPS_SOURCE_DIR} purge\n" - "to remove\n" - "########################################################################") - endif() -endforeach() +check_for_autogen_files(${LAMMPS_SOURCE_DIR}) ###################################################################### # compiler tests @@ -190,14 +134,6 @@ endforeach() ###################################################### # packages with special compiler needs or external libs ###################################################### -if(PKG_USER-QUIP OR PKG_USER-QMMM OR PKG_LATTE OR PKG_USER-SCAFACOS) - enable_language(Fortran) -endif() - -if(PKG_USER-H5MD OR PKG_USER-QMMM OR PKG_USER-SCAFACOS) - enable_language(C) -endif() - include_directories(${LAMMPS_SOURCE_DIR}) if(PKG_USER-ADIOS) @@ -252,59 +188,7 @@ endif() option(CMAKE_VERBOSE_MAKEFILE "Verbose makefile" OFF) -option(ENABLE_TESTING "Enable testing" OFF) -if(ENABLE_TESTING AND BUILD_EXE) - enable_testing() - option(LAMMPS_TESTING_SOURCE_DIR "Location of lammps-testing source directory" "") - option(LAMMPS_TESTING_GIT_TAG "Git tag of lammps-testing" "master") - mark_as_advanced(LAMMPS_TESTING_SOURCE_DIR LAMMPS_TESTING_GIT_TAG) - if (CMAKE_VERSION VERSION_GREATER "3.10.3" AND NOT LAMMPS_TESTING_SOURCE_DIR) - include(FetchContent) - - FetchContent_Declare(lammps-testing - GIT_REPOSITORY https://github.com/lammps/lammps-testing.git - GIT_TAG ${LAMMPS_TESTING_GIT_TAG} - ) - - FetchContent_GetProperties(lammps-testing) - if(NOT lammps-testing_POPULATED) - message(STATUS "Downloading tests...") - FetchContent_Populate(lammps-testing) - endif() - - set(LAMMPS_TESTING_SOURCE_DIR ${lammps-testing_SOURCE_DIR}) - elseif(NOT LAMMPS_TESTING_SOURCE_DIR) - message(WARNING "Full test-suite requires CMake >= 3.11 or copy of\n" - "https://github.com/lammps/lammps-testing in LAMMPS_TESTING_SOURCE_DIR") - endif() - - if(EXISTS ${LAMMPS_TESTING_SOURCE_DIR}) - message(STATUS "Running test discovery...") - - file(GLOB_RECURSE TEST_SCRIPTS ${LAMMPS_TESTING_SOURCE_DIR}/tests/core/*/in.*) - foreach(script_path ${TEST_SCRIPTS}) - get_filename_component(TEST_NAME ${script_path} EXT) - get_filename_component(SCRIPT_NAME ${script_path} NAME) - get_filename_component(PARENT_DIR ${script_path} DIRECTORY) - string(SUBSTRING ${TEST_NAME} 1 -1 TEST_NAME) - string(REPLACE "-" "_" TEST_NAME ${TEST_NAME}) - string(REPLACE "+" "_" TEST_NAME ${TEST_NAME}) - set(TEST_NAME "test_core_${TEST_NAME}_serial") - add_test(${TEST_NAME} ${CMAKE_BINARY_DIR}/${LAMMPS_BINARY} -in ${SCRIPT_NAME}) - set_tests_properties(${TEST_NAME} PROPERTIES WORKING_DIRECTORY ${PARENT_DIR}) - endforeach() - list(LENGTH TEST_SCRIPTS NUM_TESTS) - - message(STATUS "Found ${NUM_TESTS} tests.") - endif() -endif() - -macro(pkg_depends PKG1 PKG2) - if(PKG_${PKG1} AND NOT (PKG_${PKG2} OR BUILD_${PKG2})) - message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}") - endif() -endmacro() # "hard" dependencies between packages resulting # in an error instead of skipping over files @@ -338,44 +222,6 @@ if(BUILD_OMP) set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") endif() -if(PKG_KSPACE) - option(FFT_SINGLE "Use single precision FFT instead of double" OFF) - set(FFTW "FFTW3") - if(FFT_SINGLE) - set(FFTW "FFTW3F") - add_definitions(-DFFT_SINGLE) - endif() - find_package(${FFTW} QUIET) - if(${FFTW}_FOUND) - set(FFT "${FFTW}" CACHE STRING "FFT library for KSPACE package") - else() - set(FFT "KISS" CACHE STRING "FFT library for KSPACE package") - endif() - set(FFT_VALUES KISS ${FFTW} MKL) - set_property(CACHE FFT PROPERTY STRINGS ${FFT_VALUES}) - validate_option(FFT FFT_VALUES) - string(TOUPPER ${FFT} FFT) - if(NOT FFT STREQUAL "KISS") - find_package(${FFT} REQUIRED) - if(NOT FFT STREQUAL "FFTW3F") - add_definitions(-DFFT_FFTW) - else() - add_definitions(-DFFT_${FFT}) - endif() - include_directories(${${FFT}_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${${FFT}_LIBRARIES}) - else() - add_definitions(-DFFT_KISS) - endif() - set(FFT_PACK "array" CACHE STRING "Optimization for FFT") - set(FFT_PACK_VALUES array pointer memcpy) - set_property(CACHE FFT_PACK PROPERTY STRINGS ${FFT_PACK_VALUES}) - validate_option(FFT_PACK FFT_PACK_VALUES) - if(NOT FFT_PACK STREQUAL "array") - string(TOUPPER ${FFT_PACK} FFT_PACK) - add_definitions(-DFFT_PACK_${FFT_PACK}) - endif() -endif() if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-PLUMED OR PKG_USER-QUIP OR PKG_LATTE) find_package(LAPACK) @@ -390,12 +236,6 @@ if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-PLUMED OR PKG_USER-QUI endif() endif() -if(PKG_PYTHON) - find_package(PythonLibs REQUIRED) - add_definitions(-DLMP_PYTHON) - include_directories(${PYTHON_INCLUDE_DIR}) - list(APPEND LAMMPS_LINK_LIBS ${PYTHON_LIBRARY}) -endif() find_package(JPEG QUIET) option(WITH_JPEG "Enable JPEG support" ${JPEG_FOUND}) @@ -451,398 +291,21 @@ else() set(CUDA_REQUEST_PIC) endif() -if(PKG_VORONOI) - find_package(VORO) - if(VORO_FOUND) - set(DOWNLOAD_VORO_DEFAULT OFF) - else() - set(DOWNLOAD_VORO_DEFAULT ON) - endif() - option(DOWNLOAD_VORO "Download and compile the Voro++ library instead of using an already installed one" ${DOWNLOAD_VORO_DEFAULT}) - if(DOWNLOAD_VORO) - message(STATUS "Voro++ download requested - we will build our own") - include(ExternalProject) - - if(BUILD_SHARED_LIBS) - set(VORO_BUILD_CFLAGS "${CMAKE_SHARED_LIBRARY_CXX_FLAGS} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}") - else() - set(VORO_BUILD_CFLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}") - endif() - string(APPEND VORO_BUILD_CFLAGS ${CMAKE_CXX_FLAGS}) - set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS}) - - ExternalProject_Add(voro_build - URL https://download.lammps.org/thirdparty/voro++-0.4.6.tar.gz - URL_MD5 2338b824c3b7b25590e18e8df5d68af9 - CONFIGURE_COMMAND "" BUILD_COMMAND make ${VORO_BUILD_OPTIONS} BUILD_IN_SOURCE 1 INSTALL_COMMAND "" - ) - ExternalProject_get_property(voro_build SOURCE_DIR) - set(VORO_LIBRARIES ${SOURCE_DIR}/src/libvoro++.a) - set(VORO_INCLUDE_DIRS ${SOURCE_DIR}/src) - list(APPEND LAMMPS_DEPS voro_build) - else() - find_package(VORO) - if(NOT VORO_FOUND) - message(FATAL_ERROR "Voro++ library not found. Help CMake to find it by setting VORO_LIBRARY and VORO_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it") - endif() - endif() - include_directories(${VORO_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES}) -endif() - -if(PKG_LATTE) - find_package(LATTE) - if(LATTE_FOUND) - set(DOWNLOAD_LATTE_DEFAULT OFF) - else() - set(DOWNLOAD_LATTE_DEFAULT ON) - endif() - option(DOWNLOAD_LATTE "Download the LATTE library instead of using an already installed one" ${DOWNLOAD_LATTE_DEFAULT}) - if(DOWNLOAD_LATTE) - if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR - message(FATAL_ERROR "For downlading LATTE you need at least cmake-3.7") - endif() - message(STATUS "LATTE download requested - we will build our own") - include(ExternalProject) - ExternalProject_Add(latte_build - URL https://github.com/lanl/LATTE/archive/v1.2.1.tar.gz - URL_MD5 85ac414fdada2d04619c8f936344df14 - SOURCE_SUBDIR cmake - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= ${CMAKE_REQUEST_PIC} - -DBLAS_LIBRARIES=${BLAS_LIBRARIES} -DLAPACK_LIBRARIES=${LAPACK_LIBRARIES} - -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -DCMAKE_Fortran_FLAGS=${CMAKE_Fortran_FLAGS} - -DCMAKE_Fortran_FLAGS_${BTYPE}=${CMAKE_Fortran_FLAGS_${BTYPE}} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - ) - ExternalProject_get_property(latte_build INSTALL_DIR) - set(LATTE_LIBRARIES ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/liblatte.a) - list(APPEND LAMMPS_DEPS latte_build) - else() - find_package(LATTE) - if(NOT LATTE_FOUND) - message(FATAL_ERROR "LATTE library not found, help CMake to find it by setting LATTE_LIBRARY, or set DOWNLOAD_LATTE=ON to download it") - endif() - endif() - list(APPEND LAMMPS_LINK_LIBS ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES}) -endif() - -if(PKG_USER-SCAFACOS) - find_package(GSL REQUIRED) - find_package(PkgConfig QUIET) - set(DOWNLOAD_SCAFACOS_DEFAULT ON) - if(PKG_CONFIG_FOUND) - pkg_check_modules(SCAFACOS QUIET scafacos) - if(SCAFACOS_FOUND) - set(DOWNLOAD_SCAFACOS_DEFAULT OFF) - endif() - endif() - option(DOWNLOAD_SCAFACOS "Download ScaFaCoS library instead of using an already installed one" ${DOWNLOAD_SCAFACOS_DEFAULT}) - if(DOWNLOAD_SCAFACOS) - message(STATUS "ScaFaCoS download requested - we will build our own") - include(ExternalProject) - ExternalProject_Add(scafacos_build - URL https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz - URL_MD5 bd46d74e3296bd8a444d731bb10c1738 - CONFIGURE_COMMAND /configure --prefix= --disable-doc - --enable-fcs-solvers=fmm,p2nfft,direct,ewald,p3m - --with-internal-fftw --with-internal-pfft - --with-internal-pnfft ${CONFIGURE_REQUEST_PIC} - FC=${CMAKE_MPI_Fortran_COMPILER} - CXX=${CMAKE_MPI_CXX_COMPILER} - CC=${CMAKE_MPI_C_COMPILER} - F77= - ) - ExternalProject_get_property(scafacos_build INSTALL_DIR) - set(SCAFACOS_BUILD_DIR ${INSTALL_DIR}) - set(SCAFACOS_INCLUDE_DIRS ${SCAFACOS_BUILD_DIR}/include) - list(APPEND LAMMPS_DEPS scafacos_build) - # list and order from pkg_config file of ScaFaCoS build - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_direct.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_ewald.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fmm.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p2nfft.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p3m.a) - list(APPEND LAMMPS_LINK_LIBS ${GSL_LIBRARIES}) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_near.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_gridsort.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_resort.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_redist.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_common.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pnfft.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pfft.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3_mpi.a) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3.a) - list(APPEND LAMMPS_LINK_LIBS ${MPI_Fortran_LIBRARIES}) - list(APPEND LAMMPS_LINK_LIBS ${MPI_C_LIBRARIES}) - else() - find_package(PkgConfig REQUIRED) - pkg_check_modules(SCAFACOS REQUIRED scafacos) - list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_LDFLAGS}) - endif() - include_directories(${SCAFACOS_INCLUDE_DIRS}) -endif() - -if(PKG_USER-PLUMED) - find_package(GSL REQUIRED) - set(PLUMED_MODE "static" CACHE STRING "Linkage mode for Plumed2 library") - set(PLUMED_MODE_VALUES static shared runtime) - set_property(CACHE PLUMED_MODE PROPERTY STRINGS ${PLUMED_MODE_VALUES}) - validate_option(PLUMED_MODE PLUMED_MODE_VALUES) - string(TOUPPER ${PLUMED_MODE} PLUMED_MODE) - - find_package(PkgConfig QUIET) - set(DOWNLOAD_PLUMED_DEFAULT ON) - if(PKG_CONFIG_FOUND) - pkg_check_modules(PLUMED QUIET plumed) - if(PLUMED_FOUND) - set(DOWNLOAD_PLUMED_DEFAULT OFF) - endif() - endif() - - option(DOWNLOAD_PLUMED "Download Plumed package instead of using an already installed one" ${DOWNLOAD_PLUMED_DEFAULT}) - if(DOWNLOAD_PLUMED) - if(BUILD_MPI) - set(PLUMED_CONFIG_MPI "--enable-mpi") - set(PLUMED_CONFIG_CC ${CMAKE_MPI_C_COMPILER}) - set(PLUMED_CONFIG_CXX ${CMAKE_MPI_CXX_COMPILER}) - else() - set(PLUMED_CONFIG_MPI "--disable-mpi") - set(PLUMED_CONFIG_CC ${CMAKE_C_COMPILER}) - set(PLUMED_CONFIG_CXX ${CMAKE_CXX_COMPILER}) - endif() - if(BUILD_OMP) - set(PLUMED_CONFIG_OMP "--enable-openmp") - else() - set(PLUMED_CONFIG_OMP "--disable-openmp") - endif() - message(STATUS "PLUMED download requested - we will build our own") - include(ExternalProject) - ExternalProject_Add(plumed_build - URL https://github.com/plumed/plumed2/releases/download/v2.5.1/plumed-src-2.5.1.tgz - URL_MD5 c2a7b519e32197a120cdf47e0f194f81 - BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND /configure --prefix= - ${CONFIGURE_REQUEST_PIC} - --enable-modules=all - ${PLUMED_CONFIG_MPI} - ${PLUMED_CONFIG_OMP} - CXX=${PLUMED_CONFIG_CXX} - CC=${PLUMED_CONFIG_CC} - ) - ExternalProject_get_property(plumed_build INSTALL_DIR) - set(PLUMED_INSTALL_DIR ${INSTALL_DIR}) - list(APPEND LAMMPS_DEPS plumed_build) - if(PLUMED_MODE STREQUAL "STATIC") - add_definitions(-D__PLUMED_WRAPPER_CXX=1) - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${GSL_LIBRARIES} ${LAPACK_LIBRARIES} ${CMAKE_DL_LIBS}) - elseif(PLUMED_MODE STREQUAL "SHARED") - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.so ${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so ${CMAKE_DL_LIBS}) - elseif(PLUMED_MODE STREQUAL "RUNTIME") - add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so) - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumedWrapper.a -rdynamic ${CMAKE_DL_LIBS}) - endif() - set(PLUMED_INCLUDE_DIRS "${PLUMED_INSTALL_DIR}/include") - else() - find_package(PkgConfig REQUIRED) - pkg_check_modules(PLUMED REQUIRED plumed) - if(PLUMED_MODE STREQUAL "STATIC") - add_definitions(-D__PLUMED_WRAPPER_CXX=1) - include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.static) - elseif(PLUMED_MODE STREQUAL "SHARED") - include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.shared) - elseif(PLUMED_MODE STREQUAL "RUNTIME") - add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel.so) - include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.runtime) - endif() - list(APPEND LAMMPS_LINK_LIBS ${PLUMED_LOAD}) - endif() - include_directories(${PLUMED_INCLUDE_DIRS}) -endif() - -if(PKG_USER-MOLFILE) - set(MOLFILE_INCLUDE_DIRS "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers") - add_library(molfile INTERFACE) - target_include_directories(molfile INTERFACE ${MOLFILE_INCLUDE_DIRS}) - # no need to link with -ldl on windows - if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") - target_link_libraries(molfile INTERFACE ${CMAKE_DL_LIBS}) - endif() - list(APPEND LAMMPS_LINK_LIBS molfile) -endif() - -if(PKG_USER-NETCDF) - find_package(NetCDF REQUIRED) - include_directories(${NETCDF_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARIES}) - add_definitions(-DLMP_HAS_NETCDF -DNC_64BIT_DATA=0x0020) -endif() - -if(PKG_USER-SMD) - find_package(Eigen3 NO_MODULE) - if(EIGEN3_FOUND) - set(DOWNLOAD_EIGEN3_DEFAULT OFF) - else() - set(DOWNLOAD_EIGEN3_DEFAULT ON) - endif() - option(DOWNLOAD_EIGEN3 "Download Eigen3 instead of using an already installed one)" ${DOWNLOAD_EIGEN3_DEFAULT}) - if(DOWNLOAD_EIGEN3) - message(STATUS "Eigen3 download requested - we will build our own") - include(ExternalProject) - ExternalProject_Add(Eigen3_build - URL http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz - URL_MD5 f2a417d083fe8ca4b8ed2bc613d20f07 - CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" - ) - ExternalProject_get_property(Eigen3_build SOURCE_DIR) - set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR}) - list(APPEND LAMMPS_DEPS Eigen3_build) - else() - find_package(Eigen3 NO_MODULE) - mark_as_advanced(Eigen3_DIR) - if(NOT EIGEN3_FOUND) - message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_EIGEN3=ON to download it") - endif() - endif() - include_directories(${EIGEN3_INCLUDE_DIR}) -endif() - -if(PKG_USER-QUIP) - find_package(QUIP REQUIRED) - list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${LAPACK_LIBRARIES}) -endif() - -if(PKG_USER-QMMM) - message(WARNING "Building QMMM with CMake is still experimental") - find_package(QE REQUIRED) - include_directories(${QE_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES}) -endif() - -if(PKG_USER-VTK) - find_package(VTK REQUIRED NO_MODULE) - include(${VTK_USE_FILE}) - add_definitions(-DLAMMPS_VTK) - list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES}) -endif() - -if(PKG_KIM) - find_package(CURL) - if(CURL_FOUND) - include_directories(${CURL_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${CURL_LIBRARIES}) - add_definitions(-DLMP_KIM_CURL) - endif() - find_package(KIM-API QUIET) - if(KIM-API_FOUND) - set(DOWNLOAD_KIM_DEFAULT OFF) - else() - set(DOWNLOAD_KIM_DEFAULT ON) - endif() - option(DOWNLOAD_KIM "Download KIM-API from OpenKIM instead of using an already installed one" ${DOWNLOAD_KIM_DEFAULT}) - if(DOWNLOAD_KIM) - message(STATUS "KIM-API download requested - we will build our own") - enable_language(C) - enable_language(Fortran) - include(ExternalProject) - ExternalProject_Add(kim_build - URL https://s3.openkim.org/kim-api/kim-api-2.0.2.txz - URL_MD5 537d9c0abd30f85b875ebb584f9143fa - BINARY_DIR build - CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} - -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} - -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} - -DCMAKE_INSTALL_PREFIX= - -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} - ) - ExternalProject_get_property(kim_build INSTALL_DIR) - set(KIM-API_INCLUDE_DIRS ${INSTALL_DIR}/include/kim-api) - set(KIM-API_LDFLAGS ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libkim-api${CMAKE_SHARED_LIBRARY_SUFFIX}) - list(APPEND LAMMPS_DEPS kim_build) - else() - find_package(KIM-API REQUIRED) - endif() - list(APPEND LAMMPS_LINK_LIBS "${KIM-API_LDFLAGS}") - include_directories(${KIM-API_INCLUDE_DIRS}) -endif() - -if(PKG_MESSAGE) - option(MESSAGE_ZMQ "Use ZeroMQ in MESSAGE package" OFF) - file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.F - ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.c - ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.cpp) - - add_library(cslib STATIC ${cslib_SOURCES}) - if(BUILD_MPI) - target_compile_definitions(cslib PRIVATE -DMPI_YES) - set_target_properties(cslib PROPERTIES OUTPUT_NAME "csmpi") - else() - target_compile_definitions(cslib PRIVATE -DMPI_NO) - target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_MPI) - set_target_properties(cslib PROPERTIES OUTPUT_NAME "csnompi") - endif() - - if(MESSAGE_ZMQ) - target_compile_definitions(cslib PRIVATE -DZMQ_YES) - find_package(ZMQ REQUIRED) - target_include_directories(cslib PRIVATE ${ZMQ_INCLUDE_DIRS}) - target_link_libraries(cslib PUBLIC ${ZMQ_LIBRARIES}) - else() - target_compile_definitions(cslib PRIVATE -DZMQ_NO) - target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_ZMQ) - endif() - - list(APPEND LAMMPS_LINK_LIBS cslib) - include_directories(${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src) -endif() - -if(PKG_MSCG) - find_package(GSL REQUIRED) - find_package(MSCG QUIET) - if(MSGC_FOUND) - set(DOWNLOAD_MSCG_DEFAULT OFF) - else() - set(DOWNLOAD_MSCG_DEFAULT ON) - endif() - option(DOWNLOAD_MSCG "Download MSCG library instead of using an already installed one)" ${DOWNLOAD_MSCG_DEFAULT}) - if(DOWNLOAD_MSCG) - if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR - message(FATAL_ERROR "For downlading MSCG you need at least cmake-3.7") - endif() - include(ExternalProject) - if(NOT LAPACK_FOUND) - set(EXTRA_MSCG_OPTS "-DLAPACK_LIBRARIES=${CMAKE_CURRENT_BINARY_DIR}/liblinalg.a") - endif() - ExternalProject_Add(mscg_build - URL https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz - URL_MD5 8c45e269ee13f60b303edd7823866a91 - SOURCE_SUBDIR src/CMake - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= ${CMAKE_REQUEST_PIC} ${EXTRA_MSCG_OPTS} - BUILD_COMMAND make mscg INSTALL_COMMAND "" - ) - ExternalProject_get_property(mscg_build BINARY_DIR) - set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a) - ExternalProject_get_property(mscg_build SOURCE_DIR) - set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src) - list(APPEND LAMMPS_DEPS mscg_build) - if(NOT LAPACK_FOUND) - file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS}) - add_dependencies(mscg_build linalg) - endif() - else() - find_package(MSCG) - if(NOT MSCG_FOUND) - message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it") - endif() - endif() - list(APPEND LAMMPS_LINK_LIBS ${MSCG_LIBRARIES} ${GSL_LIBRARIES} ${LAPACK_LIBRARIES}) - include_directories(${MSCG_INCLUDE_DIRS}) -endif() - -if(PKG_COMPRESS) - find_package(ZLIB REQUIRED) - include_directories(${ZLIB_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS ${ZLIB_LIBRARIES}) -endif() +include(Packages/KSPACE) +include(Packages/PYTHON) +include(Packages/VORONOI) +include(Packages/USER-SCAFACOS) +include(Packages/USER-PLUMED) +include(Packages/USER-MOLFILE) +include(Packages/USER-NETCDF) +include(Packages/USER-SMD) +include(Packages/USER-QUIP) +include(Packages/USER-QMMM) +include(Packages/USER-VTK) +include(Packages/KIM) +include(Packages/MESSAGE) +include(Packages/MSCG) +include(Packages/COMPRESS) # the windows version of LAMMPS requires a couple extra libraries if(${CMAKE_SYSTEM_NAME} STREQUAL "Windows") @@ -962,491 +425,20 @@ if(PKG_USER-ATC) target_link_libraries(atc ${LAPACK_LIBRARIES}) endif() -if(PKG_USER-H5MD) - find_package(HDF5 REQUIRED) - target_link_libraries(h5md ${HDF5_LIBRARIES}) - target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS}) - include_directories(${HDF5_INCLUDE_DIRS}) -endif() +include(Packages/USER-H5MD) ###################################################################### # packages which selectively include variants based on enabled styles # e.g. accelerator packages ###################################################################### -if(PKG_CORESHELL) - set(CORESHELL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/CORESHELL) - set(CORESHELL_SOURCES) - set_property(GLOBAL PROPERTY "CORESHELL_SOURCES" "${CORESHELL_SOURCES}") - - # detects styles which have a CORESHELL version - RegisterStylesExt(${CORESHELL_SOURCES_DIR} cs CORESHELL_SOURCES) - - get_property(CORESHELL_SOURCES GLOBAL PROPERTY CORESHELL_SOURCES) - - list(APPEND LIB_SOURCES ${CORESHELL_SOURCES}) - include_directories(${CORESHELL_SOURCES_DIR}) -endif() - -# Fix qeq/fire requires MANYBODY (i.e. COMB and COMB3) to be installed -if(PKG_QEQ) - set(QEQ_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/QEQ) - file(GLOB QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix*.h) - file(GLOB QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix*.cpp) - - if(NOT PKG_MANYBODY) - list(REMOVE_ITEM QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix_qeq_fire.h) - list(REMOVE_ITEM QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix_qeq_fire.cpp) - endif() - set_property(GLOBAL PROPERTY "QEQ_SOURCES" "${QEQ_SOURCES}") - - foreach(MY_HEADER ${QEQ_HEADERS}) - AddStyleHeader(${MY_HEADER} FIX) - endforeach() - - get_property(QEQ_SOURCES GLOBAL PROPERTY QEQ_SOURCES) - list(APPEND LIB_SOURCES ${QEQ_SOURCES}) - include_directories(${QEQ_SOURCES_DIR}) -endif() - -if(PKG_USER-OMP) - set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP) - set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp - ${USER-OMP_SOURCES_DIR}/thr_omp.cpp - ${USER-OMP_SOURCES_DIR}/fix_omp.cpp - ${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp - ${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp - ${USER-OMP_SOURCES_DIR}/domain_omp.cpp) - add_definitions(-DLMP_USER_OMP) - set_property(GLOBAL PROPERTY "OMP_SOURCES" "${USER-OMP_SOURCES}") - - # detects styles which have USER-OMP version - RegisterStylesExt(${USER-OMP_SOURCES_DIR} omp OMP_SOURCES) - RegisterFixStyle(${USER-OMP_SOURCES_DIR}/fix_omp.h) - - get_property(USER-OMP_SOURCES GLOBAL PROPERTY OMP_SOURCES) - - # manually add package dependent source files from USER-OMP that do not provide styles - - if(PKG_ASPHERE) - list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_nh_asphere_omp.cpp) - endif() - - if(PKG_RIGID) - list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_rigid_nh_omp.cpp) - endif() - - if(PKG_USER-REAXC) - list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/reaxc_bond_orders_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_hydrogen_bonds_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_nonbonded_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_bonds_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_init_md_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_torsion_angles_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_forces_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_multi_body_omp.cpp - ${USER-OMP_SOURCES_DIR}/reaxc_valence_angles_omp.cpp) - endif() - - list(APPEND LIB_SOURCES ${USER-OMP_SOURCES}) - include_directories(${USER-OMP_SOURCES_DIR}) -endif() - -# Fix rigid/meso requires RIGID to be installed -if(PKG_USER-SDPD) - set(USER-SDPD_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-SDPD) - - get_property(hlist GLOBAL PROPERTY FIX) - if(NOT PKG_RIGID) - list(REMOVE_ITEM hlist ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.h) - list(REMOVE_ITEM LIB_SOURCES ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.cpp) - endif() - set_property(GLOBAL PROPERTY FIX "${hlist}") - - include_directories(${USER-SDPD_SOURCES_DIR}) -endif() - -if(PKG_KOKKOS) - set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) - set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos) - add_definitions(-DLMP_KOKKOS) - add_subdirectory(${LAMMPS_LIB_KOKKOS_SRC_DIR} ${LAMMPS_LIB_KOKKOS_BIN_DIR}) - - set(Kokkos_INCLUDE_DIRS ${LAMMPS_LIB_KOKKOS_SRC_DIR}/core/src - ${LAMMPS_LIB_KOKKOS_SRC_DIR}/containers/src - ${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src - ${LAMMPS_LIB_KOKKOS_BIN_DIR}) - include_directories(${Kokkos_INCLUDE_DIRS}) - list(APPEND LAMMPS_LINK_LIBS kokkos) - - set(KOKKOS_PKG_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/KOKKOS) - set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/fix_nh_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp - ${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp) - - if(PKG_KSPACE) - list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp) - endif() - - set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}") - - # detects styles which have KOKKOS version - RegisterStylesExt(${KOKKOS_PKG_SOURCES_DIR} kokkos KOKKOS_PKG_SOURCES) - - # register kokkos-only styles - RegisterNBinStyle(${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.h) - RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.h) - - if(PKG_USER-DPD) - get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES) - list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.cpp) - RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.h) - set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}") - endif() - - get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES) - - list(APPEND LIB_SOURCES ${KOKKOS_PKG_SOURCES}) - include_directories(${KOKKOS_PKG_SOURCES_DIR}) -endif() - -if(PKG_OPT) - set(OPT_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/OPT) - set(OPT_SOURCES) - set_property(GLOBAL PROPERTY "OPT_SOURCES" "${OPT_SOURCES}") - - # detects styles which have OPT version - RegisterStylesExt(${OPT_SOURCES_DIR} opt OPT_SOURCES) - - get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES) - - list(APPEND LIB_SOURCES ${OPT_SOURCES}) - include_directories(${OPT_SOURCES_DIR}) -endif() - -if(PKG_USER-INTEL) - include(CheckIncludeFile) - check_include_file(immintrin.h FOUND_IMMINTRIN) - if(NOT FOUND_IMMINTRIN) - message(FATAL_ERROR "immintrin.h header not found, Intel package won't work without it") - endif() - - add_definitions(-DLMP_USER_INTEL) - - set(INTEL_ARCH "cpu" CACHE STRING "Architectures used by USER-INTEL (cpu or knl)") - set(INTEL_ARCH_VALUES cpu knl) - set_property(CACHE INTEL_ARCH PROPERTY STRINGS ${INTEL_ARCH_VALUES}) - validate_option(INTEL_ARCH INTEL_ARCH_VALUES) - string(TOUPPER ${INTEL_ARCH} INTEL_ARCH) - - find_package(Threads QUIET) - if(Threads_FOUND) - set(INTEL_LRT_MODE "threads" CACHE STRING "Long-range threads mode (none, threads, or c++11)") - else() - set(INTEL_LRT_MODE "none" CACHE STRING "Long-range threads mode (none, threads, or c++11)") - endif() - set(INTEL_LRT_VALUES none threads c++11) - set_property(CACHE INTEL_LRT_MODE PROPERTY STRINGS ${INTEL_LRT_VALUES}) - validate_option(INTEL_LRT_MODE INTEL_LRT_VALUES) - string(TOUPPER ${INTEL_LRT_MODE} INTEL_LRT_MODE) - if(INTEL_LRT_MODE STREQUAL "THREADS") - if(Threads_FOUND) - add_definitions(-DLMP_INTEL_USELRT) - list(APPEND LAMMPS_LINK_LIBS ${CMAKE_THREAD_LIBS_INIT}) - else() - message(FATAL_ERROR "Must have working threads library for Long-range thread support") - endif() - endif() - if(INTEL_LRT_MODE STREQUAL "C++11") - add_definitions(-DLMP_INTEL_USERLRT -DLMP_INTEL_LRT11) - endif() - - if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) - message(FATAL_ERROR "USER-INTEL needs at least a 2016 Intel compiler, found ${CMAKE_CXX_COMPILER_VERSION}") - endif() - else() - message(WARNING "USER-INTEL gives best performance with Intel compilers") - endif() - - find_package(TBB QUIET) - if(TBB_FOUND) - list(APPEND LAMMPS_LINK_LIBS ${TBB_MALLOC_LIBRARIES}) - else() - add_definitions(-DLMP_INTEL_NO_TBB) - if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - message(WARNING "USER-INTEL with Intel compilers should use TBB malloc libraries") - endif() - endif() - - find_package(MKL QUIET) - if(MKL_FOUND) - add_definitions(-DLMP_USE_MKL_RNG) - list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) - else() - message(STATUS "Pair style dpd/intel will be faster with MKL libraries") - endif() - - if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "64") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "128") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "256")) - message(FATAL_ERROR "USER-INTEL only supports memory alignment of 64, 128 or 256 on this platform") - endif() - - if(INTEL_ARCH STREQUAL "KNL") - if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - message(FATAL_ERROR "Must use Intel compiler with USER-INTEL for KNL architecture") - endif() - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -xHost -qopenmp -qoffload") - set(MIC_OPTIONS "-qoffload-option,mic,compiler,\"-fp-model fast=2 -mGLOB_default_function_attrs=\\\"gather_scatter_loop_unroll=4\\\"\"") - add_compile_options(-xMIC-AVX512 -qoffload -fno-alias -ansi-alias -restrict -qoverride-limits ${MIC_OPTIONS}) - add_definitions(-DLMP_INTEL_OFFLOAD) - else() - if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") - if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xCOMMON-AVX512") - else() - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost") - endif() - include(CheckCXXCompilerFlag) - foreach(_FLAG -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -qopt-zmm-usage=high -qno-offload -fno-alias -ansi-alias -restrict) - check_cxx_compiler_flag("${__FLAG}" COMPILER_SUPPORTS${_FLAG}) - if(COMPILER_SUPPORTS${_FLAG}) - add_compile_options(${_FLAG}) - endif() - endforeach() - else() - add_compile_options(-O3 -ffast-math) - endif() - endif() - - # collect sources - set(USER-INTEL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-INTEL) - set(USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/fix_intel.cpp - ${USER-INTEL_SOURCES_DIR}/fix_nh_intel.cpp - ${USER-INTEL_SOURCES_DIR}/intel_buffers.cpp - ${USER-INTEL_SOURCES_DIR}/nbin_intel.cpp - ${USER-INTEL_SOURCES_DIR}/npair_intel.cpp) - - set_property(GLOBAL PROPERTY "USER-INTEL_SOURCES" "${USER-INTEL_SOURCES}") - - # detect styles which have a USER-INTEL version - RegisterStylesExt(${USER-INTEL_SOURCES_DIR} intel USER-INTEL_SOURCES) - RegisterNBinStyle(${USER-INTEL_SOURCES_DIR}/nbin_intel.h) - RegisterNPairStyle(${USER-INTEL_SOURCES_DIR}/npair_intel.h) - RegisterFixStyle(${USER-INTEL_SOURCES_DIR}/fix_intel.h) - - get_property(USER-INTEL_SOURCES GLOBAL PROPERTY USER-INTEL_SOURCES) - if(PKG_KSPACE) - list(APPEND USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.cpp) - RegisterIntegrateStyle(${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.h) - endif() - - list(APPEND LIB_SOURCES ${USER-INTEL_SOURCES}) - include_directories(${USER-INTEL_SOURCES_DIR}) -endif() - -if(PKG_GPU) - if (CMAKE_VERSION VERSION_LESS "3.1") - message(FATAL_ERROR "For the GPU package you need at least cmake-3.1") - endif() - set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU) - set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h - ${GPU_SOURCES_DIR}/fix_gpu.h - ${GPU_SOURCES_DIR}/fix_gpu.cpp) - - set(GPU_API "opencl" CACHE STRING "API used by GPU package") - set(GPU_API_VALUES opencl cuda) - set_property(CACHE GPU_API PROPERTY STRINGS ${GPU_API_VALUES}) - validate_option(GPU_API GPU_API_VALUES) - string(TOUPPER ${GPU_API} GPU_API) - - set(GPU_PREC "mixed" CACHE STRING "LAMMPS GPU precision") - set(GPU_PREC_VALUES double mixed single) - set_property(CACHE GPU_PREC PROPERTY STRINGS ${GPU_PREC_VALUES}) - validate_option(GPU_PREC GPU_PREC_VALUES) - string(TOUPPER ${GPU_PREC} GPU_PREC) - - if(GPU_PREC STREQUAL "DOUBLE") - set(GPU_PREC_SETTING "DOUBLE_DOUBLE") - elseif(GPU_PREC STREQUAL "MIXED") - set(GPU_PREC_SETTING "SINGLE_DOUBLE") - elseif(GPU_PREC STREQUAL "SINGLE") - set(GPU_PREC_SETTING "SINGLE_SINGLE") - endif() - - file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp) - file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu) - - if(GPU_API STREQUAL "CUDA") - find_package(CUDA REQUIRED) - find_program(BIN2C bin2c) - if(NOT BIN2C) - message(FATAL_ERROR "Could not find bin2c, use -DBIN2C=/path/to/bin2c to help cmake finding it.") - endif() - option(CUDPP_OPT "Enable CUDPP_OPT" ON) - option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF) - if(CUDA_MPS_SUPPORT) - set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY") - endif() - - set(GPU_ARCH "sm_30" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)") - - file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu) - list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu) - - cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu) - - if(CUDPP_OPT) - cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) - file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cpp) - file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cu) - endif() - - # build arch/gencode commands for nvcc based on CUDA toolkit version and use choice - # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture - set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH} ") - # Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0 - if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0")) - string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_20,code=[sm_20,compute_20] ") - endif() - # Kepler (GPU Arch 3.x) is supported by CUDA 5 and later - if(CUDA_VERSION VERSION_GREATER "4.9") - string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ") - endif() - # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later - if(CUDA_VERSION VERSION_GREATER "5.9") - string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] ") - endif() - # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later - if(CUDA_VERSION VERSION_GREATER "7.9") - string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] ") - endif() - # Volta (GPU Arch 7.0) is supported by CUDA 9 and later - if(CUDA_VERSION VERSION_GREATER "8.9") - string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_70,code=[sm_70,compute_70] ") - endif() - # Turing (GPU Arch 7.5) is supported by CUDA 10 and later - if(CUDA_VERSION VERSION_GREATER "9.9") - string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_75,code=[sm_75,compute_75] ") - endif() - - cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS - -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING}) - - cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC} - -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING}) - - foreach(CU_OBJ ${GPU_GEN_OBJS}) - get_filename_component(CU_NAME ${CU_OBJ} NAME_WE) - string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}") - add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h - COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h - DEPENDS ${CU_OBJ} - COMMENT "Generating ${CU_NAME}_cubin.h") - list(APPEND GPU_LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h) - endforeach() - set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h") - - - add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS}) - target_link_libraries(gpu ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) - target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS}) - target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT ${GPU_CUDA_MPS_FLAGS}) - if(CUDPP_OPT) - target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) - target_compile_definitions(gpu PRIVATE -DUSE_CUDPP) - endif() - - list(APPEND LAMMPS_LINK_LIBS gpu) - - add_executable(nvc_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) - target_compile_definitions(nvc_get_devices PRIVATE -DUCL_CUDADR) - target_link_libraries(nvc_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) - target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS}) - - - elseif(GPU_API STREQUAL "OPENCL") - find_package(OpenCL REQUIRED) - set(OCL_TUNE "generic" CACHE STRING "OpenCL Device Tuning") - set(OCL_TUNE_VALUES intel fermi kepler cypress generic) - set_property(CACHE OCL_TUNE PROPERTY STRINGS ${OCL_TUNE_VALUES}) - validate_option(OCL_TUNE OCL_TUNE_VALUES) - string(TOUPPER ${OCL_TUNE} OCL_TUNE) - - include(OpenCLUtils) - set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h) - - file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu) - list(REMOVE_ITEM GPU_LIB_CU - ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu - ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu - ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu - ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu - ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu - ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu - ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu - ) - - foreach(GPU_KERNEL ${GPU_LIB_CU}) - get_filename_component(basename ${GPU_KERNEL} NAME_WE) - string(SUBSTRING ${basename} 4 -1 KERNEL_NAME) - GenerateOpenCLHeader(${KERNEL_NAME} ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h ${OCL_COMMON_HEADERS} ${GPU_KERNEL}) - list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h) - endforeach() - - GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu) - GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu) - GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu) - GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu) - GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu) - GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu) - GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu) - - list(APPEND GPU_LIB_SOURCES - ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h - ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h - ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h - ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h - ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h - ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h - ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h - ) - - add_library(gpu STATIC ${GPU_LIB_SOURCES}) - target_link_libraries(gpu ${OpenCL_LIBRARIES}) - target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS}) - target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT) - target_compile_definitions(gpu PRIVATE -DUSE_OPENCL) - - list(APPEND LAMMPS_LINK_LIBS gpu) - - add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) - target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL) - target_link_libraries(ocl_get_devices PRIVATE ${OpenCL_LIBRARIES}) - target_include_directories(ocl_get_devices PRIVATE ${OpenCL_INCLUDE_DIRS}) - endif() - - # GPU package - FindStyleHeaders(${GPU_SOURCES_DIR} FIX_CLASS fix_ FIX) - - set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}") - - # detects styles which have GPU version - RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES) - - get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES) - - list(APPEND LIB_SOURCES ${GPU_SOURCES}) - include_directories(${GPU_SOURCES_DIR}) -endif() +include(Packages/CORESHELL) +include(Packages/QEQ) +include(Packages/USER-OMP) +include(Packages/USER-SDPD) +include(Packages/KOKKOS) +include(Packages/OPT) +include(Packages/USER-INTEL) +include(Packages/GPU) ###################################################### # Generate style headers based on global list of @@ -1530,12 +522,9 @@ if(BUILD_EXE) set_target_properties(lmp PROPERTIES OUTPUT_NAME ${LAMMPS_BINARY}) install(TARGETS lmp DESTINATION ${CMAKE_INSTALL_BINDIR}) install(FILES ${LAMMPS_DOC_DIR}/lammps.1 DESTINATION ${CMAKE_INSTALL_MANDIR}/man1 RENAME ${LAMMPS_BINARY}.1) - if(ENABLE_TESTING) - add_test(ShowHelp ${LAMMPS_BINARY} -help) - endif() enable_language(C) - get_filename_component(MSI2LMP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../tools/msi2lmp/src ABSOLUTE) + get_filename_component(MSI2LMP_SOURCE_DIR ${LAMMPS_TOOLS_DIR}/msi2lmp/src ABSOLUTE) file(GLOB MSI2LMP_SOURCES ${MSI2LMP_SOURCE_DIR}/[^.]*.c) add_executable(msi2lmp ${MSI2LMP_SOURCES}) target_link_libraries(msi2lmp m) @@ -1544,74 +533,16 @@ if(BUILD_EXE) endif() -############################################################################### -# Build documentation -############################################################################### -option(BUILD_DOC "Build LAMMPS documentation" OFF) -if(BUILD_DOC) - include(ProcessorCount) - ProcessorCount(NPROCS) - find_package(PythonInterp 3 REQUIRED) - - set(VIRTUALENV ${PYTHON_EXECUTABLE} -m virtualenv) - - file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.txt) - file(GLOB PDF_EXTRA_SOURCES ${LAMMPS_DOC_DIR}/src/lammps_commands*.txt ${LAMMPS_DOC_DIR}/src/lammps_support.txt ${LAMMPS_DOC_DIR}/src/lammps_tutorials.txt) - list(REMOVE_ITEM DOC_SOURCES ${PDF_EXTRA_SOURCES}) - - add_custom_command( - OUTPUT docenv - COMMAND ${VIRTUALENV} docenv - ) - - set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin) - - add_custom_command( - OUTPUT requirements.txt - DEPENDS docenv - COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt - COMMAND ${DOCENV_BINARY_DIR}/pip install -r requirements.txt --upgrade - COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters - ) - - set(RST_FILES "") - set(RST_DIR ${CMAKE_BINARY_DIR}/rst) - file(MAKE_DIRECTORY ${RST_DIR}) - foreach(TXT_FILE ${DOC_SOURCES}) - get_filename_component(FILENAME ${TXT_FILE} NAME_WE) - set(RST_FILE ${RST_DIR}/${FILENAME}.rst) - list(APPEND RST_FILES ${RST_FILE}) - add_custom_command( - OUTPUT ${RST_FILE} - DEPENDS requirements.txt docenv ${TXT_FILE} - COMMAND ${DOCENV_BINARY_DIR}/txt2rst -o ${RST_DIR} ${TXT_FILE} - ) - endforeach() - - add_custom_command( - OUTPUT html - DEPENDS ${RST_FILES} - COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src ${RST_DIR} - COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -j ${NPROCS} -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${RST_DIR} html - ) - - add_custom_target( - doc ALL - DEPENDS html - SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES} - ) - - install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR}) -endif() +include(Documentation) ############################################################################### # Install potential and force field files in data directory ############################################################################### -set(LAMMPS_POTENTIALS_DIR ${CMAKE_INSTALL_FULL_DATADIR}/lammps/potentials) -install(DIRECTORY ${LAMMPS_SOURCE_DIR}/../potentials/ DESTINATION ${LAMMPS_POTENTIALS_DIR}) +set(LAMMPS_INSTALL_POTENTIALS_DIR ${CMAKE_INSTALL_FULL_DATADIR}/lammps/potentials) +install(DIRECTORY ${LAMMPS_POTENTIALS_DIR} DESTINATION ${LAMMPS_INSTALL_POTENTIALS_DIR}) -set(LAMMPS_FRC_FILES_DIR ${CMAKE_INSTALL_FULL_DATADIR}/lammps/frc_files) -install(DIRECTORY ${LAMMPS_SOURCE_DIR}/../tools/msi2lmp/frc_files/ DESTINATION ${LAMMPS_FRC_FILES_DIR}) +set(LAMMPS_INSTALL_FRC_FILES_DIR ${CMAKE_INSTALL_FULL_DATADIR}/lammps/frc_files) +install(DIRECTORY ${LAMMPS_TOOLS_DIR}/msi2lmp/frc_files/ DESTINATION ${LAMMPS_INSTALL_FRC_FILES_DIR}) configure_file(etc/profile.d/lammps.sh.in ${CMAKE_BINARY_DIR}/etc/profile.d/lammps.sh @ONLY) configure_file(etc/profile.d/lammps.csh.in ${CMAKE_BINARY_DIR}/etc/profile.d/lammps.csh @ONLY) @@ -1633,9 +564,9 @@ if(BUILD_LIB AND BUILD_SHARED_LIBS) add_custom_target( install-python ${PYTHON_EXECUTABLE} install.py -v ${LAMMPS_SOURCE_DIR}/version.h - -m ${CMAKE_CURRENT_SOURCE_DIR}/../python/lammps.py + -m ${LAMMPS_PYTHON_DIR}/lammps.py -l ${CMAKE_BINARY_DIR}/liblammps${CMAKE_SHARED_LIBRARY_SUFFIX} - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/../python + WORKING_DIRECTORY ${LAMMPS_PYTHON_DIR} COMMENT "Installing LAMMPS Python module") else() add_custom_target( @@ -1660,38 +591,12 @@ if((BUILD_LIB AND BUILD_SHARED_LIBS) OR (PKG_PYTHON)) -c "import distutils.sysconfig as cg; print(cg.get_python_lib(1,0,prefix='${CMAKE_INSTALL_PREFIX}'))" OUTPUT_VARIABLE PYTHON_DEFAULT_INSTDIR OUTPUT_STRIP_TRAILING_WHITESPACE) set(PYTHON_INSTDIR ${PYTHON_DEFAULT_INSTDIR} CACHE PATH "Installation folder for LAMMPS Python module") - install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/../python/lammps.py DESTINATION ${PYTHON_INSTDIR}) + install(FILES ${LAMMPS_PYTHON_DIR}/lammps.py DESTINATION ${PYTHON_INSTDIR}) endif() endif() -############################################################################### -# Testing -# -# Requires latest gcovr (for GCC 8.1 support):# -# pip install git+https://github.com/gcovr/gcovr.git -############################################################################### -if(ENABLE_COVERAGE) - find_program(GCOVR_BINARY gcovr) - find_package_handle_standard_args(GCOVR DEFAULT_MSG GCOVR_BINARY) - - if(GCOVR_FOUND) - get_filename_component(ABSOLUTE_LAMMPS_SOURCE_DIR ${LAMMPS_SOURCE_DIR} ABSOLUTE) - - add_custom_target( - gen_coverage_xml - COMMAND ${GCOVR_BINARY} -s -x -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.xml - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - COMMENT "Generating XML Coverage Report..." - ) - - add_custom_target( - gen_coverage_html - COMMAND ${GCOVR_BINARY} -s --html --html-details -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.html - WORKING_DIRECTORY ${CMAKE_BINARY_DIR} - COMMENT "Generating HTML Coverage Report..." - ) - endif() -endif() +include(Testing) +include(CodeCoverage) ############################################################################### # Print package summary diff --git a/cmake/Modules/CodeCoverage.cmake b/cmake/Modules/CodeCoverage.cmake new file mode 100644 index 0000000000..d018db43d9 --- /dev/null +++ b/cmake/Modules/CodeCoverage.cmake @@ -0,0 +1,28 @@ +############################################################################### +# Coverage +# +# Requires latest gcovr (for GCC 8.1 support):# +# pip install git+https://github.com/gcovr/gcovr.git +############################################################################### +if(ENABLE_COVERAGE) + find_program(GCOVR_BINARY gcovr) + find_package_handle_standard_args(GCOVR DEFAULT_MSG GCOVR_BINARY) + + if(GCOVR_FOUND) + get_filename_component(ABSOLUTE_LAMMPS_SOURCE_DIR ${LAMMPS_SOURCE_DIR} ABSOLUTE) + + add_custom_target( + gen_coverage_xml + COMMAND ${GCOVR_BINARY} -s -x -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.xml + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Generating XML Coverage Report..." + ) + + add_custom_target( + gen_coverage_html + COMMAND ${GCOVR_BINARY} -s --html --html-details -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.html + WORKING_DIRECTORY ${CMAKE_BINARY_DIR} + COMMENT "Generating HTML Coverage Report..." + ) + endif() +endif() diff --git a/cmake/Modules/Documentation.cmake b/cmake/Modules/Documentation.cmake new file mode 100644 index 0000000000..99f570820a --- /dev/null +++ b/cmake/Modules/Documentation.cmake @@ -0,0 +1,59 @@ +############################################################################### +# Build documentation +############################################################################### +option(BUILD_DOC "Build LAMMPS documentation" OFF) +if(BUILD_DOC) + include(ProcessorCount) + ProcessorCount(NPROCS) + find_package(PythonInterp 3 REQUIRED) + + set(VIRTUALENV ${PYTHON_EXECUTABLE} -m virtualenv) + + file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.txt) + file(GLOB PDF_EXTRA_SOURCES ${LAMMPS_DOC_DIR}/src/lammps_commands*.txt ${LAMMPS_DOC_DIR}/src/lammps_support.txt ${LAMMPS_DOC_DIR}/src/lammps_tutorials.txt) + list(REMOVE_ITEM DOC_SOURCES ${PDF_EXTRA_SOURCES}) + + add_custom_command( + OUTPUT docenv + COMMAND ${VIRTUALENV} docenv + ) + + set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin) + + add_custom_command( + OUTPUT requirements.txt + DEPENDS docenv + COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt + COMMAND ${DOCENV_BINARY_DIR}/pip install -r requirements.txt --upgrade + COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters + ) + + set(RST_FILES "") + set(RST_DIR ${CMAKE_BINARY_DIR}/rst) + file(MAKE_DIRECTORY ${RST_DIR}) + foreach(TXT_FILE ${DOC_SOURCES}) + get_filename_component(FILENAME ${TXT_FILE} NAME_WE) + set(RST_FILE ${RST_DIR}/${FILENAME}.rst) + list(APPEND RST_FILES ${RST_FILE}) + add_custom_command( + OUTPUT ${RST_FILE} + DEPENDS requirements.txt docenv ${TXT_FILE} + COMMAND ${DOCENV_BINARY_DIR}/txt2rst -o ${RST_DIR} ${TXT_FILE} + ) + endforeach() + + add_custom_command( + OUTPUT html + DEPENDS ${RST_FILES} + COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src ${RST_DIR} + COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -j ${NPROCS} -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${RST_DIR} html + ) + + add_custom_target( + doc ALL + DEPENDS html + SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES} + ) + + install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR}) +endif() diff --git a/cmake/Modules/LAMMPSUtils.cmake b/cmake/Modules/LAMMPSUtils.cmake new file mode 100644 index 0000000000..3ea2b3cb7e --- /dev/null +++ b/cmake/Modules/LAMMPSUtils.cmake @@ -0,0 +1,71 @@ +# Utility functions +function(list_to_bulletpoints result) + list(REMOVE_AT ARGV 0) + set(temp "") + foreach(item ${ARGV}) + set(temp "${temp}* ${item}\n") + endforeach() + set(${result} "${temp}" PARENT_SCOPE) +endfunction(list_to_bulletpoints) + +function(validate_option name values) + string(TOLOWER ${${name}} needle_lower) + string(TOUPPER ${${name}} needle_upper) + list(FIND ${values} ${needle_lower} IDX_LOWER) + list(FIND ${values} ${needle_upper} IDX_UPPER) + if(${IDX_LOWER} LESS 0 AND ${IDX_UPPER} LESS 0) + list_to_bulletpoints(POSSIBLE_VALUE_LIST ${${values}}) + message(FATAL_ERROR "\n########################################################################\n" + "Invalid value '${${name}}' for option ${name}\n" + "\n" + "Possible values are:\n" + "${POSSIBLE_VALUE_LIST}" + "########################################################################") + endif() +endfunction(validate_option) + +function(get_lammps_version version_header variable) + file(READ ${version_header} line) + set(MONTHS x Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec) + string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\1" day "${line}") + string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\2" month "${line}") + string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\3" year "${line}") + string(STRIP ${day} day) + string(STRIP ${month} month) + string(STRIP ${year} year) + list(FIND MONTHS "${month}" month) + string(LENGTH ${day} day_length) + string(LENGTH ${month} month_length) + if(day_length EQUAL 1) + set(day "0${day}") + endif() + if(month_length EQUAL 1) + set(month "0${month}") + endif() + set(${variable} "${year}${month}${day}" PARENT_SCOPE) +endfunction() + +function(check_for_autogen_files source_dir) + message(STATUS "Running check for auto-generated files from make-based build system") + file(GLOB SRC_AUTOGEN_FILES ${source_dir}/style_*.h) + file(GLOB SRC_AUTOGEN_PACKAGES ${source_dir}/packages_*.h) + list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h) + foreach(_SRC ${SRC_AUTOGEN_FILES}) + get_filename_component(FILENAME "${_SRC}" NAME) + if(EXISTS ${source_dir}/${FILENAME}) + message(FATAL_ERROR "\n########################################################################\n" + "Found header file(s) generated by the make-based build system\n" + "\n" + "Please run\n" + "make -C ${source_dir} purge\n" + "to remove\n" + "########################################################################") + endif() + endforeach() +endfunction() + +macro(pkg_depends PKG1 PKG2) + if(PKG_${PKG1} AND NOT (PKG_${PKG2} OR BUILD_${PKG2})) + message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}") + endif() +endmacro() diff --git a/cmake/Modules/Packages/COMPRESS.cmake b/cmake/Modules/Packages/COMPRESS.cmake new file mode 100644 index 0000000000..864b868865 --- /dev/null +++ b/cmake/Modules/Packages/COMPRESS.cmake @@ -0,0 +1,5 @@ +if(PKG_COMPRESS) + find_package(ZLIB REQUIRED) + include_directories(${ZLIB_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${ZLIB_LIBRARIES}) +endif() diff --git a/cmake/Modules/Packages/CORESHELL.cmake b/cmake/Modules/Packages/CORESHELL.cmake new file mode 100644 index 0000000000..591477c899 --- /dev/null +++ b/cmake/Modules/Packages/CORESHELL.cmake @@ -0,0 +1,13 @@ +if(PKG_CORESHELL) + set(CORESHELL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/CORESHELL) + set(CORESHELL_SOURCES) + set_property(GLOBAL PROPERTY "CORESHELL_SOURCES" "${CORESHELL_SOURCES}") + + # detects styles which have a CORESHELL version + RegisterStylesExt(${CORESHELL_SOURCES_DIR} cs CORESHELL_SOURCES) + + get_property(CORESHELL_SOURCES GLOBAL PROPERTY CORESHELL_SOURCES) + + list(APPEND LIB_SOURCES ${CORESHELL_SOURCES}) + include_directories(${CORESHELL_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake new file mode 100644 index 0000000000..dab9d51a3f --- /dev/null +++ b/cmake/Modules/Packages/GPU.cmake @@ -0,0 +1,194 @@ +if(PKG_GPU) + if (CMAKE_VERSION VERSION_LESS "3.1") + message(FATAL_ERROR "For the GPU package you need at least cmake-3.1") + endif() + set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU) + set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h + ${GPU_SOURCES_DIR}/fix_gpu.h + ${GPU_SOURCES_DIR}/fix_gpu.cpp) + + set(GPU_API "opencl" CACHE STRING "API used by GPU package") + set(GPU_API_VALUES opencl cuda) + set_property(CACHE GPU_API PROPERTY STRINGS ${GPU_API_VALUES}) + validate_option(GPU_API GPU_API_VALUES) + string(TOUPPER ${GPU_API} GPU_API) + + set(GPU_PREC "mixed" CACHE STRING "LAMMPS GPU precision") + set(GPU_PREC_VALUES double mixed single) + set_property(CACHE GPU_PREC PROPERTY STRINGS ${GPU_PREC_VALUES}) + validate_option(GPU_PREC GPU_PREC_VALUES) + string(TOUPPER ${GPU_PREC} GPU_PREC) + + if(GPU_PREC STREQUAL "DOUBLE") + set(GPU_PREC_SETTING "DOUBLE_DOUBLE") + elseif(GPU_PREC STREQUAL "MIXED") + set(GPU_PREC_SETTING "SINGLE_DOUBLE") + elseif(GPU_PREC STREQUAL "SINGLE") + set(GPU_PREC_SETTING "SINGLE_SINGLE") + endif() + + file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp) + file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu) + + if(GPU_API STREQUAL "CUDA") + find_package(CUDA REQUIRED) + find_program(BIN2C bin2c) + if(NOT BIN2C) + message(FATAL_ERROR "Could not find bin2c, use -DBIN2C=/path/to/bin2c to help cmake finding it.") + endif() + option(CUDPP_OPT "Enable CUDPP_OPT" ON) + option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF) + if(CUDA_MPS_SUPPORT) + set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY") + endif() + + set(GPU_ARCH "sm_30" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)") + + file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu) + list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu) + + cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu) + + if(CUDPP_OPT) + cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) + file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cpp) + file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cu) + endif() + + # build arch/gencode commands for nvcc based on CUDA toolkit version and use choice + # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture + set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH} ") + # Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0 + if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0")) + string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_20,code=[sm_20,compute_20] ") + endif() + # Kepler (GPU Arch 3.x) is supported by CUDA 5 and later + if(CUDA_VERSION VERSION_GREATER "4.9") + string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ") + endif() + # Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later + if(CUDA_VERSION VERSION_GREATER "5.9") + string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] ") + endif() + # Pascal (GPU Arch 6.x) is supported by CUDA 8 and later + if(CUDA_VERSION VERSION_GREATER "7.9") + string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] ") + endif() + # Volta (GPU Arch 7.0) is supported by CUDA 9 and later + if(CUDA_VERSION VERSION_GREATER "8.9") + string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_70,code=[sm_70,compute_70] ") + endif() + # Turing (GPU Arch 7.5) is supported by CUDA 10 and later + if(CUDA_VERSION VERSION_GREATER "9.9") + string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_75,code=[sm_75,compute_75] ") + endif() + + cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS + -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING}) + + cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC} + -DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING}) + + foreach(CU_OBJ ${GPU_GEN_OBJS}) + get_filename_component(CU_NAME ${CU_OBJ} NAME_WE) + string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}") + add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h + COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h + DEPENDS ${CU_OBJ} + COMMENT "Generating ${CU_NAME}_cubin.h") + list(APPEND GPU_LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h) + endforeach() + set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h") + + + add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS}) + target_link_libraries(gpu ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) + target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS}) + target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT ${GPU_CUDA_MPS_FLAGS}) + if(CUDPP_OPT) + target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini) + target_compile_definitions(gpu PRIVATE -DUSE_CUDPP) + endif() + + list(APPEND LAMMPS_LINK_LIBS gpu) + + add_executable(nvc_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) + target_compile_definitions(nvc_get_devices PRIVATE -DUCL_CUDADR) + target_link_libraries(nvc_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY}) + target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS}) + + + elseif(GPU_API STREQUAL "OPENCL") + find_package(OpenCL REQUIRED) + set(OCL_TUNE "generic" CACHE STRING "OpenCL Device Tuning") + set(OCL_TUNE_VALUES intel fermi kepler cypress generic) + set_property(CACHE OCL_TUNE PROPERTY STRINGS ${OCL_TUNE_VALUES}) + validate_option(OCL_TUNE OCL_TUNE_VALUES) + string(TOUPPER ${OCL_TUNE} OCL_TUNE) + + include(OpenCLUtils) + set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h) + + file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu) + list(REMOVE_ITEM GPU_LIB_CU + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu + ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu + ) + + foreach(GPU_KERNEL ${GPU_LIB_CU}) + get_filename_component(basename ${GPU_KERNEL} NAME_WE) + string(SUBSTRING ${basename} 4 -1 KERNEL_NAME) + GenerateOpenCLHeader(${KERNEL_NAME} ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h ${OCL_COMMON_HEADERS} ${GPU_KERNEL}) + list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h) + endforeach() + + GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu) + GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu) + GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu) + GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu) + GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu) + GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu) + GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu) + + list(APPEND GPU_LIB_SOURCES + ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h + ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h + ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h + ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h + ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h + ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h + ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h + ) + + add_library(gpu STATIC ${GPU_LIB_SOURCES}) + target_link_libraries(gpu ${OpenCL_LIBRARIES}) + target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS}) + target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT) + target_compile_definitions(gpu PRIVATE -DUSE_OPENCL) + + list(APPEND LAMMPS_LINK_LIBS gpu) + + add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) + target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL) + target_link_libraries(ocl_get_devices PRIVATE ${OpenCL_LIBRARIES}) + target_include_directories(ocl_get_devices PRIVATE ${OpenCL_INCLUDE_DIRS}) + endif() + + # GPU package + FindStyleHeaders(${GPU_SOURCES_DIR} FIX_CLASS fix_ FIX) + + set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}") + + # detects styles which have GPU version + RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES) + + get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES) + + list(APPEND LIB_SOURCES ${GPU_SOURCES}) + include_directories(${GPU_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/KIM.cmake b/cmake/Modules/Packages/KIM.cmake new file mode 100644 index 0000000000..21ebd0f8e0 --- /dev/null +++ b/cmake/Modules/Packages/KIM.cmake @@ -0,0 +1,39 @@ +if(PKG_KIM) + find_package(CURL) + if(CURL_FOUND) + include_directories(${CURL_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${CURL_LIBRARIES}) + add_definitions(-DLMP_KIM_CURL) + endif() + find_package(KIM-API QUIET) + if(KIM-API_FOUND) + set(DOWNLOAD_KIM_DEFAULT OFF) + else() + set(DOWNLOAD_KIM_DEFAULT ON) + endif() + option(DOWNLOAD_KIM "Download KIM-API from OpenKIM instead of using an already installed one" ${DOWNLOAD_KIM_DEFAULT}) + if(DOWNLOAD_KIM) + message(STATUS "KIM-API download requested - we will build our own") + enable_language(C) + enable_language(Fortran) + include(ExternalProject) + ExternalProject_Add(kim_build + URL https://s3.openkim.org/kim-api/kim-api-2.0.2.txz + URL_MD5 537d9c0abd30f85b875ebb584f9143fa + BINARY_DIR build + CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} + -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} + -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} + -DCMAKE_INSTALL_PREFIX= + -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + ) + ExternalProject_get_property(kim_build INSTALL_DIR) + set(KIM-API_INCLUDE_DIRS ${INSTALL_DIR}/include/kim-api) + set(KIM-API_LDFLAGS ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libkim-api${CMAKE_SHARED_LIBRARY_SUFFIX}) + list(APPEND LAMMPS_DEPS kim_build) + else() + find_package(KIM-API REQUIRED) + endif() + list(APPEND LAMMPS_LINK_LIBS "${KIM-API_LDFLAGS}") + include_directories(${KIM-API_INCLUDE_DIRS}) +endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake new file mode 100644 index 0000000000..2f7d401802 --- /dev/null +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -0,0 +1,53 @@ +if(PKG_KOKKOS) + set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos) + set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos) + add_definitions(-DLMP_KOKKOS) + add_subdirectory(${LAMMPS_LIB_KOKKOS_SRC_DIR} ${LAMMPS_LIB_KOKKOS_BIN_DIR}) + + set(Kokkos_INCLUDE_DIRS ${LAMMPS_LIB_KOKKOS_SRC_DIR}/core/src + ${LAMMPS_LIB_KOKKOS_SRC_DIR}/containers/src + ${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src + ${LAMMPS_LIB_KOKKOS_BIN_DIR}) + include_directories(${Kokkos_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS kokkos) + + set(KOKKOS_PKG_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/KOKKOS) + set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/fix_nh_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp + ${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp) + + if(PKG_KSPACE) + list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp) + endif() + + set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}") + + # detects styles which have KOKKOS version + RegisterStylesExt(${KOKKOS_PKG_SOURCES_DIR} kokkos KOKKOS_PKG_SOURCES) + + # register kokkos-only styles + RegisterNBinStyle(${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.h) + RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.h) + + if(PKG_USER-DPD) + get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES) + list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.cpp) + RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.h) + set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}") + endif() + + get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES) + + list(APPEND LIB_SOURCES ${KOKKOS_PKG_SOURCES}) + include_directories(${KOKKOS_PKG_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake new file mode 100644 index 0000000000..6938a93a36 --- /dev/null +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -0,0 +1,38 @@ +if(PKG_KSPACE) + option(FFT_SINGLE "Use single precision FFT instead of double" OFF) + set(FFTW "FFTW3") + if(FFT_SINGLE) + set(FFTW "FFTW3F") + add_definitions(-DFFT_SINGLE) + endif() + find_package(${FFTW} QUIET) + if(${FFTW}_FOUND) + set(FFT "${FFTW}" CACHE STRING "FFT library for KSPACE package") + else() + set(FFT "KISS" CACHE STRING "FFT library for KSPACE package") + endif() + set(FFT_VALUES KISS ${FFTW} MKL) + set_property(CACHE FFT PROPERTY STRINGS ${FFT_VALUES}) + validate_option(FFT FFT_VALUES) + string(TOUPPER ${FFT} FFT) + if(NOT FFT STREQUAL "KISS") + find_package(${FFT} REQUIRED) + if(NOT FFT STREQUAL "FFTW3F") + add_definitions(-DFFT_FFTW) + else() + add_definitions(-DFFT_${FFT}) + endif() + include_directories(${${FFT}_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${${FFT}_LIBRARIES}) + else() + add_definitions(-DFFT_KISS) + endif() + set(FFT_PACK "array" CACHE STRING "Optimization for FFT") + set(FFT_PACK_VALUES array pointer memcpy) + set_property(CACHE FFT_PACK PROPERTY STRINGS ${FFT_PACK_VALUES}) + validate_option(FFT_PACK FFT_PACK_VALUES) + if(NOT FFT_PACK STREQUAL "array") + string(TOUPPER ${FFT_PACK} FFT_PACK) + add_definitions(-DFFT_PACK_${FFT_PACK}) + endif() +endif() diff --git a/cmake/Modules/Packages/LATTE.cmake b/cmake/Modules/Packages/LATTE.cmake new file mode 100644 index 0000000000..a709561562 --- /dev/null +++ b/cmake/Modules/Packages/LATTE.cmake @@ -0,0 +1,35 @@ +if(PKG_LATTE) + enable_language(Fortran) + find_package(LATTE) + if(LATTE_FOUND) + set(DOWNLOAD_LATTE_DEFAULT OFF) + else() + set(DOWNLOAD_LATTE_DEFAULT ON) + endif() + option(DOWNLOAD_LATTE "Download the LATTE library instead of using an already installed one" ${DOWNLOAD_LATTE_DEFAULT}) + if(DOWNLOAD_LATTE) + if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR + message(FATAL_ERROR "For downlading LATTE you need at least cmake-3.7") + endif() + message(STATUS "LATTE download requested - we will build our own") + include(ExternalProject) + ExternalProject_Add(latte_build + URL https://github.com/lanl/LATTE/archive/v1.2.1.tar.gz + URL_MD5 85ac414fdada2d04619c8f936344df14 + SOURCE_SUBDIR cmake + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= ${CMAKE_REQUEST_PIC} + -DBLAS_LIBRARIES=${BLAS_LIBRARIES} -DLAPACK_LIBRARIES=${LAPACK_LIBRARIES} + -DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -DCMAKE_Fortran_FLAGS=${CMAKE_Fortran_FLAGS} + -DCMAKE_Fortran_FLAGS_${BTYPE}=${CMAKE_Fortran_FLAGS_${BTYPE}} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE} + ) + ExternalProject_get_property(latte_build INSTALL_DIR) + set(LATTE_LIBRARIES ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/liblatte.a) + list(APPEND LAMMPS_DEPS latte_build) + else() + find_package(LATTE) + if(NOT LATTE_FOUND) + message(FATAL_ERROR "LATTE library not found, help CMake to find it by setting LATTE_LIBRARY, or set DOWNLOAD_LATTE=ON to download it") + endif() + endif() + list(APPEND LAMMPS_LINK_LIBS ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES}) +endif() diff --git a/cmake/Modules/Packages/MESSAGE.cmake b/cmake/Modules/Packages/MESSAGE.cmake new file mode 100644 index 0000000000..3c1bdde855 --- /dev/null +++ b/cmake/Modules/Packages/MESSAGE.cmake @@ -0,0 +1,29 @@ +if(PKG_MESSAGE) + option(MESSAGE_ZMQ "Use ZeroMQ in MESSAGE package" OFF) + file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.F + ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.c + ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.cpp) + + add_library(cslib STATIC ${cslib_SOURCES}) + if(BUILD_MPI) + target_compile_definitions(cslib PRIVATE -DMPI_YES) + set_target_properties(cslib PROPERTIES OUTPUT_NAME "csmpi") + else() + target_compile_definitions(cslib PRIVATE -DMPI_NO) + target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_MPI) + set_target_properties(cslib PROPERTIES OUTPUT_NAME "csnompi") + endif() + + if(MESSAGE_ZMQ) + target_compile_definitions(cslib PRIVATE -DZMQ_YES) + find_package(ZMQ REQUIRED) + target_include_directories(cslib PRIVATE ${ZMQ_INCLUDE_DIRS}) + target_link_libraries(cslib PUBLIC ${ZMQ_LIBRARIES}) + else() + target_compile_definitions(cslib PRIVATE -DZMQ_NO) + target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_ZMQ) + endif() + + list(APPEND LAMMPS_LINK_LIBS cslib) + include_directories(${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src) +endif() diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake new file mode 100644 index 0000000000..e8744bc192 --- /dev/null +++ b/cmake/Modules/Packages/MSCG.cmake @@ -0,0 +1,42 @@ +if(PKG_MSCG) + find_package(GSL REQUIRED) + find_package(MSCG QUIET) + if(MSGC_FOUND) + set(DOWNLOAD_MSCG_DEFAULT OFF) + else() + set(DOWNLOAD_MSCG_DEFAULT ON) + endif() + option(DOWNLOAD_MSCG "Download MSCG library instead of using an already installed one)" ${DOWNLOAD_MSCG_DEFAULT}) + if(DOWNLOAD_MSCG) + if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR + message(FATAL_ERROR "For downlading MSCG you need at least cmake-3.7") + endif() + include(ExternalProject) + if(NOT LAPACK_FOUND) + set(EXTRA_MSCG_OPTS "-DLAPACK_LIBRARIES=${CMAKE_CURRENT_BINARY_DIR}/liblinalg.a") + endif() + ExternalProject_Add(mscg_build + URL https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz + URL_MD5 8c45e269ee13f60b303edd7823866a91 + SOURCE_SUBDIR src/CMake + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= ${CMAKE_REQUEST_PIC} ${EXTRA_MSCG_OPTS} + BUILD_COMMAND make mscg INSTALL_COMMAND "" + ) + ExternalProject_get_property(mscg_build BINARY_DIR) + set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a) + ExternalProject_get_property(mscg_build SOURCE_DIR) + set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src) + list(APPEND LAMMPS_DEPS mscg_build) + if(NOT LAPACK_FOUND) + file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS}) + add_dependencies(mscg_build linalg) + endif() + else() + find_package(MSCG) + if(NOT MSCG_FOUND) + message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it") + endif() + endif() + list(APPEND LAMMPS_LINK_LIBS ${MSCG_LIBRARIES} ${GSL_LIBRARIES} ${LAPACK_LIBRARIES}) + include_directories(${MSCG_INCLUDE_DIRS}) +endif() diff --git a/cmake/Modules/Packages/OPT.cmake b/cmake/Modules/Packages/OPT.cmake new file mode 100644 index 0000000000..f2802c757b --- /dev/null +++ b/cmake/Modules/Packages/OPT.cmake @@ -0,0 +1,13 @@ +if(PKG_OPT) + set(OPT_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/OPT) + set(OPT_SOURCES) + set_property(GLOBAL PROPERTY "OPT_SOURCES" "${OPT_SOURCES}") + + # detects styles which have OPT version + RegisterStylesExt(${OPT_SOURCES_DIR} opt OPT_SOURCES) + + get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES) + + list(APPEND LIB_SOURCES ${OPT_SOURCES}) + include_directories(${OPT_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/PYTHON.cmake b/cmake/Modules/Packages/PYTHON.cmake new file mode 100644 index 0000000000..4f8959ae38 --- /dev/null +++ b/cmake/Modules/Packages/PYTHON.cmake @@ -0,0 +1,6 @@ +if(PKG_PYTHON) + find_package(PythonLibs REQUIRED) + add_definitions(-DLMP_PYTHON) + include_directories(${PYTHON_INCLUDE_DIR}) + list(APPEND LAMMPS_LINK_LIBS ${PYTHON_LIBRARY}) +endif() diff --git a/cmake/Modules/Packages/QEQ.cmake b/cmake/Modules/Packages/QEQ.cmake new file mode 100644 index 0000000000..94cca30540 --- /dev/null +++ b/cmake/Modules/Packages/QEQ.cmake @@ -0,0 +1,20 @@ +# Fix qeq/fire requires MANYBODY (i.e. COMB and COMB3) to be installed +if(PKG_QEQ) + set(QEQ_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/QEQ) + file(GLOB QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix*.h) + file(GLOB QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix*.cpp) + + if(NOT PKG_MANYBODY) + list(REMOVE_ITEM QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix_qeq_fire.h) + list(REMOVE_ITEM QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix_qeq_fire.cpp) + endif() + set_property(GLOBAL PROPERTY "QEQ_SOURCES" "${QEQ_SOURCES}") + + foreach(MY_HEADER ${QEQ_HEADERS}) + AddStyleHeader(${MY_HEADER} FIX) + endforeach() + + get_property(QEQ_SOURCES GLOBAL PROPERTY QEQ_SOURCES) + list(APPEND LIB_SOURCES ${QEQ_SOURCES}) + include_directories(${QEQ_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/USER-H5MD.cmake b/cmake/Modules/Packages/USER-H5MD.cmake new file mode 100644 index 0000000000..40ea7b7444 --- /dev/null +++ b/cmake/Modules/Packages/USER-H5MD.cmake @@ -0,0 +1,8 @@ +if(PKG_USER-H5MD) + enable_language(C) + + find_package(HDF5 REQUIRED) + target_link_libraries(h5md ${HDF5_LIBRARIES}) + target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS}) + include_directories(${HDF5_INCLUDE_DIRS}) +endif() diff --git a/cmake/Modules/Packages/USER-INTEL.cmake b/cmake/Modules/Packages/USER-INTEL.cmake new file mode 100644 index 0000000000..f61b8f1630 --- /dev/null +++ b/cmake/Modules/Packages/USER-INTEL.cmake @@ -0,0 +1,119 @@ +if(PKG_USER-INTEL) + include(CheckIncludeFile) + check_include_file_cxx(immintrin.h FOUND_IMMINTRIN) + if(NOT FOUND_IMMINTRIN) + message(FATAL_ERROR "immintrin.h header not found, Intel package won't work without it") + endif() + + add_definitions(-DLMP_USER_INTEL) + + set(INTEL_ARCH "cpu" CACHE STRING "Architectures used by USER-INTEL (cpu or knl)") + set(INTEL_ARCH_VALUES cpu knl) + set_property(CACHE INTEL_ARCH PROPERTY STRINGS ${INTEL_ARCH_VALUES}) + validate_option(INTEL_ARCH INTEL_ARCH_VALUES) + string(TOUPPER ${INTEL_ARCH} INTEL_ARCH) + + find_package(Threads QUIET) + if(Threads_FOUND) + set(INTEL_LRT_MODE "threads" CACHE STRING "Long-range threads mode (none, threads, or c++11)") + else() + set(INTEL_LRT_MODE "none" CACHE STRING "Long-range threads mode (none, threads, or c++11)") + endif() + set(INTEL_LRT_VALUES none threads c++11) + set_property(CACHE INTEL_LRT_MODE PROPERTY STRINGS ${INTEL_LRT_VALUES}) + validate_option(INTEL_LRT_MODE INTEL_LRT_VALUES) + string(TOUPPER ${INTEL_LRT_MODE} INTEL_LRT_MODE) + if(INTEL_LRT_MODE STREQUAL "THREADS") + if(Threads_FOUND) + add_definitions(-DLMP_INTEL_USELRT) + list(APPEND LAMMPS_LINK_LIBS ${CMAKE_THREAD_LIBS_INIT}) + else() + message(FATAL_ERROR "Must have working threads library for Long-range thread support") + endif() + endif() + if(INTEL_LRT_MODE STREQUAL "C++11") + add_definitions(-DLMP_INTEL_USERLRT -DLMP_INTEL_LRT11) + endif() + + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16) + message(FATAL_ERROR "USER-INTEL needs at least a 2016 Intel compiler, found ${CMAKE_CXX_COMPILER_VERSION}") + endif() + else() + message(WARNING "USER-INTEL gives best performance with Intel compilers") + endif() + + find_package(TBB QUIET) + if(TBB_FOUND) + list(APPEND LAMMPS_LINK_LIBS ${TBB_MALLOC_LIBRARIES}) + else() + add_definitions(-DLMP_INTEL_NO_TBB) + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + message(WARNING "USER-INTEL with Intel compilers should use TBB malloc libraries") + endif() + endif() + + find_package(MKL QUIET) + if(MKL_FOUND) + add_definitions(-DLMP_USE_MKL_RNG) + list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) + else() + message(STATUS "Pair style dpd/intel will be faster with MKL libraries") + endif() + + if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "64") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "128") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "256")) + message(FATAL_ERROR "USER-INTEL only supports memory alignment of 64, 128 or 256 on this platform") + endif() + + if(INTEL_ARCH STREQUAL "KNL") + if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + message(FATAL_ERROR "Must use Intel compiler with USER-INTEL for KNL architecture") + endif() + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -xHost -qopenmp -qoffload") + set(MIC_OPTIONS "-qoffload-option,mic,compiler,\"-fp-model fast=2 -mGLOB_default_function_attrs=\\\"gather_scatter_loop_unroll=4\\\"\"") + add_compile_options(-xMIC-AVX512 -qoffload -fno-alias -ansi-alias -restrict -qoverride-limits ${MIC_OPTIONS}) + add_definitions(-DLMP_INTEL_OFFLOAD) + else() + if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") + if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4) + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xCOMMON-AVX512") + else() + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost") + endif() + include(CheckCXXCompilerFlag) + foreach(_FLAG -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -qopt-zmm-usage=high -qno-offload -fno-alias -ansi-alias -restrict) + check_cxx_compiler_flag("${__FLAG}" COMPILER_SUPPORTS${_FLAG}) + if(COMPILER_SUPPORTS${_FLAG}) + add_compile_options(${_FLAG}) + endif() + endforeach() + else() + add_compile_options(-O3 -ffast-math) + endif() + endif() + + # collect sources + set(USER-INTEL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-INTEL) + set(USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/fix_intel.cpp + ${USER-INTEL_SOURCES_DIR}/fix_nh_intel.cpp + ${USER-INTEL_SOURCES_DIR}/intel_buffers.cpp + ${USER-INTEL_SOURCES_DIR}/nbin_intel.cpp + ${USER-INTEL_SOURCES_DIR}/npair_intel.cpp) + + set_property(GLOBAL PROPERTY "USER-INTEL_SOURCES" "${USER-INTEL_SOURCES}") + + # detect styles which have a USER-INTEL version + RegisterStylesExt(${USER-INTEL_SOURCES_DIR} intel USER-INTEL_SOURCES) + RegisterNBinStyle(${USER-INTEL_SOURCES_DIR}/nbin_intel.h) + RegisterNPairStyle(${USER-INTEL_SOURCES_DIR}/npair_intel.h) + RegisterFixStyle(${USER-INTEL_SOURCES_DIR}/fix_intel.h) + + get_property(USER-INTEL_SOURCES GLOBAL PROPERTY USER-INTEL_SOURCES) + if(PKG_KSPACE) + list(APPEND USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.cpp) + RegisterIntegrateStyle(${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.h) + endif() + + list(APPEND LIB_SOURCES ${USER-INTEL_SOURCES}) + include_directories(${USER-INTEL_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/USER-MOLFILE.cmake b/cmake/Modules/Packages/USER-MOLFILE.cmake new file mode 100644 index 0000000000..16ffc34994 --- /dev/null +++ b/cmake/Modules/Packages/USER-MOLFILE.cmake @@ -0,0 +1,10 @@ +if(PKG_USER-MOLFILE) + set(MOLFILE_INCLUDE_DIRS "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers") + add_library(molfile INTERFACE) + target_include_directories(molfile INTERFACE ${MOLFILE_INCLUDE_DIRS}) + # no need to link with -ldl on windows + if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") + target_link_libraries(molfile INTERFACE ${CMAKE_DL_LIBS}) + endif() + list(APPEND LAMMPS_LINK_LIBS molfile) +endif() diff --git a/cmake/Modules/Packages/USER-NETCDF.cmake b/cmake/Modules/Packages/USER-NETCDF.cmake new file mode 100644 index 0000000000..a90725bbbc --- /dev/null +++ b/cmake/Modules/Packages/USER-NETCDF.cmake @@ -0,0 +1,6 @@ +if(PKG_USER-NETCDF) + find_package(NetCDF REQUIRED) + include_directories(${NETCDF_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARIES}) + add_definitions(-DLMP_HAS_NETCDF -DNC_64BIT_DATA=0x0020) +endif() diff --git a/cmake/Modules/Packages/USER-OMP.cmake b/cmake/Modules/Packages/USER-OMP.cmake new file mode 100644 index 0000000000..668f42f10a --- /dev/null +++ b/cmake/Modules/Packages/USER-OMP.cmake @@ -0,0 +1,42 @@ +if(PKG_USER-OMP) + set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP) + set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp + ${USER-OMP_SOURCES_DIR}/thr_omp.cpp + ${USER-OMP_SOURCES_DIR}/fix_omp.cpp + ${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp + ${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp + ${USER-OMP_SOURCES_DIR}/domain_omp.cpp) + add_definitions(-DLMP_USER_OMP) + set_property(GLOBAL PROPERTY "OMP_SOURCES" "${USER-OMP_SOURCES}") + + # detects styles which have USER-OMP version + RegisterStylesExt(${USER-OMP_SOURCES_DIR} omp OMP_SOURCES) + RegisterFixStyle(${USER-OMP_SOURCES_DIR}/fix_omp.h) + + get_property(USER-OMP_SOURCES GLOBAL PROPERTY OMP_SOURCES) + + # manually add package dependent source files from USER-OMP that do not provide styles + + if(PKG_ASPHERE) + list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_nh_asphere_omp.cpp) + endif() + + if(PKG_RIGID) + list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_rigid_nh_omp.cpp) + endif() + + if(PKG_USER-REAXC) + list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/reaxc_bond_orders_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_hydrogen_bonds_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_nonbonded_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_bonds_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_init_md_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_torsion_angles_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_forces_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_multi_body_omp.cpp + ${USER-OMP_SOURCES_DIR}/reaxc_valence_angles_omp.cpp) + endif() + + list(APPEND LIB_SOURCES ${USER-OMP_SOURCES}) + include_directories(${USER-OMP_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/USER-PLUMED.cmake b/cmake/Modules/Packages/USER-PLUMED.cmake new file mode 100644 index 0000000000..422527dd06 --- /dev/null +++ b/cmake/Modules/Packages/USER-PLUMED.cmake @@ -0,0 +1,76 @@ +if(PKG_USER-PLUMED) + find_package(GSL REQUIRED) + set(PLUMED_MODE "static" CACHE STRING "Linkage mode for Plumed2 library") + set(PLUMED_MODE_VALUES static shared runtime) + set_property(CACHE PLUMED_MODE PROPERTY STRINGS ${PLUMED_MODE_VALUES}) + validate_option(PLUMED_MODE PLUMED_MODE_VALUES) + string(TOUPPER ${PLUMED_MODE} PLUMED_MODE) + + find_package(PkgConfig QUIET) + set(DOWNLOAD_PLUMED_DEFAULT ON) + if(PKG_CONFIG_FOUND) + pkg_check_modules(PLUMED QUIET plumed) + if(PLUMED_FOUND) + set(DOWNLOAD_PLUMED_DEFAULT OFF) + endif() + endif() + + option(DOWNLOAD_PLUMED "Download Plumed package instead of using an already installed one" ${DOWNLOAD_PLUMED_DEFAULT}) + if(DOWNLOAD_PLUMED) + if(BUILD_MPI) + set(PLUMED_CONFIG_MPI "--enable-mpi") + set(PLUMED_CONFIG_CC ${CMAKE_MPI_C_COMPILER}) + set(PLUMED_CONFIG_CXX ${CMAKE_MPI_CXX_COMPILER}) + else() + set(PLUMED_CONFIG_MPI "--disable-mpi") + set(PLUMED_CONFIG_CC ${CMAKE_C_COMPILER}) + set(PLUMED_CONFIG_CXX ${CMAKE_CXX_COMPILER}) + endif() + if(BUILD_OMP) + set(PLUMED_CONFIG_OMP "--enable-openmp") + else() + set(PLUMED_CONFIG_OMP "--disable-openmp") + endif() + message(STATUS "PLUMED download requested - we will build our own") + include(ExternalProject) + ExternalProject_Add(plumed_build + URL https://github.com/plumed/plumed2/releases/download/v2.5.1/plumed-src-2.5.1.tgz + URL_MD5 c2a7b519e32197a120cdf47e0f194f81 + BUILD_IN_SOURCE 1 + CONFIGURE_COMMAND /configure --prefix= + ${CONFIGURE_REQUEST_PIC} + --enable-modules=all + ${PLUMED_CONFIG_MPI} + ${PLUMED_CONFIG_OMP} + CXX=${PLUMED_CONFIG_CXX} + CC=${PLUMED_CONFIG_CC} + ) + ExternalProject_get_property(plumed_build INSTALL_DIR) + set(PLUMED_INSTALL_DIR ${INSTALL_DIR}) + list(APPEND LAMMPS_DEPS plumed_build) + if(PLUMED_MODE STREQUAL "STATIC") + add_definitions(-D__PLUMED_WRAPPER_CXX=1) + list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${GSL_LIBRARIES} ${LAPACK_LIBRARIES} ${CMAKE_DL_LIBS}) + elseif(PLUMED_MODE STREQUAL "SHARED") + list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.so ${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so ${CMAKE_DL_LIBS}) + elseif(PLUMED_MODE STREQUAL "RUNTIME") + add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so) + list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumedWrapper.a -rdynamic ${CMAKE_DL_LIBS}) + endif() + set(PLUMED_INCLUDE_DIRS "${PLUMED_INSTALL_DIR}/include") + else() + find_package(PkgConfig REQUIRED) + pkg_check_modules(PLUMED REQUIRED plumed) + if(PLUMED_MODE STREQUAL "STATIC") + add_definitions(-D__PLUMED_WRAPPER_CXX=1) + include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.static) + elseif(PLUMED_MODE STREQUAL "SHARED") + include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.shared) + elseif(PLUMED_MODE STREQUAL "RUNTIME") + add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel.so) + include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.runtime) + endif() + list(APPEND LAMMPS_LINK_LIBS ${PLUMED_LOAD}) + endif() + include_directories(${PLUMED_INCLUDE_DIRS}) +endif() diff --git a/cmake/Modules/Packages/USER-QMMM.cmake b/cmake/Modules/Packages/USER-QMMM.cmake new file mode 100644 index 0000000000..e0ae1a46dc --- /dev/null +++ b/cmake/Modules/Packages/USER-QMMM.cmake @@ -0,0 +1,9 @@ +if(PKG_USER-QMMM) + enable_language(Fortran) + enable_language(C) + + message(WARNING "Building QMMM with CMake is still experimental") + find_package(QE REQUIRED) + include_directories(${QE_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES}) +endif() diff --git a/cmake/Modules/Packages/USER-QUIP.cmake b/cmake/Modules/Packages/USER-QUIP.cmake new file mode 100644 index 0000000000..93096a2f54 --- /dev/null +++ b/cmake/Modules/Packages/USER-QUIP.cmake @@ -0,0 +1,5 @@ +if(PKG_USER-QUIP) + enable_language(Fortran) + find_package(QUIP REQUIRED) + list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${LAPACK_LIBRARIES}) +endif() diff --git a/cmake/Modules/Packages/USER-SCAFACOS.cmake b/cmake/Modules/Packages/USER-SCAFACOS.cmake new file mode 100644 index 0000000000..adb002081f --- /dev/null +++ b/cmake/Modules/Packages/USER-SCAFACOS.cmake @@ -0,0 +1,59 @@ +if(PKG_USER-SCAFACOS) + enable_language(Fortran) + enable_language(C) + + find_package(GSL REQUIRED) + find_package(PkgConfig QUIET) + set(DOWNLOAD_SCAFACOS_DEFAULT ON) + if(PKG_CONFIG_FOUND) + pkg_check_modules(SCAFACOS QUIET scafacos) + if(SCAFACOS_FOUND) + set(DOWNLOAD_SCAFACOS_DEFAULT OFF) + endif() + endif() + option(DOWNLOAD_SCAFACOS "Download ScaFaCoS library instead of using an already installed one" ${DOWNLOAD_SCAFACOS_DEFAULT}) + if(DOWNLOAD_SCAFACOS) + message(STATUS "ScaFaCoS download requested - we will build our own") + include(ExternalProject) + ExternalProject_Add(scafacos_build + URL https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz + URL_MD5 bd46d74e3296bd8a444d731bb10c1738 + CONFIGURE_COMMAND /configure --prefix= --disable-doc + --enable-fcs-solvers=fmm,p2nfft,direct,ewald,p3m + --with-internal-fftw --with-internal-pfft + --with-internal-pnfft ${CONFIGURE_REQUEST_PIC} + FC=${CMAKE_MPI_Fortran_COMPILER} + CXX=${CMAKE_MPI_CXX_COMPILER} + CC=${CMAKE_MPI_C_COMPILER} + F77= + ) + ExternalProject_get_property(scafacos_build INSTALL_DIR) + set(SCAFACOS_BUILD_DIR ${INSTALL_DIR}) + set(SCAFACOS_INCLUDE_DIRS ${SCAFACOS_BUILD_DIR}/include) + list(APPEND LAMMPS_DEPS scafacos_build) + # list and order from pkg_config file of ScaFaCoS build + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_direct.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_ewald.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fmm.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p2nfft.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p3m.a) + list(APPEND LAMMPS_LINK_LIBS ${GSL_LIBRARIES}) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_near.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_gridsort.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_resort.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_redist.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_common.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pnfft.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pfft.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3_mpi.a) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3.a) + list(APPEND LAMMPS_LINK_LIBS ${MPI_Fortran_LIBRARIES}) + list(APPEND LAMMPS_LINK_LIBS ${MPI_C_LIBRARIES}) + else() + find_package(PkgConfig REQUIRED) + pkg_check_modules(SCAFACOS REQUIRED scafacos) + list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_LDFLAGS}) + endif() + include_directories(${SCAFACOS_INCLUDE_DIRS}) +endif() diff --git a/cmake/Modules/Packages/USER-SDPD.cmake b/cmake/Modules/Packages/USER-SDPD.cmake new file mode 100644 index 0000000000..530dcf2bd9 --- /dev/null +++ b/cmake/Modules/Packages/USER-SDPD.cmake @@ -0,0 +1,13 @@ +# Fix rigid/meso requires RIGID to be installed +if(PKG_USER-SDPD) + set(USER-SDPD_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-SDPD) + + get_property(hlist GLOBAL PROPERTY FIX) + if(NOT PKG_RIGID) + list(REMOVE_ITEM hlist ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.h) + list(REMOVE_ITEM LIB_SOURCES ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.cpp) + endif() + set_property(GLOBAL PROPERTY FIX "${hlist}") + + include_directories(${USER-SDPD_SOURCES_DIR}) +endif() diff --git a/cmake/Modules/Packages/USER-SMD.cmake b/cmake/Modules/Packages/USER-SMD.cmake new file mode 100644 index 0000000000..a868918e37 --- /dev/null +++ b/cmake/Modules/Packages/USER-SMD.cmake @@ -0,0 +1,28 @@ +if(PKG_USER-SMD) + find_package(Eigen3 NO_MODULE) + if(EIGEN3_FOUND) + set(DOWNLOAD_EIGEN3_DEFAULT OFF) + else() + set(DOWNLOAD_EIGEN3_DEFAULT ON) + endif() + option(DOWNLOAD_EIGEN3 "Download Eigen3 instead of using an already installed one)" ${DOWNLOAD_EIGEN3_DEFAULT}) + if(DOWNLOAD_EIGEN3) + message(STATUS "Eigen3 download requested - we will build our own") + include(ExternalProject) + ExternalProject_Add(Eigen3_build + URL http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz + URL_MD5 f2a417d083fe8ca4b8ed2bc613d20f07 + CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" + ) + ExternalProject_get_property(Eigen3_build SOURCE_DIR) + set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR}) + list(APPEND LAMMPS_DEPS Eigen3_build) + else() + find_package(Eigen3 NO_MODULE) + mark_as_advanced(Eigen3_DIR) + if(NOT EIGEN3_FOUND) + message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_EIGEN3=ON to download it") + endif() + endif() + include_directories(${EIGEN3_INCLUDE_DIR}) +endif() diff --git a/cmake/Modules/Packages/USER-VTK.cmake b/cmake/Modules/Packages/USER-VTK.cmake new file mode 100644 index 0000000000..d264577ca2 --- /dev/null +++ b/cmake/Modules/Packages/USER-VTK.cmake @@ -0,0 +1,6 @@ +if(PKG_USER-VTK) + find_package(VTK REQUIRED NO_MODULE) + include(${VTK_USE_FILE}) + add_definitions(-DLAMMPS_VTK) + list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES}) +endif() diff --git a/cmake/Modules/Packages/VORONOI.cmake b/cmake/Modules/Packages/VORONOI.cmake new file mode 100644 index 0000000000..df4551b6e7 --- /dev/null +++ b/cmake/Modules/Packages/VORONOI.cmake @@ -0,0 +1,38 @@ +if(PKG_VORONOI) + find_package(VORO) + if(VORO_FOUND) + set(DOWNLOAD_VORO_DEFAULT OFF) + else() + set(DOWNLOAD_VORO_DEFAULT ON) + endif() + option(DOWNLOAD_VORO "Download and compile the Voro++ library instead of using an already installed one" ${DOWNLOAD_VORO_DEFAULT}) + if(DOWNLOAD_VORO) + message(STATUS "Voro++ download requested - we will build our own") + include(ExternalProject) + + if(BUILD_SHARED_LIBS) + set(VORO_BUILD_CFLAGS "${CMAKE_SHARED_LIBRARY_CXX_FLAGS} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}") + else() + set(VORO_BUILD_CFLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}") + endif() + string(APPEND VORO_BUILD_CFLAGS ${CMAKE_CXX_FLAGS}) + set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS}) + + ExternalProject_Add(voro_build + URL https://download.lammps.org/thirdparty/voro++-0.4.6.tar.gz + URL_MD5 2338b824c3b7b25590e18e8df5d68af9 + CONFIGURE_COMMAND "" BUILD_COMMAND make ${VORO_BUILD_OPTIONS} BUILD_IN_SOURCE 1 INSTALL_COMMAND "" + ) + ExternalProject_get_property(voro_build SOURCE_DIR) + set(VORO_LIBRARIES ${SOURCE_DIR}/src/libvoro++.a) + set(VORO_INCLUDE_DIRS ${SOURCE_DIR}/src) + list(APPEND LAMMPS_DEPS voro_build) + else() + find_package(VORO) + if(NOT VORO_FOUND) + message(FATAL_ERROR "Voro++ library not found. Help CMake to find it by setting VORO_LIBRARY and VORO_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it") + endif() + endif() + include_directories(${VORO_INCLUDE_DIRS}) + list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES}) +endif() diff --git a/cmake/Modules/Testing.cmake b/cmake/Modules/Testing.cmake new file mode 100644 index 0000000000..0eeef00fe9 --- /dev/null +++ b/cmake/Modules/Testing.cmake @@ -0,0 +1,52 @@ +############################################################################### +# Testing +############################################################################### +option(ENABLE_TESTING "Enable testing" OFF) +if(ENABLE_TESTING AND BUILD_EXE) + enable_testing() + option(LAMMPS_TESTING_SOURCE_DIR "Location of lammps-testing source directory" "") + option(LAMMPS_TESTING_GIT_TAG "Git tag of lammps-testing" "master") + mark_as_advanced(LAMMPS_TESTING_SOURCE_DIR LAMMPS_TESTING_GIT_TAG) + + if (CMAKE_VERSION VERSION_GREATER "3.10.3" AND NOT LAMMPS_TESTING_SOURCE_DIR) + include(FetchContent) + + FetchContent_Declare(lammps-testing + GIT_REPOSITORY https://github.com/lammps/lammps-testing.git + GIT_TAG ${LAMMPS_TESTING_GIT_TAG} + ) + + FetchContent_GetProperties(lammps-testing) + if(NOT lammps-testing_POPULATED) + message(STATUS "Downloading tests...") + FetchContent_Populate(lammps-testing) + endif() + + set(LAMMPS_TESTING_SOURCE_DIR ${lammps-testing_SOURCE_DIR}) + elseif(NOT LAMMPS_TESTING_SOURCE_DIR) + message(WARNING "Full test-suite requires CMake >= 3.11 or copy of\n" + "https://github.com/lammps/lammps-testing in LAMMPS_TESTING_SOURCE_DIR") + endif() + + add_test(ShowHelp ${CMAKE_BINARY_DIR}/${LAMMPS_BINARY} -help) + + if(EXISTS ${LAMMPS_TESTING_SOURCE_DIR}) + message(STATUS "Running test discovery...") + + file(GLOB_RECURSE TEST_SCRIPTS ${LAMMPS_TESTING_SOURCE_DIR}/tests/core/*/in.*) + foreach(script_path ${TEST_SCRIPTS}) + get_filename_component(TEST_NAME ${script_path} EXT) + get_filename_component(SCRIPT_NAME ${script_path} NAME) + get_filename_component(PARENT_DIR ${script_path} DIRECTORY) + string(SUBSTRING ${TEST_NAME} 1 -1 TEST_NAME) + string(REPLACE "-" "_" TEST_NAME ${TEST_NAME}) + string(REPLACE "+" "_" TEST_NAME ${TEST_NAME}) + set(TEST_NAME "test_core_${TEST_NAME}_serial") + add_test(${TEST_NAME} ${CMAKE_BINARY_DIR}/${LAMMPS_BINARY} -in ${SCRIPT_NAME}) + set_tests_properties(${TEST_NAME} PROPERTIES WORKING_DIRECTORY ${PARENT_DIR}) + endforeach() + list(LENGTH TEST_SCRIPTS NUM_TESTS) + + message(STATUS "Found ${NUM_TESTS} tests.") + endif() +endif() diff --git a/doc/src/Speed_kokkos.txt b/doc/src/Speed_kokkos.txt index 23155cd540..fd33491253 100644 --- a/doc/src/Speed_kokkos.txt +++ b/doc/src/Speed_kokkos.txt @@ -46,7 +46,7 @@ software version 7.5 or later must be installed on your system. See the discussion for the "GPU package"_Speed_gpu.html for details of how to check and do this. -NOTE: Kokkos with CUDA currently implicitly assumes, that the MPI +NOTE: Kokkos with CUDA currently implicitly assumes that the MPI library is CUDA-aware and has support for GPU-direct. This is not always the case, especially when using pre-compiled MPI libraries provided by a Linux distribution. This is not a problem when using @@ -207,19 +207,21 @@ supports. [Running on GPUs:] -Use the "-k" "command-line switch"_Run_options.html to -specify the number of GPUs per node. Typically the -np setting of the -mpirun command should set the number of MPI tasks/node to be equal to -the number of physical GPUs on the node. You can assign multiple MPI -tasks to the same GPU with the KOKKOS package, but this is usually -only faster if significant portions of the input script have not -been ported to use Kokkos. Using CUDA MPS is recommended in this -scenario. Using a CUDA-aware MPI library with support for GPU-direct -is highly recommended. GPU-direct use can be avoided by using -"-pk kokkos gpu/direct no"_package.html. -As above for multi-core CPUs (and no GPU), if N is the number of -physical cores/node, then the number of MPI tasks/node should not -exceed N. +Use the "-k" "command-line switch"_Run_options.html to specify the +number of GPUs per node. Typically the -np setting of the mpirun command +should set the number of MPI tasks/node to be equal to the number of +physical GPUs on the node. You can assign multiple MPI tasks to the same +GPU with the KOKKOS package, but this is usually only faster if some +portions of the input script have not been ported to use Kokkos. In this +case, also packing/unpacking communication buffers on the host may give +speedup (see the KOKKOS "package"_package.html command). Using CUDA MPS +is recommended in this scenario. + +Using a CUDA-aware MPI library with +support for GPU-direct is highly recommended. GPU-direct use can be +avoided by using "-pk kokkos gpu/direct no"_package.html. As above for +multi-core CPUs (and no GPU), if N is the number of physical cores/node, +then the number of MPI tasks/node should not exceed N. -k on g Ng :pre diff --git a/doc/src/package.txt b/doc/src/package.txt index 9e32afae0a..dfdecab0a5 100644 --- a/doc/src/package.txt +++ b/doc/src/package.txt @@ -64,13 +64,16 @@ args = arguments specific to the style :l {no_affinity} values = none {kokkos} args = keyword value ... zero or more keyword/value pairs may be appended - keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct} + keywords = {neigh} or {neigh/qeq} or {neigh/thread} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct} {neigh} value = {full} or {half} full = full neighbor list half = half neighbor list built in thread-safe manner {neigh/qeq} value = {full} or {half} full = full neighbor list half = half neighbor list built in thread-safe manner + {neigh/thread} value = {off} or {on} + off = thread only over atoms + on = thread over both atoms and neighbors {newton} = {off} or {on} off = set Newton pairwise and bonded flags off on = set Newton pairwise and bonded flags on @@ -442,7 +445,19 @@ running on CPUs, a {half} neighbor list is the default because it are often faster, just as it is for non-accelerated pair styles. Similarly, the {neigh/qeq} keyword determines how neighbor lists are built for "fix qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of -{neigh/qeq} will match {neigh}. +{neigh/qeq} will match {neigh}. + +If the {neigh/thread} keyword is set to {off}, then the KOKKOS package +threads only over atoms. However, for small systems, this may not expose +enough parallelism to keep a GPU busy. When this keyword is set to {on}, +the KOKKOS package threads over both atoms and neighbors of atoms. When +using {neigh/thread} {on}, a full neighbor list must also be used. Using +{neigh/thread} {on} may be slower for large systems, so this this option +is turned on by default only when there are 16K atoms or less owned by +an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled +potentials support this keyword yet, and only thread over atoms. Many +simple pair-wise potentials such as Lennard-Jones do support threading +over both atoms and neighbors. The {newton} keyword sets the Newton flags for pairwise and bonded interactions to {off} or {on}, the same as the "newton"_newton.html @@ -475,10 +490,10 @@ are rebuilt. The data is only for atoms that migrate to new processors. "Forward" communication happens every timestep. "Reverse" communication happens every timestep if the {newton} option is on. The data is for atom coordinates and any other atom properties that needs to be updated -for ghost atoms owned by each processor. +for ghost atoms owned by each processor. The {comm} keyword is simply a short-cut to set the same value for both -the {comm/exchange} and {comm/forward} and {comm/reverse} keywords. +the {comm/exchange} and {comm/forward} and {comm/reverse} keywords. The value options for all 3 keywords are {no} or {host} or {device}. A value of {no} means to use the standard non-KOKKOS method of @@ -486,26 +501,26 @@ packing/unpacking data for the communication. A value of {host} means to use the host, typically a multi-core CPU, and perform the packing/unpacking in parallel with threads. A value of {device} means to use the device, typically a GPU, to perform the packing/unpacking -operation. +operation. The optimal choice for these keywords depends on the input script and the hardware used. The {no} value is useful for verifying that the Kokkos-based {host} and {device} values are working correctly. It is the -default when running on CPUs since it is usually the fastest. +default when running on CPUs since it is usually the fastest. When running on CPUs or Xeon Phi, the {host} and {device} values work identically. When using GPUs, the {device} value is the default since it will typically be optimal if all of your styles used in your input script are supported by the KOKKOS package. In this case data can stay on the GPU for many timesteps without being moved between the host and -GPU, if you use the {device} value. This requires that your MPI is able -to access GPU memory directly. Currently that is true for OpenMPI 1.8 -(or later versions), Mvapich2 1.9 (or later), and CrayMPI. If your -script uses styles (e.g. fixes) which are not yet supported by the -KOKKOS package, then data has to be move between the host and device -anyway, so it is typically faster to let the host handle communication, -by using the {host} value. Using {host} instead of {no} will enable use -of multiple threads to pack/unpack communicated data. +GPU, if you use the {device} value. If your script uses styles (e.g. +fixes) which are not yet supported by the KOKKOS package, then data has +to be move between the host and device anyway, so it is typically faster +to let the host handle communication, by using the {host} value. Using +{host} instead of {no} will enable use of multiple threads to +pack/unpack communicated data. When running small systems on a GPU, +performing the exchange pack/unpack on the host CPU can give speedup +since it reduces the number of CUDA kernel launches. The {gpu/direct} keyword chooses whether GPU-direct will be used. When this keyword is set to {on}, buffers in GPU memory are passed directly @@ -518,7 +533,8 @@ the {gpu/direct} keyword is automatically set to {off} by default. When the {gpu/direct} keyword is set to {off} while any of the {comm} keywords are set to {device}, the value for these {comm} keywords will be automatically changed to {host}. This setting has no effect if not -running on GPUs. +running on GPUs. GPU-direct is available for OpenMPI 1.8 (or later +versions), Mvapich2 1.9 (or later), and CrayMPI. :line @@ -630,11 +646,12 @@ neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default value, comm = device, gpu/direct = on. When LAMMPS can safely detect that GPU-direct is not available, the default value of gpu/direct becomes "off". For CPUs or Xeon Phis, the option defaults are neigh = -half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. These -settings are made automatically by the required "-k on" "command-line -switch"_Run_options.html. You can change them by using the package -kokkos command in your input script or via the "-pk kokkos command-line -switch"_Run_options.html. +half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The +option neigh/thread = on when there are 16K atoms or less on an MPI +rank, otherwise it is "off". These settings are made automatically by +the required "-k on" "command-line switch"_Run_options.html. You can +change them by using the package kokkos command in your input script or +via the "-pk kokkos command-line switch"_Run_options.html. For the OMP package, the default is Nthreads = 0 and the option defaults are neigh = yes. These settings are made automatically if diff --git a/src/KOKKOS/atom_kokkos.cpp b/src/KOKKOS/atom_kokkos.cpp index b54719e852..813c5ddbf2 100644 --- a/src/KOKKOS/atom_kokkos.cpp +++ b/src/KOKKOS/atom_kokkos.cpp @@ -22,6 +22,7 @@ #include "memory_kokkos.h" #include "error.h" #include "kokkos.h" +#include "atom_masks.h" using namespace LAMMPS_NS; @@ -270,8 +271,10 @@ int AtomKokkos::add_custom(const char *name, int flag) int n = strlen(name) + 1; dname[index] = new char[n]; strcpy(dname[index],name); + this->sync(Device,DVECTOR_MASK); memoryKK->grow_kokkos(k_dvector,dvector,ndvector,nmax, "atom:dvector"); + this->modified(Device,DVECTOR_MASK); } return index; diff --git a/src/KOKKOS/atom_vec_angle_kokkos.cpp b/src/KOKKOS/atom_vec_angle_kokkos.cpp index 352fec57fb..e4f27e733a 100644 --- a/src/KOKKOS/atom_vec_angle_kokkos.cpp +++ b/src/KOKKOS/atom_vec_angle_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -59,14 +59,15 @@ AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecAngleKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -98,7 +99,7 @@ void AtomVecAngleKokkos::grow(int n) "atom:angle_atom3"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -282,7 +283,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackComm f(atomKK->k_x,buf,list,iswap, @@ -309,7 +310,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n, } } } else { - sync(Device,X_MASK); + atomKK->sync(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackComm f(atomKK->k_x,buf,list,iswap, @@ -397,8 +398,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackCommSelf @@ -429,8 +430,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li } } } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecAngleKokkos_PackCommSelf @@ -493,13 +494,13 @@ struct AtomVecAngleKokkos_UnpackComm { void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); struct AtomVecAngleKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); struct AtomVecAngleKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } @@ -642,7 +643,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int m = 0; const int last = first + n; @@ -659,7 +660,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf) void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) - modified(Host,F_MASK); + atomKK->modified(Host,F_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -960,9 +961,9 @@ struct AtomVecAngleKokkos_UnpackBorder { void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecAngleKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); @@ -984,7 +985,7 @@ void AtomVecAngleKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1010,7 +1011,7 @@ void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1412,7 +1413,7 @@ int AtomVecAngleKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | SPECIAL_MASK); @@ -1487,7 +1488,7 @@ int AtomVecAngleKokkos::size_restart() int AtomVecAngleKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | SPECIAL_MASK); @@ -1541,7 +1542,7 @@ int AtomVecAngleKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | SPECIAL_MASK); diff --git a/src/KOKKOS/atom_vec_atomic_kokkos.cpp b/src/KOKKOS/atom_vec_atomic_kokkos.cpp index 6aba49e5f3..95e4ddd72b 100644 --- a/src/KOKKOS/atom_vec_atomic_kokkos.cpp +++ b/src/KOKKOS/atom_vec_atomic_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -55,14 +55,15 @@ AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecAtomicKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -74,7 +75,7 @@ void AtomVecAtomicKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -393,9 +394,9 @@ struct AtomVecAtomicKokkos_UnpackBorder { void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); if(space==Host) { struct AtomVecAtomicKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,first); Kokkos::parallel_for(n,f); @@ -415,7 +416,7 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -440,7 +441,7 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -668,7 +669,7 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK); int m = 1; @@ -720,7 +721,7 @@ int AtomVecAtomicKokkos::size_restart() int AtomVecAtomicKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK ); int m = 1; @@ -755,7 +756,7 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK ); int m = 1; diff --git a/src/KOKKOS/atom_vec_bond_kokkos.cpp b/src/KOKKOS/atom_vec_bond_kokkos.cpp index 2945f45ab3..92311d5d09 100644 --- a/src/KOKKOS/atom_vec_bond_kokkos.cpp +++ b/src/KOKKOS/atom_vec_bond_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -58,14 +58,15 @@ AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecBondKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -84,7 +85,7 @@ void AtomVecBondKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atomKK->nextra_grow; iextra++) @@ -468,9 +469,9 @@ struct AtomVecBondKokkos_UnpackBorder { void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecBondKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); @@ -492,7 +493,7 @@ void AtomVecBondKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -518,7 +519,7 @@ void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -866,7 +867,7 @@ int AtomVecBondKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK); int k; @@ -934,7 +935,7 @@ int AtomVecBondKokkos::size_restart() int AtomVecBondKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK); int m = 1; buf[m++] = h_x(i,0); @@ -978,7 +979,7 @@ int AtomVecBondKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK); int m = 1; h_x(nlocal,0) = buf[m++]; diff --git a/src/KOKKOS/atom_vec_charge_kokkos.cpp b/src/KOKKOS/atom_vec_charge_kokkos.cpp index 11e46d1274..31a690f521 100644 --- a/src/KOKKOS/atom_vec_charge_kokkos.cpp +++ b/src/KOKKOS/atom_vec_charge_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -58,14 +58,15 @@ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecChargeKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -79,7 +80,7 @@ void AtomVecChargeKokkos::grow(int n) memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -494,7 +495,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first, f(buf.view(),d_x,d_tag,d_type,d_mask,d_q,first); Kokkos::parallel_for(n,f); } - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); } /* ---------------------------------------------------------------------- */ @@ -510,7 +511,7 @@ void AtomVecChargeKokkos::unpack_border(int n, int first, double *buf) if (i == nmax) { grow(0); } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -536,7 +537,7 @@ void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -797,7 +798,7 @@ int AtomVecChargeKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK); int m = 1; @@ -850,7 +851,7 @@ int AtomVecChargeKokkos::size_restart() int AtomVecChargeKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK); int m = 1; @@ -888,7 +889,7 @@ int AtomVecChargeKokkos::unpack_restart(double *buf) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK); int m = 1; diff --git a/src/KOKKOS/atom_vec_dpd_kokkos.cpp b/src/KOKKOS/atom_vec_dpd_kokkos.cpp index 30db76e723..4034efee9e 100644 --- a/src/KOKKOS/atom_vec_dpd_kokkos.cpp +++ b/src/KOKKOS/atom_vec_dpd_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -60,14 +60,15 @@ AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecDPDKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -93,7 +94,7 @@ void AtomVecDPDKokkos::grow(int n) modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); } /* ---------------------------------------------------------------------- @@ -158,7 +159,7 @@ void AtomVecDPDKokkos::grow_reset() void AtomVecDPDKokkos::copy(int i, int j, int delflag) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); @@ -184,7 +185,7 @@ void AtomVecDPDKokkos::copy(int i, int j, int delflag) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); @@ -268,7 +269,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, @@ -303,7 +304,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n, } } } else { - sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackComm f(atomKK->k_x, @@ -410,8 +411,8 @@ struct AtomVecDPDKokkos_PackCommSelf { int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, @@ -446,8 +447,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list } } } else { - sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecDPDKokkos_PackCommSelf f(atomKK->k_x, @@ -528,15 +529,15 @@ struct AtomVecDPDKokkos_UnpackComm { void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); Kokkos::parallel_for(n,f); } else { - sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); - modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); struct AtomVecDPDKokkos_UnpackComm f(atomKK->k_x, atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem, buf,first); @@ -552,7 +553,7 @@ int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf, int i,j,m; double dx,dy,dz; - sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); m = 0; if (pbc_flag == 0) { @@ -598,7 +599,7 @@ int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; - sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); m = 0; if (pbc_flag == 0) { @@ -685,7 +686,7 @@ void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf) h_uChem[i] = buf[m++]; } - modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); } /* ---------------------------------------------------------------------- */ @@ -709,7 +710,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) h_uChem[i] = buf[m++]; } - modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); + atomKK->modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK); } /* ---------------------------------------------------------------------- */ @@ -717,7 +718,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int m = 0; const int last = first + n; @@ -734,8 +735,8 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf) void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) { - sync(Host,F_MASK); - modified(Host,F_MASK); + atomKK->sync(Host,F_MASK); + atomKK->modified(Host,F_MASK); } int m = 0; @@ -819,7 +820,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA { X_FLOAT dx,dy,dz; - sync(space,ALL_MASK); + atomKK->sync(space,ALL_MASK); if (pbc_flag != 0) { if (domain->triclinic == 0) { @@ -876,7 +877,7 @@ int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf, int i,j,m; double dx,dy,dz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -937,7 +938,7 @@ int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -1032,7 +1033,7 @@ int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf) { int i,j,m; - sync(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK); m = 0; @@ -1052,7 +1053,7 @@ int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf) { int i,j,m; - sync(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); m = 0; @@ -1127,11 +1128,11 @@ struct AtomVecDPDKokkos_UnpackBorder { void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); if(space==Host) { @@ -1179,7 +1180,7 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); } @@ -1217,7 +1218,7 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK| UCG_MASK|UCGNEW_MASK|DVECTOR_MASK); } @@ -1237,7 +1238,7 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf) h_uChem(i) = buf[m++]; } - modified(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK ); return m; @@ -1260,7 +1261,7 @@ int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf) h_uCGnew(i) = buf[m++]; } - modified(Host,DPDTHETA_MASK | UCOND_MASK | + atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK); return m; @@ -1384,7 +1385,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d int newsize = nsend*17/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } - sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1402,7 +1403,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d int AtomVecDPDKokkos::pack_exchange(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1518,7 +1519,7 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre k_count.sync(); } - modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1556,7 +1557,7 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf) m += modify->fix[atom->extra_grow[iextra]]-> unpack_exchange(nlocal,&buf[m]); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK | DVECTOR_MASK); @@ -1593,7 +1594,7 @@ int AtomVecDPDKokkos::size_restart() int AtomVecDPDKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); @@ -1658,7 +1659,7 @@ int AtomVecDPDKokkos::unpack_restart(double *buf) for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | DPDTHETA_MASK | UCG_MASK | UCGNEW_MASK | UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK); diff --git a/src/KOKKOS/atom_vec_full_kokkos.cpp b/src/KOKKOS/atom_vec_full_kokkos.cpp index a6ae1e0ccc..034da88f73 100644 --- a/src/KOKKOS/atom_vec_full_kokkos.cpp +++ b/src/KOKKOS/atom_vec_full_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -58,14 +58,15 @@ AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecFullKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -123,7 +124,7 @@ void AtomVecFullKokkos::grow(int n) atomKK->improper_per_atom,"atom:improper_atom4"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -608,9 +609,9 @@ struct AtomVecFullKokkos_UnpackBorder { void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecFullKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first); @@ -632,7 +633,7 @@ void AtomVecFullKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -659,7 +660,7 @@ void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1204,7 +1205,7 @@ int AtomVecFullKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1297,7 +1298,7 @@ int AtomVecFullKokkos::size_restart() int AtomVecFullKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1370,10 +1371,10 @@ int AtomVecFullKokkos::unpack_restart(double *buf) if (atom->nextra_store) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); diff --git a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp index ce36f59053..03cbe1ee5e 100644 --- a/src/KOKKOS/atom_vec_hybrid_kokkos.cpp +++ b/src/KOKKOS/atom_vec_hybrid_kokkos.cpp @@ -307,7 +307,7 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); int i,j,k,m; double dx,dy,dz; @@ -351,7 +351,7 @@ int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf, int AtomVecHybridKokkos::pack_comm_vel(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); int i,j,k,m; double dx,dy,dz,dvx,dvy,dvz; @@ -463,7 +463,7 @@ void AtomVecHybridKokkos::unpack_comm(int n, int first, double *buf) h_x(i,2) = buf[m++]; } - modified(Host,X_MASK); + atomKK->modified(Host,X_MASK); // unpack sub-style contributions as contiguous chunks @@ -500,7 +500,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf) } } - modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); // unpack sub-style contributions as contiguous chunks @@ -512,7 +512,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecHybridKokkos::pack_reverse(int n, int first, double *buf) { - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int i,k,m,last; @@ -546,7 +546,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf) h_f(j,2) += buf[m++]; } - modified(Host,F_MASK); + atomKK->modified(Host,F_MASK); // unpack sub-style contributions as contiguous chunks @@ -559,7 +559,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf) int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); int i,j,k,m; double dx,dy,dz; @@ -613,7 +613,7 @@ int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf, int AtomVecHybridKokkos::pack_border_vel(int n, int *list, double *buf, int pbc_flag, int *pbc) { - sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); int i,j,k,m; double dx,dy,dz,dvx,dvy,dvz; int omega_flag = atom->omega_flag; @@ -741,7 +741,7 @@ void AtomVecHybridKokkos::unpack_border(int n, int first, double *buf) h_mask[i] = (int) ubuf(buf[m++]).i; } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK); // unpack sub-style contributions as contiguous chunks @@ -787,7 +787,7 @@ void AtomVecHybridKokkos::unpack_border_vel(int n, int first, double *buf) } } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); // unpack sub-style contributions as contiguous chunks @@ -969,7 +969,7 @@ void AtomVecHybridKokkos::create_atom(int itype, double *coord) void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **values) { - sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); @@ -1000,7 +1000,7 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val h_angmom(nlocal,2) = 0.0; } - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/); // each sub-style parses sub-style specific values @@ -1017,13 +1017,13 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val void AtomVecHybridKokkos::data_vel(int m, char **values) { - sync(Host,V_MASK); + atomKK->sync(Host,V_MASK); h_v(m,0) = atof(values[0]); h_v(m,1) = atof(values[1]); h_v(m,2) = atof(values[2]); - modified(Host,V_MASK); + atomKK->modified(Host,V_MASK); // each sub-style parses sub-style specific values @@ -1038,7 +1038,7 @@ void AtomVecHybridKokkos::data_vel(int m, char **values) void AtomVecHybridKokkos::pack_data(double **buf) { - sync(Host,TAG_MASK|TYPE_MASK|X_MASK); + atomKK->sync(Host,TAG_MASK|TYPE_MASK|X_MASK); int k,m; @@ -1089,7 +1089,7 @@ void AtomVecHybridKokkos::write_data(FILE *fp, int n, double **buf) void AtomVecHybridKokkos::pack_vel(double **buf) { - sync(Host,V_MASK); + atomKK->sync(Host,V_MASK); int k,m; diff --git a/src/KOKKOS/atom_vec_kokkos.cpp b/src/KOKKOS/atom_vec_kokkos.cpp index 83af437eba..7d5df17544 100644 --- a/src/KOKKOS/atom_vec_kokkos.cpp +++ b/src/KOKKOS/atom_vec_kokkos.cpp @@ -267,6 +267,114 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c return n*3; } + +/* ---------------------------------------------------------------------- */ + +template +struct AtomVecKokkos_PackCommSelfFused { + typedef DeviceType device_type; + + typename ArrayTypes::t_x_array_randomread _x; + typename ArrayTypes::t_x_array _xw; + typename ArrayTypes::t_int_2d_const _list; + typename ArrayTypes::t_int_2d_const _pbc; + typename ArrayTypes::t_int_1d_const _pbc_flag; + typename ArrayTypes::t_int_1d_const _firstrecv; + typename ArrayTypes::t_int_1d_const _sendnum_scan; + typename ArrayTypes::t_int_1d_const _g2l; + X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz; + + AtomVecKokkos_PackCommSelfFused( + const typename DAT::tdual_x_array &x, + const typename DAT::tdual_int_2d &list, + const typename DAT::tdual_int_2d &pbc, + const typename DAT::tdual_int_1d &pbc_flag, + const typename DAT::tdual_int_1d &firstrecv, + const typename DAT::tdual_int_1d &sendnum_scan, + const typename DAT::tdual_int_1d &g2l, + const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd, + const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz): + _x(x.view()),_xw(x.view()), + _list(list.view()), + _pbc(pbc.view()), + _pbc_flag(pbc_flag.view()), + _firstrecv(firstrecv.view()), + _sendnum_scan(sendnum_scan.view()), + _g2l(g2l.view()), + _xprd(xprd),_yprd(yprd),_zprd(zprd), + _xy(xy),_xz(xz),_yz(yz) {}; + + KOKKOS_INLINE_FUNCTION + void operator() (const int& ii) const { + + int iswap = 0; + while (ii >= _sendnum_scan[iswap]) iswap++; + int i = ii; + if (iswap > 0) + i = ii - _sendnum_scan[iswap-1]; + + const int _nfirst = _firstrecv[iswap]; + const int nlocal = _firstrecv[0]; + + int j = _list(iswap,i); + if (j >= nlocal) + j = _g2l(j-nlocal); + + if (_pbc_flag(ii) == 0) { + _xw(i+_nfirst,0) = _x(j,0); + _xw(i+_nfirst,1) = _x(j,1); + _xw(i+_nfirst,2) = _x(j,2); + } else { + if (TRICLINIC == 0) { + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + } else { + _xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz; + _xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz; + _xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd; + } + } + } +}; + +/* ---------------------------------------------------------------------- */ + +int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l) { + if(commKK->forward_comm_on_host) { + sync(Host,X_MASK); + modified(Host,X_MASK); + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } + } else { + sync(Device,X_MASK); + modified(Device,X_MASK); + if(domain->triclinic) { + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } else { + struct AtomVecKokkos_PackCommSelfFused f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l, + domain->xprd,domain->yprd,domain->zprd, + domain->xy,domain->xz,domain->yz); + Kokkos::parallel_for(n,f); + } + } + return n*3; +} + /* ---------------------------------------------------------------------- */ template diff --git a/src/KOKKOS/atom_vec_kokkos.h b/src/KOKKOS/atom_vec_kokkos.h index 58f21b9cd4..0474a2380a 100644 --- a/src/KOKKOS/atom_vec_kokkos.h +++ b/src/KOKKOS/atom_vec_kokkos.h @@ -51,6 +51,14 @@ class AtomVecKokkos : public AtomVec { const int & iswap, const int nfirst, const int &pbc_flag, const int pbc[]); + virtual int + pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, + const DAT::tdual_int_1d &sendnum_scan, + const DAT::tdual_int_1d &firstrecv, + const DAT::tdual_int_1d &pbc_flag, + const DAT::tdual_int_2d &pbc, + const DAT::tdual_int_1d &g2l); + virtual int pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list, const int & iswap, const DAT::tdual_xfloat_2d &buf, diff --git a/src/KOKKOS/atom_vec_molecular_kokkos.cpp b/src/KOKKOS/atom_vec_molecular_kokkos.cpp index 9537320976..9ac8ecd264 100644 --- a/src/KOKKOS/atom_vec_molecular_kokkos.cpp +++ b/src/KOKKOS/atom_vec_molecular_kokkos.cpp @@ -24,7 +24,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 /* ---------------------------------------------------------------------- */ @@ -58,14 +58,15 @@ AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp) void AtomVecMolecularKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atomKK->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -121,7 +122,7 @@ void AtomVecMolecularKokkos::grow(int n) atomKK->improper_per_atom,"atom:improper_atom4"); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); if (atom->nextra_grow) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) @@ -361,7 +362,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n, // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackComm @@ -388,7 +389,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n, } } } else { - sync(Device,X_MASK); + atomKK->sync(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackComm @@ -477,8 +478,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d const int nfirst, const int &pbc_flag, const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackCommSelf @@ -505,8 +506,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d } } } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecMolecularKokkos_PackCommSelf @@ -565,13 +566,13 @@ struct AtomVecMolecularKokkos_UnpackComm { void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK); - modified(Host,X_MASK); + atomKK->sync(Host,X_MASK); + atomKK->modified(Host,X_MASK); struct AtomVecMolecularKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } else { - sync(Device,X_MASK); - modified(Device,X_MASK); + atomKK->sync(Device,X_MASK); + atomKK->modified(Device,X_MASK); struct AtomVecMolecularKokkos_UnpackComm f(atomKK->k_x,buf,first); Kokkos::parallel_for(n,f); } @@ -714,7 +715,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf) int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK); + atomKK->sync(Host,F_MASK); int m = 0; const int last = first + n; @@ -731,7 +732,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf) void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) - modified(Host,F_MASK); + atomKK->modified(Host,F_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -1032,9 +1033,9 @@ struct AtomVecMolecularKokkos_UnpackBorder { void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first, const DAT::tdual_xfloat_2d &buf, ExecutionSpace space) { - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); while (first+n >= nmax) grow(0); - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); if(space==Host) { struct AtomVecMolecularKokkos_UnpackBorder f(buf.view(),h_x,h_tag,h_type,h_mask,h_molecule,first); @@ -1056,7 +1057,7 @@ void AtomVecMolecularKokkos::unpack_border(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1082,7 +1083,7 @@ void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf) last = first + n; for (i = first; i < last; i++) { if (i == nmax) grow(0); - modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); + atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK); h_x(i,0) = buf[m++]; h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; @@ -1615,7 +1616,7 @@ int AtomVecMolecularKokkos::unpack_exchange(double *buf) { int nlocal = atom->nlocal; if (nlocal == nmax) grow(0); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1707,7 +1708,7 @@ int AtomVecMolecularKokkos::size_restart() int AtomVecMolecularKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); @@ -1780,7 +1781,7 @@ int AtomVecMolecularKokkos::unpack_restart(double *buf) memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra"); } - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK); diff --git a/src/KOKKOS/atom_vec_sphere_kokkos.cpp b/src/KOKKOS/atom_vec_sphere_kokkos.cpp index f05e8d09df..9e8388488f 100644 --- a/src/KOKKOS/atom_vec_sphere_kokkos.cpp +++ b/src/KOKKOS/atom_vec_sphere_kokkos.cpp @@ -30,7 +30,7 @@ using namespace LAMMPS_NS; -#define DELTA 10000 +#define DELTA 10 static const double MY_PI = 3.14159265358979323846; // pi @@ -93,14 +93,15 @@ void AtomVecSphereKokkos::init() void AtomVecSphereKokkos::grow(int n) { - if (n == 0) nmax += DELTA; + int step = MAX(DELTA,nmax*0.01); + if (n == 0) nmax += step; else nmax = n; atom->nmax = nmax; if (nmax < 0 || nmax > MAXSMALLINT) error->one(FLERR,"Per-processor system is too big"); - sync(Device,ALL_MASK); - modified(Device,ALL_MASK); + atomKK->sync(Device,ALL_MASK); + atomKK->modified(Device,ALL_MASK); memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag"); memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type"); @@ -120,7 +121,7 @@ void AtomVecSphereKokkos::grow(int n) modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax); grow_reset(); - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); } /* ---------------------------------------------------------------------- @@ -172,7 +173,7 @@ void AtomVecSphereKokkos::grow_reset() void AtomVecSphereKokkos::copy(int i, int j, int delflag) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -197,7 +198,7 @@ void AtomVecSphereKokkos::copy(int i, int j, int delflag) for (int iextra = 0; iextra < atom->nextra_grow; iextra++) modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); } @@ -277,7 +278,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos( // Check whether to always run forward communication on the host // Choose correct forward PackComm kernel if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackComm f( @@ -316,7 +317,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos( } } } else { - sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackComm f( @@ -464,7 +465,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos( const int* const pbc) { if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if(pbc_flag) { if(deform_vremap) { if(domain->triclinic) { @@ -595,7 +596,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos( } } } else { - sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if(pbc_flag) { if(deform_vremap) { if(domain->triclinic) { @@ -795,8 +796,8 @@ int AtomVecSphereKokkos::pack_comm_self( if (radvary == 0) return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc); if(commKK->forward_comm_on_host) { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackCommSelf f( @@ -835,8 +836,8 @@ int AtomVecSphereKokkos::pack_comm_self( } } } else { - sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); - modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); if(pbc_flag) { if(domain->triclinic) { struct AtomVecSphereKokkos_PackCommSelf f( @@ -926,14 +927,14 @@ void AtomVecSphereKokkos::unpack_comm_kokkos( return; } if(commKK->forward_comm_on_host) { - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); struct AtomVecSphereKokkos_UnpackComm f( atomKK->k_x, atomKK->k_radius,atomKK->k_rmass, buf,first); Kokkos::parallel_for(n,f); } else { - modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK); struct AtomVecSphereKokkos_UnpackComm f( atomKK->k_x, atomKK->k_radius,atomKK->k_rmass, @@ -998,7 +999,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos( const int &n, const int &first, const DAT::tdual_xfloat_2d &buf ) { if(commKK->forward_comm_on_host) { - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if (radvary == 0) { struct AtomVecSphereKokkos_UnpackCommVel f( atomKK->k_x, @@ -1015,7 +1016,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos( Kokkos::parallel_for(n,f); } } else { - modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); if (radvary == 0) { struct AtomVecSphereKokkos_UnpackCommVel f( atomKK->k_x, @@ -1044,7 +1045,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf, if (radvary == 0) { // Not sure if we need to call sync for X here - sync(Host,X_MASK); + atomKK->sync(Host,X_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1071,7 +1072,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf, } } } else { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1115,7 +1116,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf, double dx,dy,dz,dvx,dvy,dvz; if (radvary == 0) { - sync(Host,X_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1178,7 +1179,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf, } } } else { - sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); m = 0; if (pbc_flag == 0) { for (i = 0; i < n; i++) { @@ -1257,7 +1258,7 @@ int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf) { if (radvary == 0) return 0; - sync(Host,RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,RADIUS_MASK|RMASS_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -1280,7 +1281,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf) h_x(i,1) = buf[m++]; h_x(i,2) = buf[m++]; } - modified(Host,X_MASK); + atomKK->modified(Host,X_MASK); } else { int m = 0; const int last = first + n; @@ -1291,7 +1292,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf) h_radius[i] = buf[m++]; h_rmass[i] = buf[m++]; } - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK); } } @@ -1313,7 +1314,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf) h_omega(i,1) = buf[m++]; h_omega(i,2) = buf[m++]; } - modified(Host,X_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK); } else { int m = 0; const int last = first + n; @@ -1330,7 +1331,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf) h_omega(i,1) = buf[m++]; h_omega(i,2) = buf[m++]; } - modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); } } @@ -1346,7 +1347,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf) h_radius[i] = buf[m++]; h_rmass[i] = buf[m++]; } - modified(Host,RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,RADIUS_MASK|RMASS_MASK); return m; } @@ -1355,7 +1356,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf) int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf) { if(n > 0) - sync(Host,F_MASK|TORQUE_MASK); + atomKK->sync(Host,F_MASK|TORQUE_MASK); int m = 0; const int last = first + n; @@ -1375,7 +1376,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf) int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf) { if(n > 0) - sync(Host,TORQUE_MASK); + atomKK->sync(Host,TORQUE_MASK); int m = 0; const int last = first + n; @@ -1392,7 +1393,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf) void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf) { if(n > 0) { - modified(Host,F_MASK|TORQUE_MASK); + atomKK->modified(Host,F_MASK|TORQUE_MASK); } int m = 0; @@ -1412,7 +1413,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf) int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf) { if(n > 0) { - modified(Host,TORQUE_MASK); + atomKK->modified(Host,TORQUE_MASK); } int m = 0; @@ -1492,7 +1493,7 @@ int AtomVecSphereKokkos::pack_border_kokkos( X_FLOAT dx,dy,dz; // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - sync(space,ALL_MASK); + atomKK->sync(space,ALL_MASK); if (pbc_flag != 0) { if (domain->triclinic == 0) { @@ -1549,7 +1550,7 @@ int AtomVecSphereKokkos::pack_border( int i,j,m; double dx,dy,dz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -1686,7 +1687,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos( X_FLOAT dvx=0,dvy=0,dvz=0; // This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec - sync(space,ALL_MASK); + atomKK->sync(space,ALL_MASK); if (pbc_flag != 0) { if (domain->triclinic == 0) { @@ -1776,7 +1777,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf, int i,j,m; double dx,dy,dz,dvx,dvy,dvz; - sync(Host,ALL_MASK); + atomKK->sync(Host,ALL_MASK); m = 0; if (pbc_flag == 0) { @@ -1866,7 +1867,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf, int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf) { - sync(Host,RADIUS_MASK|RMASS_MASK); + atomKK->sync(Host,RADIUS_MASK|RMASS_MASK); int m = 0; for (int i = 0; i < n; i++) { @@ -1942,7 +1943,7 @@ void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first, Kokkos::parallel_for(n,f); } - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| RADIUS_MASK|RMASS_MASK); } @@ -1969,7 +1970,7 @@ void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK); } @@ -2052,7 +2053,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos( Kokkos::parallel_for(n,f); } - modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| + atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK| RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); } @@ -2085,7 +2086,7 @@ void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf) m += modify->fix[atom->extra_border[iextra]]-> unpack_border(n,first,&buf[m]); - modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); + atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK); } /* ---------------------------------------------------------------------- */ @@ -2098,7 +2099,7 @@ int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf) h_radius[i] = buf[m++]; h_rmass[i] = buf[m++]; } - modified(Host,RADIUS_MASK|RMASS_MASK); + atomKK->modified(Host,RADIUS_MASK|RMASS_MASK); return m; } @@ -2218,7 +2219,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( int newsize = nsend*17/k_buf.view().extent(1)+1; k_buf.resize(newsize,k_buf.view().extent(1)); } - sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2239,7 +2240,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos( int AtomVecSphereKokkos::pack_exchange(int i, double *buf) { - sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2354,7 +2355,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int k_count.sync(); } - modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2391,7 +2392,7 @@ int AtomVecSphereKokkos::unpack_exchange(double *buf) m += modify->fix[atom->extra_grow[iextra]]-> unpack_exchange(nlocal,&buf[m]); - modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2427,7 +2428,7 @@ int AtomVecSphereKokkos::size_restart() int AtomVecSphereKokkos::pack_restart(int i, double *buf) { - sync(Host,X_MASK | TAG_MASK | TYPE_MASK | + atomKK->sync(Host,X_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | V_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2494,7 +2495,7 @@ int AtomVecSphereKokkos::unpack_restart(double *buf) for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++]; } - modified(Host,X_MASK | TAG_MASK | TYPE_MASK | + atomKK->modified(Host,X_MASK | TAG_MASK | TYPE_MASK | MASK_MASK | IMAGE_MASK | V_MASK | RADIUS_MASK | RMASS_MASK | OMEGA_MASK); @@ -2616,14 +2617,14 @@ int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values) void AtomVecSphereKokkos::data_vel(int m, char **values) { - sync(Host,V_MASK|OMEGA_MASK); + atomKK->sync(Host,V_MASK|OMEGA_MASK); h_v(m,0) = atof(values[0]); h_v(m,1) = atof(values[1]); h_v(m,2) = atof(values[2]); h_omega(m,0) = atof(values[3]); h_omega(m,1) = atof(values[4]); h_omega(m,2) = atof(values[5]); - modified(Host,V_MASK|OMEGA_MASK); + atomKK->modified(Host,V_MASK|OMEGA_MASK); } /* ---------------------------------------------------------------------- @@ -2632,11 +2633,11 @@ void AtomVecSphereKokkos::data_vel(int m, char **values) int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values) { - sync(Host,OMEGA_MASK); + atomKK->sync(Host,OMEGA_MASK); omega[m][0] = atof(values[0]); omega[m][1] = atof(values[1]); omega[m][2] = atof(values[2]); - modified(Host,OMEGA_MASK); + atomKK->modified(Host,OMEGA_MASK); return 3; } @@ -2711,7 +2712,7 @@ int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf) void AtomVecSphereKokkos::pack_vel(double **buf) { - sync(Host,TAG_MASK|V_MASK|OMEGA_MASK); + atomKK->sync(Host,TAG_MASK|V_MASK|OMEGA_MASK); int nlocal = atom->nlocal; for (int i = 0; i < nlocal; i++) { @@ -2731,7 +2732,7 @@ void AtomVecSphereKokkos::pack_vel(double **buf) int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf) { - sync(Host,OMEGA_MASK); + atomKK->sync(Host,OMEGA_MASK); buf[0] = h_omega(i,0); buf[1] = h_omega(i,1); diff --git a/src/KOKKOS/comm_kokkos.cpp b/src/KOKKOS/comm_kokkos.cpp index 1d31c07180..87986a9ca9 100644 --- a/src/KOKKOS/comm_kokkos.cpp +++ b/src/KOKKOS/comm_kokkos.cpp @@ -57,10 +57,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp) memory->destroy(buf_recv); buf_recv = NULL; - k_exchange_sendlist = DAT:: - tdual_int_1d("comm:k_exchange_sendlist",100); - k_exchange_copylist = DAT:: - tdual_int_1d("comm:k_exchange_copylist",100); + k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100); + k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); + k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); k_count = DAT::tdual_int_scalar("comm:k_count"); k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100); @@ -187,71 +186,80 @@ void CommKokkos::forward_comm_device(int dummy) k_sendlist.sync(); atomKK->sync(ExecutionSpaceFromDevice::space,X_MASK); - for (int iswap = 0; iswap < nswap; iswap++) { - if (sendproc[iswap] != me) { - if (comm_x_only) { - if (size_forward_recv[iswap]) { - buf = atomKK->k_x.view().data() + - firstrecv[iswap]*atomKK->k_x.view().extent(1); - MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - } - n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, - iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - if (n) { - MPI_Send(k_buf_send.view().data(), - n,MPI_DOUBLE,sendproc[iswap],0,world); - } + if (comm->nprocs == 1 && !ghost_velocity) { + k_swap.sync(); + k_swap2.sync(); + k_pbc.sync(); + n = avec->pack_comm_self_fused(totalsend,k_sendlist,k_sendnum_scan, + k_firstrecv,k_pbc_flag,k_pbc,k_g2l); + } else { - if (size_forward_recv[iswap]) { - MPI_Wait(&request,MPI_STATUS_IGNORE); - atomKK->modified(ExecutionSpaceFromDevice:: - space,X_MASK); + for (int iswap = 0; iswap < nswap; iswap++) { + if (sendproc[iswap] != me) { + if (comm_x_only) { + if (size_forward_recv[iswap]) { + buf = atomKK->k_x.view().data() + + firstrecv[iswap]*atomKK->k_x.view().extent(1); + MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); + } + n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist, + iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + if (n) { + MPI_Send(k_buf_send.view().data(), + n,MPI_DOUBLE,sendproc[iswap],0,world); + } + + if (size_forward_recv[iswap]) { + MPI_Wait(&request,MPI_STATUS_IGNORE); + atomKK->modified(ExecutionSpaceFromDevice:: + space,X_MASK); + } + } else if (ghost_velocity) { + if (size_forward_recv[iswap]) { + MPI_Irecv(k_buf_recv.view().data(), + size_forward_recv[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); + } + n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + if (n) { + MPI_Send(k_buf_send.view().data(),n, + MPI_DOUBLE,sendproc[iswap],0,world); + } + if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); + DeviceType::fence(); + } else { + if (size_forward_recv[iswap]) + MPI_Irecv(k_buf_recv.view().data(), + size_forward_recv[iswap],MPI_DOUBLE, + recvproc[iswap],0,world,&request); + n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + if (n) + MPI_Send(k_buf_send.view().data(),n, + MPI_DOUBLE,sendproc[iswap],0,world); + if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); + avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); + DeviceType::fence(); } - } else if (ghost_velocity) { - if (size_forward_recv[iswap]) { - MPI_Irecv(k_buf_recv.view().data(), - size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - } - n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, - k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - if (n) { - MPI_Send(k_buf_send.view().data(),n, - MPI_DOUBLE,sendproc[iswap],0,world); - } - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType::fence(); } else { - if (size_forward_recv[iswap]) - MPI_Irecv(k_buf_recv.view().data(), - size_forward_recv[iswap],MPI_DOUBLE, - recvproc[iswap],0,world,&request); - n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap, - k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - if (n) - MPI_Send(k_buf_send.view().data(),n, - MPI_DOUBLE,sendproc[iswap],0,world); - if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE); - avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv); - DeviceType::fence(); - } - } else { - if (!ghost_velocity) { - if (sendnum[iswap]) - n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, - firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - } else { - n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, - k_buf_send,pbc_flag[iswap],pbc[iswap]); - DeviceType::fence(); - avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); - DeviceType::fence(); + if (!ghost_velocity) { + if (sendnum[iswap]) + n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap, + firstrecv[iswap],pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + } else { + n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap, + k_buf_send,pbc_flag[iswap],pbc[iswap]); + DeviceType::fence(); + avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send); + DeviceType::fence(); + } } } } @@ -496,9 +504,8 @@ void CommKokkos::exchange() } atomKK->sync(Host,ALL_MASK); - atomKK->modified(Host,ALL_MASK); - CommBrick::exchange(); + atomKK->modified(Host,ALL_MASK); } /* ---------------------------------------------------------------------- */ @@ -565,146 +572,149 @@ void CommKokkos::exchange_device() atom->nghost = 0; atom->avec->clear_bonus(); - // subbox bounds for orthogonal or triclinic + if (comm->nprocs > 1) { // otherwise no-op - if (triclinic == 0) { - sublo = domain->sublo; - subhi = domain->subhi; - } else { - sublo = domain->sublo_lamda; - subhi = domain->subhi_lamda; - } + // subbox bounds for orthogonal or triclinic - atomKK->sync(ExecutionSpaceFromDevice::space,ALL_MASK); + if (triclinic == 0) { + sublo = domain->sublo; + subhi = domain->subhi; + } else { + sublo = domain->sublo_lamda; + subhi = domain->subhi_lamda; + } - // loop over dimensions - for (int dim = 0; dim < 3; dim++) { + atomKK->sync(ExecutionSpaceFromDevice::space,ALL_MASK); - // fill buffer with atoms leaving my box, using < and >= - // when atom is deleted, fill it in with last atom + // loop over dimensions + for (int dim = 0; dim < 3; dim++) { - x = atom->x; - lo = sublo[dim]; - hi = subhi[dim]; - nlocal = atom->nlocal; - i = nsend = 0; + // fill buffer with atoms leaving my box, using < and >= + // when atom is deleted, fill it in with last atom - if (true) { - if (k_sendflag.h_view.extent(0)(); - k_count.h_view() = k_exchange_sendlist.h_view.extent(0); - while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { - k_count.h_view() = 0; - k_count.modify(); - k_count.sync(); + x = atom->x; + lo = sublo[dim]; + hi = subhi[dim]; + nlocal = atom->nlocal; + i = nsend = 0; - BuildExchangeListFunctor - f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, - nlocal,dim,lo,hi); - Kokkos::parallel_for(nlocal,f); - k_exchange_sendlist.modify(); - k_sendflag.modify(); - k_count.modify(); + if (true) { + if (k_sendflag.h_view.extent(0)(); + k_count.h_view() = k_exchange_sendlist.h_view.extent(0); + while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { + k_count.h_view() = 0; + k_count.modify(); + k_count.sync(); - k_count.sync(); - if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { - k_exchange_sendlist.resize(k_count.h_view()*1.1); - k_exchange_copylist.resize(k_count.h_view()*1.1); - k_count.h_view()=k_exchange_sendlist.h_view.extent(0); + BuildExchangeListFunctor + f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag, + nlocal,dim,lo,hi); + Kokkos::parallel_for(nlocal,f); + k_exchange_sendlist.modify(); + k_sendflag.modify(); + k_count.modify(); + + k_count.sync(); + if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) { + k_exchange_lists.resize(2,k_count.h_view()*1.1); + k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL); + k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL); + k_count.h_view()=k_exchange_sendlist.h_view.extent(0); + } + } + + k_exchange_lists.sync(); + k_sendflag.sync(); + + int sendpos = nlocal-1; + nlocal -= k_count.h_view(); + for(int i = 0; i < k_count.h_view(); i++) { + if (k_exchange_sendlist.h_view(i)(); + k_exchange_copylist.sync(); + nsend = k_count.h_view(); + if (nsend > maxsend) grow_send_kokkos(nsend,1); + nsend = + avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, + k_exchange_sendlist,k_exchange_copylist, + ExecutionSpaceFromDevice::space, + dim,lo,hi); + DeviceType::fence(); + } else { + while (i < nlocal) { + if (x[i][dim] < lo || x[i][dim] >= hi) { + if (nsend > maxsend) grow_send_kokkos(nsend,1); + nsend += avec->pack_exchange(i,&buf_send[nsend]); + avec->copy(nlocal-1,i,1); + nlocal--; + } else i++; } } - k_exchange_copylist.sync(); - k_exchange_sendlist.sync(); - k_sendflag.sync(); + atom->nlocal = nlocal; - int sendpos = nlocal-1; - nlocal -= k_count.h_view(); - for(int i = 0; i < k_count.h_view(); i++) { - if (k_exchange_sendlist.h_view(i)(); - k_exchange_copylist.sync(); - nsend = k_count.h_view(); - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend = - avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send, - k_exchange_sendlist,k_exchange_copylist, - ExecutionSpaceFromDevice::space, - dim,lo,hi); - DeviceType::fence(); - } else { - while (i < nlocal) { - if (x[i][dim] < lo || x[i][dim] >= hi) { - if (nsend > maxsend) grow_send_kokkos(nsend,1); - nsend += avec->pack_exchange(i,&buf_send[nsend]); - avec->copy(nlocal-1,i,1); - nlocal--; - } else i++; - } - } - atom->nlocal = nlocal; + if (procgrid[dim] == 1) { + nrecv = nsend; + if (nrecv) { + atom->nlocal=avec-> + unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + DeviceType::fence(); + } + } else { + MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, + &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); + nrecv = nrecv1; + if (procgrid[dim] > 2) { + MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0, + &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE); + nrecv += nrecv2; + } + if (nrecv > maxrecv) grow_recv_kokkos(nrecv); - // send/recv atoms in both directions - // if 1 proc in dimension, no send/recv, set recv buf to send buf - // if 2 procs in dimension, single send/recv - // if more than 2 procs in dimension, send/recv to both neighbors - - if (procgrid[dim] == 1) { - nrecv = nsend; - if (nrecv) { - atom->nlocal=avec-> - unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - DeviceType::fence(); - } - } else { - MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0, - &nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE); - nrecv = nrecv1; - if (procgrid[dim] > 2) { - MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0, - &nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE); - nrecv += nrecv2; - } - if (nrecv > maxrecv) grow_recv_kokkos(nrecv); - - MPI_Irecv(k_buf_recv.view().data(),nrecv1, - MPI_DOUBLE,procneigh[dim][1],0, - world,&request); - MPI_Send(k_buf_send.view().data(),nsend, - MPI_DOUBLE,procneigh[dim][0],0,world); - MPI_Wait(&request,MPI_STATUS_IGNORE); - - if (procgrid[dim] > 2) { - MPI_Irecv(k_buf_recv.view().data()+nrecv1, - nrecv2,MPI_DOUBLE,procneigh[dim][0],0, + MPI_Irecv(k_buf_recv.view().data(),nrecv1, + MPI_DOUBLE,procneigh[dim][1],0, world,&request); MPI_Send(k_buf_send.view().data(),nsend, - MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_DOUBLE,procneigh[dim][0],0,world); MPI_Wait(&request,MPI_STATUS_IGNORE); + + if (procgrid[dim] > 2) { + MPI_Irecv(k_buf_recv.view().data()+nrecv1, + nrecv2,MPI_DOUBLE,procneigh[dim][0],0, + world,&request); + MPI_Send(k_buf_send.view().data(),nsend, + MPI_DOUBLE,procneigh[dim][1],0,world); + MPI_Wait(&request,MPI_STATUS_IGNORE); + } + + if (nrecv) { + atom->nlocal = avec-> + unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, + ExecutionSpaceFromDevice::space); + DeviceType::fence(); + } } - if (nrecv) { - atom->nlocal = avec-> - unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi, - ExecutionSpaceFromDevice::space); - DeviceType::fence(); - } + // check incoming atoms to see if they are in my box + // if so, add to my list + } - - // check incoming atoms to see if they are in my box - // if so, add to my list - + atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); } - atomKK->modified(ExecutionSpaceFromDevice::space,ALL_MASK); - if (atom->firstgroupname) { /* this is not yet implemented with Kokkos */ atomKK->sync(Host,ALL_MASK); @@ -742,14 +752,15 @@ void CommKokkos::borders() if (!exchange_comm_classic) { if (exchange_comm_on_host) borders_device(); else borders_device(); - return; + } else { + atomKK->sync(Host,ALL_MASK); + CommBrick::borders(); + k_sendlist.modify(); + atomKK->modified(Host,ALL_MASK); } - atomKK->sync(Host,ALL_MASK); - k_sendlist.sync(); - CommBrick::borders(); - k_sendlist.modify(); - atomKK->modified(Host,ALL_MASK); + if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_classic) + copy_swap_info(); } /* ---------------------------------------------------------------------- */ @@ -817,7 +828,6 @@ void CommKokkos::borders_device() { AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec; ExecutionSpace exec_space = ExecutionSpaceFromDevice::space; - k_sendlist.sync(); atomKK->sync(exec_space,ALL_MASK); // do swaps over all 3 dimensions @@ -1037,6 +1047,69 @@ void CommKokkos::borders_device() { atom->map_set(); } } + +/* ---------------------------------------------------------------------- + copy swap info +------------------------------------------------------------------------- */ + +void CommKokkos::copy_swap_info() +{ + if (nswap > k_swap.extent(1)) { + k_swap = DAT::tdual_int_2d("comm:swap",2,nswap); + k_firstrecv = Kokkos::subview(k_swap,0,Kokkos::ALL); + k_sendnum_scan = Kokkos::subview(k_swap,1,Kokkos::ALL); + } + int scan = 0; + for (int iswap = 0; iswap < nswap; iswap++) { + scan += sendnum[iswap]; + k_sendnum_scan.h_view[iswap] = scan; + k_firstrecv.h_view[iswap] = firstrecv[iswap]; + } + totalsend = scan; + + // create map of ghost to local atom id + // store periodic boundary transform from local to ghost + + k_sendlist.sync(); + + if (totalsend > k_pbc.extent(0)) { + k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6); + k_swap2 = DAT::tdual_int_2d("comm:swap2",2,totalsend); + k_pbc_flag = Kokkos::subview(k_swap2,0,Kokkos::ALL); + k_g2l = Kokkos::subview(k_swap2,1,Kokkos::ALL); + } + + for (int iswap = 0; iswap < nswap; iswap++) { + for (int i = 0; i < sendnum[iswap]; i++) { + int source = sendlist[iswap][i] - atom->nlocal; + int dest = firstrecv[iswap] + i - atom->nlocal; + k_pbc_flag.h_view(dest) = pbc_flag[iswap]; + k_pbc.h_view(dest,0) = pbc[iswap][0]; + k_pbc.h_view(dest,1) = pbc[iswap][1]; + k_pbc.h_view(dest,2) = pbc[iswap][2]; + k_pbc.h_view(dest,3) = pbc[iswap][3]; + k_pbc.h_view(dest,4) = pbc[iswap][4]; + k_pbc.h_view(dest,5) = pbc[iswap][5]; + k_g2l.h_view(dest) = atom->nlocal + source; + + if (source >= 0) { + k_pbc_flag.h_view(dest) = k_pbc_flag.h_view(dest) || k_pbc_flag.h_view(source); + k_pbc.h_view(dest,0) += k_pbc.h_view(source,0); + k_pbc.h_view(dest,1) += k_pbc.h_view(source,1); + k_pbc.h_view(dest,2) += k_pbc.h_view(source,2); + k_pbc.h_view(dest,3) += k_pbc.h_view(source,3); + k_pbc.h_view(dest,4) += k_pbc.h_view(source,4); + k_pbc.h_view(dest,5) += k_pbc.h_view(source,5); + k_g2l.h_view(dest) = k_g2l.h_view(source); + } + } + } + + k_swap.modify(); + k_swap2.modify(); + k_pbc.modify(); +} + /* ---------------------------------------------------------------------- realloc the size of the send buffer as needed with BUFFACTOR and bufextra if flag = 1, realloc diff --git a/src/KOKKOS/comm_kokkos.h b/src/KOKKOS/comm_kokkos.h index f137655cb8..9d8766e309 100644 --- a/src/KOKKOS/comm_kokkos.h +++ b/src/KOKKOS/comm_kokkos.h @@ -58,11 +58,21 @@ class CommKokkos : public CommBrick { DAT::tdual_int_2d k_sendlist; DAT::tdual_int_scalar k_total_send; DAT::tdual_xfloat_2d k_buf_send,k_buf_recv; + DAT::tdual_int_2d k_exchange_lists; DAT::tdual_int_1d k_exchange_sendlist,k_exchange_copylist,k_sendflag; DAT::tdual_int_scalar k_count; //double *buf_send; // send buffer for all comm //double *buf_recv; // recv buffer for all comm + DAT::tdual_int_2d k_swap; + DAT::tdual_int_2d k_swap2; + DAT::tdual_int_2d k_pbc; + DAT::tdual_int_1d k_pbc_flag; + DAT::tdual_int_1d k_g2l; + DAT::tdual_int_1d k_firstrecv; + DAT::tdual_int_1d k_sendnum_scan; + int totalsend; + int max_buf_pair; DAT::tdual_xfloat_1d k_buf_send_pair; DAT::tdual_xfloat_1d k_buf_recv_pair; @@ -74,6 +84,7 @@ class CommKokkos : public CommBrick { void grow_recv_kokkos(int, ExecutionSpace space = Host); void grow_list(int, int); void grow_swap(int); + void copy_swap_info(); }; } diff --git a/src/KOKKOS/domain_kokkos.cpp b/src/KOKKOS/domain_kokkos.cpp index d9c1332778..4cf3e6ab52 100644 --- a/src/KOKKOS/domain_kokkos.cpp +++ b/src/KOKKOS/domain_kokkos.cpp @@ -17,6 +17,7 @@ #include "error.h" #include "force.h" #include "kspace.h" +#include "kokkos.h" using namespace LAMMPS_NS; @@ -339,6 +340,17 @@ struct DomainPBCFunctor { void DomainKokkos::pbc() { + + if (lmp->kokkos->exchange_comm_classic) { + + // reduce GPU data movement + + atomKK->sync(Host,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK); + Domain::pbc(); + atomKK->modified(Host,X_MASK|V_MASK|MASK_MASK|IMAGE_MASK); + return; + } + double *lo,*hi,*period; int nlocal = atomKK->nlocal; diff --git a/src/KOKKOS/fix_nve_kokkos.cpp b/src/KOKKOS/fix_nve_kokkos.cpp index 052bf411d6..6db8ff8c0f 100644 --- a/src/KOKKOS/fix_nve_kokkos.cpp +++ b/src/KOKKOS/fix_nve_kokkos.cpp @@ -113,8 +113,8 @@ void FixNVEKokkos::initial_integrate_rmass_item(int i) const template void FixNVEKokkos::final_integrate() { - atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); + atomKK->sync(execution_space,V_MASK | F_MASK | MASK_MASK | RMASS_MASK | TYPE_MASK); + atomKK->modified(execution_space,V_MASK); v = atomKK->k_v.view(); f = atomKK->k_f.view(); diff --git a/src/KOKKOS/fix_property_atom_kokkos.cpp b/src/KOKKOS/fix_property_atom_kokkos.cpp index 12f27f9932..6860676911 100644 --- a/src/KOKKOS/fix_property_atom_kokkos.cpp +++ b/src/KOKKOS/fix_property_atom_kokkos.cpp @@ -19,6 +19,7 @@ #include "memory_kokkos.h" #include "error.h" #include "update.h" +#include "atom_masks.h" using namespace LAMMPS_NS; using namespace FixConst; @@ -61,8 +62,10 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax) size_t nbytes = (nmax-nmax_old) * sizeof(int); memset(&atom->ivector[index[m]][nmax_old],0,nbytes); } else if (style[m] == DOUBLE) { + atomKK->sync(Device,DVECTOR_MASK); memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax, "atom:dvector"); + atomKK->modified(Device,DVECTOR_MASK); //memory->grow(atom->dvector[index[m]],nmax,"atom:dvector"); //size_t nbytes = (nmax-nmax_old) * sizeof(double); //memset(&atom->dvector[index[m]][nmax_old],0,nbytes); diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.cpp b/src/KOKKOS/fix_qeq_reax_kokkos.cpp index 9969ab7257..12369261b3 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.cpp +++ b/src/KOKKOS/fix_qeq_reax_kokkos.cpp @@ -12,7 +12,8 @@ ------------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- - Contributing author: Ray Shan (SNL), Stan Moore (SNL) + Contributing authors: Ray Shan (SNL), Stan Moore (SNL), + Kamesh Arumugam (NVIDIA) ------------------------------------------------------------------------- */ #include @@ -58,7 +59,7 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : atomKK = (AtomKokkos *) atom; execution_space = ExecutionSpaceFromDevice::space; - datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK; + datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK; datamask_modify = Q_MASK | X_MASK; nmax = nmax = m_cap = 0; @@ -68,6 +69,8 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) : memory->destroy(s_hist); memory->destroy(t_hist); grow_arrays(atom->nmax); + + d_mfill_offset = typename AT::t_int_scalar("qeq/kk:mfill_offset"); } /* ---------------------------------------------------------------------- */ @@ -164,6 +167,9 @@ void FixQEqReaxKokkos::init_shielding_k() template void FixQEqReaxKokkos::init_hist() { + k_s_hist.clear_sync_state(); + k_t_hist.clear_sync_state(); + Kokkos::deep_copy(d_s_hist,0.0); Kokkos::deep_copy(d_t_hist,0.0); @@ -189,7 +195,6 @@ void FixQEqReaxKokkos::pre_force(int vflag) if (update->ntimestep % nevery) return; atomKK->sync(execution_space,datamask_read); - atomKK->modified(execution_space,datamask_modify); x = atomKK->k_x.view(); v = atomKK->k_v.view(); @@ -215,17 +220,46 @@ void FixQEqReaxKokkos::pre_force(int vflag) copymode = 1; // allocate + allocate_array(); // get max number of neighbor + if (!allocated_flag || update->ntimestep == neighbor->lastcall) allocate_matrix(); // compute_H - FixQEqReaxKokkosComputeHFunctor computeH_functor(this); - Kokkos::parallel_scan(inum,computeH_functor); + + if (lmp->kokkos->ngpus == 0) { // CPU + if (neighflag == FULL) { + FixQEqReaxKokkosComputeHFunctor computeH_functor(this); + Kokkos::parallel_scan(inum,computeH_functor); + } else { // HALF and HALFTHREAD are the same + FixQEqReaxKokkosComputeHFunctor computeH_functor(this); + Kokkos::parallel_scan(inum,computeH_functor); + } + } else { // GPU, use teams + Kokkos::deep_copy(d_mfill_offset,0); + + int vector_length = 32; + int atoms_per_team = 4; + int num_teams = inum / atoms_per_team + (inum % atoms_per_team ? 1 : 0); + + Kokkos::TeamPolicy policy(num_teams, atoms_per_team, + vector_length); + if (neighflag == FULL) { + FixQEqReaxKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } else { // HALF and HALFTHREAD are the same + FixQEqReaxKokkosComputeHFunctor computeH_functor( + this, atoms_per_team, vector_length); + Kokkos::parallel_for(policy, computeH_functor); + } + } // init_matvec + k_s_hist.template sync(); k_t_hist.template sync(); FixQEqReaxKokkosMatVecFunctor matvec_functor(this); @@ -255,12 +289,15 @@ void FixQEqReaxKokkos::pre_force(int vflag) ndup_o = Kokkos::Experimental::create_scatter_view (d_o); // 1st cg solve over b_s, s + cg_solve1(); // 2nd cg solve over b_t, t + cg_solve2(); // calculate_Q(); + calculate_q(); k_s_hist.template modify(); k_t_hist.template modify(); @@ -271,8 +308,11 @@ void FixQEqReaxKokkos::pre_force(int vflag) allocated_flag = 1; // free duplicated memory + if (need_dup) dup_o = decltype(dup_o)(); + + atomKK->modified(execution_space,datamask_modify); } /* ---------------------------------------------------------------------- */ @@ -373,6 +413,7 @@ void FixQEqReaxKokkos::zero_item(int ii) const /* ---------------------------------------------------------------------- */ template +template KOKKOS_INLINE_FUNCTION void FixQEqReaxKokkos::compute_h_item(int ii, int &m_fill, const bool &final) const { @@ -399,7 +440,7 @@ void FixQEqReaxKokkos::compute_h_item(int ii, int &m_fill, const boo const X_FLOAT dely = x(j,1) - ytmp; const X_FLOAT delz = x(j,2) - ztmp; - if (neighflag != FULL) { + if (NEIGHFLAG != FULL) { // skip half of the interactions const tagint jtag = tag(j); if (j >= nlocal) { @@ -433,6 +474,217 @@ void FixQEqReaxKokkos::compute_h_item(int ii, int &m_fill, const boo /* ---------------------------------------------------------------------- */ +// Calculate Qeq matrix H where H is a sparse matrix and H[i][j] represents the electrostatic interaction coefficients on atom-i with atom-j +// d_val - contains the non-zero entries of sparse matrix H +// d_numnbrs - d_numnbrs[i] contains the # of non-zero entries in the i-th row of H (which also represents the # of neighbor atoms with electrostatic interaction coefficients with atom-i) +// d_firstnbr- d_firstnbr[i] contains the beginning index from where the H matrix entries corresponding to row-i is stored in d_val +// d_jlist - contains the column index corresponding to each entry in d_val + +template +template +void FixQEqReaxKokkos::compute_h_team( + const typename Kokkos::TeamPolicy::member_type &team, + int atoms_per_team, int vector_length) const { + + // scratch space setup + Kokkos::View, + Kokkos::MemoryTraits> + s_ilist(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_numnbrs(team.team_shmem(), atoms_per_team); + Kokkos::View, + Kokkos::MemoryTraits> + s_firstnbr(team.team_shmem(), atoms_per_team); + + Kokkos::View, + Kokkos::MemoryTraits> + s_jtype(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_jlist(team.team_shmem(), atoms_per_team, vector_length); + Kokkos::View, + Kokkos::MemoryTraits> + s_r(team.team_shmem(), atoms_per_team, vector_length); + + // team of threads work on atoms with index in [firstatom, lastatom) + int firstatom = team.league_rank() * atoms_per_team; + int lastatom = + (firstatom + atoms_per_team < inum) ? (firstatom + atoms_per_team) : inum; + + // kokkos-thread-0 is used to load info from global memory into scratch space + if (team.team_rank() == 0) { + + // copy atom indices from d_ilist[firstatom:lastatom] to scratch space s_ilist[0:atoms_per_team] + // copy # of neighbor atoms for all the atoms with indices in d_ilist[firstatom:lastatom] from d_numneigh to scratch space s_numneigh[0:atoms_per_team] + // calculate total number of neighbor atoms for all atoms assigned to the current team of threads (Note - Total # of neighbor atoms here provides the + // upper bound space requirement to store the H matrix values corresponding to the atoms with indices in d_ilist[firstatom:lastatom]) + + Kokkos::parallel_scan(Kokkos::ThreadVectorRange(team, atoms_per_team), + [&](const int &idx, int &totalnbrs, bool final) { + int ii = firstatom + idx; + + if (ii < inum) { + const int i = d_ilist[ii]; + int jnum = d_numneigh[i]; + + if (final) { + s_ilist[idx] = i; + s_numnbrs[idx] = jnum; + s_firstnbr[idx] = totalnbrs; + } + totalnbrs += jnum; + } else { + s_numnbrs[idx] = 0; + } + }); + } + + // barrier ensures that the data moved to scratch space is visible to all the + // threads of the corresponding team + team.team_barrier(); + + // calculate the global memory offset from where the H matrix values to be + // calculated by the current team will be stored in d_val + int team_firstnbr_idx = 0; + Kokkos::single(Kokkos::PerTeam(team), + [=](int &val) { + int totalnbrs = s_firstnbr[lastatom - firstatom - 1] + + s_numnbrs[lastatom - firstatom - 1]; + val = Kokkos::atomic_fetch_add(&d_mfill_offset(), totalnbrs); + }, + team_firstnbr_idx); + + // map the H matrix computation of each atom to kokkos-thread (one atom per + // kokkos-thread) neighbor computation for each atom is assigned to vector + // lanes of the corresponding thread + Kokkos::parallel_for( + Kokkos::TeamThreadRange(team, atoms_per_team), [&](const int &idx) { + int ii = firstatom + idx; + + if (ii < inum) { + const int i = s_ilist[idx]; + + if (mask[i] & groupbit) { + const X_FLOAT xtmp = x(i, 0); + const X_FLOAT ytmp = x(i, 1); + const X_FLOAT ztmp = x(i, 2); + const int itype = type(i); + const tagint itag = tag(i); + const int jnum = s_numnbrs[idx]; + + // calculate the write-offset for atom-i's first neighbor + int atomi_firstnbr_idx = team_firstnbr_idx + s_firstnbr[idx]; + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_firstnbr[i] = atomi_firstnbr_idx; }); + + // current # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i which represents the # of + // non-zero elements in row-i of H matrix + int atomi_nbrs_inH = 0; + + // calculate H matrix values corresponding to atom-i where neighbors + // are processed in batches and the batch size is vector_length + for (int jj_start = 0; jj_start < jnum; jj_start += vector_length) { + + int atomi_nbr_writeIdx = atomi_firstnbr_idx + atomi_nbrs_inH; + + // count the # of neighbor atoms with non-zero electrostatic + // interaction coefficients with atom-i in the current batch + int atomi_nbrs_curbatch = 0; + + // compute rsq, jtype, j and store in scratch space which is + // reused later + Kokkos::parallel_reduce( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill) { + const int jj = jj_start + idx; + + // initialize: -1 represents no interaction with atom-j + // where j = d_neighbors(i,jj) + s_jlist(team.team_rank(), idx) = -1; + + if (jj < jnum) { + int j = d_neighbors(i, jj); + j &= NEIGHMASK; + const int jtype = type(j); + + const X_FLOAT delx = x(j, 0) - xtmp; + const X_FLOAT dely = x(j, 1) - ytmp; + const X_FLOAT delz = x(j, 2) - ztmp; + + // valid nbr interaction + bool valid = true; + if (NEIGHFLAG != FULL) { + // skip half of the interactions + const tagint jtag = tag(j); + if (j >= nlocal) { + if (itag > jtag) { + if ((itag + jtag) % 2 == 0) + valid = false; + } else if (itag < jtag) { + if ((itag + jtag) % 2 == 1) + valid = false; + } else { + if (x(j, 2) < ztmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) < ytmp) + valid = false; + if (x(j, 2) == ztmp && x(j, 1) == ytmp && + x(j, 0) < xtmp) + valid = false; + } + } + } + + const F_FLOAT rsq = + delx * delx + dely * dely + delz * delz; + if (rsq > cutsq) + valid = false; + + if (valid) { + s_jlist(team.team_rank(), idx) = j; + s_jtype(team.team_rank(), idx) = jtype; + s_r(team.team_rank(), idx) = sqrt(rsq); + m_fill++; + } + } + }, + atomi_nbrs_curbatch); + + // write non-zero entries of H to global memory + Kokkos::parallel_scan( + Kokkos::ThreadVectorRange(team, vector_length), + [&](const int &idx, int &m_fill, bool final) { + int j = s_jlist(team.team_rank(), idx); + if (final) { + if (j != -1) { + const int jtype = s_jtype(team.team_rank(), idx); + const F_FLOAT r = s_r(team.team_rank(), idx); + const F_FLOAT shldij = d_shield(itype, jtype); + + d_jlist[atomi_nbr_writeIdx + m_fill] = j; + d_val[atomi_nbr_writeIdx + m_fill] = + calculate_H_k(r, shldij); + } + } + + if (j != -1) { + m_fill++; + } + }); + atomi_nbrs_inH += atomi_nbrs_curbatch; + } + + Kokkos::single(Kokkos::PerThread(team), + [&]() { d_numnbrs[i] = atomi_nbrs_inH; }); + } + } + }); +} + +/* ---------------------------------------------------------------------- */ + template KOKKOS_INLINE_FUNCTION double FixQEqReaxKokkos::calculate_H_k(const F_FLOAT &r, const F_FLOAT &shld) const @@ -1199,9 +1451,12 @@ double FixQEqReaxKokkos::memory_usage() template void FixQEqReaxKokkos::grow_arrays(int nmax) { - k_s_hist.template sync(); // force reallocation on host + k_s_hist.template sync(); k_t_hist.template sync(); + k_s_hist.template modify(); // force reallocation on host + k_t_hist.template modify(); + memoryKK->grow_kokkos(k_s_hist,s_hist,nmax,nprev,"qeq:s_hist"); memoryKK->grow_kokkos(k_t_hist,t_hist,nmax,nprev,"qeq:t_hist"); diff --git a/src/KOKKOS/fix_qeq_reax_kokkos.h b/src/KOKKOS/fix_qeq_reax_kokkos.h index 23bb4f32ee..cd69aa9283 100644 --- a/src/KOKKOS/fix_qeq_reax_kokkos.h +++ b/src/KOKKOS/fix_qeq_reax_kokkos.h @@ -53,9 +53,14 @@ class FixQEqReaxKokkos : public FixQEqReax { KOKKOS_INLINE_FUNCTION void zero_item(int) const; + template KOKKOS_INLINE_FUNCTION void compute_h_item(int, int &, const bool &) const; + template + KOKKOS_INLINE_FUNCTION + void compute_h_team(const typename Kokkos::TeamPolicy ::member_type &team, int, int) const; + KOKKOS_INLINE_FUNCTION void matvec_item(int) const; @@ -150,6 +155,8 @@ class FixQEqReaxKokkos : public FixQEqReax { int allocated_flag; int need_dup; + typename AT::t_int_scalar d_mfill_offset; + typedef Kokkos::DualView tdual_int_1d; Kokkos::DualView k_params; typename Kokkos::DualView::t_dev_const params; @@ -247,16 +254,51 @@ struct FixQEqReaxKokkosMatVecFunctor { } }; -template -struct FixQEqReaxKokkosComputeHFunctor { - typedef DeviceType device_type ; +template +struct FixQEqReaxKokkosComputeHFunctor { + int atoms_per_team, vector_length; + typedef int value_type; + typedef Kokkos::ScratchMemorySpace scratch_space; FixQEqReaxKokkos c; + FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos* c_ptr):c(*c_ptr) { c.cleanup_copy(); }; + + FixQEqReaxKokkosComputeHFunctor(FixQEqReaxKokkos *c_ptr, + int _atoms_per_team, int _vector_length) + : c(*c_ptr), atoms_per_team(_atoms_per_team), + vector_length(_vector_length) { + c.cleanup_copy(); + }; + KOKKOS_INLINE_FUNCTION void operator()(const int ii, int &m_fill, const bool &final) const { - c.compute_h_item(ii,m_fill,final); + c.template compute_h_item(ii,m_fill,final); + } + + KOKKOS_INLINE_FUNCTION + void operator()( + const typename Kokkos::TeamPolicy::member_type &team) const { + c.template compute_h_team(team, atoms_per_team, vector_length); + } + + size_t team_shmem_size(int team_size) const { + size_t shmem_size = + Kokkos::View::shmem_size( + atoms_per_team) + // s_ilist + Kokkos::View::shmem_size( + atoms_per_team) + // s_numnbrs + Kokkos::View::shmem_size( + atoms_per_team) + // s_firstnbr + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_jtype + Kokkos::View:: + shmem_size(atoms_per_team, vector_length) + // s_j + Kokkos::View::shmem_size(atoms_per_team, + vector_length); // s_r + return shmem_size; } }; diff --git a/src/KOKKOS/kokkos.cpp b/src/KOKKOS/kokkos.cpp index d6a67188bb..3fa84d98b2 100644 --- a/src/KOKKOS/kokkos.cpp +++ b/src/KOKKOS/kokkos.cpp @@ -78,9 +78,9 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // process any command-line args that invoke Kokkos settings - ngpu = 0; + ngpus = 0; int device = 0; - num_threads = 1; + nthreads = 1; numa = 1; int iarg = 0; @@ -96,7 +96,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) error->all(FLERR,"GPUs are requested but Kokkos has not been compiled for CUDA"); #endif if (iarg+2 > narg) error->all(FLERR,"Invalid Kokkos command-line args"); - ngpu = atoi(arg[iarg+1]); + ngpus = atoi(arg[iarg+1]); int skip_gpu = 9999; if (iarg+2 < narg && isdigit(arg[iarg+2][0])) { @@ -108,23 +108,23 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) char *str; if ((str = getenv("SLURM_LOCALID"))) { int local_rank = atoi(str); - device = local_rank % ngpu; + device = local_rank % ngpus; if (device >= skip_gpu) device++; } if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) { int local_rank = atoi(str); - device = local_rank % ngpu; + device = local_rank % ngpus; if (device >= skip_gpu) device++; } if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) { int local_rank = atoi(str); - device = local_rank % ngpu; + device = local_rank % ngpus; if (device >= skip_gpu) device++; } } else if (strcmp(arg[iarg],"t") == 0 || strcmp(arg[iarg],"threads") == 0) { - num_threads = atoi(arg[iarg+1]); + nthreads = atoi(arg[iarg+1]); iarg += 2; } else if (strcmp(arg[iarg],"n") == 0 || @@ -138,12 +138,12 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) // initialize Kokkos if (me == 0) { - if (screen) fprintf(screen," will use up to %d GPU(s) per node\n",ngpu); - if (logfile) fprintf(logfile," will use up to %d GPU(s) per node\n",ngpu); + if (screen) fprintf(screen," will use up to %d GPU(s) per node\n",ngpus); + if (logfile) fprintf(logfile," will use up to %d GPU(s) per node\n",ngpus); } #ifdef KOKKOS_ENABLE_CUDA - if (ngpu <= 0) + if (ngpus <= 0) error->all(FLERR,"Kokkos has been compiled for CUDA but no GPUs are requested"); // check and warn about GPU-direct availability when using multiple MPI tasks @@ -167,14 +167,14 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) #endif #ifndef KOKKOS_ENABLE_SERIAL - if (num_threads == 1) + if (nthreads == 1) error->warning(FLERR,"When using a single thread, the Kokkos Serial backend " "(i.e. Makefile.kokkos_mpi_only) gives better performance " "than the OpenMP backend"); #endif Kokkos::InitArguments args; - args.num_threads = num_threads; + args.num_threads = nthreads; args.num_numa = numa; args.device_id = device; @@ -184,22 +184,23 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp) binsize = 0.0; gpu_direct_flag = 1; - if (ngpu > 0) { + neigh_thread = 0; + neigh_thread_set = 0; + neighflag_qeq_set = 0; + if (ngpus > 0) { neighflag = FULL; neighflag_qeq = FULL; - neighflag_qeq_set = 0; newtonflag = 0; exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; } else { - if (num_threads > 1) { + if (nthreads > 1) { neighflag = HALFTHREAD; neighflag_qeq = HALFTHREAD; } else { neighflag = HALF; neighflag_qeq = HALF; } - neighflag_qeq_set = 0; newtonflag = 1; exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1; exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0; @@ -236,7 +237,7 @@ void KokkosLMP::accelerator(int narg, char **arg) if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"full") == 0) neighflag = FULL; else if (strcmp(arg[iarg+1],"half") == 0) { - if (num_threads > 1 || ngpu > 0) + if (nthreads > 1 || ngpus > 0) neighflag = HALFTHREAD; else neighflag = HALF; @@ -248,7 +249,7 @@ void KokkosLMP::accelerator(int narg, char **arg) if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); if (strcmp(arg[iarg+1],"full") == 0) neighflag_qeq = FULL; else if (strcmp(arg[iarg+1],"half") == 0) { - if (num_threads > 1 || ngpu > 0) + if (nthreads > 1 || ngpus > 0) neighflag_qeq = HALFTHREAD; else neighflag_qeq = HALF; @@ -318,6 +319,13 @@ void KokkosLMP::accelerator(int narg, char **arg) else if (strcmp(arg[iarg+1],"on") == 0) gpu_direct_flag = 1; else error->all(FLERR,"Illegal package kokkos command"); iarg += 2; + } else if (strcmp(arg[iarg],"neigh/thread") == 0) { + if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command"); + if (strcmp(arg[iarg+1],"off") == 0) neigh_thread = 0; + else if (strcmp(arg[iarg+1],"on") == 0) neigh_thread = 1; + else error->all(FLERR,"Illegal package kokkos command"); + neigh_thread_set = 1; + iarg += 2; } else error->all(FLERR,"Illegal package kokkos command"); } @@ -337,6 +345,9 @@ void KokkosLMP::accelerator(int narg, char **arg) force->newton = force->newton_pair = force->newton_bond = newtonflag; + if (neigh_thread && neighflag != FULL) + error->all(FLERR,"Must use KOKKOS package option 'neigh full' with 'neigh/thread on'"); + neighbor->binsize_user = binsize; if (binsize <= 0.0) neighbor->binsizeflag = 0; else neighbor->binsizeflag = 1; diff --git a/src/KOKKOS/kokkos.h b/src/KOKKOS/kokkos.h index 74a10883f6..46044799c4 100644 --- a/src/KOKKOS/kokkos.h +++ b/src/KOKKOS/kokkos.h @@ -32,10 +32,12 @@ class KokkosLMP : protected Pointers { int exchange_comm_on_host; int forward_comm_on_host; int reverse_comm_on_host; - int num_threads,ngpu; + int nthreads,ngpus; int numa; int auto_sync; int gpu_direct_flag; + int neigh_thread; + int neigh_thread_set; int newtonflag; double binsize; @@ -87,4 +89,8 @@ U: Must use Kokkos half/thread or full neighbor list with threads or GPUs Using Kokkos half-neighbor lists with threading is not allowed. +E: Must use KOKKOS package option 'neigh full' with 'neigh/thread on' + +The 'neigh/thread on' option requires a full neighbor list + */ diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index ff96684edc..942c2af241 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -448,6 +448,52 @@ struct s_EV_FLOAT_REAX { }; typedef struct s_EV_FLOAT_REAX EV_FLOAT_REAX; +struct s_FEV_FLOAT { + F_FLOAT f[3]; + E_FLOAT evdwl; + E_FLOAT ecoul; + E_FLOAT v[6]; + KOKKOS_INLINE_FUNCTION + s_FEV_FLOAT() { + f[0] = 0; f[1] = 0; f[2] = 0; + evdwl = 0; + ecoul = 0; + v[0] = 0; v[1] = 0; v[2] = 0; + v[3] = 0; v[4] = 0; v[5] = 0; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const s_FEV_FLOAT &rhs) { + f[0] += rhs.f[0]; + f[1] += rhs.f[1]; + f[2] += rhs.f[2]; + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; + } + + KOKKOS_INLINE_FUNCTION + void operator+=(const volatile s_FEV_FLOAT &rhs) volatile { + f[0] += rhs.f[0]; + f[1] += rhs.f[1]; + f[2] += rhs.f[2]; + evdwl += rhs.evdwl; + ecoul += rhs.ecoul; + v[0] += rhs.v[0]; + v[1] += rhs.v[1]; + v[2] += rhs.v[2]; + v[3] += rhs.v[3]; + v[4] += rhs.v[4]; + v[5] += rhs.v[5]; + } +}; +typedef struct s_FEV_FLOAT FEV_FLOAT; + #ifndef PREC_POS #define PREC_POS PRECISION #endif diff --git a/src/KOKKOS/neighbor_kokkos.cpp b/src/KOKKOS/neighbor_kokkos.cpp index 7aaeda4b37..e912956a3f 100644 --- a/src/KOKKOS/neighbor_kokkos.cpp +++ b/src/KOKKOS/neighbor_kokkos.cpp @@ -362,7 +362,7 @@ void NeighborKokkos::modify_mol_intra_grow_kokkos(){ /* ---------------------------------------------------------------------- */ void NeighborKokkos::set_binsize_kokkos() { - if (!binsizeflag && lmp->kokkos->ngpu > 0) { + if (!binsizeflag && lmp->kokkos->ngpus > 0) { binsize_user = cutneighmax; binsizeflag = 1; } diff --git a/src/KOKKOS/npair_kokkos.cpp b/src/KOKKOS/npair_kokkos.cpp index 5e1b7b0414..4daf4b84c5 100644 --- a/src/KOKKOS/npair_kokkos.cpp +++ b/src/KOKKOS/npair_kokkos.cpp @@ -15,6 +15,7 @@ #include "atom_kokkos.h" #include "atom_masks.h" #include "domain_kokkos.h" +#include "update.h" #include "neighbor_kokkos.h" #include "nbin_kokkos.h" #include "nstencil.h" @@ -27,6 +28,16 @@ namespace LAMMPS_NS { template NPairKokkos::NPairKokkos(LAMMPS *lmp) : NPair(lmp) { + // use 1D view for scalars to reduce GPU memory operations + + d_scalars = typename AT::t_int_1d("neighbor:scalars",2); + h_scalars = HAT::t_int_1d("neighbor:scalars_mirror",2); + + d_resize = Kokkos::subview(d_scalars,0); + d_new_maxneighs = Kokkos::subview(d_scalars,1); + + h_resize = Kokkos::subview(h_scalars,0); + h_new_maxneighs = Kokkos::subview(h_scalars,1); } /* ---------------------------------------------------------------------- @@ -84,27 +95,30 @@ template void NPairKokkos::copy_stencil_info() { NPair::copy_stencil_info(); - nstencil = ns->nstencil; - int maxstencil = ns->get_maxstencil(); + if (neighbor->last_setup_bins == update->ntimestep) { + // copy stencil to device as it may have changed - if (maxstencil > k_stencil.extent(0)) - k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); - for (int k = 0; k < maxstencil; k++) - k_stencil.h_view(k) = ns->stencil[k]; - k_stencil.modify(); - k_stencil.sync(); - if (GHOST) { - if (maxstencil > k_stencilxyz.extent(0)) - k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); - for (int k = 0; k < maxstencil; k++) { - k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; - k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; - k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2]; + int maxstencil = ns->get_maxstencil(); + + if (maxstencil > k_stencil.extent(0)) + k_stencil = DAT::tdual_int_1d("neighlist:stencil",maxstencil); + for (int k = 0; k < maxstencil; k++) + k_stencil.h_view(k) = ns->stencil[k]; + k_stencil.modify(); + k_stencil.sync(); + if (GHOST) { + if (maxstencil > k_stencilxyz.extent(0)) + k_stencilxyz = DAT::tdual_int_1d_3("neighlist:stencilxyz",maxstencil); + for (int k = 0; k < maxstencil; k++) { + k_stencilxyz.h_view(k,0) = ns->stencilxyz[k][0]; + k_stencilxyz.h_view(k,1) = ns->stencilxyz[k][1]; + k_stencilxyz.h_view(k,2) = ns->stencilxyz[k][2]; + } + k_stencilxyz.modify(); + k_stencilxyz.sync(); } - k_stencilxyz.modify(); - k_stencilxyz.sync(); } } @@ -126,7 +140,7 @@ void NPairKokkos::build(NeighList *list_) k_bincount.view(), k_bins.view(), k_atom2bin.view(), - nstencil, + mbins,nstencil, k_stencil.view(), k_stencilxyz.view(), nlocal, @@ -157,7 +171,7 @@ void NPairKokkos::build(NeighList *list_) bboxhi,bboxlo, domain->xperiodic,domain->yperiodic,domain->zperiodic, domain->xprd_half,domain->yprd_half,domain->zprd_half, - skin); + skin,d_resize,h_resize,d_new_maxneighs,h_new_maxneighs); k_cutneighsq.sync(); k_ex1_type.sync(); @@ -173,7 +187,18 @@ void NPairKokkos::build(NeighList *list_) k_bincount.sync(); k_bins.sync(); k_atom2bin.sync(); - atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + + if (atom->molecular) { + if (exclude) + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK|TAG_MASK|SPECIAL_MASK); + else + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|TAG_MASK|SPECIAL_MASK); + } else { + if (exclude) + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK|MASK_MASK); + else + atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK); + } data.special_flag[0] = special_flag[0]; data.special_flag[1] = special_flag[1]; @@ -185,8 +210,7 @@ void NPairKokkos::build(NeighList *list_) data.h_new_maxneighs() = list->maxneighs; data.h_resize() = 0; - Kokkos::deep_copy(data.resize, data.h_resize); - Kokkos::deep_copy(data.new_maxneighs, data.h_new_maxneighs); + Kokkos::deep_copy(d_scalars, h_scalars); #ifdef KOKKOS_ENABLE_CUDA #define BINS_PER_BLOCK 2 const int factor = atoms_per_bin<64?2:1; @@ -245,10 +269,9 @@ void NPairKokkos::build(NeighList *list_) } } } - deep_copy(data.h_resize, data.resize); + Kokkos::deep_copy(h_scalars, d_scalars); if(data.h_resize()) { - deep_copy(data.h_new_maxneighs, data.new_maxneighs); list->maxneighs = data.h_new_maxneighs() * 1.2; list->d_neighbors = typename ArrayTypes::t_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs); data.neigh_list.d_neighbors = list->d_neighbors; @@ -488,7 +511,7 @@ void NeighborKokkosExecute::build_ItemCuda(typename Kokkos::TeamPoli const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; - if(ibin >=c_bincount.extent(0)) return; + if(ibin >= mbins) return; X_FLOAT* other_x = sharedmem; other_x = other_x + 5*atoms_per_bin*MY_BIN; @@ -924,7 +947,7 @@ void NeighborKokkosExecute::build_ItemSizeCuda(typename Kokkos::Team const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN; - if(ibin >=c_bincount.extent(0)) return; + if(ibin >= mbins) return; X_FLOAT* other_x = sharedmem; other_x = other_x + 6*atoms_per_bin*MY_BIN; diff --git a/src/KOKKOS/npair_kokkos.h b/src/KOKKOS/npair_kokkos.h index 62138bd524..2a3994f584 100644 --- a/src/KOKKOS/npair_kokkos.h +++ b/src/KOKKOS/npair_kokkos.h @@ -95,6 +95,8 @@ namespace LAMMPS_NS { template class NPairKokkos : public NPair { + typedef ArrayTypes AT; + public: NPairKokkos(class LAMMPS *); ~NPairKokkos() {} @@ -105,6 +107,12 @@ class NPairKokkos : public NPair { private: int newton_pair; + typename AT::t_int_1d d_scalars; + HAT::t_int_1d h_scalars; + typename AT::t_int_scalar d_resize; + typename AT::t_int_scalar d_new_maxneighs; + HAT::t_int_scalar h_resize; + HAT::t_int_scalar h_new_maxneighs; // data from Neighbor class @@ -165,6 +173,7 @@ class NeighborKokkosExecute // data from NBin class + const int mbins; const typename AT::t_int_1d bincount; const typename AT::t_int_1d_const c_bincount; typename AT::t_int_2d bins; @@ -218,7 +227,7 @@ class NeighborKokkosExecute const typename AT::t_int_1d &_bincount, const typename AT::t_int_2d &_bins, const typename AT::t_int_1d &_atom2bin, - const int _nstencil, + const int _mbins,const int _nstencil, const typename AT::t_int_1d &_d_stencil, const typename AT::t_int_1d_3 &_d_stencilxyz, const int _nlocal, @@ -251,8 +260,12 @@ class NeighborKokkosExecute const X_FLOAT *_bboxhi, const X_FLOAT* _bboxlo, const int & _xperiodic, const int & _yperiodic, const int & _zperiodic, const int & _xprd_half, const int & _yprd_half, const int & _zprd_half, - const X_FLOAT _skin): - neigh_list(_neigh_list), cutneighsq(_cutneighsq), + const X_FLOAT _skin, + const typename AT::t_int_scalar _resize, + const typename ArrayTypes::t_int_scalar _h_resize, + const typename AT::t_int_scalar _new_maxneighs, + const typename ArrayTypes::t_int_scalar _h_new_maxneighs): + neigh_list(_neigh_list), cutneighsq(_cutneighsq),mbins(_mbins), bincount(_bincount),c_bincount(_bincount),bins(_bins),c_bins(_bins), atom2bin(_atom2bin),c_atom2bin(_atom2bin), nstencil(_nstencil),d_stencil(_d_stencil),d_stencilxyz(_d_stencilxyz), @@ -272,7 +285,8 @@ class NeighborKokkosExecute ex_mol_intra(_ex_mol_intra), xperiodic(_xperiodic),yperiodic(_yperiodic),zperiodic(_zperiodic), xprd_half(_xprd_half),yprd_half(_yprd_half),zprd_half(_zprd_half), - skin(_skin) { + skin(_skin),resize(_resize),h_resize(_h_resize), + new_maxneighs(_new_maxneighs),h_new_maxneighs(_h_new_maxneighs) { if (molecular == 2) moltemplate = 1; else moltemplate = 0; @@ -280,20 +294,7 @@ class NeighborKokkosExecute bboxlo[0] = _bboxlo[0]; bboxlo[1] = _bboxlo[1]; bboxlo[2] = _bboxlo[2]; bboxhi[0] = _bboxhi[0]; bboxhi[1] = _bboxhi[1]; bboxhi[2] = _bboxhi[2]; - resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize"); -#ifndef KOKKOS_USE_CUDA_UVM - h_resize = Kokkos::create_mirror_view(resize); -#else - h_resize = resize; -#endif h_resize() = 1; - new_maxneighs = typename AT:: - t_int_scalar("NeighborKokkosFunctor::new_maxneighs"); -#ifndef KOKKOS_USE_CUDA_UVM - h_new_maxneighs = Kokkos::create_mirror_view(new_maxneighs); -#else - h_new_maxneighs = new_maxneighs; -#endif h_new_maxneighs() = neigh_list.maxneighs; }; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.cpp b/src/KOKKOS/pair_exp6_rx_kokkos.cpp index 0a6372fdf8..3a857a6485 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.cpp +++ b/src/KOKKOS/pair_exp6_rx_kokkos.cpp @@ -310,12 +310,12 @@ void PairExp6rxKokkos::compute(int eflag_in, int vflag_in) #else // No atomics - num_threads = lmp->kokkos->num_threads; + nthreads = lmp->kokkos->nthreads; int nmax = f.extent(0); if (nmax > t_f.extent(1)) { - t_f = t_f_array_thread("pair_exp6_rx:t_f",num_threads,nmax); - t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",num_threads,nmax); - t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",num_threads,nmax); + t_f = t_f_array_thread("pair_exp6_rx:t_f",nthreads,nmax); + t_uCG = t_efloat_1d_thread("pair_exp6_rx:t_uCG",nthreads,nmax); + t_uCGnew = t_efloat_1d_thread("pair_exp6_rx:t_UCGnew",nthreads,nmax); } Kokkos::parallel_for(Kokkos::RangePolicy(0,nmax),*this); @@ -1642,7 +1642,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxComputeNoAtomics KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxCollapseDupViews, const int &i) const { - for (int n = 0; n < num_threads; n++) { + for (int n = 0; n < nthreads; n++) { f(i,0) += t_f(n,i,0); f(i,1) += t_f(n,i,1); f(i,2) += t_f(n,i,2); @@ -1654,7 +1654,7 @@ void PairExp6rxKokkos::operator()(TagPairExp6rxCollapseDupViews, con template KOKKOS_INLINE_FUNCTION void PairExp6rxKokkos::operator()(TagPairExp6rxZeroDupViews, const int &i) const { - for (int n = 0; n < num_threads; n++) { + for (int n = 0; n < nthreads; n++) { t_f(n,i,0) = 0.0; t_f(n,i,1) = 0.0; t_f(n,i,2) = 0.0; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 5e44048ae2..f3801db631 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -145,7 +145,7 @@ class PairExp6rxKokkos : public PairExp6rx { int eflag,vflag; int nlocal,newton_pair,neighflag; double special_lj[4]; - int num_threads,ntypes; + int nthreads,ntypes; typename AT::t_x_array_randomread x; typename AT::t_f_array f; diff --git a/src/KOKKOS/pair_kokkos.h b/src/KOKKOS/pair_kokkos.h index ab616d2c07..9ca5d9578d 100644 --- a/src/KOKKOS/pair_kokkos.h +++ b/src/KOKKOS/pair_kokkos.h @@ -86,6 +86,7 @@ struct PairComputeFunctor { NeighListKokkos* list_ptr): c(*c_ptr),list(*list_ptr) { // allocate duplicated memory + f = c.f; dup_f = Kokkos::Experimental::create_scatter_view::value >(c.f); dup_eatom = Kokkos::Experimental::create_scatter_view::value >(c.d_eatom); dup_vatom = Kokkos::Experimental::create_scatter_view::value >(c.d_vatom); @@ -255,6 +256,328 @@ struct PairComputeFunctor { return ev; } + // Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial + // Loop over neighbors of one atom without coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + void compute_item_team(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const NoCoulTag&) const { + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + const int firstatom = team.league_rank()*atoms_per_team; + const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + t_scalar3 fsum; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, t_scalar3& ftmp) { + + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + + ftmp.x += delx*fpair; + ftmp.y += dely*fpair; + ftmp.z += delz*fpair; + } + + },fsum); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fsum.x; + f(i,1) += fsum.y; + f(i,2) += fsum.z; + }); + + }); + } + + // Use TeamPolicy, assume Newton off, Full Neighborlist, and no energy/virial + // Loop over neighbors of one atom with coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + void compute_item_team(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const CoulTag& ) const { + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + int firstatom = team.league_rank()*atoms_per_team; + int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + const F_FLOAT qtmp = c.q(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + t_scalar3 fsum; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, t_scalar3& ftmp) { + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + const F_FLOAT factor_coul = c.special_coul[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + F_FLOAT fpair = F_FLOAT(); + + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) + fpair+=factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) + fpair+=c.template compute_fcoul(rsq,i,j,itype,jtype,factor_coul,qtmp); + + ftmp.x += delx*fpair; + ftmp.y += dely*fpair; + ftmp.z += delz*fpair; + } + },fsum); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fsum.x; + f(i,1) += fsum.y; + f(i,2) += fsum.z; + }); + }); + } + + + // Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial + // Loop over neighbors of one atom without coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + EV_FLOAT compute_item_team_ev(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const NoCoulTag&) const { + + EV_FLOAT ev; + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + const int firstatom = team.league_rank()*atoms_per_team; + const int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + FEV_FLOAT fev; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, FEV_FLOAT& fev_tmp) { + + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + const F_FLOAT fpair = factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + + fev_tmp.f[0] += delx*fpair; + fev_tmp.f[1] += dely*fpair; + fev_tmp.f[2] += delz*fpair; + + F_FLOAT evdwl = 0.0; + if (c.eflag) { + evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); + fev.evdwl += 0.5*evdwl; + } + if (c.vflag_either) { + fev.v[0] += 0.5*delx*delx*fpair; + fev.v[1] += 0.5*dely*dely*fpair; + fev.v[2] += 0.5*delz*delz*fpair; + fev.v[3] += 0.5*delx*dely*fpair; + fev.v[4] += 0.5*delx*delz*fpair; + fev.v[5] += 0.5*dely*delz*fpair; + } + } + },fev); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fev.f[0]; + f(i,1) += fev.f[1]; + f(i,2) += fev.f[2]; + + if (c.eflag_global) + ev.evdwl += fev.evdwl; + + if (c.eflag_atom) + d_eatom(i,0) += fev.evdwl; + + if (c.vflag_global) { + ev.v[0] += fev.v[0]; + ev.v[1] += fev.v[1]; + ev.v[2] += fev.v[2]; + ev.v[3] += fev.v[3]; + ev.v[4] += fev.v[4]; + ev.v[5] += fev.v[5]; + } + + if (c.vflag_atom) { + d_vatom(i,0) += fev.v[0]; + d_vatom(i,1) += fev.v[1]; + d_vatom(i,2) += fev.v[2]; + d_vatom(i,3) += fev.v[3]; + d_vatom(i,4) += fev.v[4]; + d_vatom(i,5) += fev.v[5]; + } + }); + }); + return ev; + } + + // Use TeamPolicy, assume Newton off, Full Neighborlist, and energy/virial + // Loop over neighbors of one atom with coulomb interaction + // This function is called in parallel + KOKKOS_FUNCTION + EV_FLOAT compute_item_team_ev(Kokkos::TeamPolicy<>::member_type team, + const NeighListKokkos &list, const CoulTag& ) const { + + EV_FLOAT ev; + + const int inum = team.league_size(); + const int atoms_per_team = team.team_size(); + int firstatom = team.league_rank()*atoms_per_team; + int lastatom = firstatom + atoms_per_team < inum ? firstatom + atoms_per_team : inum; + Kokkos::parallel_for(Kokkos::TeamThreadRange(team, firstatom, lastatom), [&] (const int &ii) { + + const int i = list.d_ilist[ii]; + const X_FLOAT xtmp = c.x(i,0); + const X_FLOAT ytmp = c.x(i,1); + const X_FLOAT ztmp = c.x(i,2); + const int itype = c.type(i); + const F_FLOAT qtmp = c.q(i); + + const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i); + const int jnum = list.d_numneigh[i]; + + FEV_FLOAT fev; + + Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(team,jnum), + [&] (const int jj, FEV_FLOAT& fev_tmp) { + int j = neighbors_i(jj); + const F_FLOAT factor_lj = c.special_lj[sbmask(j)]; + const F_FLOAT factor_coul = c.special_coul[sbmask(j)]; + j &= NEIGHMASK; + const X_FLOAT delx = xtmp - c.x(j,0); + const X_FLOAT dely = ytmp - c.x(j,1); + const X_FLOAT delz = ztmp - c.x(j,2); + const int jtype = c.type(j); + const F_FLOAT rsq = delx*delx + dely*dely + delz*delz; + + if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) { + + F_FLOAT fpair = F_FLOAT(); + + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) + fpair+=factor_lj*c.template compute_fpair(rsq,i,j,itype,jtype); + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) + fpair+=c.template compute_fcoul(rsq,i,j,itype,jtype,factor_coul,qtmp); + + fev.f[0] += delx*fpair; + fev.f[1] += dely*fpair; + fev.f[2] += delz*fpair; + + F_FLOAT evdwl = 0.0; + F_FLOAT ecoul = 0.0; + if (c.eflag) { + if(rsq < (STACKPARAMS?c.m_cut_ljsq[itype][jtype]:c.d_cut_ljsq(itype,jtype))) { + evdwl = factor_lj * c.template compute_evdwl(rsq,i,j,itype,jtype); + ev.evdwl += 0.5*evdwl; + } + if(rsq < (STACKPARAMS?c.m_cut_coulsq[itype][jtype]:c.d_cut_coulsq(itype,jtype))) { + ecoul = c.template compute_ecoul(rsq,i,j,itype,jtype,factor_coul,qtmp); + ev.ecoul += 0.5*ecoul; + } + } + if (c.vflag) { + fev.v[0] += 0.5*delx*delx*fpair; + fev.v[1] += 0.5*dely*dely*fpair; + fev.v[2] += 0.5*delz*delz*fpair; + fev.v[3] += 0.5*delx*dely*fpair; + fev.v[4] += 0.5*delx*delz*fpair; + fev.v[5] += 0.5*dely*delz*fpair; + } + } + },fev); + + Kokkos::single(Kokkos::PerThread(team), [&] (){ + f(i,0) += fev.f[0]; + f(i,1) += fev.f[1]; + f(i,2) += fev.f[2]; + + if (c.eflag_global) { + ev.evdwl += fev.evdwl; + ev.ecoul += fev.ecoul; + } + + if (c.eflag_atom) + d_eatom(i,0) += fev.evdwl + fev.ecoul; + + if (c.vflag_global) { + ev.v[0] += fev.v[0]; + ev.v[1] += fev.v[1]; + ev.v[2] += fev.v[2]; + ev.v[3] += fev.v[3]; + ev.v[4] += fev.v[4]; + ev.v[5] += fev.v[5]; + } + + if (c.vflag_atom) { + d_vatom(i,0) += fev.v[0]; + d_vatom(i,1) += fev.v[1]; + d_vatom(i,2) += fev.v[2]; + d_vatom(i,3) += fev.v[3]; + d_vatom(i,4) += fev.v[4]; + d_vatom(i,5) += fev.v[5]; + } + }); + }); + return ev; + } + KOKKOS_INLINE_FUNCTION void ev_tally(EV_FLOAT &ev, const int &i, const int &j, const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx, @@ -355,6 +678,16 @@ struct PairComputeFunctor { else energy_virial += compute_item<1,0>(i,list,typename DoCoul::type()); } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team) const { + compute_item_team(team,list,typename DoCoul::type()); + } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team, value_type &energy_virial) const { + energy_virial += compute_item_team_ev(team,list,typename DoCoul::type()); + } }; template @@ -489,6 +822,15 @@ struct PairComputeFunctor { void operator()(const int i, value_type &energy_virial) const { energy_virial += compute_item<1,0>(i,list,typename DoCoul::type()); } + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team) const + {} + + KOKKOS_INLINE_FUNCTION + void operator()(const typename Kokkos::TeamPolicy<>::member_type& team, value_type &energy_virial) const + {} + }; // Filter out Neighflags which are not supported for PairStyle @@ -507,20 +849,57 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable return ev; } +template +int GetTeamSize(FunctorStyle& functor, int team_size, int vector_length) { + int team_size_max = Kokkos::TeamPolicy<>::team_size_max(functor); + +#ifdef KOKKOS_ENABLE_CUDA + if(team_size*vector_length > team_size_max) + team_size = team_size_max/vector_length; +#else + team_size = 1; +#endif + return team_size; +} + // Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL,N2 template EV_FLOAT pair_compute_neighlist (PairStyle* fpair, typename Kokkos::Impl::enable_if<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos*>::type list) { EV_FLOAT ev; - if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { - PairComputeFunctor ff(fpair,list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - ff.contribute(); + + if (!fpair->lmp->kokkos->neigh_thread_set) + if (list->inum <= 16384 && NEIGHFLAG == FULL) + fpair->lmp->kokkos->neigh_thread = 1; + + if (fpair->lmp->kokkos->neigh_thread) { + int vector_length = 8; + int atoms_per_team = 32; + + if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { + PairComputeFunctor ff(fpair,list); + atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); + Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); + else Kokkos::parallel_for(policy,ff); + } else { + PairComputeFunctor ff(fpair,list); + atoms_per_team = GetTeamSize(ff, atoms_per_team, vector_length); + Kokkos::TeamPolicy > policy(list->inum,atoms_per_team,vector_length); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev); + else Kokkos::parallel_for(policy,ff); + } } else { - PairComputeFunctor ff(fpair,list); - if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); - else Kokkos::parallel_for(list->inum,ff); - ff.contribute(); + if(fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) { + PairComputeFunctor ff(fpair,list); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + ff.contribute(); + } else { + PairComputeFunctor ff(fpair,list); + if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev); + else Kokkos::parallel_for(list->inum,ff); + ff.contribute(); + } } return ev; } diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index c233ca6264..7c01adc510 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -1656,7 +1656,7 @@ void PPPMKokkos::make_rho() iy = nyhi_out-nylo_out + 1; copymode = 1; - Kokkos::TeamPolicy config(lmp->kokkos->num_threads,1); + Kokkos::TeamPolicy config(lmp->kokkos->nthreads,1); Kokkos::parallel_for(config,*this); copymode = 0; #endif diff --git a/src/KOKKOS/rand_pool_wrap_kokkos.cpp b/src/KOKKOS/rand_pool_wrap_kokkos.cpp index 39b91f1600..51ebcb154e 100644 --- a/src/KOKKOS/rand_pool_wrap_kokkos.cpp +++ b/src/KOKKOS/rand_pool_wrap_kokkos.cpp @@ -25,7 +25,7 @@ using namespace LAMMPS_NS; RandPoolWrap::RandPoolWrap(int, LAMMPS *lmp) : Pointers(lmp) { random_thr = NULL; - nthreads = lmp->kokkos->num_threads; + nthreads = lmp->kokkos->nthreads; } /* ---------------------------------------------------------------------- */ @@ -59,7 +59,7 @@ void RandPoolWrap::init(RanMars* random, int seed) // allocate pool of RNGs // generate a random number generator instance for // all threads != 0. make sure we use unique seeds. - nthreads = lmp->kokkos->num_threads; + nthreads = lmp->kokkos->nthreads; random_thr = new RanMars*[nthreads]; for (int tid = 1; tid < nthreads; ++tid) { random_thr[tid] = new RanMars(lmp, seed + comm->me diff --git a/src/KOKKOS/verlet_kokkos.cpp b/src/KOKKOS/verlet_kokkos.cpp index d75a7e491f..b80d5e0646 100644 --- a/src/KOKKOS/verlet_kokkos.cpp +++ b/src/KOKKOS/verlet_kokkos.cpp @@ -93,7 +93,6 @@ void VerletKokkos::setup(int flag) } update->setupflag = 1; - lmp->kokkos->auto_sync = 0; // setup domain, communication and neighboring // acquire ghosts @@ -187,6 +186,7 @@ void VerletKokkos::setup(int flag) } if (force->newton) comm->reverse_comm(); + lmp->kokkos->auto_sync = 0; modify->setup(vflag); output->setup(flag); lmp->kokkos->auto_sync = 1; @@ -202,7 +202,6 @@ void VerletKokkos::setup(int flag) void VerletKokkos::setup_minimal(int flag) { update->setupflag = 1; - lmp->kokkos->auto_sync = 0; // setup domain, communication and neighboring // acquire ghosts @@ -294,7 +293,6 @@ void VerletKokkos::setup_minimal(int flag) if (force->newton) comm->reverse_comm(); modify->setup(vflag); - lmp->kokkos->auto_sync = 1; update->setupflag = 0; } diff --git a/src/USER-MISC/pair_extep.cpp b/src/USER-MISC/pair_extep.cpp index 132b857dde..bd5da71f4a 100644 --- a/src/USER-MISC/pair_extep.cpp +++ b/src/USER-MISC/pair_extep.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include "pair_extep.h" #include "atom.h" #include "neighbor.h" @@ -705,7 +706,7 @@ void PairExTeP::read_file(char *file) error->all(FLERR,"Illegal ExTeP parameter"); nparams++; - if (nparams >= pow(atom->ntypes,3)) break; + if (nparams >= pow(nelements,3)) break; } // deallocate words array @@ -746,14 +747,24 @@ void PairExTeP::read_file(char *file) nwords = atom->count_words(line); if (nwords == 0) continue; - if (nwords != params_per_line) - error->all(FLERR,"Incorrect format in ExTeP potential file"); - // words = ptrs to all words in line nwords = 0; words[nwords++] = strtok(line," \t\n\r\f"); - while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + while ((nwords < params_per_line) + && (words[nwords++] = strtok(NULL," \t\n\r\f"))) continue; + + // skip line if it is a leftover from the previous section, + // which can be identified by having 3 elements (instead of 2) + // as first words. + + if (isupper(words[0][0]) && isupper(words[1][0]) && isupper(words[2][0])) + continue; + + // need to have two elements followed by a number in each line + if (!(isupper(words[0][0]) && isupper(words[1][0]) + && !isupper(words[2][0]))) + error->all(FLERR,"Incorrect format in ExTeP potential file"); // ielement,jelement = 1st args // if all 3 args are in element list, then parse this line @@ -1074,8 +1085,8 @@ void PairExTeP::costheta_d(double *rij_hat, double rij, // initialize spline for F_corr (based on PairLCBOP::F_conj) void PairExTeP::spline_init() { - for ( int iel=0; ielntypes; iel++) { - for ( int jel=0; jelntypes; jel++) { + for ( int iel=0; ielkokkos) { - nthreads = lmp->kokkos->num_threads * lmp->kokkos->numa; + nthreads = lmp->kokkos->nthreads * lmp->kokkos->numa; } else if (getenv("OMP_NUM_THREADS") == NULL) { nthreads = 1; if (me == 0) diff --git a/src/finish.cpp b/src/finish.cpp index 9ad8b44927..1baa6d6fda 100644 --- a/src/finish.cpp +++ b/src/finish.cpp @@ -176,9 +176,9 @@ void Finish::end(int flag) const char fmt2[] = "%.1f%% CPU use with %d MPI tasks x %d OpenMP threads\n"; if (screen) fprintf(screen,fmt2,cpu_loop,nprocs, - lmp->kokkos->num_threads); + lmp->kokkos->nthreads); if (logfile) fprintf(logfile,fmt2,cpu_loop,nprocs, - lmp->kokkos->num_threads); + lmp->kokkos->nthreads); } else { #if defined(_OPENMP) const char fmt2[] = @@ -579,7 +579,7 @@ void Finish::end(int flag) } #endif - if (lmp->kokkos && lmp->kokkos->ngpu > 0) + if (lmp->kokkos && lmp->kokkos->ngpus > 0) if (const char* env_clb = getenv("CUDA_LAUNCH_BLOCKING")) if (!(strcmp(env_clb,"1") == 0)) { error->warning(FLERR,"Timing breakdown may not be accurate " diff --git a/src/neighbor.h b/src/neighbor.h index ffe181313b..3466dd426a 100644 --- a/src/neighbor.h +++ b/src/neighbor.h @@ -126,6 +126,8 @@ class Neighbor : protected Pointers { bigint memory_usage(); + bigint last_setup_bins; // step of last neighbor::setup_bins() call + protected: int me,nprocs; int firsttime; // flag for calling init_styles() only once @@ -139,8 +141,6 @@ class Neighbor : protected Pointers { int fix_check; // # of fixes that induce reneigh int *fixchecklist; // which fixes to check - bigint last_setup_bins; // step of last neighbor::setup_bins() call - double triggersq; // trigger = build when atom moves this dist double **xhold; // atom coords at last neighbor build