Compare commits
189 Commits
patch_11Ma
...
patch_22Ju
| Author | SHA1 | Date | |
|---|---|---|---|
| d237625d2a | |||
| 510834cb7b | |||
| f5f73b0172 | |||
| f0a2c57401 | |||
| 4ca3d30db7 | |||
| cbfb751e92 | |||
| 65c895721f | |||
| bfacfc46be | |||
| 919d95d375 | |||
| 258185690d | |||
| cea0227f14 | |||
| 3e58ebea4f | |||
| 7a22612bea | |||
| 9805b5d34a | |||
| 28504e91c0 | |||
| 47482afbc1 | |||
| 7d3a549de9 | |||
| a9b794b2ab | |||
| 3ae8fdccd8 | |||
| 70a7b37614 | |||
| 036647c102 | |||
| 37b901eadb | |||
| 5a3ef99272 | |||
| 03a7d1cd5d | |||
| 08331da93d | |||
| ab0f3debeb | |||
| fe14bfd1d2 | |||
| d55c7039ee | |||
| 87dcef7115 | |||
| 7e234497ff | |||
| 277b43b2fd | |||
| 6f61ed2af5 | |||
| 16cc613993 | |||
| 6842a527e0 | |||
| d094bf789c | |||
| 3750e0a427 | |||
| 40c4461a78 | |||
| d830fc2301 | |||
| 265b6c261e | |||
| 3411b0e576 | |||
| 47458f1ca0 | |||
| c4bcb7e73d | |||
| 18b543179c | |||
| 14c4438a72 | |||
| acd20512f0 | |||
| 5fcdfe6dcb | |||
| 7c9afc1be2 | |||
| fe483a769f | |||
| f312fbfe32 | |||
| f68c6dddb8 | |||
| c5b51df06e | |||
| 479cc9424d | |||
| 673202d05f | |||
| 3e79296dcf | |||
| fcf9ed0fc2 | |||
| 661848139c | |||
| 12d8370ec5 | |||
| 3c781afa6c | |||
| 37268e435b | |||
| 29e555213c | |||
| 965ac3cedd | |||
| 30e3e9ab88 | |||
| 6cb0c2e9c5 | |||
| ebd93cac69 | |||
| a2dd21dca5 | |||
| e78ed7d044 | |||
| 142de878f9 | |||
| 5a383aa518 | |||
| a4a3133b76 | |||
| fa5ecf88a5 | |||
| 7ca794beb9 | |||
| c386bdddf9 | |||
| 56c3a0be29 | |||
| cf3095e938 | |||
| 6056c1db4d | |||
| 38182dcc60 | |||
| ef3c0dbf7b | |||
| 4c18a07193 | |||
| e3e040227b | |||
| c27e617fd8 | |||
| b9a8f91753 | |||
| 95e259fe71 | |||
| 69a9f1bf94 | |||
| e7773808a5 | |||
| 0368202d12 | |||
| 3ba87e52e9 | |||
| 1422b0413b | |||
| 18c6d7f289 | |||
| 1b4068b575 | |||
| 341fa160fe | |||
| 6c42c9b378 | |||
| ce4a446cea | |||
| 933cf92e36 | |||
| 69903cb4aa | |||
| ad5cfb5ae6 | |||
| 2f9e6d4566 | |||
| 4ec31564fe | |||
| f5b7419108 | |||
| 1f193e02e0 | |||
| 838a1938bb | |||
| df3eb36519 | |||
| bb6dcb8ebd | |||
| e6f76451eb | |||
| cfb350d40b | |||
| 5daf1fe0d4 | |||
| 2e7b919774 | |||
| 858c211fdc | |||
| 6997aedf30 | |||
| cab602045e | |||
| 6944973484 | |||
| 9e82c86fbd | |||
| 68dd7d4f2b | |||
| b2da3ca3e9 | |||
| e1361a9dca | |||
| 94c6d2d546 | |||
| 585a164e78 | |||
| 5a4a7cebc1 | |||
| 133f0922b3 | |||
| 1a88ffd5ab | |||
| 307e471456 | |||
| f5d66e74c5 | |||
| bebf25dcb3 | |||
| 3f316f71ef | |||
| 8303d1a375 | |||
| 363d8ef775 | |||
| a449488b1d | |||
| e076d08ee9 | |||
| 1a5144bf37 | |||
| ca6920be72 | |||
| 55d8cc0341 | |||
| 6b73e29f64 | |||
| 09046ad3f7 | |||
| 06c18dd36c | |||
| 55bd823339 | |||
| 77e04a9213 | |||
| 233f03ea8e | |||
| f65b06de0e | |||
| 0f32d603b5 | |||
| 3440b1a2a7 | |||
| 8952b1107c | |||
| b3b3c225e8 | |||
| 4d34e55072 | |||
| 4820cef5d4 | |||
| e7598eeec0 | |||
| a5d29fa52e | |||
| 821ef0cf77 | |||
| 42f22a9723 | |||
| 0d8110bc19 | |||
| 44c5e88e24 | |||
| 308dac6821 | |||
| 944dab1351 | |||
| c3cc07bd3d | |||
| c50258e89c | |||
| 9674512997 | |||
| f864584bce | |||
| 075598b405 | |||
| 7780d92823 | |||
| 66caf1c1a3 | |||
| 95eb86d6c7 | |||
| d43c556263 | |||
| d7992f324d | |||
| a25895d31d | |||
| 4f762deff8 | |||
| 843f89fc92 | |||
| 6336c3b975 | |||
| a43e74180a | |||
| 6626cd5aaa | |||
| 6226ec8831 | |||
| 6c91cc1f76 | |||
| 4e42ee2cfc | |||
| 1224296365 | |||
| 25cef528c9 | |||
| 6359392834 | |||
| 71e1867dd2 | |||
| f90af1839e | |||
| 2ebd4eb26e | |||
| 3682bc47c2 | |||
| 2797afbf3e | |||
| 635383c00c | |||
| b6a8168e3f | |||
| 71fce2e413 | |||
| 994fd2af0e | |||
| d8138a5b4c | |||
| a001f2c248 | |||
| 8104568b6a | |||
| 96d1c421df | |||
| 96c5c92b1a | |||
| 192f83164c | |||
| 78d8be311c |
11
.github/CODEOWNERS
vendored
11
.github/CODEOWNERS
vendored
@ -43,3 +43,14 @@ src/USER-MISC/*_grem.* @dstelter92
|
||||
|
||||
# tools
|
||||
tools/msi2lmp/* @akohlmey
|
||||
|
||||
# cmake
|
||||
cmake/* @junghans @rbberger
|
||||
|
||||
# python
|
||||
python/* @rbberger
|
||||
|
||||
# docs
|
||||
doc/utils/*/* @rbberger
|
||||
doc/Makefile @rbberger
|
||||
doc/README @rbberger
|
||||
|
||||
@ -2,9 +2,9 @@
|
||||
# CMake build system
|
||||
# This file is part of LAMMPS
|
||||
# Created by Christoph Junghans and Richard Berger
|
||||
cmake_minimum_required(VERSION 3.1)
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
|
||||
project(lammps)
|
||||
project(lammps CXX)
|
||||
set(SOVERSION 0)
|
||||
set(LAMMPS_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../src)
|
||||
set(LAMMPS_LIB_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../lib)
|
||||
@ -23,14 +23,22 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CXX_FLAGS)
|
||||
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
|
||||
endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CXX_FLAGS)
|
||||
|
||||
# remove any style headers in the src dir
|
||||
file(GLOB SRC_STYLE_FILES ${LAMMPS_SOURCE_DIR}/style_*.h)
|
||||
if(SRC_STYLE_FILES)
|
||||
file(REMOVE ${SRC_STYLE_FILES})
|
||||
file(GLOB SRC_FILES ${LAMMPS_SOURCE_DIR}/*.cpp)
|
||||
list(SORT SRC_FILES)
|
||||
# check for files installed by make-based buildsystem
|
||||
# only run this time consuming check if there are new files
|
||||
if(NOT SRC_FILES STREQUAL SRC_FILES_CACHED)
|
||||
file(GLOB SRC_PKG_FILES ${LAMMPS_SOURCE_DIR}/*/*.cpp)
|
||||
message(STATUS "Running check for installed package (this might take a while)")
|
||||
foreach(_SRC SRC_PKG_FILES)
|
||||
get_filename_component(FILENAME "${_SRC}" NAME)
|
||||
if(EXISTS ${LAMMPS_SOURCE_DIR}/${FILENAME})
|
||||
message(FATAL_ERROR "Found packages installed by the make-based buildsystem, please run 'make -C ${LAMMPS_SOURCE_DIR} no-all purge'")
|
||||
endif()
|
||||
endforeach()
|
||||
set(SRC_FILES_CACHED "${SRC_FILES}" CACHE INTERNAL "List of file in LAMMPS_SOURCE_DIR" FORCE)
|
||||
endif()
|
||||
|
||||
enable_language(CXX)
|
||||
|
||||
######################################################################
|
||||
# compiler tests
|
||||
# these need ot be done early (before further tests).
|
||||
@ -41,6 +49,11 @@ if (${CMAKE_CXX_COMPILER_ID} STREQUAL "Intel")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -restrict")
|
||||
endif()
|
||||
|
||||
option(ENABLE_COVERAGE "Enable code coverage" OFF)
|
||||
if(ENABLE_COVERAGE)
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} --coverage")
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# User input options #
|
||||
########################################################################
|
||||
@ -48,21 +61,27 @@ option(BUILD_SHARED_LIBS "Build shared libs" OFF)
|
||||
if(BUILD_SHARED_LIBS) # for all pkg libs, mpi_stubs and linalg
|
||||
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
|
||||
endif()
|
||||
option(DEVELOPER_MODE "Enable developer mode" OFF)
|
||||
mark_as_advanced(DEVELOPER_MODE)
|
||||
option(CMAKE_VERBOSE_MAKEFILE "Generate verbose Makefiles" OFF)
|
||||
include(GNUInstallDirs)
|
||||
|
||||
set(LAMMPS_LINK_LIBS)
|
||||
set(LAMMPS_DEPS)
|
||||
set(LAMMPS_API_DEFINES)
|
||||
option(ENABLE_MPI "Build MPI version" OFF)
|
||||
if(ENABLE_MPI)
|
||||
|
||||
find_package(MPI QUIET)
|
||||
option(BUILD_MPI "Build MPI version" ${MPI_FOUND})
|
||||
if(BUILD_MPI)
|
||||
find_package(MPI REQUIRED)
|
||||
include_directories(${MPI_C_INCLUDE_PATH})
|
||||
include_directories(${MPI_CXX_INCLUDE_PATH})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${MPI_CXX_LIBRARIES})
|
||||
option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF)
|
||||
if(LAMMPS_LONGLONG_TO_LONG)
|
||||
add_definitions(-DLAMMPS_LONGLONG_TO_LONG)
|
||||
endif()
|
||||
else()
|
||||
enable_language(C)
|
||||
file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.c)
|
||||
add_library(mpi_stubs STATIC ${MPI_SOURCES})
|
||||
include_directories(${LAMMPS_SOURCE_DIR}/STUBS)
|
||||
@ -108,14 +127,14 @@ set(OTHER_PACKAGES KIM PYTHON MSCG MPIIO VORONOI POEMS LATTE
|
||||
USER-SMTBQ USER-SPH USER-TALLY USER-UEF USER-VTK USER-QUIP USER-QMMM)
|
||||
set(ACCEL_PACKAGES USER-OMP KOKKOS OPT USER-INTEL GPU)
|
||||
foreach(PKG ${DEFAULT_PACKAGES})
|
||||
option(ENABLE_${PKG} "Build ${PKG} Package" ${ENABLE_ALL})
|
||||
option(PKG_${PKG} "Build ${PKG} Package" ${ENABLE_ALL})
|
||||
endforeach()
|
||||
foreach(PKG ${ACCEL_PACKAGES} ${OTHER_PACKAGES})
|
||||
option(ENABLE_${PKG} "Build ${PKG} Package" OFF)
|
||||
option(PKG_${PKG} "Build ${PKG} Package" OFF)
|
||||
endforeach()
|
||||
|
||||
macro(pkg_depends PKG1 PKG2)
|
||||
if(ENABLE_${PKG1} AND NOT ENABLE_${PKG2})
|
||||
if(PKG_${PKG1} AND NOT (PKG_${PKG2} OR BUILD_${PKG2}))
|
||||
message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}")
|
||||
endif()
|
||||
endmacro()
|
||||
@ -123,39 +142,51 @@ endmacro()
|
||||
pkg_depends(MPIIO MPI)
|
||||
pkg_depends(QEQ MANYBODY)
|
||||
pkg_depends(USER-ATC MANYBODY)
|
||||
pkg_depends(USER-H5MD MPI)
|
||||
pkg_depends(USER-LB MPI)
|
||||
pkg_depends(USER-MISC MANYBODY)
|
||||
pkg_depends(USER-PHONON KSPACE)
|
||||
pkg_depends(CORESHELL KSPACE)
|
||||
|
||||
######################################################
|
||||
# packages with special compiler needs or external libs
|
||||
######################################################
|
||||
if(ENABLE_REAX OR ENABLE_MEAM OR ENABLE_USER-QUIP OR ENABLE_USER-QMMM OR ENABLE_LATTE)
|
||||
if(PKG_REAX OR PKG_MEAM OR PKG_USER-QUIP OR PKG_USER-QMMM OR PKG_LATTE)
|
||||
enable_language(Fortran)
|
||||
include(CheckFortranCompilerFlag)
|
||||
check_Fortran_compiler_flag("-fno-second-underscore" FC_HAS_NO_SECOND_UNDERSCORE)
|
||||
endif()
|
||||
|
||||
if(ENABLE_KOKKOS OR ENABLE_MSCG)
|
||||
# starting with CMake 3.1 this is all you have to do to enforce C++11
|
||||
set(CMAKE_CXX_STANDARD 11) # C++11...
|
||||
set(CMAKE_CXX_STANDARD_REQUIRED ON) #...is required...
|
||||
set(CMAKE_CXX_EXTENSIONS OFF) #...without compiler extensions like gnu++11
|
||||
if(PKG_MEAM OR PKG_USER-H5MD OR PKG_USER-QMMM)
|
||||
enable_language(C)
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-OMP OR ENABLE_KOKKOS OR ENABLE_USER-INTEL)
|
||||
find_package(OpenMP QUIET)
|
||||
option(BUILD_OMP "Build with OpenMP support" ${OpenMP_FOUND})
|
||||
if(BUILD_OMP OR PKG_USER-OMP OR PKG_KOKKOS OR PKG_USER-INTEL)
|
||||
find_package(OpenMP REQUIRED)
|
||||
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
|
||||
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
|
||||
endif()
|
||||
|
||||
if(ENABLE_KSPACE)
|
||||
set(FFT "KISSFFT" CACHE STRING "FFT library for KSPACE package")
|
||||
set_property(CACHE FFT PROPERTY STRINGS KISSFFT FFTW3 MKL FFTW2)
|
||||
if(PKG_KSPACE)
|
||||
option(FFT_SINGLE "Use single precision FFT instead of double" OFF)
|
||||
set(FFTW "FFTW3")
|
||||
if(FFT_SINGLE)
|
||||
set(FFTW "FFTW3F")
|
||||
add_definitions(-DFFT_SINGLE)
|
||||
endif()
|
||||
find_package(${FFTW} QUIET)
|
||||
if(${FFTW}_FOUND)
|
||||
set(FFT "${FFTW}" CACHE STRING "FFT library for KSPACE package")
|
||||
else()
|
||||
set(FFT "KISSFFT" CACHE STRING "FFT library for KSPACE package")
|
||||
endif()
|
||||
set_property(CACHE FFT PROPERTY STRINGS KISSFFT ${FFTW} MKL)
|
||||
if(NOT FFT STREQUAL "KISSFFT")
|
||||
find_package(${FFT} REQUIRED)
|
||||
add_definitions(-DFFT_${FFT})
|
||||
if(NOT FFT STREQUAL "FFTW3F")
|
||||
add_definitions(-DFFT_FFTW)
|
||||
else()
|
||||
add_definitions(-DFFT_${FFT})
|
||||
endif()
|
||||
include_directories(${${FFT}_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${${FFT}_LIBRARIES})
|
||||
endif()
|
||||
@ -166,22 +197,17 @@ if(ENABLE_KSPACE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_MSCG OR ENABLE_USER-ATC OR ENABLE_USER-AWPMD OR ENABLE_USER-QUIP OR ENABLE_LATTE)
|
||||
if(PKG_MSCG OR PKG_USER-ATC OR PKG_USER-AWPMD OR PKG_USER-QUIP OR PKG_LATTE)
|
||||
find_package(LAPACK)
|
||||
if(NOT LAPACK_FOUND)
|
||||
enable_language(Fortran)
|
||||
file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/*.f)
|
||||
file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/*.[fF])
|
||||
add_library(linalg STATIC ${LAPACK_SOURCES})
|
||||
include(CheckFortranCompilerFlag)
|
||||
check_Fortran_compiler_flag("-fno-second-underscore" FC_HAS_NO_SECOND_UNDERSCORE)
|
||||
if(FC_HAS_NO_SECOND_UNDERSCORE)
|
||||
target_compile_options(linalg PRIVATE -fno-second-underscore)
|
||||
endif()
|
||||
set(LAPACK_LIBRARIES linalg)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if(ENABLE_PYTHON)
|
||||
if(PKG_PYTHON)
|
||||
find_package(PythonInterp REQUIRED)
|
||||
find_package(PythonLibs REQUIRED)
|
||||
add_definitions(-DLMP_PYTHON)
|
||||
@ -197,16 +223,25 @@ if(ENABLE_PYTHON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package(JPEG)
|
||||
if(JPEG_FOUND)
|
||||
find_package(JPEG QUIET)
|
||||
option(WITH_JPEG "Enable JPEG support" ${JPEG_FOUND})
|
||||
if(WITH_JPEG)
|
||||
find_package(JPEG REQUIRED)
|
||||
add_definitions(-DLAMMPS_JPEG)
|
||||
include_directories(${JPEG_INCLUDE_DIR})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${JPEG_LIBRARIES})
|
||||
endif()
|
||||
|
||||
find_package(PNG)
|
||||
find_package(ZLIB)
|
||||
find_package(PNG QUIET)
|
||||
find_package(ZLIB QUIET)
|
||||
if(PNG_FOUND AND ZLIB_FOUND)
|
||||
option(WITH_PNG "Enable PNG support" ON)
|
||||
else()
|
||||
option(WITH_PNG "Enable PNG support" OFF)
|
||||
endif()
|
||||
if(WITH_PNG)
|
||||
find_package(PNG REQUIRED)
|
||||
find_package(ZLIB REQUIRED)
|
||||
include_directories(${PNG_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${PNG_LIBRARIES} ${ZLIB_LIBRARIES})
|
||||
add_definitions(-DLAMMPS_PNG)
|
||||
@ -214,25 +249,50 @@ endif()
|
||||
|
||||
find_program(GZIP_EXECUTABLE gzip)
|
||||
find_package_handle_standard_args(GZIP REQUIRED_VARS GZIP_EXECUTABLE)
|
||||
if(GZIP_FOUND)
|
||||
option(WITH_GZIP "Enable GZIP support" ${GZIP_FOUND})
|
||||
if(WITH_GZIP)
|
||||
if(NOT GZIP_FOUND)
|
||||
message(FATAL_ERROR "gzip executable not found")
|
||||
endif()
|
||||
add_definitions(-DLAMMPS_GZIP)
|
||||
endif()
|
||||
|
||||
find_program(FFMPEG_EXECUTABLE ffmpeg)
|
||||
find_package_handle_standard_args(FFMPEG REQUIRED_VARS FFMPEG_EXECUTABLE)
|
||||
if(FFMPEG_FOUND)
|
||||
option(WITH_FFMPEG "Enable FFMPEG support" ${FFMPEG_FOUND})
|
||||
if(WITH_FFMPEG)
|
||||
if(NOT FFMPEG_FOUND)
|
||||
message(FATAL_ERROR "ffmpeg executable not found")
|
||||
endif()
|
||||
add_definitions(-DLAMMPS_FFMPEG)
|
||||
endif()
|
||||
|
||||
if(ENABLE_VORONOI)
|
||||
find_package(VORO REQUIRED) #some distros
|
||||
if(PKG_VORONOI)
|
||||
option(DOWNLOAD_VORO "Download voro++ (instead of using the system's one)" OFF)
|
||||
if(DOWNLOAD_VORO)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(voro_build
|
||||
URL http://math.lbl.gov/voro++/download/dir/voro++-0.4.6.tar.gz
|
||||
URL_MD5 2338b824c3b7b25590e18e8df5d68af9
|
||||
CONFIGURE_COMMAND "" BUILD_IN_SOURCE 1 INSTALL_COMMAND ""
|
||||
)
|
||||
ExternalProject_get_property(voro_build SOURCE_DIR)
|
||||
set(VORO_LIBRARIES ${SOURCE_DIR}/src/libvoro++.a)
|
||||
set(VORO_INCLUDE_DIRS ${SOURCE_DIR}/src)
|
||||
list(APPEND LAMMPS_DEPS voro_build)
|
||||
else()
|
||||
find_package(VORO)
|
||||
if(NOT VORO_FOUND)
|
||||
message(FATAL_ERROR "VORO not found, help CMake to find it by setting VORO_LIBRARY and VORO_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
include_directories(${VORO_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_LATTE)
|
||||
find_package(LATTE QUIET)
|
||||
if(NOT LATTE_FOUND)
|
||||
if(PKG_LATTE)
|
||||
option(DOWNLOAD_LATTE "Download latte (instead of using the system's one)" OFF)
|
||||
if(DOWNLOAD_LATTE)
|
||||
message(STATUS "LATTE not found - we will build our own")
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(latte_build
|
||||
@ -244,55 +304,76 @@ if(ENABLE_LATTE)
|
||||
ExternalProject_get_property(latte_build INSTALL_DIR)
|
||||
set(LATTE_LIBRARIES ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/liblatte.a)
|
||||
list(APPEND LAMMPS_DEPS latte_build)
|
||||
else()
|
||||
find_package(LATTE)
|
||||
if(NOT LATTE_FOUND)
|
||||
message(FATAL_ERROR "LATTE not found, help CMake to find it by setting LATTE_LIBRARY, or set DOWNLOAD_LATTE=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND LAMMPS_LINK_LIBS ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-MOLFILE)
|
||||
if(PKG_USER-MOLFILE)
|
||||
add_library(molfile INTERFACE)
|
||||
target_include_directories(molfile INTERFACE ${LAMMPS_LIB_SOURCE_DIR}/molfile)
|
||||
target_link_libraries(molfile INTERFACE ${CMAKE_DL_LIBS})
|
||||
list(APPEND LAMMPS_LINK_LIBS molfile)
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-NETCDF)
|
||||
if(PKG_USER-NETCDF)
|
||||
find_package(NetCDF REQUIRED)
|
||||
include_directories(NETCDF_INCLUDE_DIR)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARY})
|
||||
add_definitions(-DLMP_HAS_NETCDF -DNC_64BIT_DATA=0x0020)
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-SMD)
|
||||
find_package(Eigen3 REQUIRED)
|
||||
if(PKG_USER-SMD)
|
||||
option(DOWNLOAD_Eigen3 "Download Eigen3 (instead of using the system's one)" OFF)
|
||||
if(DOWNLOAD_Eigen3)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(Eigen3_build
|
||||
URL http://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz
|
||||
URL_MD5 1a47e78efe365a97de0c022d127607c3
|
||||
CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND ""
|
||||
)
|
||||
ExternalProject_get_property(Eigen3_build SOURCE_DIR)
|
||||
set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR})
|
||||
list(APPEND LAMMPS_DEPS Eigen3_build)
|
||||
else()
|
||||
find_package(Eigen3)
|
||||
if(NOT Eigen3_FOUND)
|
||||
message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_Eigen3=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
include_directories(${EIGEN3_INCLUDE_DIR})
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-QUIP)
|
||||
if(PKG_USER-QUIP)
|
||||
find_package(QUIP REQUIRED)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${LAPACK_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-QMMM)
|
||||
if(PKG_USER-QMMM)
|
||||
message(WARNING "Building QMMM with CMake is still experimental")
|
||||
find_package(QE REQUIRED)
|
||||
include_directories(${QE_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-VTK)
|
||||
if(PKG_USER-VTK)
|
||||
find_package(VTK REQUIRED NO_MODULE)
|
||||
include(${VTK_USE_FILE})
|
||||
add_definitions(-DLAMMPS_VTK)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_KIM)
|
||||
find_package(KIM QUIET)
|
||||
if(NOT KIM_FOUND)
|
||||
message(STATUS "KIM not found - we will build our own")
|
||||
if(PKG_KIM)
|
||||
option(DOWNLOAD_KIM "Download kim-api (instead of using the system's one)" OFF)
|
||||
if(DOWNLOAD_KIM)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(kim_build
|
||||
URL https://github.com/openkim/kim-api/archive/v1.9.4.tar.gz
|
||||
URL_MD5 f4d35a1705eed46d64c7c0ab448ff3e0
|
||||
URL https://github.com/openkim/kim-api/archive/v1.9.5.tar.gz
|
||||
URL_MD5 9f66efc128da33039e30659f36fc6d00
|
||||
BUILD_IN_SOURCE 1
|
||||
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
|
||||
)
|
||||
@ -300,41 +381,62 @@ if(ENABLE_KIM)
|
||||
set(KIM_INCLUDE_DIRS ${INSTALL_DIR}/include/kim-api-v1)
|
||||
set(KIM_LIBRARIES ${INSTALL_DIR}/lib/libkim-api-v1.so)
|
||||
list(APPEND LAMMPS_DEPS kim_build)
|
||||
else()
|
||||
find_package(KIM)
|
||||
if(NOT KIM_FOUND)
|
||||
message(FATAL_ERROR "KIM not found, help CMake to find it by setting KIM_LIBRARY and KIM_INCLUDE_DIR, or set DOWNLOAD_KIM=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND LAMMPS_LINK_LIBS ${KIM_LIBRARIES})
|
||||
include_directories(${KIM_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
if(ENABLE_MSCG)
|
||||
if(PKG_MSCG)
|
||||
find_package(GSL REQUIRED)
|
||||
set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/mscg)
|
||||
set(MSCG_TARBALL ${LAMMPS_LIB_MSCG_BIN_DIR}/MS-CG-master.zip)
|
||||
set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_MSCG_BIN_DIR}/MSCG-release-master/src)
|
||||
if(NOT EXISTS ${LAMMPS_LIB_MSCG_BIN_DIR})
|
||||
if(NOT EXISTS ${MSCG_TARBALL})
|
||||
message(STATUS "Downloading ${MSCG_TARBALL}")
|
||||
file(DOWNLOAD
|
||||
https://github.com/uchicago-voth/MSCG-release/archive/master.zip
|
||||
${MSCG_TARBALL} SHOW_PROGRESS) #EXPECTED_MD5 cannot be due due to master
|
||||
option(DOWNLOAD_MSCG "Download latte (instead of using the system's one)" OFF)
|
||||
if(DOWNLOAD_MSCG)
|
||||
include(ExternalProject)
|
||||
if(NOT LAPACK_FOUND)
|
||||
set(EXTRA_MSCG_OPTS "-DLAPACK_LIBRARIES=${CMAKE_CURRENT_BINARY_DIR}/liblinalg.a")
|
||||
endif()
|
||||
ExternalProject_Add(mscg_build
|
||||
URL https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz
|
||||
URL_MD5 8c45e269ee13f60b303edd7823866a91
|
||||
SOURCE_SUBDIR src/CMake
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> -DCMAKE_POSITION_INDEPENDENT_CODE=${CMAKE_POSITION_INDEPENDENT_CODE} ${EXTRA_MSCG_OPTS}
|
||||
BUILD_COMMAND make mscg INSTALL_COMMAND ""
|
||||
)
|
||||
ExternalProject_get_property(mscg_build BINARY_DIR)
|
||||
set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a)
|
||||
ExternalProject_get_property(mscg_build SOURCE_DIR)
|
||||
set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src)
|
||||
list(APPEND LAMMPS_DEPS mscg_build)
|
||||
if(NOT LAPACK_FOUND)
|
||||
file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS})
|
||||
add_dependencies(mscg_build linalg)
|
||||
endif()
|
||||
else()
|
||||
find_package(MSCG)
|
||||
if(NOT MSCG_FOUND)
|
||||
message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it")
|
||||
endif()
|
||||
message(STATUS "Unpacking ${MSCG_TARBALL}")
|
||||
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xvf ${MSCG_TARBALL}
|
||||
WORKING_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/mscg)
|
||||
endif()
|
||||
file(GLOB MSCG_SOURCES ${LAMMPS_LIB_MSCG_BIN_DIR}/*.cpp)
|
||||
add_library(mscg STATIC ${MSCG_SOURCES})
|
||||
list(APPEND LAMMPS_LINK_LIBS mscg)
|
||||
target_compile_options(mscg PRIVATE -DDIMENSION=3 -D_exclude_gromacs=1)
|
||||
target_include_directories(mscg PUBLIC ${LAMMPS_LIB_MSCG_BIN_DIR})
|
||||
target_link_libraries(mscg ${GSL_LIBRARIES} ${LAPACK_LIBRARIES})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${MSCG_LIBRARIES} ${GSL_LIBRARIES} ${LAPACK_LIBRARIES})
|
||||
include_directories(${MSCG_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
if(PKG_COMPRESS)
|
||||
find_package(ZLIB REQUIRED)
|
||||
include_directories(${ZLIB_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${ZLIB_LIBRARIES})
|
||||
endif()
|
||||
|
||||
########################################################################
|
||||
# Basic system tests (standard libraries, headers, functions, types) #
|
||||
########################################################################
|
||||
include(CheckIncludeFile)
|
||||
include(CheckIncludeFileCXX)
|
||||
foreach(HEADER math.h)
|
||||
check_include_file(${HEADER} FOUND_${HEADER})
|
||||
check_include_file_cxx(${HEADER} FOUND_${HEADER})
|
||||
if(NOT FOUND_${HEADER})
|
||||
message(FATAL_ERROR "Could not find needed header - ${HEADER}")
|
||||
endif(NOT FOUND_${HEADER})
|
||||
@ -378,7 +480,7 @@ foreach(PKG ${DEFAULT_PACKAGES} ${OTHER_PACKAGES})
|
||||
DetectAndRemovePackageHeader(${LAMMPS_SOURCE_DIR}/${FNAME})
|
||||
endforeach()
|
||||
|
||||
if(ENABLE_${PKG})
|
||||
if(PKG_${PKG})
|
||||
# detects styles in package and adds them to global list
|
||||
RegisterStyles(${${PKG}_SOURCES_DIR})
|
||||
|
||||
@ -392,7 +494,7 @@ endforeach()
|
||||
############################################
|
||||
foreach(SIMPLE_LIB REAX MEAM POEMS USER-ATC USER-AWPMD USER-COLVARS USER-H5MD
|
||||
USER-QMMM)
|
||||
if(ENABLE_${SIMPLE_LIB})
|
||||
if(PKG_${SIMPLE_LIB})
|
||||
string(REGEX REPLACE "^USER-" "" PKG_LIB "${SIMPLE_LIB}")
|
||||
string(TOLOWER "${PKG_LIB}" PKG_LIB)
|
||||
file(GLOB_RECURSE ${PKG_LIB}_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/${PKG_LIB}/*.F
|
||||
@ -413,40 +515,26 @@ foreach(SIMPLE_LIB REAX MEAM POEMS USER-ATC USER-AWPMD USER-COLVARS USER-H5MD
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(ENABLE_USER-AWPMD)
|
||||
if(PKG_USER-AWPMD)
|
||||
target_link_libraries(awpmd ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-ATC)
|
||||
if(PKG_USER-ATC)
|
||||
target_link_libraries(atc ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-H5MD)
|
||||
if(PKG_USER-H5MD)
|
||||
find_package(HDF5 REQUIRED)
|
||||
target_link_libraries(h5md ${HDF5_LIBRARIES})
|
||||
target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
if(ENABLE_MEAM AND FC_HAS_NO_SECOND_UNDERSCORE)
|
||||
foreach(FSRC ${meam_SOURCES})
|
||||
string(REGEX REPLACE "^.*\\." "" FEXT "${FSRC}")
|
||||
list(FIND CMAKE_Fortran_SOURCE_FILE_EXTENSIONS "${FEXT}" FINDEX)
|
||||
if(FINDEX GREATER -1)
|
||||
set_property(SOURCE ${FSRC} APPEND PROPERTY COMPILE_FLAGS "-fno-second-underscore")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
|
||||
if(ENABLE_REAX AND FC_HAS_NO_SECOND_UNDERSCORE)
|
||||
target_compile_options(reax PRIVATE -fno-second-underscore)
|
||||
endif()
|
||||
|
||||
|
||||
######################################################################
|
||||
# packages which selectively include variants based on enabled styles
|
||||
# e.g. accelerator packages
|
||||
######################################################################
|
||||
if(ENABLE_USER-OMP)
|
||||
if(PKG_USER-OMP)
|
||||
set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP)
|
||||
set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp
|
||||
${USER-OMP_SOURCES_DIR}/thr_omp.cpp
|
||||
@ -463,7 +551,7 @@ if(ENABLE_USER-OMP)
|
||||
include_directories(${USER-OMP_SOURCES_DIR})
|
||||
endif()
|
||||
|
||||
if(ENABLE_KOKKOS)
|
||||
if(PKG_KOKKOS)
|
||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||
set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos)
|
||||
add_definitions(-DLMP_KOKKOS)
|
||||
@ -499,7 +587,7 @@ if(ENABLE_KOKKOS)
|
||||
RegisterNBinStyle(${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.h)
|
||||
RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.h)
|
||||
|
||||
if(ENABLE_USER-DPD)
|
||||
if(PKG_USER-DPD)
|
||||
get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES)
|
||||
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.cpp)
|
||||
RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.h)
|
||||
@ -512,7 +600,7 @@ if(ENABLE_KOKKOS)
|
||||
include_directories(${KOKKOS_PKG_SOURCES_DIR})
|
||||
endif()
|
||||
|
||||
if(ENABLE_OPT)
|
||||
if(PKG_OPT)
|
||||
set(OPT_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/OPT)
|
||||
set(OPT_SOURCES)
|
||||
set_property(GLOBAL PROPERTY "OPT_SOURCES" "${OPT_SOURCES}")
|
||||
@ -526,7 +614,30 @@ if(ENABLE_OPT)
|
||||
include_directories(${OPT_SOURCES_DIR})
|
||||
endif()
|
||||
|
||||
if(ENABLE_USER-INTEL)
|
||||
if(PKG_USER-INTEL)
|
||||
if(NOT DEVELOPER_MODE)
|
||||
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
message(FATAL_ERROR "USER-INTEL is only useful together with intel compiler")
|
||||
endif()
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
|
||||
message(FATAL_ERROR "USER-INTEL is needed at least 2016 intel compiler, found ${CMAKE_CXX_COMPILER_VERSION}")
|
||||
endif()
|
||||
endif()
|
||||
option(INJECT_INTEL_FLAG "Inject OMG fast flags for USER-INTEL" ON)
|
||||
if(INJECT_INTEL_FLAG AND CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xCOMMON-AVX512")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost")
|
||||
endif()
|
||||
include(CheckCXXCompilerFlag)
|
||||
foreach(_FLAG -qopenmp -qno-offload -fno-alias -ansi-alias -restrict -DLMP_INTEL_USELRT -DLMP_USE_MKL_RNG -O2 "-fp-model fast=2" -no-prec-div -qoverride-limits -qopt-zmm-usage=high)
|
||||
check_cxx_compiler_flag("${__FLAG}" COMPILER_SUPPORTS${_FLAG})
|
||||
if(COMPILER_SUPPORTS${_FLAG})
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${_FLAG}")
|
||||
endif()
|
||||
endforeach()
|
||||
endif()
|
||||
set(USER-INTEL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-INTEL)
|
||||
set(USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/intel_preprocess.h
|
||||
${USER-INTEL_SOURCES_DIR}/intel_buffers.h
|
||||
@ -550,7 +661,10 @@ if(ENABLE_USER-INTEL)
|
||||
include_directories(${USER-INTEL_SOURCES_DIR})
|
||||
endif()
|
||||
|
||||
if(ENABLE_GPU)
|
||||
if(PKG_GPU)
|
||||
if (CMAKE_VERSION VERSION_LESS "3.1")
|
||||
message(FATAL_ERROR "For the GPU package you need at least cmake-3.1")
|
||||
endif()
|
||||
set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
|
||||
set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h
|
||||
${GPU_SOURCES_DIR}/fix_gpu.h
|
||||
@ -647,7 +761,7 @@ if(ENABLE_GPU)
|
||||
add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
||||
target_link_libraries(gpu ${OpenCL_LIBRARIES})
|
||||
target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS})
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC} -DMPI_GERYON -DUCL_NO_EXIT)
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
|
||||
target_compile_definitions(gpu PRIVATE -DUSE_OPENCL)
|
||||
|
||||
list(APPEND LAMMPS_LINK_LIBS gpu)
|
||||
@ -687,6 +801,12 @@ include_directories(${LAMMPS_STYLE_HEADERS_DIR})
|
||||
# Actually add executable and lib to build
|
||||
############################################
|
||||
add_library(lammps ${LIB_SOURCES})
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
list (FIND LANGUAGES "Fortran" _index)
|
||||
if (${_index} GREATER -1)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
|
||||
endif()
|
||||
list(REMOVE_DUPLICATES LAMMPS_LINK_LIBS)
|
||||
target_link_libraries(lammps ${LAMMPS_LINK_LIBS})
|
||||
if(LAMMPS_DEPS)
|
||||
add_dependencies(lammps ${LAMMPS_DEPS})
|
||||
@ -708,35 +828,93 @@ if(ENABLE_TESTING)
|
||||
add_test(ShowHelp lmp${LAMMPS_MACHINE} -help)
|
||||
endif()
|
||||
|
||||
##################################
|
||||
###############################################################################
|
||||
# Testing
|
||||
#
|
||||
# Requires latest gcovr (for GCC 8.1 support):#
|
||||
# pip install git+https://github.com/gcovr/gcovr.git
|
||||
###############################################################################
|
||||
if(ENABLE_COVERAGE)
|
||||
find_program(GCOVR_BINARY gcovr)
|
||||
find_package_handle_standard_args(GCOVR DEFAULT_MSG GCOVR_BINARY)
|
||||
|
||||
if(GCOVR_FOUND)
|
||||
get_filename_component(ABSOLUTE_LAMMPS_SOURCE_DIR ${LAMMPS_SOURCE_DIR} ABSOLUTE)
|
||||
|
||||
add_custom_target(
|
||||
gen_coverage_xml
|
||||
COMMAND ${GCOVR_BINARY} -s -x -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.xml
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Generating XML Coverage Report..."
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
gen_coverage_html
|
||||
COMMAND ${GCOVR_BINARY} -s --html --html-details -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.html
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Generating HTML Coverage Report..."
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
###############################################################################
|
||||
# Print package summary
|
||||
##################################
|
||||
###############################################################################
|
||||
foreach(PKG ${DEFAULT_PACKAGES} ${OTHER_PACKAGES} ${ACCEL_PACKAGES})
|
||||
if(ENABLE_${PKG})
|
||||
if(PKG_${PKG})
|
||||
message(STATUS "Building package: ${PKG}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
string(TOUPPER "${CMAKE_BUILD_TYPE}" BTYPE)
|
||||
get_directory_property(CPPFLAGS DIRECTORY ${CMAKE_SOURCE_DIR} COMPILE_DEFINITIONS)
|
||||
include(FeatureSummary)
|
||||
feature_summary(INCLUDE_QUIET_PACKAGES WHAT ALL)
|
||||
message(STATUS "<<< Build configuration >>>
|
||||
Build type ${CMAKE_BUILD_TYPE}
|
||||
Install path ${CMAKE_INSTALL_PREFIX}
|
||||
Compilers and Flags:
|
||||
C++ Compiler ${CMAKE_CXX_COMPILER}
|
||||
Type ${CMAKE_CXX_COMPILER_ID}
|
||||
C++ Flags ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}")
|
||||
Version ${CMAKE_CXX_COMPILER_VERSION}
|
||||
C++ Flags ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}
|
||||
Defines ${CPPFLAGS}")
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
if(LANGUAGES MATCHES ".*Fortran.*")
|
||||
list (FIND LANGUAGES "Fortran" _index)
|
||||
if (${_index} GREATER -1)
|
||||
message(STATUS "Fortran Compiler ${CMAKE_Fortran_COMPILER}
|
||||
Type ${CMAKE_Fortran_COMPILER_ID}
|
||||
Version ${CMAKE_Fortran_COMPILER_VERSION}
|
||||
Fortran Flags ${CMAKE_Fortran_FLAGS} ${CMAKE_Fortran_FLAGS_${BTYPE}}")
|
||||
endif()
|
||||
message(STATUS "Linker flags:
|
||||
list (FIND LANGUAGES "C" _index)
|
||||
if (${_index} GREATER -1)
|
||||
message(STATUS "C Compiler ${CMAKE_C_COMPILER}
|
||||
Type ${CMAKE_C_COMPILER_ID}
|
||||
Version ${CMAKE_C_COMPILER_VERSION}
|
||||
C Flags ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${BTYPE}}")
|
||||
endif()
|
||||
if(CMAKE_EXE_LINKER_FLAGS)
|
||||
message(STATUS "Linker flags:
|
||||
Executable ${CMAKE_EXE_LINKER_FLAGS}")
|
||||
endif()
|
||||
if(BUILD_SHARED_LIBS)
|
||||
message(STATUS "Shared libries ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
message(STATUS "Shared libraries ${CMAKE_SHARED_LINKER_FLAGS}")
|
||||
else()
|
||||
message(STATUS "Static libries ${CMAKE_STATIC_LINKER_FLAGS}")
|
||||
message(STATUS "Static libraries ${CMAKE_STATIC_LINKER_FLAGS}")
|
||||
endif()
|
||||
message(STATUS "Link libraries: ${LAMMPS_LINK_LIBS}")
|
||||
|
||||
if(BUILD_MPI)
|
||||
message(STATUS "Using mpi with headers in ${MPI_CXX_INCLUDE_PATH} and ${MPI_CXX_LIBRARIES}")
|
||||
endif()
|
||||
if(ENABLED_GPU)
|
||||
message(STATUS "GPU Api: ${GPU_API}")
|
||||
if(GPU_API STREQUAL "CUDA")
|
||||
message(STATUS "GPU Arch: ${GPU_ARCH}")
|
||||
elseif(GPU_API STREQUAL "OpenCL")
|
||||
message(STATUS "OCL Tune: ${OCL_TUNE}")
|
||||
endif()
|
||||
endif()
|
||||
if(PKG_KSPACE)
|
||||
message(STATUS "Using ${FFT} as FFT")
|
||||
endif()
|
||||
|
||||
@ -1,22 +0,0 @@
|
||||
# - Find fftw2
|
||||
# Find the native FFTW2 headers and libraries.
|
||||
#
|
||||
# FFTW2_INCLUDE_DIRS - where to find fftw2.h, etc.
|
||||
# FFTW2_LIBRARIES - List of libraries when using fftw2.
|
||||
# FFTW2_FOUND - True if fftw2 found.
|
||||
#
|
||||
|
||||
find_path(FFTW2_INCLUDE_DIR fftw.h)
|
||||
|
||||
find_library(FFTW2_LIBRARY NAMES fftw)
|
||||
|
||||
set(FFTW2_LIBRARIES ${FFTW2_LIBRARY})
|
||||
set(FFTW2_INCLUDE_DIRS ${FFTW2_INCLUDE_DIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
# handle the QUIETLY and REQUIRED arguments and set FFTW2_FOUND to TRUE
|
||||
# if all listed variables are TRUE
|
||||
|
||||
find_package_handle_standard_args(FFTW2 DEFAULT_MSG FFTW2_LIBRARY FFTW2_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(FFTW2_INCLUDE_DIR FFTW2_LIBRARY )
|
||||
25
cmake/Modules/FindFFTW3F.cmake
Normal file
25
cmake/Modules/FindFFTW3F.cmake
Normal file
@ -0,0 +1,25 @@
|
||||
# - Find fftw3f
|
||||
# Find the native FFTW3F headers and libraries.
|
||||
#
|
||||
# FFTW3F_INCLUDE_DIRS - where to find fftw3f.h, etc.
|
||||
# FFTW3F_LIBRARIES - List of libraries when using fftw3f.
|
||||
# FFTW3F_FOUND - True if fftw3f found.
|
||||
#
|
||||
|
||||
find_package(PkgConfig)
|
||||
|
||||
pkg_check_modules(PC_FFTW3F fftw3f)
|
||||
find_path(FFTW3F_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3F_INCLUDE_DIRS})
|
||||
|
||||
find_library(FFTW3F_LIBRARY NAMES fftw3f HINTS ${PC_FFTW3F_LIBRARY_DIRS})
|
||||
|
||||
set(FFTW3F_LIBRARIES ${FFTW3F_LIBRARY})
|
||||
set(FFTW3F_INCLUDE_DIRS ${FFTW3F_INCLUDE_DIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
# handle the QUIETLY and REQUIRED arguments and set FFTW3F_FOUND to TRUE
|
||||
# if all listed variables are TRUE
|
||||
|
||||
find_package_handle_standard_args(FFTW3F DEFAULT_MSG FFTW3F_LIBRARY FFTW3F_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(FFTW3F_INCLUDE_DIR FFTW3F_LIBRARY )
|
||||
22
cmake/Modules/FindMSCG.cmake
Normal file
22
cmake/Modules/FindMSCG.cmake
Normal file
@ -0,0 +1,22 @@
|
||||
# - Find mscg
|
||||
# Find the native MSCG headers and libraries.
|
||||
#
|
||||
# MSCG_INCLUDE_DIRS - where to find mscg.h, etc.
|
||||
# MSCG_LIBRARIES - List of libraries when using mscg.
|
||||
# MSCG_FOUND - True if mscg found.
|
||||
#
|
||||
|
||||
find_path(MSCG_INCLUDE_DIR mscg.h PATH_SUFFIXES mscg)
|
||||
|
||||
find_library(MSCG_LIBRARY NAMES mscg)
|
||||
|
||||
set(MSCG_LIBRARIES ${MSCG_LIBRARY})
|
||||
set(MSCG_INCLUDE_DIRS ${MSCG_INCLUDE_DIR})
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
# handle the QUIETLY and REQUIRED arguments and set MSCG_FOUND to TRUE
|
||||
# if all listed variables are TRUE
|
||||
|
||||
find_package_handle_standard_args(MSCG DEFAULT_MSG MSCG_LIBRARY MSCG_INCLUDE_DIR)
|
||||
|
||||
mark_as_advanced(MSCG_INCLUDE_DIR MSCG_LIBRARY )
|
||||
1644
cmake/README.md
1644
cmake/README.md
File diff suppressed because it is too large
Load Diff
@ -9,6 +9,7 @@ TXT2RST = $(VENV)/bin/txt2rst
|
||||
ANCHORCHECK = $(VENV)/bin/doc_anchor_check
|
||||
|
||||
PYTHON = $(shell which python3)
|
||||
VIRTUALENV = virtualenv
|
||||
HAS_PYTHON3 = NO
|
||||
HAS_VIRTUALENV = NO
|
||||
|
||||
@ -16,7 +17,13 @@ ifeq ($(shell which python3 >/dev/null 2>&1; echo $$?), 0)
|
||||
HAS_PYTHON3 = YES
|
||||
endif
|
||||
|
||||
ifeq ($(shell which virtualenv-3 >/dev/null 2>&1; echo $$?), 0)
|
||||
VIRTUALENV = virtualenv-3
|
||||
HAS_VIRTUALENV = YES
|
||||
endif
|
||||
|
||||
ifeq ($(shell which virtualenv >/dev/null 2>&1; echo $$?), 0)
|
||||
VIRTUALENV = virtualenv
|
||||
HAS_VIRTUALENV = YES
|
||||
endif
|
||||
|
||||
@ -158,7 +165,7 @@ $(VENV):
|
||||
@if [ "$(HAS_PYTHON3)" == "NO" ] ; then echo "Python3 was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
|
||||
@if [ "$(HAS_VIRTUALENV)" == "NO" ] ; then echo "virtualenv was not found! Please check README.md for further instructions" 1>&2; exit 1; fi
|
||||
@( \
|
||||
virtualenv -p $(PYTHON) $(VENV); \
|
||||
$(VIRTUALENV) -p $(PYTHON) $(VENV); \
|
||||
. $(VENV)/bin/activate; \
|
||||
pip install Sphinx; \
|
||||
pip install sphinxcontrib-images; \
|
||||
|
||||
BIN
doc/src/Eqs/dihedral_table_cut.jpg
Normal file
BIN
doc/src/Eqs/dihedral_table_cut.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 30 KiB |
11
doc/src/Eqs/dihedral_table_cut.tex
Normal file
11
doc/src/Eqs/dihedral_table_cut.tex
Normal file
@ -0,0 +1,11 @@
|
||||
\documentclass[12pt]{article}
|
||||
\pagestyle{empty}
|
||||
\begin{document}
|
||||
|
||||
\begin{eqnarray*}
|
||||
f(\theta) & = & K \qquad\qquad\qquad\qquad\qquad\qquad \theta < \theta_1 \\
|
||||
f(\theta) & = & K \left(1-\frac{(\theta - \theta_1)^2}{(\theta_2 - \theta_1)^2}\right) \qquad \theta_1 < \theta < \theta_2
|
||||
\end{eqnarray*}
|
||||
|
||||
\end{document}
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
<!-- HTML_ONLY -->
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="11 May 2018 version">
|
||||
<META NAME="docnumber" CONTENT="22 Jun 2018 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -19,7 +19,7 @@
|
||||
:line
|
||||
|
||||
LAMMPS Documentation :c,h1
|
||||
11 May 2018 version :c,h2
|
||||
22 Jun 2018 version :c,h2
|
||||
|
||||
Version info: :h3
|
||||
|
||||
|
||||
@ -129,6 +129,17 @@ region 1 block $((xlo+xhi)/2+sqrt(v_area)) 2 INF INF EDGE EDGE :pre
|
||||
|
||||
so that you do not have to define (or discard) a temporary variable X.
|
||||
|
||||
Additionally, the "immediate" variable expression may be followed by a
|
||||
colon, followed by a C-style format string, e.g. ":%f" or ":%.10g".
|
||||
The format string must be appropriate for a double-precision
|
||||
floating-point value. The format string is used to output the result
|
||||
of the variable expression evaluation. If a format string is not
|
||||
specified a high-precision "%.20g" is used as the default.
|
||||
|
||||
This can be useful for formatting print output to a desired precion:
|
||||
|
||||
print "Final energy per atom: $(pe/atoms:%10.3f) eV/atom" :pre
|
||||
|
||||
Note that neither the curly-bracket or immediate form of variables can
|
||||
contain nested $ characters for other variables to substitute for.
|
||||
Thus you cannot do this:
|
||||
@ -1212,7 +1223,8 @@ package"_Section_start.html#start_3.
|
||||
"nharmonic (o)"_dihedral_nharmonic.html,
|
||||
"quadratic (o)"_dihedral_quadratic.html,
|
||||
"spherical (o)"_dihedral_spherical.html,
|
||||
"table (o)"_dihedral_table.html :tb(c=4,ea=c)
|
||||
"table (o)"_dihedral_table.html,
|
||||
"table/cut"_dihedral_table_cut.html :tb(c=4,ea=c)
|
||||
|
||||
:line
|
||||
|
||||
|
||||
@ -803,6 +803,13 @@ lo value must be less than the hi value for all 3 dimensions. :dd
|
||||
The box command cannot be used after a read_data, read_restart, or
|
||||
create_box command. :dd
|
||||
|
||||
{BUG: restartinfo=1 but no restart support in pair style} :dt
|
||||
|
||||
The pair style has a bug, where it does not support reading
|
||||
and writing information to a restart file, but does not set
|
||||
the member variable restartinfo to 0 as required in that case. :dd
|
||||
|
||||
|
||||
{CPU neighbor lists must be used for ellipsoid/sphere mix.} :dt
|
||||
|
||||
When using Gay-Berne or RE-squared pair styles with both ellipsoidal and
|
||||
|
||||
@ -37,8 +37,8 @@ keyword = {special} :l
|
||||
|
||||
create_bonds many all all 1 1.0 1.2
|
||||
create_bonds many surf solvent 3 2.0 2.4
|
||||
create_bond single/bond 1 1 2
|
||||
create_bond single/angle 5 52 98 107 special no :pre
|
||||
create_bonds single/bond 1 1 2
|
||||
create_bonds single/angle 5 52 98 107 special no :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
|
||||
205
doc/src/dihedral_table_cut.txt
Normal file
205
doc/src/dihedral_table_cut.txt
Normal file
@ -0,0 +1,205 @@
|
||||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Section_commands.html#comm)
|
||||
|
||||
:line
|
||||
|
||||
dihedral_style table/cut command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
dihedral_style table/cut style Ntable :pre
|
||||
|
||||
style = {linear} or {spline} = method of interpolation
|
||||
Ntable = size of the internal lookup table :ul
|
||||
|
||||
[Examples:]
|
||||
|
||||
dihedral_style table/cut spline 400
|
||||
dihedral_style table/cut linear 1000
|
||||
dihedral_coeff 1 aat 1.0 177 180 file.table DIH_TABLE1
|
||||
dihedral_coeff 2 aat 0.5 170 180 file.table DIH_TABLE2 :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
The {table/cut} dihedral style creates interpolation tables of length
|
||||
{Ntable} from dihedral potential and derivative values listed in a
|
||||
file(s) as a function of the dihedral angle "phi". In addition, an
|
||||
analytic cutoff that is quadratic in the bond-angle (theta) is applied
|
||||
in order to regularize the dihedral interaction. The dihedral table
|
||||
files are read by the "dihedral_coeff"_dihedral_coeff.html command.
|
||||
|
||||
The interpolation tables are created by fitting cubic splines to the
|
||||
file values and interpolating energy and derivative values at each of
|
||||
{Ntable} dihedral angles. During a simulation, these tables are used
|
||||
to interpolate energy and force values on individual atoms as
|
||||
needed. The interpolation is done in one of 2 styles: {linear} or
|
||||
{spline}.
|
||||
|
||||
For the {linear} style, the dihedral angle (phi) is used to find 2
|
||||
surrounding table values from which an energy or its derivative is
|
||||
computed by linear interpolation.
|
||||
|
||||
For the {spline} style, cubic spline coefficients are computed and
|
||||
stored at each of the {Ntable} evenly-spaced values in the
|
||||
interpolated table. For a given dihedral angle (phi), the appropriate
|
||||
coefficients are chosen from this list, and a cubic polynomial is used
|
||||
to compute the energy and the derivative at this angle.
|
||||
|
||||
The following coefficients must be defined for each dihedral type via
|
||||
the "dihedral_coeff"_dihedral_coeff.html command as in the example
|
||||
above.
|
||||
|
||||
style (aat)
|
||||
cutoff prefactor
|
||||
cutoff angle1
|
||||
cutoff angle2
|
||||
filename
|
||||
keyword :ul
|
||||
|
||||
The cutoff dihedral style uses a tabulated dihedral interaction with a
|
||||
cutoff function:
|
||||
|
||||
:c,image(Eqs/dihedral_table_cut.jpg)
|
||||
|
||||
The cutoff specifies an prefactor to the cutoff function. While this value
|
||||
would ordinarily equal 1 there may be situations where the value should change.
|
||||
|
||||
The cutoff angle1 specifies the angle (in degrees) below which the dihedral
|
||||
interaction is unmodified, i.e. the cutoff function is 1.
|
||||
|
||||
The cutoff function is applied between angle1 and angle2, which is the angle at
|
||||
which the cutoff function drops to zero. The value of zero effectively "turns
|
||||
off" the dihedral interaction.
|
||||
|
||||
The filename specifies a file containing tabulated energy and
|
||||
derivative values. The keyword specifies a section of the file. The
|
||||
format of this file is described below.
|
||||
|
||||
:line
|
||||
|
||||
The format of a tabulated file is as follows (without the
|
||||
parenthesized comments). It can begin with one or more comment
|
||||
or blank lines.
|
||||
|
||||
# Table of the potential and its negative derivative :pre
|
||||
|
||||
DIH_TABLE1 (keyword is the first text on line)
|
||||
N 30 DEGREES (N, NOF, DEGREES, RADIANS, CHECKU/F)
|
||||
(blank line)
|
||||
1 -168.0 -1.40351172223 0.0423346818422
|
||||
2 -156.0 -1.70447981034 0.00811786522531
|
||||
3 -144.0 -1.62956100432 -0.0184129719987
|
||||
...
|
||||
30 180.0 -0.707106781187 0.0719306095245 :pre
|
||||
|
||||
# Example 2: table of the potential. Forces omitted :pre
|
||||
|
||||
DIH_TABLE2
|
||||
N 30 NOF CHECKU testU.dat CHECKF testF.dat :pre
|
||||
|
||||
1 -168.0 -1.40351172223
|
||||
2 -156.0 -1.70447981034
|
||||
3 -144.0 -1.62956100432
|
||||
...
|
||||
30 180.0 -0.707106781187 :pre
|
||||
|
||||
A section begins with a non-blank line whose 1st character is not a
|
||||
"#"; blank lines or lines starting with "#" can be used as comments
|
||||
between sections. The first line begins with a keyword which
|
||||
identifies the section. The line can contain additional text, but the
|
||||
initial text must match the argument specified in the
|
||||
"dihedral_coeff"_dihedral_coeff.html command. The next line lists (in
|
||||
any order) one or more parameters for the table. Each parameter is a
|
||||
keyword followed by one or more numeric values.
|
||||
|
||||
Following a blank line, the next N lines list the tabulated values. On
|
||||
each line, the 1st value is the index from 1 to N, the 2nd value is
|
||||
the angle value, the 3rd value is the energy (in energy units), and
|
||||
the 4th is -dE/d(phi) also in energy units). The 3rd term is the
|
||||
energy of the 4-atom configuration for the specified angle. The 4th
|
||||
term (when present) is the negative derivative of the energy with
|
||||
respect to the angle (in degrees, or radians depending on whether the
|
||||
user selected DEGREES or RADIANS). Thus the units of the last term
|
||||
are still energy, not force. The dihedral angle values must increase
|
||||
from one line to the next.
|
||||
|
||||
Dihedral table splines are cyclic. There is no discontinuity at 180
|
||||
degrees (or at any other angle). Although in the examples above, the
|
||||
angles range from -180 to 180 degrees, in general, the first angle in
|
||||
the list can have any value (positive, zero, or negative). However
|
||||
the {range} of angles represented in the table must be {strictly} less
|
||||
than 360 degrees (2pi radians) to avoid angle overlap. (You may not
|
||||
supply entries in the table for both 180 and -180, for example.) If
|
||||
the user's table covers only a narrow range of dihedral angles,
|
||||
strange numerical behavior can occur in the large remaining gap.
|
||||
|
||||
[Parameters:]
|
||||
|
||||
The parameter "N" is required and its value is the number of table
|
||||
entries that follow. Note that this may be different than the N
|
||||
specified in the "dihedral_style table"_dihedral_style.html command.
|
||||
Let {Ntable} is the number of table entries requested dihedral_style
|
||||
command, and let {Nfile} be the parameter following "N" in the
|
||||
tabulated file ("30" in the sparse example above). What LAMMPS does
|
||||
is a preliminary interpolation by creating splines using the {Nfile}
|
||||
tabulated values as nodal points. It uses these to interpolate as
|
||||
needed to generate energy and derivative values at {Ntable} different
|
||||
points (which are evenly spaced over a 360 degree range, even if the
|
||||
angles in the file are not). The resulting tables of length {Ntable}
|
||||
are then used as described above, when computing energy and force for
|
||||
individual dihedral angles and their atoms. This means that if you
|
||||
want the interpolation tables of length {Ntable} to match exactly what
|
||||
is in the tabulated file (with effectively nopreliminary
|
||||
interpolation), you should set {Ntable} = {Nfile}. To insure the
|
||||
nodal points in the user's file are aligned with the interpolated
|
||||
table entries, the angles in the table should be integer multiples of
|
||||
360/{Ntable} degrees, or 2*PI/{Ntable} radians (depending on your
|
||||
choice of angle units).
|
||||
|
||||
The optional "NOF" keyword allows the user to omit the forces
|
||||
(negative energy derivatives) from the table file (normally located in
|
||||
the 4th column). In their place, forces will be calculated
|
||||
automatically by differentiating the potential energy function
|
||||
indicated by the 3rd column of the table (using either linear or
|
||||
spline interpolation).
|
||||
|
||||
The optional "DEGREES" keyword allows the user to specify angles in
|
||||
degrees instead of radians (default).
|
||||
|
||||
The optional "RADIANS" keyword allows the user to specify angles in
|
||||
radians instead of degrees. (Note: This changes the way the forces
|
||||
are scaled in the 4th column of the data file.)
|
||||
|
||||
The optional "CHECKU" keyword is followed by a filename. This allows
|
||||
the user to save all of the the {Ntable} different entries in the
|
||||
interpolated energy table to a file to make sure that the interpolated
|
||||
function agrees with the user's expectations. (Note: You can
|
||||
temporarily increase the {Ntable} parameter to a high value for this
|
||||
purpose. "{Ntable}" is explained above.)
|
||||
|
||||
The optional "CHECKF" keyword is analogous to the "CHECKU" keyword.
|
||||
It is followed by a filename, and it allows the user to check the
|
||||
interpolated force table. This option is available even if the user
|
||||
selected the "NOF" option.
|
||||
|
||||
Note that one file can contain many sections, each with a tabulated
|
||||
potential. LAMMPS reads the file section by section until it finds one
|
||||
that matches the specified keyword.
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This dihedral style can only be used if LAMMPS was built with the
|
||||
USER-MISC package. See the "Making LAMMPS"_Section_start.html#start_3
|
||||
section for more info on packages.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"dihedral_coeff"_dihedral_coeff.html, "dihedral_style table"_dihedral_table.html
|
||||
|
||||
[Default:] none
|
||||
|
||||
:link(dihedralcut-Salerno)
|
||||
[(Salerno)] Salerno, Bernstein, J Chem Theory Comput, --, ---- (2018).
|
||||
@ -19,6 +19,7 @@ Dihedral Styles :h1
|
||||
dihedral_quadratic
|
||||
dihedral_spherical
|
||||
dihedral_table
|
||||
dihedral_table_cut
|
||||
dihedral_zero
|
||||
dihedral_charmm
|
||||
dihedral_class2
|
||||
|
||||
@ -15,7 +15,7 @@ dump_modify dump-ID keyword values ... :pre
|
||||
dump-ID = ID of dump to modify :ulb,l
|
||||
one or more keyword/value pairs may be appended :l
|
||||
these keywords apply to various dump styles :l
|
||||
keyword = {append} or {at} or {buffer} or {delay} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l
|
||||
keyword = {append} or {at} or {buffer} or {delay} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {maxfiles} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l
|
||||
{append} arg = {yes} or {no}
|
||||
{at} arg = N
|
||||
N = index of frame written upon first dump
|
||||
@ -37,6 +37,8 @@ keyword = {append} or {at} or {buffer} or {delay} or {element} or {every} or {fi
|
||||
{image} arg = {yes} or {no}
|
||||
{label} arg = string
|
||||
string = character string (e.g. BONDS) to use in header of dump local file
|
||||
{maxfiles} arg = Fmax
|
||||
Fmax = keep only the most recent {Fmax} snapshots (one snapshot per file)
|
||||
{nfile} arg = Nf
|
||||
Nf = write this many files, one from each of Nf processors
|
||||
{pad} arg = Nchar = # of characters to convert timestep to
|
||||
@ -364,6 +366,20 @@ e.g. BONDS or ANGLES.
|
||||
|
||||
:line
|
||||
|
||||
The {maxfiles} keyword can only be used when a '*' wildcard is
|
||||
included in the dump file name, i.e. when writing a new file(s) for
|
||||
each snapshot. The specified {Fmax} is how many snapshots will be
|
||||
kept. Once this number is reached, the file(s) containing the oldest
|
||||
snapshot is deleted before a new dump file is written. If the
|
||||
specified {Fmax} <= 0, then all files are retained.
|
||||
|
||||
This can be useful for debugging, especially if you don't know on what
|
||||
timestep something bad will happen, e.g. when LAMMPS will exit with an
|
||||
error. You can dump every timestep, and limit the number of dump
|
||||
files produced, even if you run for 1000s of steps.
|
||||
|
||||
:line
|
||||
|
||||
The {nfile} or {fileper} keywords can be used in conjunction with the
|
||||
"%" wildcard character in the specified dump file name, for all dump
|
||||
styles except the {dcd}, {image}, {movie}, {xtc}, and {xyz} styles
|
||||
@ -901,6 +917,7 @@ flush = yes
|
||||
format = %d and %g for each integer or floating point value
|
||||
image = no
|
||||
label = ENTRIES
|
||||
maxifiles = -1
|
||||
nfile = 1
|
||||
pad = 0
|
||||
pbc = no
|
||||
|
||||
@ -205,6 +205,14 @@ a bond coefficient over time, very similar to how the {pair} keyword
|
||||
operates. The only difference is that now a bond coefficient for a
|
||||
given bond type is adapted.
|
||||
|
||||
A wild-card asterisk can be used in place of or in conjunction with
|
||||
the bond type argument to set the coefficients for multiple bond types.
|
||||
This takes the form "*" or "*n" or "n*" or "m*n". If N = the number of
|
||||
atom types, then an asterisk with no numeric values means all types
|
||||
from 1 to N. A leading asterisk means all types from 1 to n (inclusive).
|
||||
A trailing asterisk means all types from n to N (inclusive). A middle
|
||||
asterisk means all types from m to n (inclusive).
|
||||
|
||||
Currently {bond} does not support bond_style hybrid nor bond_style
|
||||
hybrid/overlay as bond styles. The only bonds that currently are
|
||||
working with fix_adapt are
|
||||
|
||||
@ -20,14 +20,15 @@ ID, group-ID are documented in "fix"_fix.html command. Group-ID is ignored. :ulb
|
||||
bond/react = style name of this fix command :l
|
||||
zero or more common keyword/value pairs may be appended directly after 'bond/react' :l
|
||||
these apply to all reaction specifications (below) :l
|
||||
common_keyword = {stabilization}
|
||||
{stabilization} values = group-ID xmax
|
||||
group-ID = user-assigned ID of an internally-created dynamic group that excludes reacting atoms, and can be used by a subsequent time integration fix such as nvt, npt, or nve (cannot be 'all')
|
||||
{xmax} value = distance
|
||||
distance = xmax value that is used by an internally created "nve/limit"_fix_nve_limit.html integrator
|
||||
react = mandatory argument indicating new reaction specification
|
||||
react-ID = user-assigned name for the reaction
|
||||
react-group-ID = only atoms in this group are available for the reaction
|
||||
common_keyword = {stabilization} :l
|
||||
{stabilization} values = {no} or {yes} {group-ID} {xmax}
|
||||
{no} = no reaction site stabilization
|
||||
{yes} = perform reaction site stabilization
|
||||
{group-ID} = user-assigned ID for all non-reacting atoms (group created internally)
|
||||
{xmax} = xmax value that is used by an internally created "nve/limit"_fix_nve_limit.html integrator :pre
|
||||
react = mandatory argument indicating new reaction specification :l
|
||||
react-ID = user-assigned name for the reaction :l
|
||||
react-group-ID = only atoms in this group are available for the reaction :l
|
||||
Nevery = attempt reaction every this many steps :l
|
||||
Rmin = bonding pair atoms must be separated by more than Rmin to initiate reaction (distance units) :l
|
||||
Rmax = bonding pair atoms must be separated by less than Rmax to initiate reaction (distance units) :l
|
||||
@ -47,7 +48,7 @@ react = mandatory argument indicating new reaction specification
|
||||
|
||||
molecule mol1 pre_reacted_topology.txt
|
||||
molecule mol2 post_reacted_topology.txt
|
||||
fix 5 all bond/react stabilization no react myrxn1 all 1 0 3.25 mol1 mol2 map_file.txt
|
||||
fix 5 all bond/react stabilization no react myrxn1 all 1 0 3.25 mol1 mol2 map_file.txt :pre
|
||||
|
||||
molecule mol1 pre_reacted_rxn1.txt
|
||||
molecule mol2 post_reacted_rxn1.txt
|
||||
@ -56,12 +57,12 @@ molecule mol4 post_reacted_rxn2.txt
|
||||
fix 5 all bond/react stabilization yes nvt_grp .03 &
|
||||
react myrxn1 all 1 0 3.25 mol1 mol2 map_file_rxn1.txt prob 0.50 12345 &
|
||||
react myrxn2 all 1 0 2.75 mol3 mol4 map_file_rxn2.txt prob 0.25 12345
|
||||
fix 6 nvt_grp nvt temp 300 300 100 # system-wide thermostat must be defined after bond/react :pre
|
||||
fix 6 nvt_grp nvt temp 300 300 100 # set thermostat after bond/react :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Initiate complex covalent bonding (topology) changes. These topology
|
||||
changes will be referred to as "reactions" throughout this
|
||||
changes will be referred to as 'reactions' throughout this
|
||||
documentation. Topology changes are defined in pre- and post-reaction
|
||||
molecule templates and can include creation and deletion of bonds,
|
||||
angles, dihedrals, impropers, bond-types, angle-types, dihedral-types,
|
||||
@ -81,10 +82,10 @@ occurred 3) build a molecule template of the reaction site after the
|
||||
reaction has occurred 4) create a map that relates the
|
||||
template-atom-IDs of each atom between pre- and post-reaction molecule
|
||||
templates 5) fill a simulation box with molecules and run a simulation
|
||||
with fix/bond react.
|
||||
with fix bond/react.
|
||||
|
||||
Only one 'fix bond/react' command can be used at a time. Multiple
|
||||
reactions can be simultaneously applied by specifying multiple 'react'
|
||||
reactions can be simultaneously applied by specifying multiple {react}
|
||||
arguments to a single 'fix bond/react' command. This syntax is
|
||||
necessary because the 'common keywords' are applied to all reactions.
|
||||
|
||||
@ -99,10 +100,11 @@ typically be set to the maximum distance that non-reacting atoms move
|
||||
during the simulation.
|
||||
|
||||
The group-ID set using the {stabilization} keyword should be a
|
||||
previously unused group-ID. The fix bond/react command creates a
|
||||
"dynamic group"_group.html of this name that excludes reacting atoms.
|
||||
This dynamic group-ID should then be used by a subsequent system-wide
|
||||
time integrator, as shown in the second example above. It is currently
|
||||
previously unused group-ID. It cannot be specified as 'all'. The fix
|
||||
bond/react command creates a "dynamic group"_group.html of this name
|
||||
that includes all non-reacting atoms. This dynamic group-ID should
|
||||
then be used by a subsequent system-wide time integrator such as nvt,
|
||||
npt, or nve, as shown in the second example above. It is currently
|
||||
necessary to place the time integration command after the fix
|
||||
bond/react command due to the internal dynamic grouping performed by
|
||||
fix bond/react.
|
||||
@ -111,9 +113,9 @@ NOTE: The internally created group currently applies to all atoms in
|
||||
the system, i.e. you should generally not have a separate thermostat
|
||||
which acts on the 'all' group.
|
||||
|
||||
The following comments pertain to each 'react' argument:
|
||||
The following comments pertain to each {react} argument:
|
||||
|
||||
A check for possible new reaction sites is performed every Nevery
|
||||
A check for possible new reaction sites is performed every {Nevery}
|
||||
timesteps.
|
||||
|
||||
Two conditions must be met for a reaction to occur. First a bonding
|
||||
@ -124,20 +126,20 @@ modified to match the post-reaction template.
|
||||
|
||||
A bonding atom pair will be identified if several conditions are met.
|
||||
First, a pair of atoms within the specified react-group-ID of type
|
||||
typei and typej must separated by a distance between Rmin and Rmax. It
|
||||
is possible that multiple bonding atom pairs are identified: if the
|
||||
bonding atoms in the pre-reacted template are not 1-2, 1-3, or 1-4
|
||||
neighbors, the closest bonding atom partner is set as its bonding
|
||||
partner; otherwise, the farthest potential partner is chosen. Then, if
|
||||
both an atomi and atomj have each other as their nearest bonding
|
||||
partners, these two atoms are identified as the bonding atom pair of
|
||||
the reaction site. Once this unique bonding atom pair is identified
|
||||
for each reaction, there could two or more reactions that involve a
|
||||
given atom on the same timestep. If this is the case, only one such
|
||||
reaction is permitted to occur. This reaction is chosen randomly from
|
||||
all potential reactions. This capability allows e.g. for different
|
||||
reaction pathways to proceed from identical reaction sites with
|
||||
user-specified probabilities.
|
||||
typei and typej must separated by a distance between {Rmin} and
|
||||
{Rmax}. It is possible that multiple bonding atom pairs are
|
||||
identified: if the bonding atoms in the pre-reacted template are not
|
||||
1-2, 1-3, or 1-4 neighbors, the closest bonding atom partner is set as
|
||||
its bonding partner; otherwise, the farthest potential partner is
|
||||
chosen. Then, if both an atomi and atomj have each other as their
|
||||
nearest bonding partners, these two atoms are identified as the
|
||||
bonding atom pair of the reaction site. Once this unique bonding atom
|
||||
pair is identified for each reaction, there could two or more
|
||||
reactions that involve a given atom on the same timestep. If this is
|
||||
the case, only one such reaction is permitted to occur. This reaction
|
||||
is chosen randomly from all potential reactions. This capability
|
||||
allows e.g. for different reaction pathways to proceed from identical
|
||||
reaction sites with user-specified probabilities.
|
||||
|
||||
The pre-reacted molecule template is specified by a molecule command.
|
||||
This molecule template file contains a sample reaction site and its
|
||||
@ -175,77 +177,43 @@ A discussion of correctly handling this is also provided on the
|
||||
|
||||
The map file is a text document with the following format:
|
||||
|
||||
Format of the map file
|
||||
A map file has a header and a body. The header of map file the
|
||||
contains one mandatory keyword and one optional keyword. The mandatory
|
||||
keyword is 'equivalences' and the optional keyword is 'edgeIDs':
|
||||
|
||||
A map file has a header and a body. The header appears first. The
|
||||
first line of the header is always skipped; it typically contains a
|
||||
description of the file. Lines can have a trailing comment starting
|
||||
with '#' that is ignored. If the line is blank (only whitespace after
|
||||
comment is deleted), it is skipped. If the line contains a header
|
||||
keyword, the corresponding value(s) is read from the line. If it
|
||||
doesn't contain a header keyword, the line begins the body of the
|
||||
file.
|
||||
N {equivalences} = # of atoms N in the reaction molecule templates
|
||||
N {edgeIDs} = # of edge atoms N in the pre-reacted molecule template :pre
|
||||
|
||||
The header contains one mandatory keyword and one optional keyword.
|
||||
The mandatory keyword is 'equivalences' and the optional keyword is
|
||||
'edgeIDs.' These specify the number of atoms in the pre- and
|
||||
post-reacted templates and the number of edge atoms in pre-reacted
|
||||
template, respectively.
|
||||
|
||||
The body contains two mandatory sections and one optional section. The
|
||||
first section begins with the keyword 'BondingIDs' and lists the atom
|
||||
IDs of the bonding atom pair in the pre-reacted molecule template. The
|
||||
second mandatory section begins with the keyword 'Equivalences' and
|
||||
lists a one-to-one correspondence between atom IDs of the pre- and
|
||||
post-reacted templates. The optional section begins with the keyword
|
||||
'EdgeIDs' and list the atom IDs of edge atoms in the pre-reacted
|
||||
The body of the map file contains two mandatory sections and one
|
||||
optional section. The first mandatory section begins with the keyword
|
||||
'BondingIDs' and lists the atom IDs of the bonding atom pair in the
|
||||
pre-reacted molecule template. The second mandatory section begins
|
||||
with the keyword 'Equivalences' and lists a one-to-one correspondence
|
||||
between atom IDs of the pre- and post-reacted templates. The first
|
||||
column is an atom ID of the pre-reacted molecule template, and the
|
||||
second column is the corresponding atom ID of the post-reacted
|
||||
molecule template. The optional section begins with the keyword
|
||||
'EdgeIDs' and lists the atom IDs of edge atoms in the pre-reacted
|
||||
molecule template.
|
||||
|
||||
Format of the header of the map file
|
||||
|
||||
These are the recognized header keywords. Header lines can come in any
|
||||
order. The value(s) are read from the beginning of the line. Thus the
|
||||
keyword 'equivalences' should be in a line like "25 equivalences."
|
||||
|
||||
equivalences = # of atoms in the pre- and post-reacted molecule
|
||||
templates edgeIDs = # of edge atoms in the pre-reacted molecule template :pre
|
||||
|
||||
The edgeIDs keyword is optional.
|
||||
|
||||
Format of the body of the map file
|
||||
|
||||
These are the section keywords for the body of the file.
|
||||
|
||||
BondingIDs, EdgeIDs = list of atom IDs of bonding and edge atoms in
|
||||
the pre-reacted molecule template
|
||||
|
||||
Equivalences = a two column list where the first column is an atom ID
|
||||
of the pre-reacted molecule template, and the second column is the
|
||||
corresponding atom ID of the post-reacted molecule template
|
||||
|
||||
The bondingIDs section will always contain two atom IDs, corresponding
|
||||
to the bonding atom pairs of the pre-reacted map file. The
|
||||
Equivalences section will contain as many rows as there are atoms in
|
||||
the pre- and post-reacted molecule templates. The edgeIDs section is
|
||||
optional, but would contain an atom ID for each edge atom in the
|
||||
pre-reacted molecule template.
|
||||
|
||||
A sample map file is given below:
|
||||
|
||||
:line
|
||||
|
||||
# This is a map file :pre
|
||||
# this is a map file :pre
|
||||
|
||||
2 edgeIDs
|
||||
7 equivalences :pre
|
||||
|
||||
BondingIDs :pre
|
||||
|
||||
3 5 :pre
|
||||
3
|
||||
5 :pre
|
||||
|
||||
EdgeIDs :pre
|
||||
|
||||
1 7 :pre
|
||||
1
|
||||
7 :pre
|
||||
|
||||
Equivalences :pre
|
||||
|
||||
@ -264,13 +232,13 @@ within LAMMPS that store bond topology are updated to reflect the
|
||||
post-reacted molecule template. All force fields with fixed bonds,
|
||||
angles, dihedrals or impropers are supported.
|
||||
|
||||
A few capabilities to note: 1) You may specify as many 'react'
|
||||
A few capabilities to note: 1) You may specify as many {react}
|
||||
arguments as desired. For example, you could break down a complicated
|
||||
reaction mechanism into several reaction steps, each defined by its
|
||||
own 'react' argument. 2) While typically a bond is formed or removed
|
||||
own {react} argument. 2) While typically a bond is formed or removed
|
||||
between the bonding atom pairs specified in the pre-reacted molecule
|
||||
template, this is not required. 3) By reversing the order of the pre-
|
||||
and post- reacted molecule templates in another 'react' argument, you
|
||||
and post- reacted molecule templates in another {react} argument, you
|
||||
can allow for the possibility of one or more reverse reactions.
|
||||
|
||||
The optional keywords deal with the probability of a given reaction
|
||||
@ -304,7 +272,7 @@ you can use the internally-created dynamic group named
|
||||
would thermostat the group of all atoms currently involved in a
|
||||
reaction:
|
||||
|
||||
fix 1 bond_react_MASTER_group temp/rescale 1 300 300 10 1
|
||||
fix 1 bond_react_MASTER_group temp/rescale 1 300 300 10 1 :pre
|
||||
|
||||
NOTE: This command must be added after the fix bond/react command, and
|
||||
will apply to all reactions.
|
||||
@ -324,10 +292,11 @@ local command.
|
||||
[Restart, fix_modify, output, run start/stop, minimize info:]
|
||||
|
||||
No information about this fix is written to "binary restart
|
||||
files"_restart.html. None of the "fix_modify"_fix_modify.html options
|
||||
are relevant to this fix.
|
||||
files"_restart.html, aside from internally-created per-atom
|
||||
properties. None of the "fix_modify"_fix_modify.html options are
|
||||
relevant to this fix.
|
||||
|
||||
This fix computes one statistic for each 'react' argument that it
|
||||
This fix computes one statistic for each {react} argument that it
|
||||
stores in a global vector, of length 'number of react arguments', that
|
||||
can be accessed by various "output
|
||||
commands"_Section_howto.html#howto_15. The vector values calculated by
|
||||
@ -359,5 +328,5 @@ The option defaults are stabilization = no, stabilize_steps = 60
|
||||
|
||||
:line
|
||||
|
||||
:link(Gissinger) [(Gissinger)] Gissinger, Jensen and Wise, Polymer,
|
||||
128, 211 (2017).
|
||||
:link(Gissinger)
|
||||
[(Gissinger)] Gissinger, Jensen and Wise, Polymer, 128, 211 (2017).
|
||||
|
||||
@ -19,7 +19,9 @@ Tmin = minimum dt allowed which can be NULL (time units)
|
||||
Tmax = maximum dt allowed which can be NULL (time units)
|
||||
Xmax = maximum distance for an atom to move in one timestep (distance units)
|
||||
zero or more keyword/value pairs may be appended
|
||||
keyword = {units} :ul
|
||||
keyword = {emax} or {units} :ul
|
||||
{emax} value = Emax
|
||||
Emax = maximum kinetic energy change for an atom in one timestep (energy units)
|
||||
{units} value = {lattice} or {box}
|
||||
lattice = Xmax is defined in lattice units
|
||||
box = Xmax is defined in simulation box units :pre
|
||||
@ -27,12 +29,17 @@ keyword = {units} :ul
|
||||
[Examples:]
|
||||
|
||||
fix 5 all dt/reset 10 1.0e-5 0.01 0.1
|
||||
fix 5 all dt/reset 10 0.01 2.0 0.2 units box :pre
|
||||
fix 5 all dt/reset 10 0.01 2.0 0.2 units box
|
||||
fix 5 all dt/reset 5 NULL 0.001 0.5 emax 30 units box :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Reset the timestep size every N steps during a run, so that no atom
|
||||
moves further than Xmax, based on current atom velocities and forces.
|
||||
moves further than the specified {Xmax} distance, based on current
|
||||
atom velocities and forces. Optionally an additional criterion is
|
||||
imposed by the {emax} keyword, so that no atom's kinetic energy
|
||||
changes by more than the specified {Emax}.
|
||||
|
||||
This can be useful when starting from a configuration with overlapping
|
||||
atoms, where forces will be large. Or it can be useful when running
|
||||
an impact simulation where one or more high-energy atoms collide with
|
||||
@ -48,7 +55,12 @@ current velocity and force. Since performing this calculation exactly
|
||||
would require the solution to a quartic equation, a cheaper estimate
|
||||
is generated. The estimate is conservative in that the atom's
|
||||
displacement is guaranteed not to exceed {Xmax}, though it may be
|
||||
smaller.
|
||||
smaller.
|
||||
|
||||
In addition if the {emax} keyword is used, the specified {Emax} value
|
||||
is enforced as a limit on how much an atom's kinetic energy can
|
||||
change. If the timestep required is even smaller than for the {Xmax}
|
||||
displacement, then the smaller timestep is used.
|
||||
|
||||
Given this putative timestep for each atom, the minimum timestep value
|
||||
across all atoms is computed. Then the {Tmin} and {Tmax} bounds are
|
||||
@ -87,4 +99,5 @@ minimization"_minimize.html.
|
||||
|
||||
[Default:]
|
||||
|
||||
The option defaults is units = lattice.
|
||||
The option defaults are units = lattice, and no emax kinetic energy
|
||||
limit.
|
||||
|
||||
@ -582,6 +582,7 @@ dihedral_opls.html
|
||||
dihedral_quadratic.html
|
||||
dihedral_spherical.html
|
||||
dihedral_table.html
|
||||
dihedral_table_cut.html
|
||||
dihedral_zero.html
|
||||
|
||||
lammps_commands_improper.html
|
||||
|
||||
@ -98,19 +98,20 @@ molecule (header keyword = inertia).
|
||||
NOTE: The molecule command can be used to define molecules with bonds,
|
||||
angles, dihedrals, imporopers, or special bond lists of neighbors
|
||||
within a molecular topology, so that you can later add the molecules
|
||||
to your simulation, via one or more of the commands listed above. If
|
||||
such molecules do not already exist when LAMMPS creates the simulation
|
||||
box, via the "create_box"_create_box.html or
|
||||
"read_data"_read_data.html command, when you later add them you may
|
||||
overflow the pre-allocated data structures which store molecular
|
||||
topology information with each atom, and an error will be generated.
|
||||
Both the "create_box"_create_box.html command and the data files read
|
||||
by the "read_data"_read_data.html command have "extra" options which
|
||||
to your simulation, via one or more of the commands listed above.
|
||||
Since this topology-related information requires that suitable storage
|
||||
is reserved when LAMMPS creates the simulation box (e.g. when using
|
||||
the "create_box"_create_box.html command or the
|
||||
"read_data"_read_data.html command) suitable space has to be reserved
|
||||
so you do not overflow those pre-allocated data structures when adding
|
||||
molecules later. Both the "create_box"_create_box.html command and
|
||||
the "read_data"_read_data.html command have "extra" options which
|
||||
insure space is allocated for storing topology info for molecules that
|
||||
are added later.
|
||||
|
||||
The format of an individual molecule file is similar to the data file
|
||||
read by the "read_data"_read_data.html commands, and is as follows.
|
||||
The format of an individual molecule file is similar but
|
||||
(not identical) to the data file read by the "read_data"_read_data.html
|
||||
commands, and is as follows.
|
||||
|
||||
A molecule file has a header and a body. The header appears first.
|
||||
The first line of the header is always skipped; it typically contains
|
||||
@ -455,7 +456,11 @@ of SHAKE clusters.
|
||||
|
||||
:line
|
||||
|
||||
[Restrictions:] none
|
||||
[Restrictions:]
|
||||
|
||||
This command must come after the simulation box is define by a
|
||||
"read_data"_read_data.html, "read_restart"_read_restart.html, or
|
||||
"create_box"_create_box.html command.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
|
||||
@ -12,12 +12,14 @@ pair_style born/omp command :h3
|
||||
pair_style born/gpu command :h3
|
||||
pair_style born/coul/long command :h3
|
||||
pair_style born/coul/long/cs command :h3
|
||||
pair_style born/coul/long/cs/gpu command :h3
|
||||
pair_style born/coul/long/gpu command :h3
|
||||
pair_style born/coul/long/omp command :h3
|
||||
pair_style born/coul/msm command :h3
|
||||
pair_style born/coul/msm/omp command :h3
|
||||
pair_style born/coul/wolf command :h3
|
||||
pair_style born/coul/wolf/cs command :h3
|
||||
pair_style born/coul/wolf/cs/gpu command :h3
|
||||
pair_style born/coul/wolf/gpu command :h3
|
||||
pair_style born/coul/wolf/omp command :h3
|
||||
pair_style born/coul/dsf command :h3
|
||||
|
||||
@ -20,6 +20,7 @@ pair_style coul/dsf/kk command :h3
|
||||
pair_style coul/dsf/omp command :h3
|
||||
pair_style coul/long command :h3
|
||||
pair_style coul/long/cs command :h3
|
||||
pair_style coul/long/cs/gpu command :h3
|
||||
pair_style coul/long/omp command :h3
|
||||
pair_style coul/long/gpu command :h3
|
||||
pair_style coul/long/kk command :h3
|
||||
|
||||
@ -95,9 +95,9 @@ This pair style can only be used via the {pair} keyword of the
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
This pair style is part of the USER-MISC package. It is only enabled
|
||||
if LAMMPS was built with that package. See
|
||||
the "Making LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
This pair style is part of the MANYBODY package. It is only enabled if
|
||||
LAMMPS was built with that package. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
This pair style requires the "newton"_newton.html setting to be "on"
|
||||
for pair interactions.
|
||||
@ -117,4 +117,5 @@ appropriate units if your simulation doesn't use "metal" units.
|
||||
:line
|
||||
|
||||
:link(Gao)
|
||||
[(Gao)] Gao and Weber, Nuclear Instruments and Methods in Physics Research B 191 (2012) 504.
|
||||
[(Gao)] Gao and Weber, Nuclear Instruments and Methods in Physics
|
||||
Research B 191 (2012) 504.
|
||||
|
||||
@ -47,13 +47,14 @@ the "(Aktulga)"_#Aktulga paper. The {reax/c} style was initially
|
||||
implemented as a stand-alone C code and is now integrated into LAMMPS
|
||||
as a package.
|
||||
|
||||
The {reax/c/kk} style is a Kokkos version of the ReaxFF potential that is
|
||||
derived from the {reax/c} style. The Kokkos version can run on GPUs and
|
||||
can also use OpenMP multithreading. For more information about the Kokkos package,
|
||||
see "Section 4"_Section_packages.html#kokkos and "Section 5.3.3"_accelerate_kokkos.html.
|
||||
One important consideration when using the {reax/c/kk} style is the choice of either
|
||||
half or full neighbor lists. This setting can be changed using the Kokkos "package"_package.html
|
||||
command.
|
||||
The {reax/c/kk} style is a Kokkos version of the ReaxFF potential that
|
||||
is derived from the {reax/c} style. The Kokkos version can run on GPUs
|
||||
and can also use OpenMP multithreading. For more information about the
|
||||
Kokkos package, see "Section 4"_Section_packages.html#kokkos and
|
||||
"Section 5.3.3"_accelerate_kokkos.html. One important consideration
|
||||
when using the {reax/c/kk} style is the choice of either half or full
|
||||
neighbor lists. This setting can be changed using the Kokkos
|
||||
"package"_package.html command.
|
||||
|
||||
The {reax/c} style differs from the "pair_style reax"_pair_reax.html
|
||||
command in the lo-level implementation details. The {reax} style is a
|
||||
@ -80,9 +81,8 @@ parameterizations for different classes of materials. You can submit
|
||||
a contact request at the Materials Computation Center (MCC) website
|
||||
"https://www.mri.psu.edu/materials-computation-center/connect-mcc"_https://www.mri.psu.edu/materials-computation-center/connect-mcc,
|
||||
describing the material(s) you are interested in modeling with ReaxFF.
|
||||
They can tell
|
||||
you what is currently available or what it would take to create a
|
||||
suitable ReaxFF parameterization.
|
||||
They can tell you what is currently available or what it would take to
|
||||
create a suitable ReaxFF parameterization.
|
||||
|
||||
The {cfile} setting can be specified as NULL, in which case default
|
||||
settings are used. A control file can be specified which defines
|
||||
@ -120,28 +120,31 @@ assign to each atom will be used for computing the electrostatic
|
||||
interactions in the system.
|
||||
See the "fix qeq/reax"_fix_qeq_reax.html command for details.
|
||||
|
||||
Using the optional keyword {lgvdw} with the value {yes} turns on
|
||||
the low-gradient correction of the ReaxFF/C for long-range
|
||||
London Dispersion, as described in the "(Liu)"_#Liu_2011 paper. Force field
|
||||
Using the optional keyword {lgvdw} with the value {yes} turns on the
|
||||
low-gradient correction of the ReaxFF/C for long-range London
|
||||
Dispersion, as described in the "(Liu)"_#Liu_2011 paper. Force field
|
||||
file {ffield.reax.lg} is designed for this correction, and is trained
|
||||
for several energetic materials (see "Liu"). When using lg-correction,
|
||||
recommended value for parameter {thb} is 0.01, which can be set in the
|
||||
control file. Note: Force field files are different for the original
|
||||
or lg corrected pair styles, using wrong ffield file generates an error message.
|
||||
or lg corrected pair styles, using wrong ffield file generates an
|
||||
error message.
|
||||
|
||||
Using the optional keyword {enobonds} with the value {yes}, the energy
|
||||
of atoms with no bonds (i.e. isolated atoms) is included in the total
|
||||
potential energy and the per-atom energy of that atom. If the value
|
||||
{no} is specified then the energy of atoms with no bonds is set to zero.
|
||||
The latter behavior is usual not desired, as it causes discontinuities
|
||||
in the potential energy when the bonding of an atom drops to zero.
|
||||
{no} is specified then the energy of atoms with no bonds is set to
|
||||
zero. The latter behavior is usual not desired, as it causes
|
||||
discontinuities in the potential energy when the bonding of an atom
|
||||
drops to zero.
|
||||
|
||||
Optional keywords {safezone} and {mincap} are used for allocating
|
||||
reax/c arrays. Increasing these values can avoid memory problems, such
|
||||
as segmentation faults and bondchk failed errors, that could occur under
|
||||
certain conditions. These keywords aren't used by the Kokkos version, which
|
||||
instead uses a more robust memory allocation scheme that checks if the sizes of
|
||||
the arrays have been exceeded and automatically allocates more memory.
|
||||
reax/c arrays. Increasing these values can avoid memory problems,
|
||||
such as segmentation faults and bondchk failed errors, that could
|
||||
occur under certain conditions. These keywords aren't used by the
|
||||
Kokkos version, which instead uses a more robust memory allocation
|
||||
scheme that checks if the sizes of the arrays have been exceeded and
|
||||
automatically allocates more memory.
|
||||
|
||||
The thermo variable {evdwl} stores the sum of all the ReaxFF potential
|
||||
energy contributions, with the exception of the Coulombic and charge
|
||||
@ -153,7 +156,8 @@ This pair style tallies a breakdown of the total ReaxFF potential
|
||||
energy into sub-categories, which can be accessed via the "compute
|
||||
pair"_compute_pair.html command as a vector of values of length 14.
|
||||
The 14 values correspond to the following sub-categories (the variable
|
||||
names in italics match those used in the original FORTRAN ReaxFF code):
|
||||
names in italics match those used in the original FORTRAN ReaxFF
|
||||
code):
|
||||
|
||||
{eb} = bond energy
|
||||
{ea} = atom energy
|
||||
@ -340,8 +344,8 @@ reax"_pair_reax.html
|
||||
|
||||
[Default:]
|
||||
|
||||
The keyword defaults are checkqeq = yes, enobonds = yes, lgvdw = no, safezone = 1.2,
|
||||
mincap = 50.
|
||||
The keyword defaults are checkqeq = yes, enobonds = yes, lgvdw = no,
|
||||
safezone = 1.2, mincap = 50.
|
||||
|
||||
:line
|
||||
|
||||
|
||||
@ -192,8 +192,8 @@ This pair style can only be used via the {pair} keyword of the
|
||||
[Restrictions:]
|
||||
|
||||
This pair style is part of the MANYBODY package. It is only enabled
|
||||
if LAMMPS was built with that package. See
|
||||
the "Making LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
if LAMMPS was built with that package. See the "Making
|
||||
LAMMPS"_Section_start.html#start_3 section for more info.
|
||||
|
||||
This pair style requires the "newton"_newton.html setting to be "on"
|
||||
for pair interactions.
|
||||
|
||||
@ -296,17 +296,24 @@ list of runs (e.g. 1000) without having to list N strings in the input
|
||||
script.
|
||||
|
||||
For the {string} style, a single string is assigned to the variable.
|
||||
The only difference between this and using the {index} style with a
|
||||
single string is that a variable with {string} style can be redefined.
|
||||
E.g. by another command later in the input script, or if the script is
|
||||
read again in a loop.
|
||||
Two differences between this this and using the {index} style exist:
|
||||
a variable with {string} style can be redefined, e.g. by another command later
|
||||
in the input script, or if the script is read again in a loop. The other
|
||||
difference is that {string} performs variable substitution even if the
|
||||
string parameter is quoted.
|
||||
|
||||
For the {format} style, an equal-style variable is specified along
|
||||
with a C-style format string, e.g. "%f" or "%.10g", which must be
|
||||
appropriate for formatting a double-precision floating-point value.
|
||||
This allows an equal-style variable to be formatted specifically for
|
||||
output as a string, e.g. by the "print"_print.html command, if the
|
||||
default format "%.15g" has too much precision.
|
||||
The default format is "%.15g". This variable style allows an
|
||||
equal-style variable to be formatted precisely when it is evaluated.
|
||||
|
||||
If you simply wish to print a variable value with desired precision to
|
||||
the screen or logfile via the "print"_print.html or "fix
|
||||
print"_fix_print.html commands, you can also do this by specifying an
|
||||
"immediate" variable with a trailing colon and format string, as part
|
||||
of the string argument of those commands. This is explained in
|
||||
"Section 3.2"_Section_commands.html#cmd_2.
|
||||
|
||||
For the {getenv} style, a single string is assigned to the variable
|
||||
which should be the name of an environment variable. When the
|
||||
|
||||
@ -6,10 +6,10 @@
|
||||
// in.lammps = LAMMPS input script
|
||||
// in.quest = Quest input script
|
||||
|
||||
#include "mpi.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include <mpi.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "stdint.h"
|
||||
|
||||
#include "many2one.h"
|
||||
|
||||
@ -7,10 +7,10 @@
|
||||
// Sfactor = multiplier on strain effect
|
||||
// in.spparks = SPPARKS input script
|
||||
|
||||
#include "mpi.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include <mpi.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "lammps_data_write.h"
|
||||
#include "many2many.h"
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include <mpi.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include "error.h"
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <cstdio>
|
||||
#include <cstring>
|
||||
#include "files.h"
|
||||
|
||||
#define MAXLINE 256
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "irregular.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include <mpi.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include "lammps_data_write.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
#ifndef LAMMPS_DATA_WRITE_H
|
||||
#define LAMMPS_DATA_WRITE_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include <cstdio>
|
||||
#include "send2one.h"
|
||||
|
||||
class LAMMPSDataWrite : public Send2One {
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include <mpi.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include "many2many.h"
|
||||
#include "irregular.h"
|
||||
#include "memory.h"
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include "mpi.h"
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include <mpi.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include "many2one.h"
|
||||
#include "memory.h"
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include <mpi.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
|
||||
@ -1,5 +1,5 @@
|
||||
#include <mpi.h>
|
||||
#include <stdlib.h>
|
||||
#include <cstdlib>
|
||||
#include "one2many.h"
|
||||
#include "memory.h"
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
#include "mpi.h"
|
||||
#include "stdlib.h"
|
||||
#include "stdio.h"
|
||||
#include <mpi.h>
|
||||
#include <cstdlib>
|
||||
#include <cstdio>
|
||||
#include "send2one.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
|
||||
@ -23,10 +23,10 @@
|
||||
// Tdelta = incremental temperature for each of N runs
|
||||
// See README for compilation instructions
|
||||
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "mpi.h"
|
||||
#include <mpi.h>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
|
||||
#include "lammps.h" // these are LAMMPS include files
|
||||
#include "input.h"
|
||||
|
||||
@ -19,15 +19,16 @@
|
||||
// in.lammps = LAMMPS input script
|
||||
// See README for compilation instructions
|
||||
|
||||
#include "stdio.h"
|
||||
#include "stdlib.h"
|
||||
#include "string.h"
|
||||
#include "mpi.h"
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <cstring>
|
||||
#include <mpi.h>
|
||||
|
||||
#include "lammps.h" // these are LAMMPS include files
|
||||
#include "input.h"
|
||||
#include "atom.h"
|
||||
#include "library.h"
|
||||
// these are LAMMPS include files
|
||||
#include <lammps/lammps.h>
|
||||
#include <lammps/input.h>
|
||||
#include <lammps/atom.h>
|
||||
#include <lammps/library.h>
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
|
||||
49
examples/latte/data.graphene.boxrel
Normal file
49
examples/latte/data.graphene.boxrel
Normal file
@ -0,0 +1,49 @@
|
||||
LAMMPS Description
|
||||
|
||||
32 atoms
|
||||
|
||||
1 atom types
|
||||
|
||||
0.0000000000000000 10.000000000000000 xlo xhi
|
||||
0.0000000000000000 8.0000000000000000 ylo yhi
|
||||
0.0000000000000000 20.000000000000000 zlo zhi
|
||||
4.8985871965894128E-016 1.2246467991473533E-015 1.2246467991473533E-015 xy xz yz
|
||||
|
||||
Masses
|
||||
|
||||
1 12.010000000000000
|
||||
|
||||
Atoms
|
||||
|
||||
1 1 1 0.0 4.93100 4.25000 0.00500
|
||||
2 1 1 0.0 8.62100 2.12100 0.14000
|
||||
3 1 1 0.0 3.70700 2.12600 0.14700
|
||||
4 1 1 0.0 7.38200 4.25400 0.07800
|
||||
5 1 1 0.0 2.47900 4.25400 0.08000
|
||||
6 1 1 0.0 6.15800 6.37400 -0.01000
|
||||
7 1 1 0.0 1.23700 6.38300 0.06600
|
||||
8 1 1 0.0 1.24000 2.12100 0.14600
|
||||
9 1 1 0.0 6.15500 2.12600 0.12900
|
||||
10 1 1 0.0 0.00700 4.25200 0.12200
|
||||
11 1 1 0.0 8.62100 6.38500 0.04100
|
||||
12 1 1 0.0 3.70000 6.37400 -0.01000
|
||||
13 1 1 0.0 0.00600 1.41600 0.13000
|
||||
14 1 1 0.0 4.93000 1.40800 0.14700
|
||||
15 1 1 0.0 8.61800 3.54600 0.11500
|
||||
16 1 1 0.0 3.70800 3.55300 0.08400
|
||||
17 1 1 0.0 7.39400 5.68000 0.03500
|
||||
18 1 1 0.0 2.46500 5.68000 0.03500
|
||||
19 1 1 0.0 6.16000 7.80500 0.02700
|
||||
20 1 1 0.0 1.23800 7.81100 0.06000
|
||||
21 1 1 0.0 2.47300 1.41800 0.16100
|
||||
22 1 1 0.0 7.38900 1.41700 0.14800
|
||||
23 1 1 0.0 1.24200 3.54700 0.12600
|
||||
24 1 1 0.0 6.15300 3.55300 0.07400
|
||||
25 1 1 0.0 0.00700 5.67800 0.09700
|
||||
26 1 1 0.0 4.93100 5.66800 -0.03100
|
||||
27 1 1 0.0 8.62000 7.81300 0.03900
|
||||
28 1 1 0.0 3.70100 7.80200 0.03700
|
||||
29 1 1 0.0 0.00700 -0.01000 0.08900
|
||||
30 1 1 0.0 4.93100 -0.01500 0.16100
|
||||
31 1 1 0.0 2.47300 -0.01200 0.14400
|
||||
32 1 1 0.0 7.38900 -0.01300 0.14800
|
||||
44
examples/latte/in.graphene.boxrel
Normal file
44
examples/latte/in.graphene.boxrel
Normal file
@ -0,0 +1,44 @@
|
||||
# Simple water model with LATTE
|
||||
|
||||
units metal
|
||||
atom_style full
|
||||
atom_modify sort 0 0.0 # turn off sorting of the coordinates
|
||||
|
||||
read_data data.graphene.boxrel
|
||||
|
||||
# replicate system if requested
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable nrep equal v_x*v_y*v_z
|
||||
if "${nrep} > 1" then "replicate $x $y $z"
|
||||
|
||||
# initialize system
|
||||
|
||||
velocity all create 0.0 87287 loop geom
|
||||
|
||||
pair_style zero 1.0
|
||||
pair_coeff * *
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 0 check yes
|
||||
|
||||
timestep 0.00025
|
||||
|
||||
fix 1 all box/relax iso 0.0 vmax 0.001
|
||||
|
||||
fix 2 all latte NULL
|
||||
fix_modify 2 energy yes
|
||||
|
||||
thermo_style custom etotal
|
||||
|
||||
# minimization
|
||||
|
||||
thermo 1
|
||||
fix 3 all print 1 "Total Energy ="
|
||||
min_style cg
|
||||
min_modify dmax 0.1
|
||||
min_modify line quadratic
|
||||
minimize 1.0e-4 1.0e-4 10000 10000
|
||||
@ -37,5 +37,6 @@ thermo_style custom step temp pe etotal press
|
||||
# minimization
|
||||
|
||||
thermo 10
|
||||
min_style fire
|
||||
minimize 1.0e-9 1.0e-9 500 500
|
||||
|
||||
min_style fire
|
||||
minimize 1.0e-4 1.0e-4 500 500
|
||||
|
||||
@ -11,7 +11,6 @@ LATTE INPUT FILE
|
||||
CONTROL{
|
||||
xControl= 1
|
||||
BASISTYPE= NONORTHO
|
||||
COORDSFILE= "./coords.dat"
|
||||
PARAMPATH= "./TBparam"
|
||||
KBT= 0.0
|
||||
ENTROPYKIND= 1
|
||||
@ -32,9 +31,3 @@ CONTROL{
|
||||
XBODISORDER= 5
|
||||
KON= 0
|
||||
}
|
||||
|
||||
#Controls for QMD (if using lammps MAXITER must be set to -1)
|
||||
MDCONTROL{
|
||||
MAXITER= -1
|
||||
}
|
||||
|
||||
|
||||
@ -1,406 +0,0 @@
|
||||
The log file for latte_lib
|
||||
|
||||
CONTROL{ }
|
||||
|
||||
WARNING: variable JobName= is missing. I will use a default value instead ...
|
||||
WARNING: variable PARAMPATH= is missing. I will use a default value instead ...
|
||||
WARNING: variable DEBUGON= is missing. I will use a default value instead ...
|
||||
WARNING: variable FERMIM= is missing. I will use a default value instead ...
|
||||
WARNING: variable CGORLIB= is missing. I will use a default value instead ...
|
||||
WARNING: variable NORECS= is missing. I will use a default value instead ...
|
||||
WARNING: variable VDWON= is missing. I will use a default value instead ...
|
||||
WARNING: variable ORDERNMOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable LCNON= is missing. I will use a default value instead ...
|
||||
WARNING: variable LCNITER= is missing. I will use a default value instead ...
|
||||
WARNING: variable MDON= is missing. I will use a default value instead ...
|
||||
WARNING: variable PBCON= is missing. I will use a default value instead ...
|
||||
WARNING: variable RESTART= is missing. I will use a default value instead ...
|
||||
WARNING: variable NGPU= is missing. I will use a default value instead ...
|
||||
WARNING: variable COMPFORCE= is missing. I will use a default value instead ...
|
||||
WARNING: variable DOSFIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable INTS2FIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable NFITSTEP= is missing. I will use a default value instead ...
|
||||
WARNING: variable QFIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPFITON= is missing. I will use a default value instead ...
|
||||
WARNING: variable ALLFITON= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPSTEP= is missing. I will use a default value instead ...
|
||||
WARNING: variable BISTEP= is missing. I will use a default value instead ...
|
||||
WARNING: variable PP2FIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable BINT2FIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPNMOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPNGEOM= is missing. I will use a default value instead ...
|
||||
WARNING: variable PARREP= is missing. I will use a default value instead ...
|
||||
WARNING: variable VERBOSE= is missing. I will use a default value instead ...
|
||||
WARNING: variable MIXER= is missing. I will use a default value instead ...
|
||||
WARNING: variable RESTARTLIB= is missing. I will use a default value instead ...
|
||||
WARNING: variable CGTOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable ELEC_ETOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable COULACC= is missing. I will use a default value instead ...
|
||||
WARNING: variable COULCUT= is missing. I will use a default value instead ...
|
||||
WARNING: variable COULR1= is missing. I will use a default value instead ...
|
||||
WARNING: variable CHTOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable BETA= is missing. I will use a default value instead ...
|
||||
WARNING: variable MCSIGMA= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPBETA= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPSIGMA= is missing. I will use a default value instead ...
|
||||
WARNING: variable ER= is missing. I will use a default value instead ...
|
||||
WARNING: variable INITIALIZED= is missing. I will use a default value instead ...
|
||||
|
||||
|
||||
############### Parameters used for this run ################
|
||||
CONTROL{
|
||||
xControl= 1
|
||||
DEBUGON= 0
|
||||
FERMIM= 6
|
||||
CGORLIB= 1
|
||||
NORECS= 1
|
||||
ENTROPYKIND= 1
|
||||
PPOTON= 1
|
||||
VDWON= 0
|
||||
SPINON= 0
|
||||
ELECTRO= 1
|
||||
ELECMETH= 0
|
||||
MAXSCF= 450
|
||||
MINSP2ITER= 22
|
||||
FULLQCONV= 1
|
||||
QITER= 3
|
||||
ORDERNMOL= 0
|
||||
SPARSEON= 1
|
||||
THRESHOLDON= 1
|
||||
FILLINSTOP= 100
|
||||
BLKSZ= 4
|
||||
MSPARSE= 1500
|
||||
LCNON= 0
|
||||
LCNITER= 4
|
||||
RELAX= 0
|
||||
MAXITER= 100000
|
||||
MDON= 1
|
||||
PBCON= 1
|
||||
RESTART= 0
|
||||
CHARGE= 0
|
||||
XBO= 1
|
||||
XBODISON= 1
|
||||
XBODISORDER= 5
|
||||
NGPU= 2
|
||||
KON= 0
|
||||
COMPFORCE= 1
|
||||
DOSFIT= 0
|
||||
INTS2FIT= 1
|
||||
NFITSTEP= 5000
|
||||
QFIT= 0
|
||||
PPFITON= 0
|
||||
ALLFITON= 0
|
||||
PPSTEP= 500
|
||||
BISTEP= 500
|
||||
PP2FIT= 2
|
||||
BINT2FIT= 6
|
||||
PPNMOL= 10
|
||||
PPNGEOM= 200
|
||||
PARREP= 0
|
||||
VERBOSE= 0
|
||||
MIXER= 0
|
||||
RESTARTLIB= 0
|
||||
CGTOL= 9.9999999747524271E-007
|
||||
KBT= 0.0000000000000000
|
||||
SPINTOL= 1.0000000000000000E-004
|
||||
ELEC_ETOL= 1.0000000474974513E-003
|
||||
ELEC_QTOL= 1.0000000000000000E-008
|
||||
COULACC= 9.9999999747524271E-007
|
||||
COULCUT= -500.00000000000000
|
||||
COULR1= 500.00000000000000
|
||||
BREAKTOL= 9.9999999999999995E-007
|
||||
QMIX= 0.25000000000000000
|
||||
SPINMIX= 0.25000000000000000
|
||||
MDMIX= 0.25000000000000000
|
||||
NUMTHRESH= 9.9999999999999995E-007
|
||||
CHTOL= 9.9999997764825821E-003
|
||||
SKIN= 1.0000000000000000
|
||||
RLXFTOL= 9.9999999999999995E-008
|
||||
BETA= 1000.0000000000000
|
||||
MCSIGMA= 0.20000000298023224
|
||||
PPBETA= 1000.0000000000000
|
||||
PPSIGMA= 9.9999997764825821E-003
|
||||
ER= 1.0000000000000000
|
||||
JobName=MyJob
|
||||
BASISTYPE=NONORTHO
|
||||
SP2CONV=REL
|
||||
RELAXTYPE=SD
|
||||
PARAMPATH=./TBparam
|
||||
COORDSFILE=./coords.dat
|
||||
INITIALIZED= F
|
||||
}
|
||||
|
||||
./TBparam/electrons.dat
|
||||
MDCONTROL{ }
|
||||
|
||||
WARNING: variable RNDIST= is missing. I will use a default value instead ...
|
||||
WARNING: variable SEEDINIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable NPTTYPE= is missing. I will use a default value instead ...
|
||||
WARNING: variable UDNEIGH= is missing. I will use a default value instead ...
|
||||
WARNING: variable DUMPFREQ= is missing. I will use a default value instead ...
|
||||
WARNING: variable RSFREQ= is missing. I will use a default value instead ...
|
||||
WARNING: variable WRTFREQ= is missing. I will use a default value instead ...
|
||||
WARNING: variable TOINITTEMP5= is missing. I will use a default value instead ...
|
||||
WARNING: variable THERMPER= is missing. I will use a default value instead ...
|
||||
WARNING: variable THERMRUN= is missing. I will use a default value instead ...
|
||||
WARNING: variable NVTON= is missing. I will use a default value instead ...
|
||||
WARNING: variable NPTON= is missing. I will use a default value instead ...
|
||||
WARNING: variable AVEPER= is missing. I will use a default value instead ...
|
||||
WARNING: variable SEED= is missing. I will use a default value instead ...
|
||||
WARNING: variable SHOCKON= is missing. I will use a default value instead ...
|
||||
WARNING: variable SHOCKSTART= is missing. I will use a default value instead ...
|
||||
WARNING: variable SHOCKDIR= is missing. I will use a default value instead ...
|
||||
WARNING: variable MDADAPT= is missing. I will use a default value instead ...
|
||||
WARNING: variable GETHUG= is missing. I will use a default value instead ...
|
||||
WARNING: variable RSLEVEL= is missing. I will use a default value instead ...
|
||||
WARNING: variable DT= is missing. I will use a default value instead ...
|
||||
WARNING: variable TEMPERATURE= is missing. I will use a default value instead ...
|
||||
WARNING: variable FRICTION= is missing. I will use a default value instead ...
|
||||
WARNING: variable PTARGET= is missing. I will use a default value instead ...
|
||||
WARNING: variable UPARTICLE= is missing. I will use a default value instead ...
|
||||
WARNING: variable USHOCK= is missing. I will use a default value instead ...
|
||||
WARNING: variable C0= is missing. I will use a default value instead ...
|
||||
WARNING: variable E0= is missing. I will use a default value instead ...
|
||||
WARNING: variable V0= is missing. I will use a default value instead ...
|
||||
WARNING: variable P0= is missing. I will use a default value instead ...
|
||||
WARNING: variable DUMMY= is missing. I will use a default value instead ...
|
||||
|
||||
|
||||
############### Parameters used for this run ################
|
||||
MDCONTROL{
|
||||
MAXITER= -1
|
||||
UDNEIGH= 1
|
||||
DUMPFREQ= 250
|
||||
RSFREQ= 500
|
||||
WRTFREQ= 25
|
||||
TOINITTEMP5= 1
|
||||
THERMPER= 500
|
||||
THERMRUN= 50000
|
||||
NVTON= 0
|
||||
NPTON= 0
|
||||
AVEPER= 1000
|
||||
SEED= 54
|
||||
SHOCKON= 0
|
||||
SHOCKSTART= 100000
|
||||
SHOCKDIR= 1
|
||||
MDADAPT= 0
|
||||
GETHUG= 0
|
||||
RSLEVEL= 0
|
||||
DT= 0.25000000000000000
|
||||
TEMPERATURE= 300.00000000000000
|
||||
FRICTION= 1000.0000000000000
|
||||
PTARGET= 0.0000000000000000
|
||||
UPARTICLE= 500.00000000000000
|
||||
USHOCK= -4590.0000000000000
|
||||
C0= 1300.0000000000000
|
||||
E0= -795.72497558593750
|
||||
V0= 896.98486328125000
|
||||
P0= 8.3149001002311707E-002
|
||||
RNDIST=GAUSSIAN
|
||||
SEEDINIT=UNIFORM
|
||||
NPTTYPE=ISO
|
||||
DUMMY= F
|
||||
}
|
||||
|
||||
LIBCALLS 0
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15165627147849 13.850829743067372 0.0000000000000000 3.9653384620309846
|
||||
LIBCALLS 1
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15142147081917 13.850596160685321 0.0000000000000000 3.9653428217526296
|
||||
LIBCALLS 2
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15072431717670 13.849902902335046 0.0000000000000000 3.9653556077235628
|
||||
LIBCALLS 3
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14958682134301 13.848772166382796 0.0000000000000000 3.9653762812719782
|
||||
LIBCALLS 4
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14804481054080 13.847240065975685 0.0000000000000000 3.9654039257311324
|
||||
LIBCALLS 5
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14614669298459 13.845355347298943 0.0000000000000000 3.9654372593625880
|
||||
LIBCALLS 6
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14395200541782 13.843177681164811 0.0000000000000000 3.9654747563744728
|
||||
LIBCALLS 7
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14152950027858 13.840775605612510 0.0000000000000000 3.9655146828204026
|
||||
LIBCALLS 8
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13895477239572 13.838224210058369 0.0000000000000000 3.9655551214573213
|
||||
LIBCALLS 9
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13630808318862 13.835602658269416 0.0000000000000000 3.9655940696401335
|
||||
LIBCALLS 10
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13367156672246 13.832991646694552 0.0000000000000000 3.9656294961085377
|
||||
LIBCALLS 11
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13112695791978 13.830470890853416 0.0000000000000000 3.9656594331001127
|
||||
LIBCALLS 12
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12875304084571 13.828116721514562 0.0000000000000000 3.9656820468287637
|
||||
LIBCALLS 13
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12662314462005 13.825999860613845 0.0000000000000000 3.9656956633599689
|
||||
LIBCALLS 14
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12480303363179 13.824183432931337 0.0000000000000000 3.9656988576578489
|
||||
LIBCALLS 15
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12334906554690 13.822721254684298 0.0000000000000000 3.9656905013961525
|
||||
LIBCALLS 16
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12230649281338 13.821656427050725 0.0000000000000000 3.9656697961568699
|
||||
LIBCALLS 17
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12170820445976 13.821020251989051 0.0000000000000000 3.9656362957330207
|
||||
LIBCALLS 18
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12157378544725 13.820831478957400 0.0000000000000000 3.9655899465557289
|
||||
LIBCALLS 19
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12190902409918 13.821095885466233 0.0000000000000000 3.9655310732858191
|
||||
LIBCALLS 20
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12270578464654 13.821806190548854 0.0000000000000000 3.9654603894825375
|
||||
LIBCALLS 21
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12394226924755 13.822942298269552 0.0000000000000000 3.9653789701528157
|
||||
LIBCALLS 22
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12558369933174 13.824471866833779 0.0000000000000000 3.9652882392864672
|
||||
LIBCALLS 23
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12758334335854 13.826351196916939 0.0000000000000000 3.9651899208403507
|
||||
LIBCALLS 24
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12988392857540 13.828526429544008 0.0000000000000000 3.9650859962581815
|
||||
LIBCALLS 25
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13241933900565 13.830935038404082 0.0000000000000000 3.9649786471076300
|
||||
LIBCALLS 26
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13511663668885 13.833507593821677 0.0000000000000000 3.9648702062183578
|
||||
LIBCALLS 27
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13789821166085 13.836169765592846 0.0000000000000000 3.9647630647732250
|
||||
LIBCALLS 28
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14068416314257 13.838844520440762 0.0000000000000000 3.9646596094056243
|
||||
LIBCALLS 29
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14339478125902 13.841454456993119 0.0000000000000000 3.9645621614306648
|
||||
LIBCALLS 30
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14595299166797 13.843924209084781 0.0000000000000000 3.9644728862209537
|
||||
LIBCALLS 31
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14828672908391 13.846182838096166 0.0000000000000000 3.9643937231592781
|
||||
LIBCALLS 32
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15033121417270 13.848166127650318 0.0000000000000000 3.9643263326484774
|
||||
LIBCALLS 33
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15203097820654 13.849818691045462 0.0000000000000000 3.9642720350529470
|
||||
LIBCALLS 34
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15334158494318 13.851095804201121 0.0000000000000000 3.9642317563508436
|
||||
LIBCALLS 35
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15423101277941 13.851964884709183 0.0000000000000000 3.9642060118064197
|
||||
LIBCALLS 36
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15468060067406 13.852406550643760 0.0000000000000000 3.9641948735126151
|
||||
LIBCALLS 37
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15468556770435 13.852415210893483 0.0000000000000000 3.9641979705462513
|
||||
LIBCALLS 38
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15425506702360 13.851999160128511 0.0000000000000000 3.9642145018322728
|
||||
LIBCALLS 39
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15341177086162 13.851180175004831 0.0000000000000000 3.9642432622019754
|
||||
LIBCALLS 40
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15219100341108 13.849992631968849 0.0000000000000000 3.9642826797086155
|
||||
LIBCALLS 41
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15063948253476 13.848482189284203 0.0000000000000000 3.9643308764467280
|
||||
LIBCALLS 42
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14881366363778 13.846704095034502 0.0000000000000000 3.9643857194231229
|
||||
LIBCALLS 43
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14677783841711 13.844721197666447 0.0000000000000000 3.9644449063996254
|
||||
LIBCALLS 44
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14460195130079 13.842601745208173 0.0000000000000000 3.9645060327113080
|
||||
LIBCALLS 45
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14235930197236 13.840417063344470 0.0000000000000000 3.9645666751650537
|
||||
LIBCALLS 46
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14012416839108 13.838239201362184 0.0000000000000000 3.9646244709241216
|
||||
LIBCALLS 47
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13796944534135 13.836138629087953 0.0000000000000000 3.9646771958199687
|
||||
LIBCALLS 48
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13596436459642 13.834182058508610 0.0000000000000000 3.9647228360374207
|
||||
LIBCALLS 49
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13417236277201 13.832430452024822 0.0000000000000000 3.9647596471475066
|
||||
LIBCALLS 50
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13264918465853 13.830937266579358 0.0000000000000000 3.9647862263274365
|
||||
LIBCALLS 51
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13144121811348 13.829746970164395 0.0000000000000000 3.9648015300858930
|
||||
LIBCALLS 52
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13058418584075 13.828893856279002 0.0000000000000000 3.9648049379175174
|
||||
LIBCALLS 53
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13010212355317 13.828401171909800 0.0000000000000000 3.9647962482159476
|
||||
LIBCALLS 54
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13000675986638 13.828280567696357 0.0000000000000000 3.9647757005033171
|
||||
LIBCALLS 55
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13029725443062 13.828531873218640 0.0000000000000000 3.9647439679967813
|
||||
LIBCALLS 56
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13096031859556 13.829143196581525 0.0000000000000000 3.9647021412055241
|
||||
LIBCALLS 57
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13197071275096 13.830091344339912 0.0000000000000000 3.9646517009757813
|
||||
LIBCALLS 58
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13329208290526 13.831342554670950 0.0000000000000000 3.9645944691057076
|
||||
LIBCALLS 59
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13487817952188 13.832853532802908 0.0000000000000000 3.9645325717081379
|
||||
LIBCALLS 60
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13667431785007 13.834572772174083 0.0000000000000000 3.9644683636269380
|
||||
LIBCALLS 61
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13861917436014 13.836442137716100 0.0000000000000000 3.9644043716683206
|
||||
LIBCALLS 62
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14064674344610 13.838398678492441 0.0000000000000000 3.9643432117931376
|
||||
LIBCALLS 63
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14268847880851 13.840376626541268 0.0000000000000000 3.9642875107994442
|
||||
LIBCALLS 64
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14467552446979 13.842309527587247 0.0000000000000000 3.9642398279114381
|
||||
LIBCALLS 65
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14654097615647 13.844132438475109 0.0000000000000000 3.9642025589783412
|
||||
LIBCALLS 66
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14822207995957 13.845784117078871 0.0000000000000000 3.9641778771678413
|
||||
LIBCALLS 67
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14966231911774 13.847209123749478 0.0000000000000000 3.9641676470155103
|
||||
LIBCALLS 68
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15081329445576 13.848359751049152 0.0000000000000000 3.9641733618391299
|
||||
LIBCALLS 69
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15163634076458 13.849197700537186 0.0000000000000000 3.9641960937768981
|
||||
LIBCALLS 70
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15210380659516 13.849695432596437 0.0000000000000000 3.9642364336978391
|
||||
LIBCALLS 71
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15219997215792 13.849837127658775 0.0000000000000000 3.9642944914660605
|
||||
LIBCALLS 72
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15192153900722 13.849619213627008 0.0000000000000000 3.9643698667021590
|
||||
LIBCALLS 73
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15127769530471 13.849050434626310 0.0000000000000000 3.9644616585289247
|
||||
LIBCALLS 74
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.15028974592457 13.848151458176057 0.0000000000000000 3.9645684873567908
|
||||
LIBCALLS 75
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14899032381624 13.846954040343237 0.0000000000000000 3.9646885325372980
|
||||
LIBCALLS 76
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14742221364327 13.845499789571511 0.0000000000000000 3.9648195821504211
|
||||
LIBCALLS 77
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14563684020112 13.843838588134755 0.0000000000000000 3.9649591055666282
|
||||
LIBCALLS 78
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14369246883172 13.842026744273829 0.0000000000000000 3.9651043223068876
|
||||
LIBCALLS 79
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14165219754119 13.840124957235691 0.0000000000000000 3.9652522794782556
|
||||
LIBCALLS 80
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13958181195608 13.838196181062383 0.0000000000000000 3.9653999492835532
|
||||
LIBCALLS 81
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13754757713065 13.836303471774007 0.0000000000000000 3.9655443071963385
|
||||
LIBCALLS 82
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13561405478509 13.834507896249461 0.0000000000000000 3.9656824354232736
|
||||
LIBCALLS 83
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13384198639028 13.832866571528193 0.0000000000000000 3.9658115908515681
|
||||
LIBCALLS 84
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13228634940748 13.831430891696755 0.0000000000000000 3.9659292903699495
|
||||
LIBCALLS 85
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13099461122306 13.830244986101496 0.0000000000000000 3.9660333724384569
|
||||
LIBCALLS 86
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13000526350720 13.829344440260281 0.0000000000000000 3.9661220782532145
|
||||
LIBCALLS 87
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12934661713206 13.828755299191645 0.0000000000000000 3.9661940662588862
|
||||
LIBCALLS 88
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12903595764971 13.828493364127572 0.0000000000000000 3.9662484623936765
|
||||
LIBCALLS 89
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12907904533250 13.828563786156602 0.0000000000000000 3.9662848954537067
|
||||
LIBCALLS 90
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.12946994320248 13.828960955791626 0.0000000000000000 3.9663034756730777
|
||||
LIBCALLS 91
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13019123489619 13.829668684955367 0.0000000000000000 3.9663048073711558
|
||||
LIBCALLS 92
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13121457766835 13.830660675785223 0.0000000000000000 3.9662899643566578
|
||||
LIBCALLS 93
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13250159637499 13.831901269302985 0.0000000000000000 3.9662604605307470
|
||||
LIBCALLS 94
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13400508153813 13.833346464674193 0.0000000000000000 3.9662181906403653
|
||||
LIBCALLS 95
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13567049003717 13.834945196074795 0.0000000000000000 3.9661653991148187
|
||||
LIBCALLS 96
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13743766487022 13.836640848231452 0.0000000000000000 3.9661045863001441
|
||||
LIBCALLS 97
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.13924277096038 13.838372983906890 0.0000000000000000 3.9660384593805307
|
||||
LIBCALLS 98
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14102036682124 13.840079246589914 0.0000000000000000 3.9659698320311318
|
||||
LIBCALLS 99
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14270555407057 13.841697390518378 0.0000000000000000 3.9659015537535014
|
||||
LIBCALLS 100
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -261.14423615166146 13.843167378892108 0.0000000000000000 3.9658364191978137
|
||||
@ -1,406 +0,0 @@
|
||||
The log file for latte_lib
|
||||
|
||||
CONTROL{ }
|
||||
|
||||
WARNING: variable JobName= is missing. I will use a default value instead ...
|
||||
WARNING: variable PARAMPATH= is missing. I will use a default value instead ...
|
||||
WARNING: variable DEBUGON= is missing. I will use a default value instead ...
|
||||
WARNING: variable FERMIM= is missing. I will use a default value instead ...
|
||||
WARNING: variable CGORLIB= is missing. I will use a default value instead ...
|
||||
WARNING: variable NORECS= is missing. I will use a default value instead ...
|
||||
WARNING: variable VDWON= is missing. I will use a default value instead ...
|
||||
WARNING: variable ORDERNMOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable LCNON= is missing. I will use a default value instead ...
|
||||
WARNING: variable LCNITER= is missing. I will use a default value instead ...
|
||||
WARNING: variable MDON= is missing. I will use a default value instead ...
|
||||
WARNING: variable PBCON= is missing. I will use a default value instead ...
|
||||
WARNING: variable RESTART= is missing. I will use a default value instead ...
|
||||
WARNING: variable NGPU= is missing. I will use a default value instead ...
|
||||
WARNING: variable COMPFORCE= is missing. I will use a default value instead ...
|
||||
WARNING: variable DOSFIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable INTS2FIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable NFITSTEP= is missing. I will use a default value instead ...
|
||||
WARNING: variable QFIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPFITON= is missing. I will use a default value instead ...
|
||||
WARNING: variable ALLFITON= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPSTEP= is missing. I will use a default value instead ...
|
||||
WARNING: variable BISTEP= is missing. I will use a default value instead ...
|
||||
WARNING: variable PP2FIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable BINT2FIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPNMOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPNGEOM= is missing. I will use a default value instead ...
|
||||
WARNING: variable PARREP= is missing. I will use a default value instead ...
|
||||
WARNING: variable VERBOSE= is missing. I will use a default value instead ...
|
||||
WARNING: variable MIXER= is missing. I will use a default value instead ...
|
||||
WARNING: variable RESTARTLIB= is missing. I will use a default value instead ...
|
||||
WARNING: variable CGTOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable ELEC_ETOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable COULACC= is missing. I will use a default value instead ...
|
||||
WARNING: variable COULCUT= is missing. I will use a default value instead ...
|
||||
WARNING: variable COULR1= is missing. I will use a default value instead ...
|
||||
WARNING: variable CHTOL= is missing. I will use a default value instead ...
|
||||
WARNING: variable BETA= is missing. I will use a default value instead ...
|
||||
WARNING: variable MCSIGMA= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPBETA= is missing. I will use a default value instead ...
|
||||
WARNING: variable PPSIGMA= is missing. I will use a default value instead ...
|
||||
WARNING: variable ER= is missing. I will use a default value instead ...
|
||||
WARNING: variable INITIALIZED= is missing. I will use a default value instead ...
|
||||
|
||||
|
||||
############### Parameters used for this run ################
|
||||
CONTROL{
|
||||
xControl= 1
|
||||
DEBUGON= 0
|
||||
FERMIM= 6
|
||||
CGORLIB= 1
|
||||
NORECS= 1
|
||||
ENTROPYKIND= 1
|
||||
PPOTON= 1
|
||||
VDWON= 0
|
||||
SPINON= 0
|
||||
ELECTRO= 1
|
||||
ELECMETH= 0
|
||||
MAXSCF= 450
|
||||
MINSP2ITER= 22
|
||||
FULLQCONV= 1
|
||||
QITER= 3
|
||||
ORDERNMOL= 0
|
||||
SPARSEON= 1
|
||||
THRESHOLDON= 1
|
||||
FILLINSTOP= 100
|
||||
BLKSZ= 4
|
||||
MSPARSE= 1500
|
||||
LCNON= 0
|
||||
LCNITER= 4
|
||||
RELAX= 0
|
||||
MAXITER= 100000
|
||||
MDON= 1
|
||||
PBCON= 1
|
||||
RESTART= 0
|
||||
CHARGE= 0
|
||||
XBO= 1
|
||||
XBODISON= 1
|
||||
XBODISORDER= 5
|
||||
NGPU= 2
|
||||
KON= 0
|
||||
COMPFORCE= 1
|
||||
DOSFIT= 0
|
||||
INTS2FIT= 1
|
||||
NFITSTEP= 5000
|
||||
QFIT= 0
|
||||
PPFITON= 0
|
||||
ALLFITON= 0
|
||||
PPSTEP= 500
|
||||
BISTEP= 500
|
||||
PP2FIT= 2
|
||||
BINT2FIT= 6
|
||||
PPNMOL= 10
|
||||
PPNGEOM= 200
|
||||
PARREP= 0
|
||||
VERBOSE= 0
|
||||
MIXER= 0
|
||||
RESTARTLIB= 0
|
||||
CGTOL= 9.9999999747524271E-007
|
||||
KBT= 0.0000000000000000
|
||||
SPINTOL= 1.0000000000000000E-004
|
||||
ELEC_ETOL= 1.0000000474974513E-003
|
||||
ELEC_QTOL= 1.0000000000000000E-008
|
||||
COULACC= 9.9999999747524271E-007
|
||||
COULCUT= -500.00000000000000
|
||||
COULR1= 500.00000000000000
|
||||
BREAKTOL= 9.9999999999999995E-007
|
||||
QMIX= 0.25000000000000000
|
||||
SPINMIX= 0.25000000000000000
|
||||
MDMIX= 0.25000000000000000
|
||||
NUMTHRESH= 9.9999999999999995E-007
|
||||
CHTOL= 9.9999997764825821E-003
|
||||
SKIN= 1.0000000000000000
|
||||
RLXFTOL= 9.9999999999999995E-008
|
||||
BETA= 1000.0000000000000
|
||||
MCSIGMA= 0.20000000298023224
|
||||
PPBETA= 1000.0000000000000
|
||||
PPSIGMA= 9.9999997764825821E-003
|
||||
ER= 1.0000000000000000
|
||||
JobName=MyJob
|
||||
BASISTYPE=NONORTHO
|
||||
SP2CONV=REL
|
||||
RELAXTYPE=SD
|
||||
PARAMPATH=./TBparam
|
||||
COORDSFILE=./coords.dat
|
||||
INITIALIZED= F
|
||||
}
|
||||
|
||||
./TBparam/electrons.dat
|
||||
MDCONTROL{ }
|
||||
|
||||
WARNING: variable RNDIST= is missing. I will use a default value instead ...
|
||||
WARNING: variable SEEDINIT= is missing. I will use a default value instead ...
|
||||
WARNING: variable NPTTYPE= is missing. I will use a default value instead ...
|
||||
WARNING: variable UDNEIGH= is missing. I will use a default value instead ...
|
||||
WARNING: variable DUMPFREQ= is missing. I will use a default value instead ...
|
||||
WARNING: variable RSFREQ= is missing. I will use a default value instead ...
|
||||
WARNING: variable WRTFREQ= is missing. I will use a default value instead ...
|
||||
WARNING: variable TOINITTEMP5= is missing. I will use a default value instead ...
|
||||
WARNING: variable THERMPER= is missing. I will use a default value instead ...
|
||||
WARNING: variable THERMRUN= is missing. I will use a default value instead ...
|
||||
WARNING: variable NVTON= is missing. I will use a default value instead ...
|
||||
WARNING: variable NPTON= is missing. I will use a default value instead ...
|
||||
WARNING: variable AVEPER= is missing. I will use a default value instead ...
|
||||
WARNING: variable SEED= is missing. I will use a default value instead ...
|
||||
WARNING: variable SHOCKON= is missing. I will use a default value instead ...
|
||||
WARNING: variable SHOCKSTART= is missing. I will use a default value instead ...
|
||||
WARNING: variable SHOCKDIR= is missing. I will use a default value instead ...
|
||||
WARNING: variable MDADAPT= is missing. I will use a default value instead ...
|
||||
WARNING: variable GETHUG= is missing. I will use a default value instead ...
|
||||
WARNING: variable RSLEVEL= is missing. I will use a default value instead ...
|
||||
WARNING: variable DT= is missing. I will use a default value instead ...
|
||||
WARNING: variable TEMPERATURE= is missing. I will use a default value instead ...
|
||||
WARNING: variable FRICTION= is missing. I will use a default value instead ...
|
||||
WARNING: variable PTARGET= is missing. I will use a default value instead ...
|
||||
WARNING: variable UPARTICLE= is missing. I will use a default value instead ...
|
||||
WARNING: variable USHOCK= is missing. I will use a default value instead ...
|
||||
WARNING: variable C0= is missing. I will use a default value instead ...
|
||||
WARNING: variable E0= is missing. I will use a default value instead ...
|
||||
WARNING: variable V0= is missing. I will use a default value instead ...
|
||||
WARNING: variable P0= is missing. I will use a default value instead ...
|
||||
WARNING: variable DUMMY= is missing. I will use a default value instead ...
|
||||
|
||||
|
||||
############### Parameters used for this run ################
|
||||
MDCONTROL{
|
||||
MAXITER= -1
|
||||
UDNEIGH= 1
|
||||
DUMPFREQ= 250
|
||||
RSFREQ= 500
|
||||
WRTFREQ= 25
|
||||
TOINITTEMP5= 1
|
||||
THERMPER= 500
|
||||
THERMRUN= 50000
|
||||
NVTON= 0
|
||||
NPTON= 0
|
||||
AVEPER= 1000
|
||||
SEED= 54
|
||||
SHOCKON= 0
|
||||
SHOCKSTART= 100000
|
||||
SHOCKDIR= 1
|
||||
MDADAPT= 0
|
||||
GETHUG= 0
|
||||
RSLEVEL= 0
|
||||
DT= 0.25000000000000000
|
||||
TEMPERATURE= 300.00000000000000
|
||||
FRICTION= 1000.0000000000000
|
||||
PTARGET= 0.0000000000000000
|
||||
UPARTICLE= 500.00000000000000
|
||||
USHOCK= -4590.0000000000000
|
||||
C0= 1300.0000000000000
|
||||
E0= -795.72497558593750
|
||||
V0= 896.98486328125000
|
||||
P0= 8.3149001002311707E-002
|
||||
RNDIST=GAUSSIAN
|
||||
SEEDINIT=UNIFORM
|
||||
NPTTYPE=ISO
|
||||
DUMMY= F
|
||||
}
|
||||
|
||||
LIBCALLS 0
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -110.94281402417451 9.3197859655447317 0.0000000000000000 3.3331152608769714
|
||||
LIBCALLS 1
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -111.00875524736128 9.3653691493930946 0.0000000000000000 3.3307590218500454
|
||||
LIBCALLS 2
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -111.20542679804305 9.5022104076319209 0.0000000000000000 3.3237269236958826
|
||||
LIBCALLS 3
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -111.52938059528239 9.7304811436977623 0.0000000000000000 3.3121168872278743
|
||||
LIBCALLS 4
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -111.97463249071366 10.050121693432235 0.0000000000000000 3.2961492065207088
|
||||
LIBCALLS 5
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.53270518796754 10.460328095449432 0.0000000000000000 3.2761112890303719
|
||||
LIBCALLS 6
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.19233973551384 10.958848347453728 0.0000000000000000 3.2524094948032394
|
||||
LIBCALLS 7
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.93936061504219 11.541120618354967 0.0000000000000000 3.2255715906285793
|
||||
LIBCALLS 8
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.75657630591589 12.199315594286325 0.0000000000000000 3.1962412869596100
|
||||
LIBCALLS 9
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -115.62363727592754 12.921383532128770 0.0000000000000000 3.1652236023838971
|
||||
LIBCALLS 10
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -116.51738028417616 13.690253224922545 0.0000000000000000 3.1333864449223818
|
||||
LIBCALLS 11
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -117.41167836078414 14.483370804317431 0.0000000000000000 3.1018474945925432
|
||||
LIBCALLS 12
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -118.27888830961329 15.272791625586624 0.0000000000000000 3.0716022180609772
|
||||
LIBCALLS 13
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.09006809777934 16.026020995592610 0.0000000000000000 3.0437832241644842
|
||||
LIBCALLS 14
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.81665859965702 16.707725410478066 0.0000000000000000 3.0194382402972129
|
||||
LIBCALLS 15
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.43171665196000 17.282293509806884 0.0000000000000000 2.9995944159949395
|
||||
LIBCALLS 16
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.91202932933264 17.717025741135480 0.0000000000000000 2.9850159611897484
|
||||
LIBCALLS 17
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.23935305628714 17.985521384886379 0.0000000000000000 2.9763132734231292
|
||||
LIBCALLS 18
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.40195013006486 18.070687763205626 0.0000000000000000 2.9738279411203812
|
||||
LIBCALLS 19
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.39540873020161 17.966785565900089 0.0000000000000000 2.9776410698341418
|
||||
LIBCALLS 20
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.22299732491055 17.680085363043698 0.0000000000000000 2.9875419962840417
|
||||
LIBCALLS 21
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.89520311723561 17.228004261852682 0.0000000000000000 3.0030824758482719
|
||||
LIBCALLS 22
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.42892991839108 16.636927104987372 0.0000000000000000 3.0235548851138652
|
||||
LIBCALLS 23
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.84603562384113 15.939176953031323 0.0000000000000000 3.0480682132279808
|
||||
LIBCALLS 24
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.17151378155378 15.169713318754383 0.0000000000000000 3.0757033760823562
|
||||
LIBCALLS 25
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -118.43237009319661 14.363090728730079 0.0000000000000000 3.1053593079625457
|
||||
LIBCALLS 26
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -117.65587959220025 13.551051330611342 0.0000000000000000 3.1359367589132958
|
||||
LIBCALLS 27
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -116.86794783202731 12.760928656005802 0.0000000000000000 3.1665525874091585
|
||||
LIBCALLS 28
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -116.09314111752745 12.014864684105008 0.0000000000000000 3.1962157162544820
|
||||
LIBCALLS 29
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -115.35329645548983 11.329720850249741 0.0000000000000000 3.2241713466126849
|
||||
LIBCALLS 30
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.66766945168203 10.717501941208962 0.0000000000000000 3.2497326120829619
|
||||
LIBCALLS 31
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.05267853351812 10.186102377105355 0.0000000000000000 3.2723439005172468
|
||||
LIBCALLS 32
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.52195471723405 9.7402032028335377 0.0000000000000000 3.2915777178346559
|
||||
LIBCALLS 33
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.08654808143162 9.3821857555240076 0.0000000000000000 3.3070881064986164
|
||||
LIBCALLS 34
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.75494140290169 9.1129669843369658 0.0000000000000000 3.3186769594405297
|
||||
LIBCALLS 35
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.53346080566452 8.9326971516334606 0.0000000000000000 3.3261797960311763
|
||||
LIBCALLS 36
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.42631053676025 8.8412887543407273 0.0000000000000000 3.3295101207595583
|
||||
LIBCALLS 37
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.43567911088179 8.8387604511711384 0.0000000000000000 3.3286360397306387
|
||||
LIBCALLS 38
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.56180874683180 8.9253908783870841 0.0000000000000000 3.3235794828927934
|
||||
LIBCALLS 39
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.80290981416660 9.1016780459478674 0.0000000000000000 3.3144303393175201
|
||||
LIBCALLS 40
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.15529209572232 9.3681021116147463 0.0000000000000000 3.3012719922659173
|
||||
LIBCALLS 41
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.61284717182851 9.7246892073080176 0.0000000000000000 3.2843276907821406
|
||||
LIBCALLS 42
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.16711238367500 10.170382433756300 0.0000000000000000 3.2638758866524444
|
||||
LIBCALLS 43
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.80697882175535 10.702240750749448 0.0000000000000000 3.2402928278295451
|
||||
LIBCALLS 44
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -115.51862249254057 11.314512276989859 0.0000000000000000 3.2140189987358694
|
||||
LIBCALLS 45
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -116.28534475502829 11.997664972113199 0.0000000000000000 3.1855791836729437
|
||||
LIBCALLS 46
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -117.08723294353808 12.737504349188432 0.0000000000000000 3.1557205936583181
|
||||
LIBCALLS 47
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -117.90172272355942 13.514542609912253 0.0000000000000000 3.1252466759266087
|
||||
LIBCALLS 48
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -118.70392627447073 14.303827027310493 0.0000000000000000 3.0950533786893732
|
||||
LIBCALLS 49
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.46728361372288 15.075425279261220 0.0000000000000000 3.0661202668284480
|
||||
LIBCALLS 50
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.16480071670361 15.795723720235596 0.0000000000000000 3.0394030522382605
|
||||
LIBCALLS 51
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.77012122199473 16.429579578207949 0.0000000000000000 3.0158910566711334
|
||||
LIBCALLS 52
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.25943485841766 16.943195338409559 0.0000000000000000 2.9964108616830281
|
||||
LIBCALLS 53
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.61275582007269 17.307379355481601 0.0000000000000000 2.9817016064731785
|
||||
LIBCALLS 54
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.81557415209883 17.500688554193868 0.0000000000000000 2.9722905637821611
|
||||
LIBCALLS 55
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.85979389563140 17.511877645177901 0.0000000000000000 2.9685356305551474
|
||||
LIBCALLS 56
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.74454585055143 17.341170281709367 0.0000000000000000 2.9705149057151141
|
||||
LIBCALLS 57
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.47625724150488 17.000096879575938 0.0000000000000000 2.9780008785307088
|
||||
LIBCALLS 58
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.06771474420596 16.509959464438374 0.0000000000000000 2.9906138266349656
|
||||
LIBCALLS 59
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.53702830874704 15.899266098308772 0.0000000000000000 3.0078351734174715
|
||||
LIBCALLS 60
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.90667912574422 15.200652842845301 0.0000000000000000 3.0288733658622142
|
||||
LIBCALLS 61
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.20142467775943 14.447825469624703 0.0000000000000000 3.0529481020908245
|
||||
LIBCALLS 62
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -118.44747494197328 13.672949108115853 0.0000000000000000 3.0790791220573088
|
||||
LIBCALLS 63
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -117.67063237406208 12.904741667499017 0.0000000000000000 3.1063745183559131
|
||||
LIBCALLS 64
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -116.89550228683500 12.167344616151606 0.0000000000000000 3.1339818740985033
|
||||
LIBCALLS 65
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -116.14487351718614 11.479908971904207 0.0000000000000000 3.1610748652786995
|
||||
LIBCALLS 66
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -115.43917601644073 10.856755674815151 0.0000000000000000 3.1869042214936911
|
||||
LIBCALLS 67
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.79630542914917 10.307930318909381 0.0000000000000000 3.2107896540741994
|
||||
LIBCALLS 68
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.23118520942130 9.8399835349372715 0.0000000000000000 3.2322754400486997
|
||||
LIBCALLS 69
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.75645667348935 9.4568320682906393 0.0000000000000000 3.2508686207040949
|
||||
LIBCALLS 70
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.38220191758144 9.1605931457952803 0.0000000000000000 3.2662052636761625
|
||||
LIBCALLS 71
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.11651461323785 8.9523172650382463 0.0000000000000000 3.2778578161416640
|
||||
LIBCALLS 72
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.96490300473705 8.8325758589074610 0.0000000000000000 3.2856373346184280
|
||||
LIBCALLS 73
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -112.93101384064629 8.8018792766284140 0.0000000000000000 3.2893376450243901
|
||||
LIBCALLS 74
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.01657988020818 8.8609123616606951 0.0000000000000000 3.2887786713823335
|
||||
LIBCALLS 75
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.22122702505257 9.0105808374276855 0.0000000000000000 3.2838806809960044
|
||||
LIBCALLS 76
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.54255812607462 9.2518619694254909 0.0000000000000000 3.2746170980725564
|
||||
LIBCALLS 77
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -113.97595003796289 9.5854566564348804 0.0000000000000000 3.2610495238703536
|
||||
LIBCALLS 78
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -114.51445216471619 10.011242264155852 0.0000000000000000 3.2433103887056101
|
||||
LIBCALLS 79
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -115.14835871057100 10.527538366743359 0.0000000000000000 3.2217018278255036
|
||||
LIBCALLS 80
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -115.86512618816471 11.130220642932718 0.0000000000000000 3.1966546818138903
|
||||
LIBCALLS 81
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -116.64916580084807 11.811746817430592 0.0000000000000000 3.1687509169099037
|
||||
LIBCALLS 82
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -117.48162972769103 12.560201275368994 0.0000000000000000 3.1387793445426220
|
||||
LIBCALLS 83
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -118.34080112521505 13.358507776606700 0.0000000000000000 3.1076005013428842
|
||||
LIBCALLS 84
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.20206255799097 14.183999576696523 0.0000000000000000 3.0762625451098367
|
||||
LIBCALLS 85
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.03875955947012 15.008549885925623 0.0000000000000000 3.0458557745855401
|
||||
LIBCALLS 86
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.82281065648482 15.799445052997022 0.0000000000000000 3.0175902569508040
|
||||
LIBCALLS 87
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.52638053902615 16.521105731022047 0.0000000000000000 2.9925661691795984
|
||||
LIBCALLS 88
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -122.12297505178334 17.137613862262167 0.0000000000000000 2.9718740800190462
|
||||
LIBCALLS 89
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -122.58954501498538 17.615819283155187 0.0000000000000000 2.9563457612376758
|
||||
LIBCALLS 90
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -122.90768650775293 17.928615619513138 0.0000000000000000 2.9466637669908935
|
||||
LIBCALLS 91
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -123.06510359278838 18.057846294334183 0.0000000000000000 2.9432773288779130
|
||||
LIBCALLS 92
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -123.05653995529889 17.996310208253615 0.0000000000000000 2.9463730237128352
|
||||
LIBCALLS 93
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -122.88443709725219 17.748486968230267 0.0000000000000000 2.9557418006906766
|
||||
LIBCALLS 94
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -122.55804625906457 17.329857520510558 0.0000000000000000 2.9710497340098647
|
||||
LIBCALLS 95
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -122.09316916859144 16.764989519228550 0.0000000000000000 2.9916333369114647
|
||||
LIBCALLS 96
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -121.51050736457847 16.084787212290774 0.0000000000000000 3.0167038701280053
|
||||
LIBCALLS 97
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.83475656442954 15.323405512114466 0.0000000000000000 3.0451593241515909
|
||||
LIBCALLS 98
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -120.09218577985371 14.515310319889227 0.0000000000000000 3.0759929793994090
|
||||
LIBCALLS 99
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -119.30969482099719 13.692843612811791 0.0000000000000000 3.1081426979179545
|
||||
LIBCALLS 100
|
||||
Energy Components (TRRHOH, EREP, ENTE, ECOUL) -118.51358261827596 12.884492109393644 0.0000000000000000 3.1405428597121636
|
||||
@ -1,152 +0,0 @@
|
||||
LAMMPS (1 Sep 2017)
|
||||
# simple water model with LATTE
|
||||
|
||||
units metal
|
||||
atom_style full
|
||||
atom_modify sort 0 0.0 # turn off sorting of the coordinates
|
||||
|
||||
read_data data.water
|
||||
orthogonal box = (0 0 0) to (6.267 6.267 6.267)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
24 atoms
|
||||
0 = max # of 1-2 neighbors
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
|
||||
# replicate system if requested
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable nrep equal v_x*v_y*v_z
|
||||
if "${nrep} > 1" then "replicate $x $y $z"
|
||||
|
||||
# initialize system
|
||||
|
||||
velocity all create 0.0 87287 loop geom
|
||||
|
||||
pair_style zero 1.0
|
||||
pair_coeff * *
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 0 check yes
|
||||
|
||||
timestep 0.00025
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
fix 2 all latte NULL
|
||||
fix_modify 2 energy yes
|
||||
|
||||
thermo_style custom step temp pe etotal press
|
||||
|
||||
# minimization
|
||||
|
||||
thermo 10
|
||||
min_style fire
|
||||
minimize 1.0e-9 1.0e-9 500 500
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2
|
||||
ghost atom cutoff = 2
|
||||
binsize = 1, bins = 7 7 7
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair zero, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.629 | 5.629 | 5.629 Mbytes
|
||||
Step Temp PotEng TotEng Press
|
||||
0 0 -104.95614 -104.95614 48229.712
|
||||
10 349.44219 -105.50971 -104.47083 62149.591
|
||||
20 1253.6752 -107.00898 -103.28182 116444.44
|
||||
30 134.63588 -107.56184 -107.16157 59854.143
|
||||
40 2.4043703 -108.15301 -108.14586 32685.77
|
||||
50 162.13426 -108.40551 -107.92349 62104.273
|
||||
60 134.03149 -108.70118 -108.30271 49400.525
|
||||
70 64.159014 -108.78034 -108.5896 37243.303
|
||||
80 240.49926 -109.10766 -108.39266 42158.884
|
||||
90 0.60467192 -109.61818 -109.61639 14107.515
|
||||
100 1.4691163 -109.65556 -109.65119 21596.775
|
||||
110 30.500628 -109.69267 -109.602 16104.639
|
||||
120 120.62379 -109.83749 -109.47888 9474.971
|
||||
130 8.4742975 -109.99986 -109.97467 10104.102
|
||||
140 3.4732679 -110.01209 -110.00176 11990.442
|
||||
150 24.749482 -110.04313 -109.96955 10851.569
|
||||
160 4.1106505 -110.13288 -110.12066 8257.3969
|
||||
170 0.0065628716 -110.18061 -110.18059 7876.8748
|
||||
180 2.0542078 -110.1837 -110.17759 7996.0533
|
||||
190 20.134782 -110.21071 -110.15085 7556.1811
|
||||
200 2.3397267 -110.3244 -110.31745 3767.062
|
||||
210 4.3544709 -110.34438 -110.33143 4889.145
|
||||
220 1.1872367 -110.37457 -110.37104 4162.6543
|
||||
230 2.2798399 -110.38081 -110.37403 4321.0943
|
||||
240 11.835907 -110.39611 -110.36092 4187.5757
|
||||
250 0.13741849 -110.41453 -110.41412 3720.7527
|
||||
260 4.2283185 -110.42036 -110.40779 3743.3494
|
||||
270 0.47243724 -110.44349 -110.44208 3172.1866
|
||||
280 0.06090137 -110.45428 -110.4541 3065.9348
|
||||
290 5.3413962 -110.46285 -110.44697 3121.2924
|
||||
300 8.2032986 -110.48519 -110.4608 2705.5001
|
||||
310 2.0783529 -110.48807 -110.48189 2740.7989
|
||||
320 16.629185 -110.51002 -110.46058 2581.7434
|
||||
330 0.19723065 -110.53444 -110.53385 1942.0228
|
||||
340 6.2758334 -110.54361 -110.52495 1924.0965
|
||||
350 1.4539052 -110.59108 -110.58676 -449.41056
|
||||
360 0.0514233 -110.60143 -110.60128 1284.8259
|
||||
370 1.7240145 -110.60394 -110.59881 1468.0004
|
||||
380 13.28516 -110.62337 -110.58387 1573.4714
|
||||
390 1.2247432 -110.63525 -110.63161 1113.4557
|
||||
400 0.3946985 -110.63694 -110.63576 1083.0801
|
||||
410 2.9831433 -110.641 -110.63213 1112.419
|
||||
420 0.068550589 -110.66029 -110.66009 897.09211
|
||||
430 0.83976182 -110.66259 -110.66009 918.69832
|
||||
440 4.4760907 -110.66844 -110.65513 915.24435
|
||||
450 1.2841241 -110.67482 -110.671 953.30422
|
||||
460 2.5707455 -110.68509 -110.67745 775.21273
|
||||
470 0.99721544 -110.68646 -110.6835 812.74984
|
||||
480 6.8379261 -110.69468 -110.67435 787.9705
|
||||
490 0.18134438 -110.69628 -110.69574 675.52792
|
||||
500 2.0946523 -110.69918 -110.69295 696.82065
|
||||
Loop time of 31.775 on 1 procs for 500 steps with 24 atoms
|
||||
|
||||
884.8% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
Minimization stats:
|
||||
Stopping criterion = max iterations
|
||||
Energy initial, next-to-last, final =
|
||||
-104.95614332 -110.698546127 -110.699182193
|
||||
Force two-norm initial, final = 19.119 0.234621
|
||||
Force max component initial, final = 11.7759 0.0903198
|
||||
Final line search alpha, max atom move = 0 0
|
||||
Iterations, force evaluations = 500 500
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.00016952 | 0.00016952 | 0.00016952 | 0.0 | 0.00
|
||||
Bond | 2.8372e-05 | 2.8372e-05 | 2.8372e-05 | 0.0 | 0.00
|
||||
Neigh | 3.0994e-05 | 3.0994e-05 | 3.0994e-05 | 0.0 | 0.00
|
||||
Comm | 0.00060034 | 0.00060034 | 0.00060034 | 0.0 | 0.00
|
||||
Output | 0.00057817 | 0.00057817 | 0.00057817 | 0.0 | 0.00
|
||||
Modify | 31.771 | 31.771 | 31.771 | 0.0 | 99.99
|
||||
Other | | 0.002469 | | | 0.01
|
||||
|
||||
Nlocal: 24 ave 24 max 24 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 71 ave 71 max 71 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 27 ave 27 max 27 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 27
|
||||
Ave neighs/atom = 1.125
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 2
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:31
|
||||
170
examples/latte/log.21Jun18.latte.graphene.boxrelax.g++.1
Normal file
170
examples/latte/log.21Jun18.latte.graphene.boxrelax.g++.1
Normal file
@ -0,0 +1,170 @@
|
||||
LAMMPS (11 May 2018)
|
||||
# Simple water model with LATTE
|
||||
|
||||
units metal
|
||||
atom_style full
|
||||
atom_modify sort 0 0.0 # turn off sorting of the coordinates
|
||||
|
||||
read_data data.graphene.boxrel
|
||||
triclinic box = (0 0 0) to (10 8 20) with tilt (4.89859e-16 1.22465e-15 1.22465e-15)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
32 atoms
|
||||
0 = max # of 1-2 neighbors
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
|
||||
# replicate system if requested
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable nrep equal v_x*v_y*v_z
|
||||
if "${nrep} > 1" then "replicate $x $y $z"
|
||||
|
||||
# initialize system
|
||||
|
||||
velocity all create 0.0 87287 loop geom
|
||||
|
||||
pair_style zero 1.0
|
||||
pair_coeff * *
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 0 check yes
|
||||
|
||||
timestep 0.00025
|
||||
|
||||
fix 1 all box/relax iso 0.0 vmax 0.001
|
||||
|
||||
fix 2 all latte NULL
|
||||
fix_modify 2 energy yes
|
||||
|
||||
thermo_style custom etotal
|
||||
|
||||
# minimization
|
||||
|
||||
thermo 1
|
||||
fix 3 all print 1 "Total Energy ="
|
||||
min_style cg
|
||||
min_modify dmax 0.1
|
||||
min_modify line quadratic
|
||||
minimize 1.0e-4 1.0e-4 10000 10000
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2
|
||||
ghost atom cutoff = 2
|
||||
binsize = 1, bins = 11 9 20
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair zero, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton/tri
|
||||
stencil: half/bin/3d/newton/tri
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 6.779 | 6.779 | 6.779 Mbytes
|
||||
TotEng
|
||||
-247.46002
|
||||
-247.67224
|
||||
-247.87937
|
||||
-248.08148
|
||||
-248.27865
|
||||
-248.47096
|
||||
-248.65851
|
||||
-248.84137
|
||||
-249.01964
|
||||
-249.19342
|
||||
-249.36281
|
||||
-249.52791
|
||||
-249.68883
|
||||
-249.8457
|
||||
-249.99865
|
||||
-250.1478
|
||||
-250.29332
|
||||
-250.43535
|
||||
-250.57409
|
||||
-250.70972
|
||||
-250.84247
|
||||
-250.97258
|
||||
-251.10035
|
||||
-251.2261
|
||||
-251.35021
|
||||
-251.47314
|
||||
-251.59543
|
||||
-251.71776
|
||||
-251.84096
|
||||
-251.9661
|
||||
-252.09459
|
||||
-252.22833
|
||||
-252.37003
|
||||
-252.52371
|
||||
-252.69578
|
||||
-252.89752
|
||||
-253.15197
|
||||
-253.52044
|
||||
-254.31418
|
||||
-255.6175
|
||||
-256.8162
|
||||
-258.1227
|
||||
-259.38401
|
||||
-260.74831
|
||||
-262.03991
|
||||
-263.5463
|
||||
-264.70486
|
||||
-267.69144
|
||||
-267.88682
|
||||
-269.03519
|
||||
-270.60187
|
||||
-270.65382
|
||||
-270.74279
|
||||
-271.55883
|
||||
-271.81248
|
||||
-271.87529
|
||||
-273.01494
|
||||
-273.23948
|
||||
-273.28719
|
||||
-273.35272
|
||||
-273.41591
|
||||
-273.46274
|
||||
-273.54755
|
||||
-273.58318
|
||||
-273.73111
|
||||
-273.75754
|
||||
Loop time of 39.4155 on 1 procs for 65 steps with 32 atoms
|
||||
|
||||
1582.4% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
Minimization stats:
|
||||
Stopping criterion = energy tolerance
|
||||
Energy initial, next-to-last, final =
|
||||
-247.460020579 -273.731112592 -273.757543461
|
||||
Force two-norm initial, final = 201.608 9.43485
|
||||
Force max component initial, final = 188.924 2.41297
|
||||
Final line search alpha, max atom move = 0.000223273 0.00053875
|
||||
Iterations, force evaluations = 65 65
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.00012159 | 0.00012159 | 0.00012159 | 0.0 | 0.00
|
||||
Bond | 5.1975e-05 | 5.1975e-05 | 5.1975e-05 | 0.0 | 0.00
|
||||
Neigh | 4.1962e-05 | 4.1962e-05 | 4.1962e-05 | 0.0 | 0.00
|
||||
Comm | 0.00026107 | 0.00026107 | 0.00026107 | 0.0 | 0.00
|
||||
Output | 0.0013342 | 0.0013342 | 0.0013342 | 0.0 | 0.00
|
||||
Modify | 39.412 | 39.412 | 39.412 | 0.0 | 99.99
|
||||
Other | | 0.00127 | | | 0.00
|
||||
|
||||
Nlocal: 32 ave 32 max 32 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 100 ave 100 max 100 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 48 ave 48 max 48 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 48
|
||||
Ave neighs/atom = 1.5
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 1
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:40
|
||||
103
examples/latte/log.21Jun18.latte.sucrose.g++.1
Normal file
103
examples/latte/log.21Jun18.latte.sucrose.g++.1
Normal file
@ -0,0 +1,103 @@
|
||||
LAMMPS (11 May 2018)
|
||||
# simple sucrose model with LATTE
|
||||
|
||||
units metal
|
||||
atom_style full
|
||||
atom_modify sort 0 0.0 # turn off sorting of the coordinates
|
||||
|
||||
read_data data.sucrose
|
||||
orthogonal box = (0 0 0) to (17.203 18.009 21.643)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
45 atoms
|
||||
0 = max # of 1-2 neighbors
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
|
||||
# replicate system if requested
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable nrep equal v_x*v_y*v_z
|
||||
if "${nrep} > 1" then "replicate $x $y $z"
|
||||
|
||||
# initialize system
|
||||
|
||||
velocity all create 0.0 87287 loop geom
|
||||
|
||||
pair_style zero 1.0
|
||||
pair_coeff * *
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 0 check yes
|
||||
|
||||
timestep 0.00025
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
fix 2 all latte NULL
|
||||
fix_modify 2 energy yes
|
||||
|
||||
thermo_style custom step temp pe etotal press
|
||||
|
||||
# dynamics
|
||||
|
||||
thermo 10
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2
|
||||
ghost atom cutoff = 2
|
||||
binsize = 1, bins = 18 19 22
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair zero, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 0.5064 | 0.5064 | 0.5064 Mbytes
|
||||
Step Temp PotEng TotEng Press
|
||||
0 0 -251.26617 -251.26617 16.617234
|
||||
10 0.025263709 -251.26631 -251.26617 8.0576708
|
||||
20 0.034232467 -251.26636 -251.26617 1.6673442
|
||||
30 0.059079556 -251.2665 -251.26617 11.058458
|
||||
40 0.055499766 -251.26648 -251.26617 14.837775
|
||||
50 0.058499509 -251.2665 -251.26617 6.7183113
|
||||
60 0.071094535 -251.26657 -251.26617 6.6133687
|
||||
70 0.084309439 -251.26665 -251.26617 12.372721
|
||||
80 0.1089929 -251.26679 -251.26617 8.8355516
|
||||
90 0.11378257 -251.26681 -251.26617 5.1177922
|
||||
100 0.13003966 -251.26691 -251.26617 8.2431185
|
||||
Loop time of 27.8386 on 1 procs for 100 steps with 45 atoms
|
||||
|
||||
Performance: 0.078 ns/day, 309.318 hours/ns, 3.592 timesteps/s
|
||||
1799.6% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 8.3685e-05 | 8.3685e-05 | 8.3685e-05 | 0.0 | 0.00
|
||||
Bond | 7.4148e-05 | 7.4148e-05 | 7.4148e-05 | 0.0 | 0.00
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.00016689 | 0.00016689 | 0.00016689 | 0.0 | 0.00
|
||||
Output | 0.00032401 | 0.00032401 | 0.00032401 | 0.0 | 0.00
|
||||
Modify | 27.837 | 27.837 | 27.837 | 0.0 |100.00
|
||||
Other | | 0.0005403 | | | 0.00
|
||||
|
||||
Nlocal: 45 ave 45 max 45 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 0 ave 0 max 0 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 59 ave 59 max 59 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 59
|
||||
Ave neighs/atom = 1.31111
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:28
|
||||
103
examples/latte/log.21Jun18.latte.water.g++.1
Normal file
103
examples/latte/log.21Jun18.latte.water.g++.1
Normal file
@ -0,0 +1,103 @@
|
||||
LAMMPS (11 May 2018)
|
||||
# simple water model with LATTE
|
||||
|
||||
units metal
|
||||
atom_style full
|
||||
atom_modify sort 0 0.0 # turn off sorting of the coordinates
|
||||
|
||||
read_data data.water
|
||||
orthogonal box = (0 0 0) to (6.267 6.267 6.267)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
24 atoms
|
||||
0 = max # of 1-2 neighbors
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
|
||||
# replicate system if requested
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable nrep equal v_x*v_y*v_z
|
||||
if "${nrep} > 1" then "replicate $x $y $z"
|
||||
|
||||
# initialize system
|
||||
|
||||
velocity all create 0.0 87287 loop geom
|
||||
|
||||
pair_style zero 1.0
|
||||
pair_coeff * *
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 0 check yes
|
||||
|
||||
timestep 0.00025
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
fix 2 all latte NULL
|
||||
fix_modify 2 energy yes
|
||||
|
||||
thermo_style custom step temp pe etotal press
|
||||
|
||||
# dynamics
|
||||
|
||||
thermo 10
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2
|
||||
ghost atom cutoff = 2
|
||||
binsize = 1, bins = 7 7 7
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair zero, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.629 | 5.629 | 5.629 Mbytes
|
||||
Step Temp PotEng TotEng Press
|
||||
0 0 -104.95594 -104.95594 48236.006
|
||||
10 336.5303 -105.96026 -104.95976 97997.303
|
||||
20 529.06385 -106.53021 -104.95731 131520.49
|
||||
30 753.62616 -107.1995 -104.95898 49297.371
|
||||
40 716.6565 -107.08802 -104.95741 28307.272
|
||||
50 824.04417 -107.40822 -104.95835 102167.48
|
||||
60 933.56056 -107.73478 -104.95932 92508.792
|
||||
70 851.18518 -107.48766 -104.95711 13993.28
|
||||
80 999.80265 -107.93146 -104.95906 36700.417
|
||||
90 998.77707 -107.92569 -104.95634 107233.7
|
||||
100 1281.4446 -108.76961 -104.95989 49703.193
|
||||
Loop time of 10.6388 on 1 procs for 100 steps with 24 atoms
|
||||
|
||||
Performance: 0.203 ns/day, 118.209 hours/ns, 9.400 timesteps/s
|
||||
6459.7% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 7.6771e-05 | 7.6771e-05 | 7.6771e-05 | 0.0 | 0.00
|
||||
Bond | 7.5817e-05 | 7.5817e-05 | 7.5817e-05 | 0.0 | 0.00
|
||||
Neigh | 4.6015e-05 | 4.6015e-05 | 4.6015e-05 | 0.0 | 0.00
|
||||
Comm | 0.00031829 | 0.00031829 | 0.00031829 | 0.0 | 0.00
|
||||
Output | 0.00032401 | 0.00032401 | 0.00032401 | 0.0 | 0.00
|
||||
Modify | 10.637 | 10.637 | 10.637 | 0.0 | 99.99
|
||||
Other | | 0.00052 | | | 0.00
|
||||
|
||||
Nlocal: 24 ave 24 max 24 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 77 ave 77 max 77 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 31 ave 31 max 31 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 31
|
||||
Ave neighs/atom = 1.29167
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 2
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:10
|
||||
108
examples/latte/log.21Jun18.latte.water.min.g++.1
Normal file
108
examples/latte/log.21Jun18.latte.water.min.g++.1
Normal file
@ -0,0 +1,108 @@
|
||||
LAMMPS (11 May 2018)
|
||||
# simple water model with LATTE
|
||||
|
||||
units metal
|
||||
atom_style full
|
||||
atom_modify sort 0 0.0 # turn off sorting of the coordinates
|
||||
|
||||
read_data data.water
|
||||
orthogonal box = (0 0 0) to (6.267 6.267 6.267)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
24 atoms
|
||||
0 = max # of 1-2 neighbors
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
|
||||
# replicate system if requested
|
||||
|
||||
variable x index 1
|
||||
variable y index 1
|
||||
variable z index 1
|
||||
|
||||
variable nrep equal v_x*v_y*v_z
|
||||
if "${nrep} > 1" then "replicate $x $y $z"
|
||||
|
||||
# initialize system
|
||||
|
||||
velocity all create 0.0 87287 loop geom
|
||||
|
||||
pair_style zero 1.0
|
||||
pair_coeff * *
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 1 delay 0 check yes
|
||||
|
||||
timestep 0.00025
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
fix 2 all latte NULL
|
||||
fix_modify 2 energy yes
|
||||
|
||||
thermo_style custom step temp pe etotal press
|
||||
|
||||
# minimization
|
||||
|
||||
thermo 10
|
||||
|
||||
min_style fire
|
||||
minimize 1.0e-4 1.0e-4 500 500
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 2
|
||||
ghost atom cutoff = 2
|
||||
binsize = 1, bins = 7 7 7
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair zero, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 5.629 | 5.629 | 5.629 Mbytes
|
||||
Step Temp PotEng TotEng Press
|
||||
0 0 -104.95594 -104.95594 48236.006
|
||||
10 349.4534 -105.50948 -104.47056 62157.729
|
||||
20 1253.6636 -107.00863 -103.28151 116456.71
|
||||
30 134.64051 -107.56155 -107.16127 59864.196
|
||||
40 2.4044989 -108.1527 -108.14556 32695.648
|
||||
47 137.26885 -108.30413 -107.89603 60177.442
|
||||
Loop time of 6.42677 on 1 procs for 47 steps with 24 atoms
|
||||
|
||||
6481.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
Minimization stats:
|
||||
Stopping criterion = energy tolerance
|
||||
Energy initial, next-to-last, final =
|
||||
-104.955944301 -108.302982895 -108.304126127
|
||||
Force two-norm initial, final = 19.119 3.44609
|
||||
Force max component initial, final = 11.7758 1.3408
|
||||
Final line search alpha, max atom move = 0 0
|
||||
Iterations, force evaluations = 47 47
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 4.6253e-05 | 4.6253e-05 | 4.6253e-05 | 0.0 | 0.00
|
||||
Bond | 3.1948e-05 | 3.1948e-05 | 3.1948e-05 | 0.0 | 0.00
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.00014353 | 0.00014353 | 0.00014353 | 0.0 | 0.00
|
||||
Output | 0.00012302 | 0.00012302 | 0.00012302 | 0.0 | 0.00
|
||||
Modify | 6.426 | 6.426 | 6.426 | 0.0 | 99.99
|
||||
Other | | 0.0004699 | | | 0.01
|
||||
|
||||
Nlocal: 24 ave 24 max 24 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 71 ave 71 max 71 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 37 ave 37 max 37 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 37
|
||||
Ave neighs/atom = 1.54167
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:06
|
||||
@ -1,4 +1,6 @@
|
||||
#ifndef _WIN32
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
#include "Function.h"
|
||||
#include "ATC_Error.h"
|
||||
#include "LammpsInterface.h"
|
||||
@ -59,9 +61,13 @@ namespace ATC {
|
||||
{
|
||||
string type = args[0];
|
||||
int narg = nargs -1;
|
||||
#ifdef _WIN32
|
||||
double *dargs = (double *) _alloca(sizeof(double) * narg);
|
||||
#else
|
||||
double *dargs = (double *) alloca(sizeof(double) * narg);
|
||||
#endif
|
||||
for (int i = 0; i < narg; ++i) dargs[i] = atof(args[i+1]);
|
||||
|
||||
|
||||
return function(type, narg, dargs);
|
||||
}
|
||||
|
||||
@ -193,7 +199,11 @@ XT_Function_Mgr * XT_Function_Mgr::myInstance_ = NULL;
|
||||
{
|
||||
string type = args[0];
|
||||
int narg = nargs -1;
|
||||
#ifdef _WIN32
|
||||
double *dargs = (double *) _alloca(sizeof(double) * narg);
|
||||
#else
|
||||
double *dargs = (double *) alloca(sizeof(double) * narg);
|
||||
#endif
|
||||
for (int i = 0; i < narg; ++i) dargs[i] = atof(args[i+1]);
|
||||
|
||||
return function(type, narg, dargs);
|
||||
|
||||
53
lib/gpu/Makefile.linux_multi
Normal file
53
lib/gpu/Makefile.linux_multi
Normal file
@ -0,0 +1,53 @@
|
||||
# /* ----------------------------------------------------------------------
|
||||
# Generic Linux Makefile for CUDA
|
||||
# - Change CUDA_ARCH for your GPU
|
||||
# ------------------------------------------------------------------------- */
|
||||
|
||||
# which file will be copied to Makefile.lammps
|
||||
|
||||
EXTRAMAKE = Makefile.lammps.standard
|
||||
|
||||
ifeq ($(CUDA_HOME),)
|
||||
CUDA_HOME = /usr/local/cuda
|
||||
endif
|
||||
|
||||
NVCC = nvcc
|
||||
|
||||
# Kepler CUDA
|
||||
#CUDA_ARCH = -arch=sm_35
|
||||
# newer CUDA
|
||||
#CUDA_ARCH = -arch=sm_13
|
||||
# older CUDA
|
||||
#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
|
||||
|
||||
CUDA_ARCH = -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61
|
||||
|
||||
# this setting should match LAMMPS Makefile
|
||||
# one of LAMMPS_SMALLBIG (default), LAMMPS_BIGBIG and LAMMPS_SMALLSMALL
|
||||
|
||||
LMP_INC = -DLAMMPS_SMALLBIG
|
||||
|
||||
# precision for GPU calculations
|
||||
# -D_SINGLE_SINGLE # Single precision for all calculations
|
||||
# -D_DOUBLE_DOUBLE # Double precision for all calculations
|
||||
# -D_SINGLE_DOUBLE # Accumulation of forces, etc. in double
|
||||
|
||||
CUDA_PRECISION = -D_SINGLE_DOUBLE
|
||||
|
||||
CUDA_INCLUDE = -I$(CUDA_HOME)/include
|
||||
CUDA_LIB = -L$(CUDA_HOME)/lib64
|
||||
CUDA_OPTS = -DUNIX -O3 -Xptxas -v --use_fast_math $(LMP_INC) -Xcompiler "-fPIC -std=c++98"
|
||||
|
||||
CUDR_CPP = mpicxx -DMPI_GERYON -DUCL_NO_EXIT -DMPICH_IGNORE_CXX_SEEK -DOMPI_SKIP_MPICXX=1 -fPIC
|
||||
CUDR_OPTS = -O2 $(LMP_INC) # -xHost -no-prec-div -ansi-alias
|
||||
|
||||
BIN_DIR = ./
|
||||
OBJ_DIR = ./
|
||||
LIB_DIR = ./
|
||||
AR = ar
|
||||
BSH = /bin/sh
|
||||
|
||||
CUDPP_OPT = -DUSE_CUDPP -Icudpp_mini
|
||||
|
||||
include Nvidia.makefile_multi
|
||||
|
||||
@ -77,7 +77,12 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
|
||||
$(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o
|
||||
$(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
|
||||
$(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o
|
||||
|
||||
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
||||
@ -133,7 +138,12 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
|
||||
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
|
||||
$(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h \
|
||||
$(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h
|
||||
$(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h \
|
||||
$(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj_cubin.h \
|
||||
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h \
|
||||
$(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
|
||||
|
||||
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
||||
|
||||
@ -809,6 +819,66 @@ $(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/l
|
||||
$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/dipole_long_lj.cubin: lal_dipole_long_lj.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_dipole_long_lj.cu
|
||||
|
||||
$(OBJ_DIR)/dipole_long_lj_cubin.h: $(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj.cubin
|
||||
$(BIN2C) -c -n dipole_long_lj $(OBJ_DIR)/dipole_long_lj.cubin > $(OBJ_DIR)/dipole_long_lj_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_long_lj.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj.cpp $(OBJ_DIR)/dipole_long_lj_cubin.h $(OBJ_DIR)/lal_base_dipole.o
|
||||
$(CUDR) -o $@ -c lal_dipole_long_lj.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_long_lj_ext.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj_ext.cpp lal_base_dipole.h
|
||||
$(CUDR) -o $@ -c lal_dipole_long_lj_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_expand_coul_long.cubin: lal_lj_expand_coul_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_lj_expand_coul_long.cu
|
||||
|
||||
$(OBJ_DIR)/lj_expand_coul_long_cubin.h: $(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long.cubin
|
||||
$(BIN2C) -c -n lj_expand_coul_long $(OBJ_DIR)/lj_expand_coul_long.cubin > $(OBJ_DIR)/lj_expand_coul_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long.cpp $(OBJ_DIR)/lj_expand_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_expand_coul_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_long_cs.cubin: lal_coul_long_cs.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_coul_long_cs.cu
|
||||
|
||||
$(OBJ_DIR)/coul_long_cs_cubin.h: $(OBJ_DIR)/coul_long_cs.cubin $(OBJ_DIR)/coul_long_cs.cubin
|
||||
$(BIN2C) -c -n coul_long_cs $(OBJ_DIR)/coul_long_cs.cubin > $(OBJ_DIR)/coul_long_cs_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long_cs.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs.cpp $(OBJ_DIR)/coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_coul_long.o
|
||||
$(CUDR) -o $@ -c lal_coul_long_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long_cs_ext.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs_ext.cpp lal_coul_long.h
|
||||
$(CUDR) -o $@ -c lal_coul_long_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_long_cs.cubin: lal_born_coul_long_cs.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_long_cs.cu
|
||||
|
||||
$(OBJ_DIR)/born_coul_long_cs_cubin.h: $(OBJ_DIR)/born_coul_long_cs.cubin $(OBJ_DIR)/born_coul_long_cs.cubin
|
||||
$(BIN2C) -c -n born_coul_long_cs $(OBJ_DIR)/born_coul_long_cs.cubin > $(OBJ_DIR)/born_coul_long_cs_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs.cpp $(OBJ_DIR)/born_coul_long_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_long.o
|
||||
$(CUDR) -o $@ -c lal_born_coul_long_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs_ext.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs_ext.cpp lal_born_coul_long.h
|
||||
$(CUDR) -o $@ -c lal_born_coul_long_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_wolf_cs.cubin: lal_born_coul_wolf_cs.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --cubin -DNV_KERNEL -o $@ lal_born_coul_wolf_cs.cu
|
||||
|
||||
$(OBJ_DIR)/born_coul_wolf_cs_cubin.h: $(OBJ_DIR)/born_coul_wolf_cs.cubin $(OBJ_DIR)/born_coul_wolf_cs.cubin
|
||||
$(BIN2C) -c -n born_coul_wolf_cs $(OBJ_DIR)/born_coul_wolf_cs.cubin > $(OBJ_DIR)/born_coul_wolf_cs_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs.cpp $(OBJ_DIR)/born_coul_wolf_cs_cubin.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_wolf.o
|
||||
$(CUDR) -o $@ -c lal_born_coul_wolf_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs_ext.cpp lal_born_coul_wolf.h
|
||||
$(CUDR) -o $@ -c lal_born_coul_wolf_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||
|
||||
|
||||
854
lib/gpu/Nvidia.makefile_multi
Normal file
854
lib/gpu/Nvidia.makefile_multi
Normal file
@ -0,0 +1,854 @@
|
||||
CUDA = $(NVCC) $(CUDA_INCLUDE) $(CUDA_OPTS) -Icudpp_mini $(CUDA_ARCH) \
|
||||
$(CUDA_PRECISION)
|
||||
CUDR = $(CUDR_CPP) $(CUDR_OPTS) $(CUDA_PRECISION) $(CUDA_INCLUDE) \
|
||||
$(CUDPP_OPT)
|
||||
CUDA_LINK = $(CUDA_LIB) -lcudart
|
||||
BIN2C = $(CUDA_HOME)/bin/bin2c
|
||||
|
||||
GPU_LIB = $(LIB_DIR)/libgpu.a
|
||||
|
||||
# Headers for Geryon
|
||||
UCL_H = $(wildcard ./geryon/ucl*.h)
|
||||
NVC_H = $(wildcard ./geryon/nvc*.h) $(UCL_H)
|
||||
NVD_H = $(wildcard ./geryon/nvd*.h) $(UCL_H) lal_preprocessor.h
|
||||
# Headers for Pair Stuff
|
||||
PAIR_H = lal_atom.h lal_answer.h lal_neighbor_shared.h \
|
||||
lal_neighbor.h lal_precision.h lal_device.h \
|
||||
lal_balance.h lal_pppm.h
|
||||
|
||||
ALL_H = $(NVD_H) $(PAIR_H)
|
||||
|
||||
EXECS = $(BIN_DIR)/nvc_get_devices
|
||||
ifdef CUDPP_OPT
|
||||
CUDPP = $(OBJ_DIR)/cudpp.o $(OBJ_DIR)/cudpp_plan.o \
|
||||
$(OBJ_DIR)/cudpp_maximal_launch.o $(OBJ_DIR)/cudpp_plan_manager.o \
|
||||
$(OBJ_DIR)/radixsort_app.cu_o $(OBJ_DIR)/scan_app.cu_o
|
||||
endif
|
||||
OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
|
||||
$(OBJ_DIR)/lal_neighbor.o $(OBJ_DIR)/lal_neighbor_shared.o \
|
||||
$(OBJ_DIR)/lal_device.o $(OBJ_DIR)/lal_base_atomic.o \
|
||||
$(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_base_ellipsoid.o \
|
||||
$(OBJ_DIR)/lal_base_dipole.o $(OBJ_DIR)/lal_base_three.o \
|
||||
$(OBJ_DIR)/lal_base_dpd.o \
|
||||
$(OBJ_DIR)/lal_pppm.o $(OBJ_DIR)/lal_pppm_ext.o \
|
||||
$(OBJ_DIR)/lal_gayberne.o $(OBJ_DIR)/lal_gayberne_ext.o \
|
||||
$(OBJ_DIR)/lal_re_squared.o $(OBJ_DIR)/lal_re_squared_ext.o \
|
||||
$(OBJ_DIR)/lal_lj.o $(OBJ_DIR)/lal_lj_ext.o \
|
||||
$(OBJ_DIR)/lal_lj96.o $(OBJ_DIR)/lal_lj96_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_expand.o $(OBJ_DIR)/lal_lj_expand_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_coul.o $(OBJ_DIR)/lal_lj_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_coul_long.o $(OBJ_DIR)/lal_lj_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_dsf.o $(OBJ_DIR)/lal_lj_dsf_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_class2_long.o $(OBJ_DIR)/lal_lj_class2_long_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_long.o $(OBJ_DIR)/lal_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_morse.o $(OBJ_DIR)/lal_morse_ext.o \
|
||||
$(OBJ_DIR)/lal_charmm_long.o $(OBJ_DIR)/lal_charmm_long_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_sdk.o $(OBJ_DIR)/lal_lj_sdk_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_sdk_long.o $(OBJ_DIR)/lal_lj_sdk_long_ext.o \
|
||||
$(OBJ_DIR)/lal_eam.o $(OBJ_DIR)/lal_eam_ext.o \
|
||||
$(OBJ_DIR)/lal_eam_fs_ext.o $(OBJ_DIR)/lal_eam_alloy_ext.o \
|
||||
$(OBJ_DIR)/lal_buck.o $(OBJ_DIR)/lal_buck_ext.o \
|
||||
$(OBJ_DIR)/lal_buck_coul.o $(OBJ_DIR)/lal_buck_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_buck_coul_long.o $(OBJ_DIR)/lal_buck_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_table.o $(OBJ_DIR)/lal_table_ext.o \
|
||||
$(OBJ_DIR)/lal_yukawa.o $(OBJ_DIR)/lal_yukawa_ext.o \
|
||||
$(OBJ_DIR)/lal_born.o $(OBJ_DIR)/lal_born_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_wolf.o $(OBJ_DIR)/lal_born_coul_wolf_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_long.o $(OBJ_DIR)/lal_born_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_dipole_lj.o $(OBJ_DIR)/lal_dipole_lj_ext.o \
|
||||
$(OBJ_DIR)/lal_dipole_lj_sf.o $(OBJ_DIR)/lal_dipole_lj_sf_ext.o \
|
||||
$(OBJ_DIR)/lal_colloid.o $(OBJ_DIR)/lal_colloid_ext.o \
|
||||
$(OBJ_DIR)/lal_gauss.o $(OBJ_DIR)/lal_gauss_ext.o \
|
||||
$(OBJ_DIR)/lal_yukawa_colloid.o $(OBJ_DIR)/lal_yukawa_colloid_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \
|
||||
$(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \
|
||||
$(OBJ_DIR)/lal_vashishta.o $(OBJ_DIR)/lal_vashishta_ext.o \
|
||||
$(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \
|
||||
$(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
|
||||
$(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_coul_msm.o $(OBJ_DIR)/lal_lj_coul_msm_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o $(OBJ_DIR)/lal_lj_gromacs_ext.o \
|
||||
$(OBJ_DIR)/lal_dpd.o $(OBJ_DIR)/lal_dpd_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff.o $(OBJ_DIR)/lal_tersoff_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff_zbl.o $(OBJ_DIR)/lal_tersoff_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_tersoff_mod.o $(OBJ_DIR)/lal_tersoff_mod_ext.o \
|
||||
$(OBJ_DIR)/lal_coul.o $(OBJ_DIR)/lal_coul_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
|
||||
$(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
|
||||
$(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o
|
||||
|
||||
CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
|
||||
$(OBJ_DIR)/atom.cubin $(OBJ_DIR)/atom_cubin.h \
|
||||
$(OBJ_DIR)/neighbor_cpu.cubin $(OBJ_DIR)/neighbor_cpu_cubin.h \
|
||||
$(OBJ_DIR)/neighbor_gpu.cubin $(OBJ_DIR)/neighbor_gpu_cubin.h \
|
||||
$(OBJ_DIR)/pppm_f.cubin $(OBJ_DIR)/pppm_f_cubin.h \
|
||||
$(OBJ_DIR)/pppm_d.cubin $(OBJ_DIR)/pppm_d_cubin.h \
|
||||
$(OBJ_DIR)/ellipsoid_nbor.cubin $(OBJ_DIR)/ellipsoid_nbor_cubin.h \
|
||||
$(OBJ_DIR)/gayberne.cubin $(OBJ_DIR)/gayberne_lj.cubin \
|
||||
$(OBJ_DIR)/gayberne_cubin.h $(OBJ_DIR)/gayberne_lj_cubin.h \
|
||||
$(OBJ_DIR)/re_squared.cubin $(OBJ_DIR)/re_squared_lj.cubin \
|
||||
$(OBJ_DIR)/re_squared_cubin.h $(OBJ_DIR)/re_squared_lj_cubin.h \
|
||||
$(OBJ_DIR)/lj.cubin $(OBJ_DIR)/lj_cubin.h \
|
||||
$(OBJ_DIR)/lj96.cubin $(OBJ_DIR)/lj96_cubin.h \
|
||||
$(OBJ_DIR)/lj_expand.cubin $(OBJ_DIR)/lj_expand_cubin.h \
|
||||
$(OBJ_DIR)/lj_coul.cubin $(OBJ_DIR)/lj_coul_cubin.h \
|
||||
$(OBJ_DIR)/lj_coul_long.cubin $(OBJ_DIR)/lj_coul_long_cubin.h \
|
||||
$(OBJ_DIR)/lj_dsf.cubin $(OBJ_DIR)/lj_dsf_cubin.h \
|
||||
$(OBJ_DIR)/lj_class2_long.cubin $(OBJ_DIR)/lj_class2_long_cubin.h \
|
||||
$(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long_cubin.h \
|
||||
$(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse_cubin.h \
|
||||
$(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long_cubin.h \
|
||||
$(OBJ_DIR)/lj_sdk.cubin $(OBJ_DIR)/lj_sdk_cubin.h \
|
||||
$(OBJ_DIR)/lj_sdk_long.cubin $(OBJ_DIR)/lj_sdk_long_cubin.h \
|
||||
$(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam_cubin.h \
|
||||
$(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck_cubin.h \
|
||||
$(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long_cubin.h \
|
||||
$(OBJ_DIR)/buck_coul.cubin $(OBJ_DIR)/buck_coul_cubin.h \
|
||||
$(OBJ_DIR)/table.cubin $(OBJ_DIR)/table_cubin.h \
|
||||
$(OBJ_DIR)/yukawa.cubin $(OBJ_DIR)/yukawa_cubin.h \
|
||||
$(OBJ_DIR)/born.cubin $(OBJ_DIR)/born_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_wolf.cubin $(OBJ_DIR)/born_coul_wolf_cubin.h \
|
||||
$(OBJ_DIR)/born_coul_long.cubin $(OBJ_DIR)/born_coul_long_cubin.h \
|
||||
$(OBJ_DIR)/dipole_lj.cubin $(OBJ_DIR)/dipole_lj_cubin.h \
|
||||
$(OBJ_DIR)/dipole_lj_sf.cubin $(OBJ_DIR)/dipole_lj_sf_cubin.h \
|
||||
$(OBJ_DIR)/colloid.cubin $(OBJ_DIR)/colloid_cubin.h \
|
||||
$(OBJ_DIR)/gauss.cubin $(OBJ_DIR)/gauss_cubin.h \
|
||||
$(OBJ_DIR)/yukawa_colloid.cubin $(OBJ_DIR)/yukawa_colloid_cubin.h \
|
||||
$(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye_cubin.h \
|
||||
$(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf_cubin.h \
|
||||
$(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw_cubin.h \
|
||||
$(OBJ_DIR)/vashishta.cubin $(OBJ_DIR)/vashishta_cubin.h \
|
||||
$(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck_cubin.h \
|
||||
$(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie_cubin.h \
|
||||
$(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \
|
||||
$(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm_cubin.h \
|
||||
$(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs_cubin.h \
|
||||
$(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd_cubin.h \
|
||||
$(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff_cubin.h \
|
||||
$(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl_cubin.h \
|
||||
$(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod_cubin.h \
|
||||
$(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul_cubin.h \
|
||||
$(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye_cubin.h \
|
||||
$(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl_cubin.h \
|
||||
$(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic_cubin.h \
|
||||
$(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm_cubin.h \
|
||||
$(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj_cubin.h \
|
||||
$(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long_cubin.h
|
||||
|
||||
all: $(OBJ_DIR) $(GPU_LIB) $(EXECS)
|
||||
|
||||
$(OBJ_DIR):
|
||||
mkdir -p $@
|
||||
|
||||
$(OBJ_DIR)/cudpp.o: cudpp_mini/cudpp.cpp
|
||||
$(CUDR) -o $@ -c cudpp_mini/cudpp.cpp -Icudpp_mini
|
||||
|
||||
$(OBJ_DIR)/cudpp_plan.o: cudpp_mini/cudpp_plan.cpp
|
||||
$(CUDR) -o $@ -c cudpp_mini/cudpp_plan.cpp -Icudpp_mini
|
||||
|
||||
$(OBJ_DIR)/cudpp_maximal_launch.o: cudpp_mini/cudpp_maximal_launch.cpp
|
||||
$(CUDR) -o $@ -c cudpp_mini/cudpp_maximal_launch.cpp -Icudpp_mini
|
||||
|
||||
$(OBJ_DIR)/cudpp_plan_manager.o: cudpp_mini/cudpp_plan_manager.cpp
|
||||
$(CUDR) -o $@ -c cudpp_mini/cudpp_plan_manager.cpp -Icudpp_mini
|
||||
|
||||
$(OBJ_DIR)/radixsort_app.cu_o: cudpp_mini/radixsort_app.cu
|
||||
$(CUDA) -o $@ -c cudpp_mini/radixsort_app.cu
|
||||
|
||||
$(OBJ_DIR)/scan_app.cu_o: cudpp_mini/scan_app.cu
|
||||
$(CUDA) -o $@ -c cudpp_mini/scan_app.cu
|
||||
|
||||
$(OBJ_DIR)/atom.cubin: lal_atom.cu lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_atom.cu
|
||||
|
||||
$(OBJ_DIR)/atom_cubin.h: $(OBJ_DIR)/atom.cubin
|
||||
$(BIN2C) -c -n atom $(OBJ_DIR)/atom.cubin > $(OBJ_DIR)/atom_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_atom.o: lal_atom.cpp lal_atom.h $(NVD_H) $(OBJ_DIR)/atom_cubin.h
|
||||
$(CUDR) -o $@ -c lal_atom.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_ans.o: lal_answer.cpp lal_answer.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c lal_answer.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/neighbor_cpu.cubin: lal_neighbor_cpu.cu lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_neighbor_cpu.cu
|
||||
|
||||
$(OBJ_DIR)/neighbor_cpu_cubin.h: $(OBJ_DIR)/neighbor_cpu.cubin
|
||||
$(BIN2C) -c -n neighbor_cpu $(OBJ_DIR)/neighbor_cpu.cubin > $(OBJ_DIR)/neighbor_cpu_cubin.h
|
||||
|
||||
$(OBJ_DIR)/neighbor_gpu.cubin: lal_neighbor_gpu.cu lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_neighbor_gpu.cu
|
||||
|
||||
$(OBJ_DIR)/neighbor_gpu_cubin.h: $(OBJ_DIR)/neighbor_gpu.cubin
|
||||
$(BIN2C) -c -n neighbor_gpu $(OBJ_DIR)/neighbor_gpu.cubin > $(OBJ_DIR)/neighbor_gpu_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_neighbor_shared.o: lal_neighbor_shared.cpp lal_neighbor_shared.h $(OBJ_DIR)/neighbor_cpu_cubin.h $(OBJ_DIR)/neighbor_gpu_cubin.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c lal_neighbor_shared.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_neighbor.o: lal_neighbor.cpp lal_neighbor.h lal_neighbor_shared.h $(NVD_H)
|
||||
$(CUDR) -o $@ -c lal_neighbor.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/device.cubin: lal_device.cu lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_device.cu
|
||||
|
||||
$(OBJ_DIR)/device_cubin.h: $(OBJ_DIR)/device.cubin
|
||||
$(BIN2C) -c -n device $(OBJ_DIR)/device.cubin > $(OBJ_DIR)/device_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_device.o: lal_device.cpp lal_device.h $(ALL_H) $(OBJ_DIR)/device_cubin.h
|
||||
$(CUDR) -o $@ -c lal_device.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_base_atomic.o: $(ALL_H) lal_base_atomic.h lal_base_atomic.cpp
|
||||
$(CUDR) -o $@ -c lal_base_atomic.cpp
|
||||
|
||||
$(OBJ_DIR)/lal_base_charge.o: $(ALL_H) lal_base_charge.h lal_base_charge.cpp
|
||||
$(CUDR) -o $@ -c lal_base_charge.cpp
|
||||
|
||||
$(OBJ_DIR)/lal_base_ellipsoid.o: $(ALL_H) lal_base_ellipsoid.h lal_base_ellipsoid.cpp $(OBJ_DIR)/ellipsoid_nbor_cubin.h
|
||||
$(CUDR) -o $@ -c lal_base_ellipsoid.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_base_dipole.o: $(ALL_H) lal_base_dipole.h lal_base_dipole.cpp
|
||||
$(CUDR) -o $@ -c lal_base_dipole.cpp
|
||||
|
||||
$(OBJ_DIR)/lal_base_three.o: $(ALL_H) lal_base_three.h lal_base_three.cpp
|
||||
$(CUDR) -o $@ -c lal_base_three.cpp
|
||||
|
||||
$(OBJ_DIR)/lal_base_dpd.o: $(ALL_H) lal_base_dpd.h lal_base_dpd.cpp
|
||||
$(CUDR) -o $@ -c lal_base_dpd.cpp
|
||||
|
||||
$(OBJ_DIR)/pppm_f.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -Dgrdtyp=float -Dgrdtyp4=float4 -o $@ lal_pppm.cu
|
||||
|
||||
$(OBJ_DIR)/pppm_f_cubin.h: $(OBJ_DIR)/pppm_f.cubin
|
||||
$(BIN2C) -c -n pppm_f $(OBJ_DIR)/pppm_f.cubin > $(OBJ_DIR)/pppm_f_cubin.h
|
||||
|
||||
$(OBJ_DIR)/pppm_d.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -Dgrdtyp=double -Dgrdtyp4=double4 -o $@ lal_pppm.cu
|
||||
|
||||
$(OBJ_DIR)/pppm_d_cubin.h: $(OBJ_DIR)/pppm_d.cubin
|
||||
$(BIN2C) -c -n pppm_d $(OBJ_DIR)/pppm_d.cubin > $(OBJ_DIR)/pppm_d_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_pppm.o: $(ALL_H) lal_pppm.h lal_pppm.cpp $(OBJ_DIR)/pppm_f_cubin.h $(OBJ_DIR)/pppm_d_cubin.h
|
||||
$(CUDR) -o $@ -c lal_pppm.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_pppm_ext.o: $(ALL_H) lal_pppm.h lal_pppm_ext.cpp
|
||||
$(CUDR) -o $@ -c lal_pppm_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/ellipsoid_nbor.cubin: lal_ellipsoid_nbor.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_ellipsoid_nbor.cu
|
||||
|
||||
$(OBJ_DIR)/ellipsoid_nbor_cubin.h: $(OBJ_DIR)/ellipsoid_nbor.cubin
|
||||
$(BIN2C) -c -n ellipsoid_nbor $(OBJ_DIR)/ellipsoid_nbor.cubin > $(OBJ_DIR)/ellipsoid_nbor_cubin.h
|
||||
|
||||
$(OBJ_DIR)/gayberne.cubin: lal_gayberne.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_gayberne.cu
|
||||
|
||||
$(OBJ_DIR)/gayberne_lj.cubin: lal_gayberne_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_gayberne_lj.cu
|
||||
|
||||
$(OBJ_DIR)/gayberne_cubin.h: $(OBJ_DIR)/gayberne.cubin
|
||||
$(BIN2C) -c -n gayberne $(OBJ_DIR)/gayberne.cubin > $(OBJ_DIR)/gayberne_cubin.h
|
||||
|
||||
$(OBJ_DIR)/gayberne_lj_cubin.h: $(OBJ_DIR)/gayberne_lj.cubin
|
||||
$(BIN2C) -c -n gayberne_lj $(OBJ_DIR)/gayberne_lj.cubin > $(OBJ_DIR)/gayberne_lj_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_gayberne.o: $(ALL_H) lal_gayberne.h lal_gayberne.cpp $(OBJ_DIR)/gayberne_cubin.h $(OBJ_DIR)/gayberne_lj_cubin.h $(OBJ_DIR)/lal_base_ellipsoid.o
|
||||
$(CUDR) -o $@ -c lal_gayberne.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_gayberne_ext.o: $(ALL_H) $(OBJ_DIR)/lal_gayberne.o lal_gayberne_ext.cpp
|
||||
$(CUDR) -o $@ -c lal_gayberne_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/re_squared.cubin: lal_re_squared.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_re_squared.cu
|
||||
|
||||
$(OBJ_DIR)/re_squared_lj.cubin: lal_re_squared_lj.cu lal_precision.h lal_ellipsoid_extra.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_re_squared_lj.cu
|
||||
|
||||
$(OBJ_DIR)/re_squared_cubin.h: $(OBJ_DIR)/re_squared.cubin
|
||||
$(BIN2C) -c -n re_squared $(OBJ_DIR)/re_squared.cubin > $(OBJ_DIR)/re_squared_cubin.h
|
||||
|
||||
$(OBJ_DIR)/re_squared_lj_cubin.h: $(OBJ_DIR)/re_squared_lj.cubin
|
||||
$(BIN2C) -c -n re_squared_lj $(OBJ_DIR)/re_squared_lj.cubin > $(OBJ_DIR)/re_squared_lj_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_re_squared.o: $(ALL_H) lal_re_squared.h lal_re_squared.cpp $(OBJ_DIR)/re_squared_cubin.h $(OBJ_DIR)/re_squared_lj_cubin.h $(OBJ_DIR)/lal_base_ellipsoid.o
|
||||
$(CUDR) -o $@ -c lal_re_squared.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_re_squared_ext.o: $(ALL_H) $(OBJ_DIR)/lal_re_squared.o lal_re_squared_ext.cpp
|
||||
$(CUDR) -o $@ -c lal_re_squared_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj.cubin: lal_lj.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj.cu
|
||||
|
||||
$(OBJ_DIR)/lj_cubin.h: $(OBJ_DIR)/lj.cubin $(OBJ_DIR)/lj.cubin
|
||||
$(BIN2C) -c -n lj $(OBJ_DIR)/lj.cubin > $(OBJ_DIR)/lj_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj.o: $(ALL_H) lal_lj.h lal_lj.cpp $(OBJ_DIR)/lj_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_ext.o: $(ALL_H) lal_lj.h lal_lj_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_coul.cubin: lal_lj_coul.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_coul.cu
|
||||
|
||||
$(OBJ_DIR)/lj_coul_cubin.h: $(OBJ_DIR)/lj_coul.cubin $(OBJ_DIR)/lj_coul.cubin
|
||||
$(BIN2C) -c -n lj_coul $(OBJ_DIR)/lj_coul.cubin > $(OBJ_DIR)/lj_coul_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul.o: $(ALL_H) lal_lj_coul.h lal_lj_coul.cpp $(OBJ_DIR)/lj_coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_coul.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul_ext.o: $(ALL_H) lal_lj_coul.h lal_lj_coul_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_coul_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long.cubin: lal_lj_class2_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_class2_long.cu
|
||||
|
||||
$(OBJ_DIR)/lj_class2_long_cubin.h: $(OBJ_DIR)/lj_class2_long.cubin $(OBJ_DIR)/lj_class2_long.cubin
|
||||
$(BIN2C) -c -n lj_class2_long $(OBJ_DIR)/lj_class2_long.cubin > $(OBJ_DIR)/lj_class2_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_class2_long.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long.cpp $(OBJ_DIR)/lj_class2_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_class2_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_class2_long_ext.o: $(ALL_H) lal_lj_class2_long.h lal_lj_class2_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_class2_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_long.cubin: lal_coul_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_coul_long.cu
|
||||
|
||||
$(OBJ_DIR)/coul_long_cubin.h: $(OBJ_DIR)/coul_long.cubin $(OBJ_DIR)/coul_long.cubin
|
||||
$(BIN2C) -c -n coul_long $(OBJ_DIR)/coul_long.cubin > $(OBJ_DIR)/coul_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long.o: $(ALL_H) lal_coul_long.h lal_coul_long.cpp $(OBJ_DIR)/coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_coul_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long_ext.o: $(ALL_H) lal_coul_long.h lal_coul_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_coul_long.cubin: lal_lj_coul_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_coul_long.cu
|
||||
|
||||
$(OBJ_DIR)/lj_coul_long_cubin.h: $(OBJ_DIR)/lj_coul_long.cubin $(OBJ_DIR)/lj_coul_long.cubin
|
||||
$(BIN2C) -c -n lj_coul_long $(OBJ_DIR)/lj_coul_long.cubin > $(OBJ_DIR)/lj_coul_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul_long.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long.cpp $(OBJ_DIR)/lj_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_coul_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul_long_ext.o: $(ALL_H) lal_lj_coul_long.h lal_lj_coul_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_dsf.cubin: lal_lj_dsf.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_dsf.cu
|
||||
|
||||
$(OBJ_DIR)/lj_dsf_cubin.h: $(OBJ_DIR)/lj_dsf.cubin $(OBJ_DIR)/lj_dsf.cubin
|
||||
$(BIN2C) -c -n lj_dsf $(OBJ_DIR)/lj_dsf.cubin > $(OBJ_DIR)/lj_dsf_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_dsf.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf.cpp $(OBJ_DIR)/lj_dsf_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_dsf.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_dsf_ext.o: $(ALL_H) lal_lj_dsf.h lal_lj_dsf_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_dsf_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/morse.cubin: lal_morse.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_morse.cu
|
||||
|
||||
$(OBJ_DIR)/morse_cubin.h: $(OBJ_DIR)/morse.cubin $(OBJ_DIR)/morse.cubin
|
||||
$(BIN2C) -c -n morse $(OBJ_DIR)/morse.cubin > $(OBJ_DIR)/morse_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_morse.o: $(ALL_H) lal_morse.h lal_morse.cpp $(OBJ_DIR)/morse_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_morse.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_morse_ext.o: $(ALL_H) lal_morse.h lal_morse_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_morse_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/charmm_long.cubin: lal_charmm_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_charmm_long.cu
|
||||
|
||||
$(OBJ_DIR)/charmm_long_cubin.h: $(OBJ_DIR)/charmm_long.cubin $(OBJ_DIR)/charmm_long.cubin
|
||||
$(BIN2C) -c -n charmm_long $(OBJ_DIR)/charmm_long.cubin > $(OBJ_DIR)/charmm_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_charmm_long.o: $(ALL_H) lal_charmm_long.h lal_charmm_long.cpp $(OBJ_DIR)/charmm_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_charmm_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_charmm_long_ext.o: $(ALL_H) lal_charmm_long.h lal_charmm_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_charmm_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj96.cubin: lal_lj96.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj96.cu
|
||||
|
||||
$(OBJ_DIR)/lj96_cubin.h: $(OBJ_DIR)/lj96.cubin $(OBJ_DIR)/lj96.cubin
|
||||
$(BIN2C) -c -n lj96 $(OBJ_DIR)/lj96.cubin > $(OBJ_DIR)/lj96_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj96.o: $(ALL_H) lal_lj96.h lal_lj96.cpp $(OBJ_DIR)/lj96_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj96.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj96_ext.o: $(ALL_H) lal_lj96.h lal_lj96_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj96_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_expand.cubin: lal_lj_expand.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_expand.cu
|
||||
|
||||
$(OBJ_DIR)/lj_expand_cubin.h: $(OBJ_DIR)/lj_expand.cubin $(OBJ_DIR)/lj_expand.cubin
|
||||
$(BIN2C) -c -n lj_expand $(OBJ_DIR)/lj_expand.cubin > $(OBJ_DIR)/lj_expand_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand.o: $(ALL_H) lal_lj_expand.h lal_lj_expand.cpp $(OBJ_DIR)/lj_expand_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_expand.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand_ext.o: $(ALL_H) lal_lj_expand.h lal_lj_expand_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_expand_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_sdk.cubin: lal_lj_sdk.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_sdk.cu
|
||||
|
||||
$(OBJ_DIR)/lj_sdk_cubin.h: $(OBJ_DIR)/lj_sdk.cubin $(OBJ_DIR)/lj_sdk.cubin
|
||||
$(BIN2C) -c -n lj_sdk $(OBJ_DIR)/lj_sdk.cubin > $(OBJ_DIR)/lj_sdk_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_sdk.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk.cpp $(OBJ_DIR)/lj_sdk_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_sdk.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_sdk_ext.o: $(ALL_H) lal_lj_sdk.h lal_lj_sdk_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_sdk_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_sdk_long.cubin: lal_lj_sdk_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_sdk_long.cu
|
||||
|
||||
$(OBJ_DIR)/lj_sdk_long_cubin.h: $(OBJ_DIR)/lj_sdk_long.cubin $(OBJ_DIR)/lj_sdk_long.cubin
|
||||
$(BIN2C) -c -n lj_sdk_long $(OBJ_DIR)/lj_sdk_long.cubin > $(OBJ_DIR)/lj_sdk_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_sdk_long.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long.cpp $(OBJ_DIR)/lj_sdk_long_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_sdk_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_sdk_long_ext.o: $(ALL_H) lal_lj_sdk_long.h lal_lj_sdk_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_sdk_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/eam.cubin: lal_eam.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_eam.cu
|
||||
|
||||
$(OBJ_DIR)/eam_cubin.h: $(OBJ_DIR)/eam.cubin $(OBJ_DIR)/eam.cubin
|
||||
$(BIN2C) -c -n eam $(OBJ_DIR)/eam.cubin > $(OBJ_DIR)/eam_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_eam.o: $(ALL_H) lal_eam.h lal_eam.cpp $(OBJ_DIR)/eam_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_eam.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_eam_ext.o: $(ALL_H) lal_eam.h lal_eam_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_eam_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_eam_fs_ext.o: $(ALL_H) lal_eam.h lal_eam_fs_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_eam_fs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_eam_alloy_ext.o: $(ALL_H) lal_eam.h lal_eam_alloy_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_eam_alloy_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/buck.cubin: lal_buck.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_buck.cu
|
||||
|
||||
$(OBJ_DIR)/buck_cubin.h: $(OBJ_DIR)/buck.cubin $(OBJ_DIR)/buck.cubin
|
||||
$(BIN2C) -c -n buck $(OBJ_DIR)/buck.cubin > $(OBJ_DIR)/buck_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_buck.o: $(ALL_H) lal_buck.h lal_buck.cpp $(OBJ_DIR)/buck_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_buck.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_buck_ext.o: $(ALL_H) lal_buck.h lal_buck_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_buck_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/buck_coul.cubin: lal_buck_coul.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_buck_coul.cu
|
||||
|
||||
$(OBJ_DIR)/buck_coul_cubin.h: $(OBJ_DIR)/buck_coul.cubin $(OBJ_DIR)/buck_coul.cubin
|
||||
$(BIN2C) -c -n buck_coul $(OBJ_DIR)/buck_coul.cubin > $(OBJ_DIR)/buck_coul_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_buck_coul.o: $(ALL_H) lal_buck_coul.h lal_buck_coul.cpp $(OBJ_DIR)/buck_coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_buck_coul.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_buck_coul_ext.o: $(ALL_H) lal_buck_coul.h lal_buck_coul_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_buck_coul_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/buck_coul_long.cubin: lal_buck_coul_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_buck_coul_long.cu
|
||||
|
||||
$(OBJ_DIR)/buck_coul_long_cubin.h: $(OBJ_DIR)/buck_coul_long.cubin $(OBJ_DIR)/buck_coul_long.cubin
|
||||
$(BIN2C) -c -n buck_coul_long $(OBJ_DIR)/buck_coul_long.cubin > $(OBJ_DIR)/buck_coul_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_buck_coul_long.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long.cpp $(OBJ_DIR)/buck_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_buck_coul_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_buck_coul_long_ext.o: $(ALL_H) lal_buck_coul_long.h lal_buck_coul_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_buck_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/table.cubin: lal_table.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_table.cu
|
||||
|
||||
$(OBJ_DIR)/table_cubin.h: $(OBJ_DIR)/table.cubin $(OBJ_DIR)/table.cubin
|
||||
$(BIN2C) -c -n table $(OBJ_DIR)/table.cubin > $(OBJ_DIR)/table_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_table.o: $(ALL_H) lal_table.h lal_table.cpp $(OBJ_DIR)/table_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_table.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_table_ext.o: $(ALL_H) lal_table.h lal_table_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_table_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/yukawa.cubin: lal_yukawa.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_yukawa.cu
|
||||
|
||||
$(OBJ_DIR)/yukawa_cubin.h: $(OBJ_DIR)/yukawa.cubin $(OBJ_DIR)/yukawa.cubin
|
||||
$(BIN2C) -c -n yukawa $(OBJ_DIR)/yukawa.cubin > $(OBJ_DIR)/yukawa_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_yukawa.o: $(ALL_H) lal_yukawa.h lal_yukawa.cpp $(OBJ_DIR)/yukawa_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_yukawa.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_yukawa_ext.o: $(ALL_H) lal_yukawa.h lal_yukawa_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_yukawa_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born.cubin: lal_born.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_born.cu
|
||||
|
||||
$(OBJ_DIR)/born_cubin.h: $(OBJ_DIR)/born.cubin $(OBJ_DIR)/born.cubin
|
||||
$(BIN2C) -c -n born $(OBJ_DIR)/born.cubin > $(OBJ_DIR)/born_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_born.o: $(ALL_H) lal_born.h lal_born.cpp $(OBJ_DIR)/born_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_born.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_ext.o: $(ALL_H) lal_born.h lal_born_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_born_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_wolf.cubin: lal_born_coul_wolf.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_born_coul_wolf.cu
|
||||
|
||||
$(OBJ_DIR)/born_coul_wolf_cubin.h: $(OBJ_DIR)/born_coul_wolf.cubin $(OBJ_DIR)/born_coul_wolf.cubin
|
||||
$(BIN2C) -c -n born_coul_wolf $(OBJ_DIR)/born_coul_wolf.cubin > $(OBJ_DIR)/born_coul_wolf_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf.cpp $(OBJ_DIR)/born_coul_wolf_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_born_coul_wolf.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_ext.o: $(ALL_H) lal_born_coul_wolf.h lal_born_coul_wolf_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_born_coul_wolf_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_long.cubin: lal_born_coul_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_born_coul_long.cu
|
||||
|
||||
$(OBJ_DIR)/born_coul_long_cubin.h: $(OBJ_DIR)/born_coul_long.cubin $(OBJ_DIR)/born_coul_long.cubin
|
||||
$(BIN2C) -c -n born_coul_long $(OBJ_DIR)/born_coul_long.cubin > $(OBJ_DIR)/born_coul_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long.cpp $(OBJ_DIR)/born_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_born_coul_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long_ext.o: $(ALL_H) lal_born_coul_long.h lal_born_coul_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_born_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/dipole_lj.cubin: lal_dipole_lj.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_dipole_lj.cu
|
||||
|
||||
$(OBJ_DIR)/dipole_lj_cubin.h: $(OBJ_DIR)/dipole_lj.cubin $(OBJ_DIR)/dipole_lj.cubin
|
||||
$(BIN2C) -c -n dipole_lj $(OBJ_DIR)/dipole_lj.cubin > $(OBJ_DIR)/dipole_lj_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_lj.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj.cpp $(OBJ_DIR)/dipole_lj_cubin.h $(OBJ_DIR)/lal_base_dipole.o
|
||||
$(CUDR) -o $@ -c lal_dipole_lj.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_lj_ext.o: $(ALL_H) lal_dipole_lj.h lal_dipole_lj_ext.cpp lal_base_dipole.h
|
||||
$(CUDR) -o $@ -c lal_dipole_lj_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/dipole_lj_sf.cubin: lal_dipole_lj_sf.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_dipole_lj_sf.cu
|
||||
|
||||
$(OBJ_DIR)/dipole_lj_sf_cubin.h: $(OBJ_DIR)/dipole_lj_sf.cubin $(OBJ_DIR)/dipole_lj_sf.cubin
|
||||
$(BIN2C) -c -n dipole_lj_sf $(OBJ_DIR)/dipole_lj_sf.cubin > $(OBJ_DIR)/dipole_lj_sf_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_lj_sf.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf.cpp $(OBJ_DIR)/dipole_lj_sf_cubin.h $(OBJ_DIR)/lal_base_dipole.o
|
||||
$(CUDR) -o $@ -c lal_dipole_lj_sf.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_lj_sf_ext.o: $(ALL_H) lal_dipole_lj_sf.h lal_dipole_lj_sf_ext.cpp lal_base_dipole.h
|
||||
$(CUDR) -o $@ -c lal_dipole_lj_sf_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/colloid.cubin: lal_colloid.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_colloid.cu
|
||||
|
||||
$(OBJ_DIR)/colloid_cubin.h: $(OBJ_DIR)/colloid.cubin $(OBJ_DIR)/colloid.cubin
|
||||
$(BIN2C) -c -n colloid $(OBJ_DIR)/colloid.cubin > $(OBJ_DIR)/colloid_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_colloid.o: $(ALL_H) lal_colloid.h lal_colloid.cpp $(OBJ_DIR)/colloid_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_colloid.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_colloid_ext.o: $(ALL_H) lal_colloid.h lal_colloid_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_colloid_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/gauss.cubin: lal_gauss.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_gauss.cu
|
||||
|
||||
$(OBJ_DIR)/gauss_cubin.h: $(OBJ_DIR)/gauss.cubin $(OBJ_DIR)/gauss.cubin
|
||||
$(BIN2C) -c -n gauss $(OBJ_DIR)/gauss.cubin > $(OBJ_DIR)/gauss_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_gauss.o: $(ALL_H) lal_gauss.h lal_gauss.cpp $(OBJ_DIR)/gauss_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_gauss.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_gauss_ext.o: $(ALL_H) lal_gauss.h lal_gauss_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_gauss_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/yukawa_colloid.cubin: lal_yukawa_colloid.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_yukawa_colloid.cu
|
||||
|
||||
$(OBJ_DIR)/yukawa_colloid_cubin.h: $(OBJ_DIR)/yukawa_colloid.cubin $(OBJ_DIR)/yukawa_colloid.cubin
|
||||
$(BIN2C) -c -n yukawa_colloid $(OBJ_DIR)/yukawa_colloid.cubin > $(OBJ_DIR)/yukawa_colloid_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_yukawa_colloid.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid.cpp $(OBJ_DIR)/yukawa_colloid_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_yukawa_colloid.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_yukawa_colloid_ext.o: $(ALL_H) lal_yukawa_colloid.h lal_yukawa_colloid_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_yukawa_colloid_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_coul_debye.cubin: lal_lj_coul_debye.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_coul_debye.cu
|
||||
|
||||
$(OBJ_DIR)/lj_coul_debye_cubin.h: $(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye.cubin
|
||||
$(BIN2C) -c -n lj_coul_debye $(OBJ_DIR)/lj_coul_debye.cubin > $(OBJ_DIR)/lj_coul_debye_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul_debye.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye.cpp $(OBJ_DIR)/lj_coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_coul_debye.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul_debye_ext.o: $(ALL_H) lal_lj_coul_debye.h lal_lj_coul_debye_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_coul_debye_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_dsf.cubin: lal_coul_dsf.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_coul_dsf.cu
|
||||
|
||||
$(OBJ_DIR)/coul_dsf_cubin.h: $(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf.cubin
|
||||
$(BIN2C) -c -n coul_dsf $(OBJ_DIR)/coul_dsf.cubin > $(OBJ_DIR)/coul_dsf_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul_dsf.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf.cpp $(OBJ_DIR)/coul_dsf_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_coul_dsf.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_dsf_ext.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_coul_dsf_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/vashishta.cubin: lal_vashishta.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_vashishta.cu
|
||||
|
||||
$(OBJ_DIR)/vashishta_cubin.h: $(OBJ_DIR)/vashishta.cubin $(OBJ_DIR)/vashishta.cubin
|
||||
$(BIN2C) -c -n vashishta $(OBJ_DIR)/vashishta.cubin > $(OBJ_DIR)/vashishta_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_vashishta.o: $(ALL_H) lal_vashishta.h lal_vashishta.cpp $(OBJ_DIR)/vashishta_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_vashishta.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_vashishta_ext.o: $(ALL_H) lal_vashishta.h lal_vashishta_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_vashishta_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/sw.cubin: lal_sw.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_sw.cu
|
||||
|
||||
$(OBJ_DIR)/sw_cubin.h: $(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw.cubin
|
||||
$(BIN2C) -c -n sw $(OBJ_DIR)/sw.cubin > $(OBJ_DIR)/sw_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_sw.o: $(ALL_H) lal_sw.h lal_sw.cpp $(OBJ_DIR)/sw_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_sw.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_sw_ext.o: $(ALL_H) lal_sw.h lal_sw_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_sw_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/beck.cubin: lal_beck.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_beck.cu
|
||||
|
||||
$(OBJ_DIR)/beck_cubin.h: $(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck.cubin
|
||||
$(BIN2C) -c -n beck $(OBJ_DIR)/beck.cubin > $(OBJ_DIR)/beck_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_beck.o: $(ALL_H) lal_beck.h lal_beck.cpp $(OBJ_DIR)/beck_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_beck.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_beck_ext.o: $(ALL_H) lal_beck.h lal_beck_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_beck_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/mie.cubin: lal_mie.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_mie.cu
|
||||
|
||||
$(OBJ_DIR)/mie_cubin.h: $(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie.cubin
|
||||
$(BIN2C) -c -n mie $(OBJ_DIR)/mie.cubin > $(OBJ_DIR)/mie_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_mie.o: $(ALL_H) lal_mie.h lal_mie.cpp $(OBJ_DIR)/mie_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_mie.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_mie_ext.o: $(ALL_H) lal_mie.h lal_mie_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_mie_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/soft.cubin: lal_soft.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_soft.cu
|
||||
|
||||
$(OBJ_DIR)/soft_cubin.h: $(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft.cubin
|
||||
$(BIN2C) -c -n soft $(OBJ_DIR)/soft.cubin > $(OBJ_DIR)/soft_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_soft.o: $(ALL_H) lal_soft.h lal_soft.cpp $(OBJ_DIR)/soft_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_soft.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_soft_ext.o: $(ALL_H) lal_soft.h lal_soft_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_soft_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_coul_msm.cubin: lal_lj_coul_msm.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_coul_msm.cu
|
||||
|
||||
$(OBJ_DIR)/lj_coul_msm_cubin.h: $(OBJ_DIR)/lj_coul_msm.cubin $(OBJ_DIR)/lj_coul_msm.cubin
|
||||
$(BIN2C) -c -n lj_coul_msm $(OBJ_DIR)/lj_coul_msm.cubin > $(OBJ_DIR)/lj_coul_msm_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul_msm.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm.cpp $(OBJ_DIR)/lj_coul_msm_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_coul_msm.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_coul_msm_ext.o: $(ALL_H) lal_lj_coul_msm.h lal_lj_coul_msm_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_coul_msm_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_gromacs.cubin: lal_lj_gromacs.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_gromacs.cu
|
||||
|
||||
$(OBJ_DIR)/lj_gromacs_cubin.h: $(OBJ_DIR)/lj_gromacs.cubin $(OBJ_DIR)/lj_gromacs.cubin
|
||||
$(BIN2C) -c -n lj_gromacs $(OBJ_DIR)/lj_gromacs.cubin > $(OBJ_DIR)/lj_gromacs_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_gromacs.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs.cpp $(OBJ_DIR)/lj_gromacs_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_gromacs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_gromacs_ext.o: $(ALL_H) lal_lj_gromacs.h lal_lj_gromacs_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_gromacs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/dpd.cubin: lal_dpd.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_dpd.cu
|
||||
|
||||
$(OBJ_DIR)/dpd_cubin.h: $(OBJ_DIR)/dpd.cubin $(OBJ_DIR)/dpd.cubin
|
||||
$(BIN2C) -c -n dpd $(OBJ_DIR)/dpd.cubin > $(OBJ_DIR)/dpd_cubin.h
|
||||
|
||||
$(OBJ_DIR)/ufm.cubin: lal_ufm.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_ufm.cu
|
||||
|
||||
$(OBJ_DIR)/ufm_cubin.h: $(OBJ_DIR)/ufm.cubin $(OBJ_DIR)/ufm.cubin
|
||||
$(BIN2C) -c -n ufm $(OBJ_DIR)/ufm.cubin > $(OBJ_DIR)/ufm_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_ufm.o: $(ALL_H) lal_ufm.h lal_ufm.cpp $(OBJ_DIR)/ufm_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_ufm.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_dpd.o: $(ALL_H) lal_dpd.h lal_dpd.cpp $(OBJ_DIR)/dpd_cubin.h $(OBJ_DIR)/lal_base_dpd.o
|
||||
$(CUDR) -o $@ -c lal_dpd.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_dpd_ext.o: $(ALL_H) lal_dpd.h lal_dpd_ext.cpp lal_base_dpd.h
|
||||
$(CUDR) -o $@ -c lal_dpd_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff.cubin: lal_tersoff.cu lal_precision.h lal_tersoff_extra.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_tersoff.cu
|
||||
|
||||
$(OBJ_DIR)/tersoff_cubin.h: $(OBJ_DIR)/tersoff.cubin $(OBJ_DIR)/tersoff.cubin
|
||||
$(BIN2C) -c -n tersoff $(OBJ_DIR)/tersoff.cubin > $(OBJ_DIR)/tersoff_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff.o: $(ALL_H) lal_tersoff.h lal_tersoff.cpp $(OBJ_DIR)/tersoff_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_tersoff.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_ext.o: $(ALL_H) lal_tersoff.h lal_tersoff_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_tersoff_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff_zbl.cubin: lal_tersoff_zbl.cu lal_precision.h lal_tersoff_zbl_extra.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_tersoff_zbl.cu
|
||||
|
||||
$(OBJ_DIR)/tersoff_zbl_cubin.h: $(OBJ_DIR)/tersoff_zbl.cubin $(OBJ_DIR)/tersoff_zbl.cubin
|
||||
$(BIN2C) -c -n tersoff_zbl $(OBJ_DIR)/tersoff_zbl.cubin > $(OBJ_DIR)/tersoff_zbl_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_zbl.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl.cpp $(OBJ_DIR)/tersoff_zbl_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_tersoff_zbl.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_zbl_ext.o: $(ALL_H) lal_tersoff_zbl.h lal_tersoff_zbl_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_tersoff_zbl_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/tersoff_mod.cubin: lal_tersoff_mod.cu lal_precision.h lal_tersoff_mod_extra.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_tersoff_mod.cu
|
||||
|
||||
$(OBJ_DIR)/tersoff_mod_cubin.h: $(OBJ_DIR)/tersoff_mod.cubin $(OBJ_DIR)/tersoff_mod.cubin
|
||||
$(BIN2C) -c -n tersoff_mod $(OBJ_DIR)/tersoff_mod.cubin > $(OBJ_DIR)/tersoff_mod_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_mod.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod.cpp $(OBJ_DIR)/tersoff_mod_cubin.h $(OBJ_DIR)/lal_base_three.o
|
||||
$(CUDR) -o $@ -c lal_tersoff_mod.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_tersoff_mod_ext.o: $(ALL_H) lal_tersoff_mod.h lal_tersoff_mod_ext.cpp lal_base_three.h
|
||||
$(CUDR) -o $@ -c lal_tersoff_mod_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul.cubin: lal_coul.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_coul.cu
|
||||
|
||||
$(OBJ_DIR)/coul_cubin.h: $(OBJ_DIR)/coul.cubin $(OBJ_DIR)/coul.cubin
|
||||
$(BIN2C) -c -n coul $(OBJ_DIR)/coul.cubin > $(OBJ_DIR)/coul_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul.o: $(ALL_H) lal_coul.h lal_coul.cpp $(OBJ_DIR)/coul_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_coul.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_ext.o: $(ALL_H) lal_coul.h lal_coul_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_coul_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_debye.cubin: lal_coul_debye.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_coul_debye.cu
|
||||
|
||||
$(OBJ_DIR)/coul_debye_cubin.h: $(OBJ_DIR)/coul_debye.cubin $(OBJ_DIR)/coul_debye.cubin
|
||||
$(BIN2C) -c -n coul_debye $(OBJ_DIR)/coul_debye.cubin > $(OBJ_DIR)/coul_debye_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_coul_debye.o: $(ALL_H) lal_coul_debye.h lal_coul_debye.cpp $(OBJ_DIR)/coul_debye_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_coul_debye.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_debye_ext.o: $(ALL_H) lal_coul_debye.h lal_coul_debye_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_coul_debye_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/zbl.cubin: lal_zbl.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_zbl.cu
|
||||
|
||||
$(OBJ_DIR)/zbl_cubin.h: $(OBJ_DIR)/zbl.cubin $(OBJ_DIR)/zbl.cubin
|
||||
$(BIN2C) -c -n zbl $(OBJ_DIR)/zbl.cubin > $(OBJ_DIR)/zbl_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_zbl.o: $(ALL_H) lal_zbl.h lal_zbl.cpp $(OBJ_DIR)/zbl_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_zbl.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_zbl_ext.o: $(ALL_H) lal_zbl.h lal_zbl_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_zbl_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_cubic.cubin: lal_lj_cubic.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_cubic.cu
|
||||
|
||||
$(OBJ_DIR)/lj_cubic_cubin.h: $(OBJ_DIR)/lj_cubic.cubin $(OBJ_DIR)/lj_cubic.cubin
|
||||
$(BIN2C) -c -n lj_cubic $(OBJ_DIR)/lj_cubic.cubin > $(OBJ_DIR)/lj_cubic_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_cubic.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic.cpp $(OBJ_DIR)/lj_cubic_cubin.h $(OBJ_DIR)/lal_base_atomic.o
|
||||
$(CUDR) -o $@ -c lal_lj_cubic.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_cubic_ext.o: $(ALL_H) lal_lj_cubic.h lal_lj_cubic_ext.cpp lal_base_atomic.h
|
||||
$(CUDR) -o $@ -c lal_lj_cubic_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/dipole_long_lj.cubin: lal_dipole_long_lj.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_dipole_long_lj.cu
|
||||
|
||||
$(OBJ_DIR)/dipole_long_lj_cubin.h: $(OBJ_DIR)/dipole_long_lj.cubin $(OBJ_DIR)/dipole_long_lj.cubin
|
||||
$(BIN2C) -c -n dipole_long_lj $(OBJ_DIR)/dipole_long_lj.cubin > $(OBJ_DIR)/dipole_long_lj_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_long_lj.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj.cpp $(OBJ_DIR)/dipole_long_lj_cubin.h $(OBJ_DIR)/lal_base_dipole.o
|
||||
$(CUDR) -o $@ -c lal_dipole_long_lj.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_long_lj_ext.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj_ext.cpp lal_base_dipole.h
|
||||
$(CUDR) -o $@ -c lal_dipole_long_lj_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_expand_coul_long.cubin: lal_lj_expand_coul_long.cu lal_precision.h lal_preprocessor.h
|
||||
$(CUDA) --fatbin -DNV_KERNEL -o $@ lal_lj_expand_coul_long.cu
|
||||
|
||||
$(OBJ_DIR)/lj_expand_coul_long_cubin.h: $(OBJ_DIR)/lj_expand_coul_long.cubin $(OBJ_DIR)/lj_expand_coul_long.cubin
|
||||
$(BIN2C) -c -n lj_expand_coul_long $(OBJ_DIR)/lj_expand_coul_long.cubin > $(OBJ_DIR)/lj_expand_coul_long_cubin.h
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long.cpp $(OBJ_DIR)/lj_expand_coul_long_cubin.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(CUDR) -o $@ -c lal_lj_expand_coul_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
|
||||
$(CUDR) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/nvc_get_devices: ./geryon/ucl_get_devices.cpp $(NVD_H)
|
||||
$(CUDR) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_CUDADR $(CUDA_LIB) -lcuda
|
||||
|
||||
$(GPU_LIB): $(OBJS) $(CUDPP)
|
||||
$(AR) -crusv $(GPU_LIB) $(OBJS) $(CUDPP)
|
||||
@cp $(EXTRAMAKE) Makefile.lammps
|
||||
|
||||
clean:
|
||||
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CUDPP) $(CBNS) *.linkinfo
|
||||
|
||||
veryclean: clean
|
||||
-rm -rf *~ *.linkinfo
|
||||
|
||||
cleanlib:
|
||||
-rm -f $(EXECS) $(GPU_LIB) $(OBJS) $(CBNS) *.linkinfo
|
||||
@ -66,7 +66,12 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
|
||||
$(OBJ_DIR)/lal_coul_debye.o $(OBJ_DIR)/lal_coul_debye_ext.o \
|
||||
$(OBJ_DIR)/lal_zbl.o $(OBJ_DIR)/lal_zbl_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_cubic.o $(OBJ_DIR)/lal_lj_cubic_ext.o \
|
||||
$(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o
|
||||
$(OBJ_DIR)/lal_ufm.o $(OBJ_DIR)/lal_ufm_ext.o \
|
||||
$(OBJ_DIR)/lal_dipole_long_lj.o $(OBJ_DIR)/lal_dipole_long_lj_ext.o \
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o $(OBJ_DIR)/lal_lj_expand_coul_long_ext.o \
|
||||
$(OBJ_DIR)/lal_coul_long_cs.o $(OBJ_DIR)/lal_coul_long_cs_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs.o $(OBJ_DIR)/lal_born_coul_long_cs_ext.o \
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs.o $(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o
|
||||
|
||||
KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
||||
$(OBJ_DIR)/neighbor_cpu_cl.h $(OBJ_DIR)/pppm_cl.h \
|
||||
@ -95,7 +100,9 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
|
||||
$(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/coul_cl.h \
|
||||
$(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
|
||||
$(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/vashishta_cl.h \
|
||||
$(OBJ_DIR)/ufm_cl.h
|
||||
$(OBJ_DIR)/ufm_cl.h $(OBJ_DIR)/dipole_long_lj_cl.h \
|
||||
$(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/coul_long_cs_cl.h \
|
||||
$(OBJ_DIR)/born_coul_long_cs_cl.h $(OBJ_DIR)/born_coul_wolf_cs_cl.h
|
||||
|
||||
|
||||
OCL_EXECS = $(BIN_DIR)/ocl_get_devices
|
||||
@ -588,7 +595,52 @@ $(OBJ_DIR)/lal_ufm.o: $(ALL_H) lal_ufm.h lal_ufm.cpp $(OBJ_DIR)/ufm_cl.h $(OBJ_
|
||||
$(OBJ_DIR)/lal_ufm_ext.o: $(ALL_H) lal_ufm.h lal_ufm_ext.cpp lal_base_atomic.h
|
||||
$(OCL) -o $@ -c lal_ufm_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp
|
||||
$(OBJ_DIR)/dipole_long_lj_cl.h: lal_dipole_long_lj.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh dipole_long_lj $(PRE1_H) lal_dipole_long_lj.cu $(OBJ_DIR)/dipole_long_lj_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_long_lj.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj.cpp $(OBJ_DIR)/dipole_long_lj_cl.h $(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(OCL) -o $@ -c lal_dipole_long_lj.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_dipole_long_lj_ext.o: $(ALL_H) lal_dipole_long_lj.h lal_dipole_long_lj_ext.cpp lal_base_dipole.h
|
||||
$(OCL) -o $@ -c lal_dipole_long_lj_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lj_expand_coul_long_cl.h: lal_lj_expand_coul_long.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh lj_expand_coul_long $(PRE1_H) lal_lj_expand_coul_long.cu $(OBJ_DIR)/lj_expand_coul_long_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long.cpp $(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/lj_expand_coul_long_cl.h $(OBJ_DIR)/lal_base_charge.o
|
||||
$(OCL) -o $@ -c lal_lj_expand_coul_long.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_lj_expand_coul_long_ext.o: $(ALL_H) lal_lj_expand_coul_long.h lal_lj_expand_coul_long_ext.cpp lal_base_charge.h
|
||||
$(OCL) -o $@ -c lal_lj_expand_coul_long_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/coul_long_cs_cl.h: lal_coul_long_cs.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh coul_long_cs $(PRE1_H) lal_coul_long_cs.cu $(OBJ_DIR)/coul_long_cs_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long_cs.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs.cpp $(OBJ_DIR)/coul_long_cs_cl.h $(OBJ_DIR)/coul_long_cs_cl.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_coul_long.o
|
||||
$(OCL) -o $@ -c lal_coul_long_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_coul_long_cs_ext.o: $(ALL_H) lal_coul_long_cs.h lal_coul_long_cs_ext.cpp lal_coul_long.h
|
||||
$(OCL) -o $@ -c lal_coul_long_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_long_cs_cl.h: lal_born_coul_long_cs.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh born_coul_long_cs $(PRE1_H) lal_born_coul_long_cs.cu $(OBJ_DIR)/born_coul_long_cs_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs.cpp $(OBJ_DIR)/born_coul_long_cs_cl.h $(OBJ_DIR)/born_coul_long_cs_cl.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_long.o
|
||||
$(OCL) -o $@ -c lal_born_coul_long_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_long_cs_ext.o: $(ALL_H) lal_born_coul_long_cs.h lal_born_coul_long_cs_ext.cpp lal_born_coul_long.h
|
||||
$(OCL) -o $@ -c lal_born_coul_long_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/born_coul_wolf_cs_cl.h: lal_born_coul_wolf_cs.cu $(PRE1_H)
|
||||
$(BSH) ./geryon/file_to_cstr.sh born_coul_wolf_cs $(PRE1_H) lal_born_coul_wolf_cs.cu $(OBJ_DIR)/born_coul_wolf_cs_cl.h;
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs.cpp $(OBJ_DIR)/born_coul_wolf_cs_cl.h $(OBJ_DIR)/born_coul_wolf_cs_cl.h $(OBJ_DIR)/lal_base_charge.o $(OBJ_DIR)/lal_born_coul_wolf.o
|
||||
$(OCL) -o $@ -c lal_born_coul_wolf_cs.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(OBJ_DIR)/lal_born_coul_wolf_cs_ext.o: $(ALL_H) lal_born_coul_wolf_cs.h lal_born_coul_wolf_cs_ext.cpp lal_born_coul_wolf.h
|
||||
$(OCL) -o $@ -c lal_born_coul_wolf_cs_ext.cpp -I$(OBJ_DIR)
|
||||
|
||||
$(BIN_DIR)/ocl_get_devices: ./geryon/ucl_get_devices.cpp $(OCL_H)
|
||||
$(OCL) -o $@ ./geryon/ucl_get_devices.cpp -DUCL_OPENCL $(OCL_LINK)
|
||||
|
||||
$(OCL_LIB): $(OBJS) $(PTXS)
|
||||
|
||||
@ -280,6 +280,9 @@ class UCL_Device {
|
||||
/// Return the OpenCL type for the device
|
||||
inline cl_device_id & cl_device() { return _cl_device; }
|
||||
|
||||
/// Select the platform that has accelerators
|
||||
inline void set_platform_accelerator(int pid=-1);
|
||||
|
||||
private:
|
||||
int _num_platforms; // Number of platforms
|
||||
int _platform; // UCL_Device ID for current platform
|
||||
@ -311,8 +314,8 @@ UCL_Device::UCL_Device() {
|
||||
return;
|
||||
} else
|
||||
_num_platforms=static_cast<int>(nplatforms);
|
||||
|
||||
set_platform(0);
|
||||
// note that platform 0 may not necessarily be associated with accelerators
|
||||
set_platform_accelerator();
|
||||
}
|
||||
|
||||
UCL_Device::~UCL_Device() {
|
||||
@ -320,6 +323,7 @@ UCL_Device::~UCL_Device() {
|
||||
}
|
||||
|
||||
void UCL_Device::clear() {
|
||||
_properties.clear();
|
||||
if (_device>-1) {
|
||||
for (size_t i=0; i<_cq.size(); i++) {
|
||||
CL_DESTRUCT_CALL(clReleaseCommandQueue(_cq.back()));
|
||||
@ -529,75 +533,105 @@ int UCL_Device::set(int num) {
|
||||
return create_context();
|
||||
}
|
||||
|
||||
// List all devices along with all properties
|
||||
// List all devices from all platforms along with all properties
|
||||
void UCL_Device::print_all(std::ostream &out) {
|
||||
if (num_devices() == 0)
|
||||
out << "There is no device supporting OpenCL\n";
|
||||
for (int i=0; i<num_devices(); ++i) {
|
||||
out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
|
||||
out << " Type of device: "
|
||||
<< device_type_name(i).c_str() << std::endl;
|
||||
out << " Double precision support: ";
|
||||
if (double_precision(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Total amount of global memory: "
|
||||
<< gigabytes(i) << " GB\n";
|
||||
out << " Number of compute units/multiprocessors: "
|
||||
<< _properties[i].compute_units << std::endl;
|
||||
//out << " Number of cores: "
|
||||
// << cores(i) << std::endl;
|
||||
out << " Total amount of constant memory: "
|
||||
<< _properties[i].const_mem << " bytes\n";
|
||||
out << " Total amount of local/shared memory per block: "
|
||||
<< _properties[i].shared_mem << " bytes\n";
|
||||
//out << " Total number of registers available per block: "
|
||||
// << _properties[i].regsPerBlock << std::endl;
|
||||
//out << " Warp size: "
|
||||
// << _properties[i].warpSize << std::endl;
|
||||
out << " Maximum group size (# of threads per block) "
|
||||
<< _properties[i].work_group_size << std::endl;
|
||||
out << " Maximum item sizes (# threads for each dim) "
|
||||
<< _properties[i].work_item_size[0] << " x "
|
||||
<< _properties[i].work_item_size[1] << " x "
|
||||
<< _properties[i].work_item_size[2] << std::endl;
|
||||
//out << " Maximum sizes of each dimension of a grid: "
|
||||
// << _properties[i].maxGridSize[0] << " x "
|
||||
// << _properties[i].maxGridSize[1] << " x "
|
||||
// << _properties[i].maxGridSize[2] << std::endl;
|
||||
//out << " Maximum memory pitch: "
|
||||
// << _properties[i].memPitch) << " bytes\n";
|
||||
//out << " Texture alignment: "
|
||||
// << _properties[i].textureAlignment << " bytes\n";
|
||||
out << " Clock rate: "
|
||||
<< clock_rate(i) << " GHz\n";
|
||||
//out << " Concurrent copy and execution: ";
|
||||
out << " ECC support: ";
|
||||
if (_properties[i].ecc_support)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Device fission into equal partitions: ";
|
||||
if (fission_equal(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Device fission by counts: ";
|
||||
if (fission_by_counts(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Device fission by affinity: ";
|
||||
if (fission_by_affinity(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Maximum subdevices from fission: "
|
||||
<< max_sub_devices(i) << std::endl;
|
||||
// --- loop through the platforms
|
||||
for (int n=0; n<_num_platforms; n++) {
|
||||
|
||||
set_platform(n);
|
||||
|
||||
out << "\nPlatform " << n << ":\n";
|
||||
|
||||
if (num_devices() == 0)
|
||||
out << "There is no device supporting OpenCL\n";
|
||||
for (int i=0; i<num_devices(); ++i) {
|
||||
out << "\nDevice " << i << ": \"" << name(i).c_str() << "\"\n";
|
||||
out << " Type of device: "
|
||||
<< device_type_name(i).c_str() << std::endl;
|
||||
out << " Double precision support: ";
|
||||
if (double_precision(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Total amount of global memory: "
|
||||
<< gigabytes(i) << " GB\n";
|
||||
out << " Number of compute units/multiprocessors: "
|
||||
<< _properties[i].compute_units << std::endl;
|
||||
//out << " Number of cores: "
|
||||
// << cores(i) << std::endl;
|
||||
out << " Total amount of constant memory: "
|
||||
<< _properties[i].const_mem << " bytes\n";
|
||||
out << " Total amount of local/shared memory per block: "
|
||||
<< _properties[i].shared_mem << " bytes\n";
|
||||
//out << " Total number of registers available per block: "
|
||||
// << _properties[i].regsPerBlock << std::endl;
|
||||
//out << " Warp size: "
|
||||
// << _properties[i].warpSize << std::endl;
|
||||
out << " Maximum group size (# of threads per block) "
|
||||
<< _properties[i].work_group_size << std::endl;
|
||||
out << " Maximum item sizes (# threads for each dim) "
|
||||
<< _properties[i].work_item_size[0] << " x "
|
||||
<< _properties[i].work_item_size[1] << " x "
|
||||
<< _properties[i].work_item_size[2] << std::endl;
|
||||
//out << " Maximum sizes of each dimension of a grid: "
|
||||
// << _properties[i].maxGridSize[0] << " x "
|
||||
// << _properties[i].maxGridSize[1] << " x "
|
||||
// << _properties[i].maxGridSize[2] << std::endl;
|
||||
//out << " Maximum memory pitch: "
|
||||
// << _properties[i].memPitch) << " bytes\n";
|
||||
//out << " Texture alignment: "
|
||||
// << _properties[i].textureAlignment << " bytes\n";
|
||||
out << " Clock rate: "
|
||||
<< clock_rate(i) << " GHz\n";
|
||||
//out << " Concurrent copy and execution: ";
|
||||
out << " ECC support: ";
|
||||
if (_properties[i].ecc_support)
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Device fission into equal partitions: ";
|
||||
if (fission_equal(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Device fission by counts: ";
|
||||
if (fission_by_counts(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Device fission by affinity: ";
|
||||
if (fission_by_affinity(i))
|
||||
out << "Yes\n";
|
||||
else
|
||||
out << "No\n";
|
||||
out << " Maximum subdevices from fission: "
|
||||
<< max_sub_devices(i) << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Select the platform that is associated with accelerators
|
||||
// if pid < 0, select the first platform
|
||||
void UCL_Device::set_platform_accelerator(int pid) {
|
||||
if (pid < 0) {
|
||||
int found = 0;
|
||||
for (int n=0; n<_num_platforms; n++) {
|
||||
set_platform(n);
|
||||
for (int i=0; i<num_devices(); i++) {
|
||||
if (_properties[i].device_type==CL_DEVICE_TYPE_CPU ||
|
||||
_properties[i].device_type==CL_DEVICE_TYPE_GPU ||
|
||||
_properties[i].device_type==CL_DEVICE_TYPE_ACCELERATOR) {
|
||||
found = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found) break;
|
||||
}
|
||||
} else {
|
||||
set_platform(pid);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace ucl_opencl
|
||||
|
||||
#endif
|
||||
|
||||
@ -57,7 +57,7 @@ int BornCoulLongT::init(const int ntypes, double **host_cutsq, double **host_rho
|
||||
const double g_ewald) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,born_coul_long,"k_born_long");
|
||||
_screen,born_coul_long,"k_born_coul_long");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -29,7 +29,7 @@ texture<int2> q_tex;
|
||||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_born_long(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_born_coul_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
@ -110,7 +110,7 @@ __kernel void k_born_long(const __global numtyp4 *restrict x_,
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
@ -127,7 +127,7 @@ __kernel void k_born_long(const __global numtyp4 *restrict x_,
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < coeff1[mtype].w) {
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
@ -149,7 +149,7 @@ __kernel void k_born_long(const __global numtyp4 *restrict x_,
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_born_long_fast(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_born_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
@ -232,7 +232,7 @@ __kernel void k_born_long_fast(const __global numtyp4 *restrict x_,
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
@ -249,7 +249,7 @@ __kernel void k_born_long_fast(const __global numtyp4 *restrict x_,
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < coeff1[mtype].w) {
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
|
||||
@ -78,7 +78,7 @@ class BornCoulLong : public BaseCharge<numtyp, acctyp> {
|
||||
|
||||
numtyp _cut_coulsq, _qqrd2e, _g_ewald;
|
||||
|
||||
private:
|
||||
protected:
|
||||
bool _allocated;
|
||||
void loop(const bool _eflag, const bool _vflag);
|
||||
};
|
||||
|
||||
95
lib/gpu/lal_born_coul_long_cs.cpp
Normal file
95
lib/gpu/lal_born_coul_long_cs.cpp
Normal file
@ -0,0 +1,95 @@
|
||||
/***************************************************************************
|
||||
born_coul_long_cs.cpp
|
||||
-------------------
|
||||
Trung Dac Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the born/coul/long/cs pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
#include "born_coul_long_cs_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *born_coul_long_cs=0;
|
||||
#else
|
||||
#include "born_coul_long_cs_cubin.h"
|
||||
#endif
|
||||
|
||||
#include "lal_born_coul_long_cs.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define BornCoulLongCST BornCoulLongCS<numtyp, acctyp>
|
||||
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int BornCoulLongCST::init(const int ntypes, double **host_cutsq, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2, double **host_born3,
|
||||
double **host_a, double **host_c, double **host_d,
|
||||
double **host_sigma, double **host_offset,
|
||||
double *host_special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
double **host_cut_ljsq, const double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,born_coul_long_cs,"k_born_coul_long_cs");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
int lj_types=ntypes;
|
||||
this->shared_types=false;
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
lj_types=max_shared_types;
|
||||
this->shared_types=true;
|
||||
}
|
||||
this->_lj_types=lj_types;
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->coeff1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,this->coeff1,host_write,host_rhoinv,
|
||||
host_born1,host_born2,host_born3);
|
||||
|
||||
this->coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,this->coeff2,host_write,host_a,host_c,
|
||||
host_d,host_offset);
|
||||
|
||||
this->cutsq_sigma.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,this->cutsq_sigma,host_write,host_cutsq,
|
||||
host_cut_ljsq,host_sigma);
|
||||
|
||||
this->sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<4; i++) {
|
||||
host_write[i]=host_special_lj[i];
|
||||
host_write[i+4]=host_special_coul[i];
|
||||
}
|
||||
ucl_copy(this->sp_lj,host_write,8,false);
|
||||
|
||||
this->_cut_coulsq=host_cut_coulsq;
|
||||
this->_qqrd2e=qqrd2e;
|
||||
this->_g_ewald=g_ewald;
|
||||
|
||||
this->_allocated=true;
|
||||
this->_max_bytes=this->coeff1.row_bytes()+this->coeff2.row_bytes()
|
||||
+this->cutsq_sigma.row_bytes()+this->sp_lj.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template class BornCoulLongCS<PRECISION,ACC_PRECISION>;
|
||||
325
lib/gpu/lal_born_coul_long_cs.cu
Normal file
325
lib/gpu/lal_born_coul_long_cs.cu
Normal file
@ -0,0 +1,325 @@
|
||||
// **************************************************************************
|
||||
// born_coul_long_cs.cu
|
||||
// -------------------
|
||||
// Trung Dac Nguyen (Northwestern)
|
||||
//
|
||||
// Device code for acceleration of the born/coul/long/cs pair style
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin : June 2018
|
||||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
// Note: EWALD_P is different from that in lal_preprocessor.h
|
||||
// acctyp is needed for these parameters
|
||||
#define CS_EWALD_P (acctyp)9.95473818e-1
|
||||
#define B0 (acctyp)-0.1335096380159268
|
||||
#define B1 (acctyp)-2.57839507e-1
|
||||
#define B2 (acctyp)-1.37203639e-1
|
||||
#define B3 (acctyp)-8.88822059e-3
|
||||
#define B4 (acctyp)-5.80844129e-3
|
||||
#define B5 (acctyp)1.14652755e-1
|
||||
|
||||
#define EPSILON (acctyp)(1.0e-20)
|
||||
#define EPS_EWALD (acctyp)(1.0e-6)
|
||||
#define EPS_EWALD_SQR (acctyp)(1.0e-12)
|
||||
|
||||
__kernel void k_born_coul_long_cs(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp sp_lj[8];
|
||||
sp_lj[0]=sp_lj_in[0];
|
||||
sp_lj[1]=sp_lj_in[1];
|
||||
sp_lj[2]=sp_lj_in[2];
|
||||
sp_lj[3]=sp_lj_in[3];
|
||||
sp_lj[4]=sp_lj_in[4];
|
||||
sp_lj[5]=sp_lj_in[5];
|
||||
sp_lj[6]=sp_lj_in[6];
|
||||
sp_lj[7]=sp_lj_in[7];
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int itype=ix.w;
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
|
||||
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp;
|
||||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
} else {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
||||
}
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
|
||||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||
} else forceborn = (numtyp)0.0;
|
||||
|
||||
force = (forcecoul + forceborn) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_born_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp4 coeff1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_lj[8];
|
||||
if (tid<8)
|
||||
sp_lj[tid]=sp_lj_in[tid];
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
coeff1[tid]=coeff1_in[tid];
|
||||
if (eflag>0)
|
||||
coeff2[tid]=coeff2_in[tid];
|
||||
}
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||
numtyp forcecoul,forceborn,force,r6inv,prefactor,_erfc,rexp;
|
||||
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp;
|
||||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
} else {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
forcecoul = prefactor*(_erfc + EWALD_F*grij*expm2);
|
||||
}
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
|
||||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||
} else forceborn = (numtyp)0.0;
|
||||
|
||||
force = (forcecoul + forceborn) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
53
lib/gpu/lal_born_coul_long_cs.h
Normal file
53
lib/gpu/lal_born_coul_long_cs.h
Normal file
@ -0,0 +1,53 @@
|
||||
/***************************************************************************
|
||||
born_coul_long_cs.h
|
||||
-------------------
|
||||
Trung Dac Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the born/coul/long/cs pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_BORN_COUL_LONG_CS_H
|
||||
#define LAL_BORN_COUL_LONG_CS_H
|
||||
|
||||
#include "lal_born_coul_long.h"
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class BornCoulLongCS : public BornCoulLong<numtyp, acctyp> {
|
||||
public:
|
||||
BornCoulLongCS() {}
|
||||
~BornCoulLongCS() {}
|
||||
|
||||
/// Clear any previous data and set up for a new LAMMPS run
|
||||
/** \param max_nbors initial number of rows in the neighbor matrix
|
||||
* \param cell_size cutoff + skin
|
||||
* \param gpu_split fraction of particles handled by device
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(const int ntypes, double **host_cutsq, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2, double **host_born3,
|
||||
double **host_a, double **host_c, double **host_d,
|
||||
double **host_sigma, double **host_offset, double *host_special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen, double **host_cut_ljsq,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
132
lib/gpu/lal_born_coul_long_cs_ext.cpp
Normal file
132
lib/gpu/lal_born_coul_long_cs_ext.cpp
Normal file
@ -0,0 +1,132 @@
|
||||
/***************************************************************************
|
||||
born_coul_long_cs_ext.cpp
|
||||
-------------------
|
||||
Trung Dac Nguyen (ORNL)
|
||||
|
||||
Functions for LAMMPS access to born/coul/long/cs acceleration routines.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
#include "lal_born_coul_long_cs.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace LAMMPS_AL;
|
||||
|
||||
static BornCoulLongCS<PRECISION,ACC_PRECISION> BCLCSMF;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int bornclcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2, double **host_born3,
|
||||
double **host_a, double **host_c, double **host_d,
|
||||
double **sigma, double **offset, double *special_lj,
|
||||
const int inum, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, int &gpu_mode,
|
||||
FILE *screen, double **host_cut_ljsq, double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald) {
|
||||
BCLCSMF.clear();
|
||||
gpu_mode=BCLCSMF.device->gpu_mode();
|
||||
double gpu_split=BCLCSMF.device->particle_split();
|
||||
int first_gpu=BCLCSMF.device->first_device();
|
||||
int last_gpu=BCLCSMF.device->last_device();
|
||||
int world_me=BCLCSMF.device->world_me();
|
||||
int gpu_rank=BCLCSMF.device->gpu_rank();
|
||||
int procs_per_gpu=BCLCSMF.device->procs_per_gpu();
|
||||
|
||||
BCLCSMF.device->init_message(screen,"born/coul/long/cs",first_gpu,last_gpu);
|
||||
|
||||
bool message=false;
|
||||
if (BCLCSMF.device->replica_me()==0 && screen)
|
||||
message=true;
|
||||
|
||||
if (message) {
|
||||
fprintf(screen,"Initializing Device and compiling on process 0...");
|
||||
fflush(screen);
|
||||
}
|
||||
|
||||
int init_ok=0;
|
||||
if (world_me==0)
|
||||
init_ok=BCLCSMF.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
|
||||
host_born3, host_a, host_c, host_d, sigma, offset,
|
||||
special_lj, inum, nall, 300, maxspecial, cell_size,
|
||||
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
BCLCSMF.device->world_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (message) {
|
||||
if (last_gpu-first_gpu==0)
|
||||
fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
|
||||
else
|
||||
fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
|
||||
last_gpu,i);
|
||||
fflush(screen);
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=BCLCSMF.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
|
||||
host_born3, host_a, host_c, host_d, sigma, offset,
|
||||
special_lj, inum, nall, 300, maxspecial, cell_size,
|
||||
gpu_split, screen, host_cut_ljsq, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
BCLCSMF.device->gpu_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
if (message)
|
||||
fprintf(screen,"\n");
|
||||
|
||||
if (init_ok==0)
|
||||
BCLCSMF.estimate_gpu_overhead();
|
||||
return init_ok;
|
||||
}
|
||||
|
||||
void bornclcs_gpu_clear() {
|
||||
BCLCSMF.clear();
|
||||
}
|
||||
|
||||
int** bornclcs_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
double *prd) {
|
||||
return BCLCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||
host_q, boxlo, prd);
|
||||
}
|
||||
|
||||
void bornclcs_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q,
|
||||
const int nlocal, double *boxlo, double *prd) {
|
||||
BCLCSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
|
||||
firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
|
||||
host_q,nlocal,boxlo,prd);
|
||||
}
|
||||
|
||||
double bornclcs_gpu_bytes() {
|
||||
return BCLCSMF.host_memory_usage();
|
||||
}
|
||||
|
||||
|
||||
@ -57,7 +57,7 @@ int BornCoulWolfT::init(const int ntypes, double **host_cutsq, double **host_rho
|
||||
const double alf, const double e_shift, const double f_shift) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,born_coul_wolf,"k_born_wolf");
|
||||
_screen,born_coul_wolf,"k_born_coul_wolf");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -31,7 +31,7 @@ texture<int2> q_tex;
|
||||
|
||||
#define MY_PIS (acctyp)1.77245385090551602729
|
||||
|
||||
__kernel void k_born_wolf(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_born_coul_wolf(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
@ -165,7 +165,7 @@ __kernel void k_born_wolf(const __global numtyp4 *restrict x_,
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_born_wolf_fast(const __global numtyp4 *restrict x_,
|
||||
__kernel void k_born_coul_wolf_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
|
||||
@ -13,8 +13,8 @@
|
||||
email : nguyentd@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_BORN_COUL_LONG_H
|
||||
#define LAL_BORN_COUL_LONG_H
|
||||
#ifndef LAL_BORN_COUL_WOLF_H
|
||||
#define LAL_BORN_COUL_WOLF_H
|
||||
|
||||
#include "lal_base_charge.h"
|
||||
|
||||
@ -79,7 +79,7 @@ class BornCoulWolf : public BaseCharge<numtyp, acctyp> {
|
||||
|
||||
numtyp _cut_coulsq,_qqrd2e,_alf,_e_shift,_f_shift;
|
||||
|
||||
private:
|
||||
protected:
|
||||
bool _allocated;
|
||||
void loop(const bool _eflag, const bool _vflag);
|
||||
};
|
||||
|
||||
97
lib/gpu/lal_born_coul_wolf_cs.cpp
Normal file
97
lib/gpu/lal_born_coul_wolf_cs.cpp
Normal file
@ -0,0 +1,97 @@
|
||||
/***************************************************************************
|
||||
born_coul_wolf_cs.cpp
|
||||
-------------------
|
||||
Trung Dac Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the born/coul/wolf/cs pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
#include "born_coul_wolf_cs_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *born_coul_wolf_cs=0;
|
||||
#else
|
||||
#include "born_coul_wolf_cs_cubin.h"
|
||||
#endif
|
||||
|
||||
#include "lal_born_coul_wolf_cs.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define BornCoulWolfCST BornCoulWolfCS<numtyp, acctyp>
|
||||
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int BornCoulWolfCST::init(const int ntypes, double **host_cutsq, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2, double **host_born3,
|
||||
double **host_a, double **host_c, double **host_d,
|
||||
double **host_sigma, double **host_offset,
|
||||
double *host_special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
double **host_cut_ljsq, const double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double alf, const double e_shift, const double f_shift) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,born_coul_wolf_cs,"k_born_coul_wolf_cs");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
int lj_types=ntypes;
|
||||
this->shared_types=false;
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
lj_types=max_shared_types;
|
||||
this->shared_types=true;
|
||||
}
|
||||
this->_lj_types=lj_types;
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->coeff1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,this->coeff1,host_write,host_rhoinv,
|
||||
host_born1,host_born2,host_born3);
|
||||
|
||||
this->coeff2.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,this->coeff2,host_write,host_a,host_c,
|
||||
host_d,host_offset);
|
||||
|
||||
this->cutsq_sigma.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,this->cutsq_sigma,host_write,host_cutsq,
|
||||
host_cut_ljsq,host_sigma);
|
||||
|
||||
this->sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<4; i++) {
|
||||
host_write[i]=host_special_lj[i];
|
||||
host_write[i+4]=host_special_coul[i];
|
||||
}
|
||||
ucl_copy(this->sp_lj,host_write,8,false);
|
||||
|
||||
this->_cut_coulsq=host_cut_coulsq;
|
||||
this->_qqrd2e=qqrd2e;
|
||||
this->_alf=alf;
|
||||
this->_e_shift=e_shift;
|
||||
this->_f_shift=f_shift;
|
||||
|
||||
this->_allocated=true;
|
||||
this->_max_bytes=this->coeff1.row_bytes()+this->coeff2.row_bytes()
|
||||
+this->cutsq_sigma.row_bytes()+this->sp_lj.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template class BornCoulWolfCS<PRECISION,ACC_PRECISION>;
|
||||
306
lib/gpu/lal_born_coul_wolf_cs.cu
Normal file
306
lib/gpu/lal_born_coul_wolf_cs.cu
Normal file
@ -0,0 +1,306 @@
|
||||
// **************************************************************************
|
||||
// born_coul_wolf_cs.cu
|
||||
// -------------------
|
||||
// Trung Dac Nguyen (Northwestern)
|
||||
//
|
||||
// Device code for acceleration of the born/coul/wolf/cs pair style
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
#define EPSILON (acctyp)(1.0e-20)
|
||||
#define MY_PIS (acctyp)1.77245385090551602729
|
||||
|
||||
__kernel void k_born_coul_wolf_cs(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1,
|
||||
const __global numtyp4 *restrict coeff2,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp alf, const numtyp e_shift,
|
||||
const numtyp f_shift, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp sp_lj[8];
|
||||
sp_lj[0]=sp_lj_in[0];
|
||||
sp_lj[1]=sp_lj_in[1];
|
||||
sp_lj[2]=sp_lj_in[2];
|
||||
sp_lj[3]=sp_lj_in[3];
|
||||
sp_lj[4]=sp_lj_in[4];
|
||||
sp_lj[5]=sp_lj_in[5];
|
||||
sp_lj[6]=sp_lj_in[6];
|
||||
sp_lj[7]=sp_lj_in[7];
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int itype=ix.w;
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq<cutsq_sigma[mtype].x) { // cutsq
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
acctyp r2inv = ucl_recip(rsq);
|
||||
|
||||
numtyp forcecoul,forceborn,force,prefactor,rexp;
|
||||
acctyp v_sh,r6inv;
|
||||
|
||||
if (rsq < cutsq_sigma[mtype].y) { // cut_ljsq
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
|
||||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||
} else forceborn = (numtyp)0.0;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
acctyp arij = alf * r;
|
||||
acctyp erfcd = ucl_exp(-arij*arij);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
|
||||
const acctyp erfcc = erfc(arij);
|
||||
v_sh = (erfcc - e_shift*r)*prefactor;
|
||||
acctyp dvdrr = (erfcc/rsq + (numtyp)2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = prefactor * dvdrr*rsq;
|
||||
if (factor_coul < (numtyp)1.0) forcecoul -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
force = (forceborn + forcecoul) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq) {
|
||||
acctyp e=v_sh;
|
||||
if (factor_coul < (numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_born_coul_wolf_cs_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict coeff1_in,
|
||||
const __global numtyp4 *restrict coeff2_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict cutsq_sigma,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp alf, const numtyp e_shift,
|
||||
const numtyp f_shift, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp4 coeff1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp4 coeff2[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_lj[8];
|
||||
if (tid<8)
|
||||
sp_lj[tid]=sp_lj_in[tid];
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
coeff1[tid]=coeff1_in[tid];
|
||||
if (eflag>0)
|
||||
coeff2[tid]=coeff2_in[tid];
|
||||
}
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e_self = -((acctyp)0.5*e_shift + alf/MY_PIS) *
|
||||
qtmp*qtmp*qqrd2e/(acctyp)t_per_atom;
|
||||
e_coul += (acctyp)2.0*e_self;
|
||||
}
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
acctyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<cutsq_sigma[mtype].x) {
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
acctyp r2inv = ucl_recip(rsq);
|
||||
|
||||
numtyp forcecoul,forceborn,force,prefactor,rexp;
|
||||
acctyp v_sh,r6inv;
|
||||
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
rexp = ucl_exp((cutsq_sigma[mtype].z-r)*coeff1[mtype].x);
|
||||
forceborn = (coeff1[mtype].y*r*rexp - coeff1[mtype].z*r6inv
|
||||
+ coeff1[mtype].w*r2inv*r6inv)*factor_lj;
|
||||
} else forceborn = (numtyp)0.0;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
acctyp arij = alf * r;
|
||||
acctyp erfcd = ucl_exp(-arij*arij);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
|
||||
const acctyp erfcc = erfc(arij);
|
||||
v_sh = (erfcc - e_shift*r)*prefactor;
|
||||
acctyp dvdrr = (erfcc/rsq + (numtyp)2.0*alf/MY_PIS * erfcd/r) + f_shift;
|
||||
forcecoul = prefactor * dvdrr*rsq;
|
||||
if (factor_coul < (numtyp)1.0) forcecoul -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
} else forcecoul = (numtyp)0.0;
|
||||
|
||||
force = (forceborn + forcecoul) * r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq) {
|
||||
acctyp e=v_sh;
|
||||
if (factor_coul < (numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (rsq < cutsq_sigma[mtype].y) {
|
||||
numtyp e=coeff2[mtype].x*rexp - coeff2[mtype].y*r6inv
|
||||
+ coeff2[mtype].z*r2inv*r6inv;
|
||||
energy+=factor_lj*(e-coeff2[mtype].w);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
54
lib/gpu/lal_born_coul_wolf_cs.h
Normal file
54
lib/gpu/lal_born_coul_wolf_cs.h
Normal file
@ -0,0 +1,54 @@
|
||||
/***************************************************************************
|
||||
born_coul_wolf_cs.h
|
||||
-------------------
|
||||
Trung Dac Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the born/coul/wolf/cs pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_BORN_COUL_WOLF_CS_H
|
||||
#define LAL_BORN_COUL_WOLF_CS_H
|
||||
|
||||
#include "lal_born_coul_wolf.h"
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class BornCoulWolfCS : public BornCoulWolf<numtyp, acctyp> {
|
||||
public:
|
||||
BornCoulWolfCS() {}
|
||||
~BornCoulWolfCS() {}
|
||||
|
||||
/// Clear any previous data and set up for a new LAMMPS run
|
||||
/** \param max_nbors initial number of rows in the neighbor matrix
|
||||
* \param cell_size cutoff + skin
|
||||
* \param gpu_split fraction of particles handled by device
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(const int ntypes, double **host_cutsq, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2, double **host_born3,
|
||||
double **host_a, double **host_c, double **host_d,
|
||||
double **host_sigma, double **host_offset, double *host_special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen, double **host_cut_ljsq,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double alf, const double e_shift,
|
||||
const double f_shift);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
134
lib/gpu/lal_born_coul_wolf_cs_ext.cpp
Normal file
134
lib/gpu/lal_born_coul_wolf_cs_ext.cpp
Normal file
@ -0,0 +1,134 @@
|
||||
/***************************************************************************
|
||||
born_coul_wolf_cs_ext.cpp
|
||||
-------------------
|
||||
Trung Dac Nguyen (Northwestern)
|
||||
|
||||
Functions for LAMMPS access to born/coul/wolf/cs acceleration routines.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
#include "lal_born_coul_wolf_cs.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace LAMMPS_AL;
|
||||
|
||||
static BornCoulWolfCS<PRECISION,ACC_PRECISION> BornCWCST;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int borncwcs_gpu_init(const int ntypes, double **cutsq, double **host_rhoinv,
|
||||
double **host_born1, double **host_born2, double **host_born3,
|
||||
double **host_a, double **host_c, double **host_d,
|
||||
double **sigma, double **offset, double *special_lj, const int inum,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
double **host_cut_ljsq, double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double alf, const double e_shift, const double f_shift) {
|
||||
BornCWCST.clear();
|
||||
gpu_mode=BornCWCST.device->gpu_mode();
|
||||
double gpu_split=BornCWCST.device->particle_split();
|
||||
int first_gpu=BornCWCST.device->first_device();
|
||||
int last_gpu=BornCWCST.device->last_device();
|
||||
int world_me=BornCWCST.device->world_me();
|
||||
int gpu_rank=BornCWCST.device->gpu_rank();
|
||||
int procs_per_gpu=BornCWCST.device->procs_per_gpu();
|
||||
|
||||
BornCWCST.device->init_message(screen,"born/coul/wolf/cs",first_gpu,last_gpu);
|
||||
|
||||
bool message=false;
|
||||
if (BornCWCST.device->replica_me()==0 && screen)
|
||||
message=true;
|
||||
|
||||
if (message) {
|
||||
fprintf(screen,"Initializing Device and compiling on process 0...");
|
||||
fflush(screen);
|
||||
}
|
||||
|
||||
int init_ok=0;
|
||||
if (world_me==0)
|
||||
init_ok=BornCWCST.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
|
||||
host_born3, host_a, host_c, host_d, sigma,
|
||||
offset, special_lj, inum, nall, 300,
|
||||
maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e,
|
||||
alf, e_shift, f_shift);
|
||||
|
||||
BornCWCST.device->world_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (message) {
|
||||
if (last_gpu-first_gpu==0)
|
||||
fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
|
||||
else
|
||||
fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
|
||||
last_gpu,i);
|
||||
fflush(screen);
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=BornCWCST.init(ntypes, cutsq, host_rhoinv, host_born1, host_born2,
|
||||
host_born3, host_a, host_c, host_d, sigma,
|
||||
offset, special_lj, inum, nall, 300,
|
||||
maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e,
|
||||
alf, e_shift, f_shift);
|
||||
|
||||
BornCWCST.device->gpu_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
if (message)
|
||||
fprintf(screen,"\n");
|
||||
|
||||
if (init_ok==0)
|
||||
BornCWCST.estimate_gpu_overhead();
|
||||
return init_ok;
|
||||
}
|
||||
|
||||
void borncwcs_gpu_clear() {
|
||||
BornCWCST.clear();
|
||||
}
|
||||
|
||||
int** borncwcs_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
double *prd) {
|
||||
return BornCWCST.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||
host_q, boxlo, prd);
|
||||
}
|
||||
|
||||
void borncwcs_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q,
|
||||
const int nlocal, double *boxlo, double *prd) {
|
||||
BornCWCST.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
|
||||
firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
|
||||
host_q,nlocal,boxlo,prd);
|
||||
}
|
||||
|
||||
double borncwcs_gpu_bytes() {
|
||||
return BornCWCST.host_memory_usage();
|
||||
}
|
||||
|
||||
|
||||
@ -72,7 +72,7 @@ class CoulLong : public BaseCharge<numtyp, acctyp> {
|
||||
|
||||
numtyp _cut_coulsq, _qqrd2e, _g_ewald;
|
||||
|
||||
private:
|
||||
protected:
|
||||
bool _allocated;
|
||||
void loop(const bool _eflag, const bool _vflag);
|
||||
};
|
||||
|
||||
78
lib/gpu/lal_coul_long_cs.cpp
Normal file
78
lib/gpu/lal_coul_long_cs.cpp
Normal file
@ -0,0 +1,78 @@
|
||||
/***************************************************************************
|
||||
coul_long_cs.cpp
|
||||
-------------------
|
||||
Trung Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the coul/long pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : June 2018
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#if defined(USE_OPENCL)
|
||||
#include "coul_long_cs_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *coul_long_cs=0;
|
||||
#else
|
||||
#include "coul_long_cs_cubin.h"
|
||||
#endif
|
||||
|
||||
#include "lal_coul_long_cs.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define CoulLongCST CoulLongCS<numtyp, acctyp>
|
||||
|
||||
extern Device<PRECISION,ACC_PRECISION> pair_gpu_device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int CoulLongCST::init(const int ntypes, double **host_scale,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,
|
||||
gpu_split,_screen,coul_long_cs,"k_coul_long_cs");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
int lj_types=ntypes;
|
||||
this->shared_types=false;
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
lj_types=max_shared_types;
|
||||
this->shared_types=true;
|
||||
}
|
||||
this->_lj_types=lj_types;
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->scale.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack1(ntypes,lj_types,this->scale,host_write,host_scale);
|
||||
|
||||
this->sp_cl.alloc(4,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<4; i++) {
|
||||
host_write[i]=host_special_coul[i];
|
||||
}
|
||||
ucl_copy(this->sp_cl,host_write,4,false);
|
||||
|
||||
this->_cut_coulsq=host_cut_coulsq;
|
||||
this->_qqrd2e=qqrd2e;
|
||||
this->_g_ewald=g_ewald;
|
||||
|
||||
this->_allocated=true;
|
||||
this->_max_bytes=this->scale.row_bytes()+this->sp_cl.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template class CoulLongCS<PRECISION,ACC_PRECISION>;
|
||||
367
lib/gpu/lal_coul_long_cs.cu
Normal file
367
lib/gpu/lal_coul_long_cs.cu
Normal file
@ -0,0 +1,367 @@
|
||||
// **************************************************************************
|
||||
// coul_long_cs.cu
|
||||
// -------------------
|
||||
// Trung Nguyen (Northwestern)
|
||||
//
|
||||
// Device code for acceleration of the coul/long/cs pair style
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin : June 2018
|
||||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
// Note: EWALD_P is different from that in lal_preprocessor.h
|
||||
// acctyp is needed for these parameters
|
||||
#define CS_EWALD_P (acctyp)9.95473818e-1
|
||||
#define B0 (acctyp)-0.1335096380159268
|
||||
#define B1 (acctyp)-2.57839507e-1
|
||||
#define B2 (acctyp)-1.37203639e-1
|
||||
#define B3 (acctyp)-8.88822059e-3
|
||||
#define B4 (acctyp)-5.80844129e-3
|
||||
#define B5 (acctyp)1.14652755e-1
|
||||
|
||||
#define EPSILON (acctyp)(1.0e-20)
|
||||
#define EPS_EWALD (acctyp)(1.0e-6)
|
||||
#define EPS_EWALD_SQR (acctyp)(1.0e-12)
|
||||
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_lq(f, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
__local acctyp red_acc[6][BLOCK_PAIR]; \
|
||||
\
|
||||
red_acc[0][tid]=f.x; \
|
||||
red_acc[1][tid]=f.y; \
|
||||
red_acc[2][tid]=f.z; \
|
||||
red_acc[3][tid]=e_coul; \
|
||||
\
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<4; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
f.x=red_acc[0][tid]; \
|
||||
f.y=red_acc[1][tid]; \
|
||||
f.z=red_acc[2][tid]; \
|
||||
e_coul=red_acc[3][tid]; \
|
||||
\
|
||||
if (vflag>0) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid]=virial[r]; \
|
||||
\
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r]=red_acc[r][tid]; \
|
||||
} \
|
||||
} \
|
||||
\
|
||||
if (offset==0) { \
|
||||
__global acctyp *ap1=engv+ii; \
|
||||
if (eflag>0) { \
|
||||
*ap1=(acctyp)0; \
|
||||
ap1+=inum; \
|
||||
*ap1=e_coul*(acctyp)0.5; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*ap1=virial[i]*(acctyp)0.5; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define store_answers_lq(f, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
e_coul += shfl_xor(e_coul, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
__global acctyp *ap1=engv+ii; \
|
||||
if (eflag>0) { \
|
||||
*ap1=(acctyp)0; \
|
||||
ap1+=inum; \
|
||||
*ap1=e_coul*(acctyp)0.5; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
*ap1=virial[i]*(acctyp)0.5; \
|
||||
ap1+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
__kernel void k_coul_long_cs(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict scale,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp sp_cl[4];
|
||||
sp_cl[0]=sp_cl_in[0];
|
||||
sp_cl[1]=sp_cl_in[1];
|
||||
sp_cl[2]=sp_cl_in[2];
|
||||
sp_cl[3]=sp_cl_in[3];
|
||||
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
int itype=ix.w;
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_coul;
|
||||
factor_coul = sp_cl[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq < cut_coulsq) {
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
|
||||
numtyp force,prefactor,_erfc;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * scale[mtype] * qtmp;
|
||||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
force *= r2inv;
|
||||
} else {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
force = prefactor*(_erfc + EWALD_F*grij*expm2);
|
||||
force *= r2inv;
|
||||
}
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_lq(f,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_coul_long_cs_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp *restrict scale_in,
|
||||
const __global numtyp *restrict sp_cl_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp scale[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_cl[4];
|
||||
if (tid<4)
|
||||
sp_cl[tid]=sp_cl_in[tid];
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES)
|
||||
scale[tid]=scale_in[tid];
|
||||
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_coul;
|
||||
factor_coul = sp_cl[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
rsq += EPSILON; // Add Epsilon for case: r = 0; Interaction must be removed by special bond;
|
||||
|
||||
numtyp force,prefactor,_erfc;
|
||||
numtyp r2inv = ucl_recip(rsq);
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * scale[mtype] * qtmp;
|
||||
if (factor_coul<(numtyp)1.0) {
|
||||
numtyp grij = g_ewald * (r+EPS_EWALD);
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= (r+EPS_EWALD);
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2 - ((numtyp)1.0-factor_coul));
|
||||
// Additionally r2inv needs to be accordingly modified since the later
|
||||
// scaling of the overall force shall be consistent
|
||||
r2inv = ucl_recip(rsq + EPS_EWALD_SQR);
|
||||
} else {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
acctyp t = ucl_recip((numtyp)1.0 + CS_EWALD_P*grij);
|
||||
numtyp u = (numtyp)1.0 - t;
|
||||
_erfc = t * ((numtyp)1.0 + u*(B0+u*(B1+u*(B2+u*(B3+u*(B4+u*B5)))))) * expm2;
|
||||
prefactor /= r;
|
||||
force = prefactor * (_erfc + EWALD_F*grij*expm2);
|
||||
}
|
||||
|
||||
force *= r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
numtyp e = prefactor*_erfc;
|
||||
if (factor_coul<(numtyp)1.0) e -= ((numtyp)1.0-factor_coul)*prefactor;
|
||||
e_coul += e;
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_lq(f,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
50
lib/gpu/lal_coul_long_cs.h
Normal file
50
lib/gpu/lal_coul_long_cs.h
Normal file
@ -0,0 +1,50 @@
|
||||
/***************************************************************************
|
||||
coul_long_cs.h
|
||||
-------------------
|
||||
Trung Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the coul/long/cs pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : June 2018
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_COUL_LONG_CS_H
|
||||
#define LAL_COUL_LONG_CS_H
|
||||
|
||||
#include "lal_coul_long.h"
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class CoulLongCS : public CoulLong<numtyp, acctyp> {
|
||||
public:
|
||||
CoulLongCS() {}
|
||||
~CoulLongCS() {}
|
||||
|
||||
/// Clear any previous data and set up for a new LAMMPS run
|
||||
/** \param max_nbors initial number of rows in the neighbor matrix
|
||||
* \param cell_size cutoff + skin
|
||||
* \param gpu_split fraction of particles handled by device
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(const int ntypes, double **scale,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
145
lib/gpu/lal_coul_long_cs_ext.cpp
Normal file
145
lib/gpu/lal_coul_long_cs_ext.cpp
Normal file
@ -0,0 +1,145 @@
|
||||
/***************************************************************************
|
||||
coul_long_cs_ext.cpp
|
||||
-------------------
|
||||
Trung Nguyen (Northwestern)
|
||||
|
||||
Functions for LAMMPS access to coul/long/cs acceleration routines.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin : June 2018
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
#include "lal_coul_long_cs.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace LAMMPS_AL;
|
||||
|
||||
static CoulLongCS<PRECISION,ACC_PRECISION> CLCSMF;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int clcs_gpu_init(const int ntypes, double **host_scale,
|
||||
const int inum, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size, int &gpu_mode,
|
||||
FILE *screen, double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald) {
|
||||
CLCSMF.clear();
|
||||
gpu_mode=CLCSMF.device->gpu_mode();
|
||||
double gpu_split=CLCSMF.device->particle_split();
|
||||
int first_gpu=CLCSMF.device->first_device();
|
||||
int last_gpu=CLCSMF.device->last_device();
|
||||
int world_me=CLCSMF.device->world_me();
|
||||
int gpu_rank=CLCSMF.device->gpu_rank();
|
||||
int procs_per_gpu=CLCSMF.device->procs_per_gpu();
|
||||
|
||||
CLCSMF.device->init_message(screen,"coul/long/cs",first_gpu,last_gpu);
|
||||
|
||||
bool message=false;
|
||||
if (CLCSMF.device->replica_me()==0 && screen)
|
||||
message=true;
|
||||
|
||||
if (message) {
|
||||
fprintf(screen,"Initializing Device and compiling on process 0...");
|
||||
fflush(screen);
|
||||
}
|
||||
|
||||
int init_ok=0;
|
||||
if (world_me==0)
|
||||
init_ok=CLCSMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
CLCSMF.device->world_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (message) {
|
||||
if (last_gpu-first_gpu==0)
|
||||
fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
|
||||
else
|
||||
fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
|
||||
last_gpu,i);
|
||||
fflush(screen);
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=CLCSMF.init(ntypes, host_scale, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_cut_coulsq,
|
||||
host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
CLCSMF.device->gpu_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
if (message)
|
||||
fprintf(screen,"\n");
|
||||
|
||||
if (init_ok==0)
|
||||
CLCSMF.estimate_gpu_overhead();
|
||||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void clcs_gpu_reinit(const int ntypes, double **host_scale) {
|
||||
int world_me=CLCSMF.device->world_me();
|
||||
int gpu_rank=CLCSMF.device->gpu_rank();
|
||||
int procs_per_gpu=CLCSMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
CLCSMF.reinit(ntypes, host_scale);
|
||||
|
||||
CLCSMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
CLCSMF.reinit(ntypes, host_scale);
|
||||
|
||||
CLCSMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void clcs_gpu_clear() {
|
||||
CLCSMF.clear();
|
||||
}
|
||||
|
||||
int** clcs_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
double *prd) {
|
||||
return CLCSMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||
host_q, boxlo, prd);
|
||||
}
|
||||
|
||||
void clcs_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q,
|
||||
const int nlocal, double *boxlo, double *prd) {
|
||||
CLCSMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
|
||||
firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
|
||||
host_q,nlocal,boxlo,prd);
|
||||
}
|
||||
|
||||
double clcs_gpu_bytes() {
|
||||
return CLCSMF.host_memory_usage();
|
||||
}
|
||||
|
||||
|
||||
@ -80,7 +80,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int first_gpu,
|
||||
char node_name[MPI_MAX_PROCESSOR_NAME];
|
||||
char *node_names = new char[MPI_MAX_PROCESSOR_NAME*_world_size];
|
||||
MPI_Get_processor_name(node_name,&name_length);
|
||||
MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names,
|
||||
MPI_Allgather(&node_name,MPI_MAX_PROCESSOR_NAME,MPI_CHAR,&node_names[0],
|
||||
MPI_MAX_PROCESSOR_NAME,MPI_CHAR,_comm_world);
|
||||
std::string node_string=std::string(node_name);
|
||||
|
||||
|
||||
173
lib/gpu/lal_dipole_long_lj.cpp
Normal file
173
lib/gpu/lal_dipole_long_lj.cpp
Normal file
@ -0,0 +1,173 @@
|
||||
/***************************************************************************
|
||||
dipole_lj.cpp
|
||||
-------------------
|
||||
Trung Dac Nguyen (ORNL)
|
||||
|
||||
Class for acceleration of the dipole/cut pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : nguyentd@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#ifdef USE_OPENCL
|
||||
#include "dipole_long_lj_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *dipole_long_lj=0;
|
||||
#else
|
||||
#include "dipole_long_lj_cubin.h"
|
||||
#endif
|
||||
|
||||
#include "lal_dipole_long_lj.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define DipoleLongLJT DipoleLongLJ<numtyp, acctyp>
|
||||
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
DipoleLongLJT::DipoleLongLJ() : BaseDipole<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
DipoleLongLJT::~DipoleLongLJ() {
|
||||
clear();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int DipoleLongLJT::bytes_per_atom(const int max_nbors) const {
|
||||
return this->bytes_per_atom_atomic(max_nbors);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int DipoleLongLJT::init(const int ntypes,
|
||||
double **host_cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset,
|
||||
double *host_special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
double **host_cut_ljsq, const double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,dipole_long_lj,"k_dipole_long_lj");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
int lj_types=ntypes;
|
||||
shared_types=false;
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
lj_types=max_shared_types;
|
||||
shared_types=true;
|
||||
}
|
||||
_lj_types=lj_types;
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
|
||||
host_cut_ljsq);
|
||||
|
||||
lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
|
||||
host_offset);
|
||||
|
||||
cutsq.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack1(ntypes,lj_types,cutsq,host_write,host_cutsq);
|
||||
|
||||
sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<4; i++) {
|
||||
host_write[i]=host_special_lj[i];
|
||||
host_write[i+4]=host_special_coul[i];
|
||||
}
|
||||
ucl_copy(sp_lj,host_write,8,false);
|
||||
|
||||
_cut_coulsq=host_cut_coulsq;
|
||||
_qqrd2e=qqrd2e;
|
||||
_g_ewald=g_ewald;
|
||||
|
||||
_allocated=true;
|
||||
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+cutsq.row_bytes()+
|
||||
sp_lj.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void DipoleLongLJT::clear() {
|
||||
if (!_allocated)
|
||||
return;
|
||||
_allocated=false;
|
||||
|
||||
lj1.clear();
|
||||
lj3.clear();
|
||||
cutsq.clear();
|
||||
sp_lj.clear();
|
||||
this->clear_atomic();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double DipoleLongLJT::host_memory_usage() const {
|
||||
return this->host_memory_usage_atomic()+sizeof(DipoleLongLJ<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Calculate energies, forces, and torques
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
void DipoleLongLJT::loop(const bool _eflag, const bool _vflag) {
|
||||
// Compute the block size and grid size to keep all cores busy
|
||||
const int BX=this->block_size();
|
||||
int eflag, vflag;
|
||||
if (_eflag)
|
||||
eflag=1;
|
||||
else
|
||||
eflag=0;
|
||||
|
||||
if (_vflag)
|
||||
vflag=1;
|
||||
else
|
||||
vflag=0;
|
||||
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
this->k_pair_fast.set_size(GX,BX);
|
||||
this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &sp_lj,
|
||||
&this->nbor->dev_nbor,
|
||||
&this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag, &vflag,
|
||||
&ainum, &nbor_pitch, &this->atom->q,
|
||||
&this->atom->quat, &cutsq, &_cut_coulsq,
|
||||
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->x, &lj1, &lj3,
|
||||
&_lj_types, &sp_lj, &this->nbor->dev_nbor,
|
||||
&this->_nbor_data->begin(), &this->ans->force,
|
||||
&this->ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->atom->q,
|
||||
&this->atom->quat, &cutsq, &_cut_coulsq,
|
||||
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
||||
template class DipoleLongLJ<PRECISION,ACC_PRECISION>;
|
||||
640
lib/gpu/lal_dipole_long_lj.cu
Normal file
640
lib/gpu/lal_dipole_long_lj.cu
Normal file
@ -0,0 +1,640 @@
|
||||
// **************************************************************************
|
||||
// dipole_lj.cu
|
||||
// -------------------
|
||||
// Trung Dac Nguyen (ORNL)
|
||||
//
|
||||
// Device code for acceleration of the dipole/cut pair style
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : nguyentd@ornl.gov
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
texture<float4> mu_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
texture<int4,1> mu_tex;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#define mu_tex mu_
|
||||
#endif
|
||||
|
||||
#if (ARCH < 300)
|
||||
|
||||
#define store_answers_tq(f, tor, energy, ecoul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
__local acctyp red_acc[8][BLOCK_PAIR]; \
|
||||
red_acc[0][tid]=f.x; \
|
||||
red_acc[1][tid]=f.y; \
|
||||
red_acc[2][tid]=f.z; \
|
||||
red_acc[3][tid]=tor.x; \
|
||||
red_acc[4][tid]=tor.y; \
|
||||
red_acc[5][tid]=tor.z; \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
f.x=red_acc[0][tid]; \
|
||||
f.y=red_acc[1][tid]; \
|
||||
f.z=red_acc[2][tid]; \
|
||||
tor.x=red_acc[3][tid]; \
|
||||
tor.y=red_acc[4][tid]; \
|
||||
tor.z=red_acc[5][tid]; \
|
||||
if (eflag>0 || vflag>0) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
red_acc[r][tid]=virial[r]; \
|
||||
red_acc[6][tid]=energy; \
|
||||
red_acc[7][tid]=ecoul; \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
if (offset < s) { \
|
||||
for (int r=0; r<8; r++) \
|
||||
red_acc[r][tid] += red_acc[r][tid+s]; \
|
||||
} \
|
||||
} \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r]=red_acc[r][tid]; \
|
||||
energy=red_acc[6][tid]; \
|
||||
ecoul=red_acc[7][tid]; \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
int ei=ii; \
|
||||
if (eflag>0) { \
|
||||
engv[ei]=energy*(acctyp)0.5; \
|
||||
ei+=inum; \
|
||||
engv[ei]=e_coul*(acctyp)0.5; \
|
||||
ei+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
engv[ei]=virial[i]*(acctyp)0.5; \
|
||||
ei+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
ans[ii+inum]=tor; \
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define store_answers_tq(f, tor, energy, e_coul, virial, ii, inum, tid, \
|
||||
t_per_atom, offset, eflag, vflag, ans, engv) \
|
||||
if (t_per_atom>1) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
f.x += shfl_xor(f.x, s, t_per_atom); \
|
||||
f.y += shfl_xor(f.y, s, t_per_atom); \
|
||||
f.z += shfl_xor(f.z, s, t_per_atom); \
|
||||
tor.x += shfl_xor(tor.x, s, t_per_atom); \
|
||||
tor.y += shfl_xor(tor.y, s, t_per_atom); \
|
||||
tor.z += shfl_xor(tor.z, s, t_per_atom); \
|
||||
energy += shfl_xor(energy, s, t_per_atom); \
|
||||
e_coul += shfl_xor(e_coul, s, t_per_atom); \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1) { \
|
||||
for (int r=0; r<6; r++) \
|
||||
virial[r] += shfl_xor(virial[r], s, t_per_atom); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
if (offset==0) { \
|
||||
int ei=ii; \
|
||||
if (eflag>0) { \
|
||||
engv[ei]=energy*(acctyp)0.5; \
|
||||
ei+=inum; \
|
||||
engv[ei]=e_coul*(acctyp)0.5; \
|
||||
ei+=inum; \
|
||||
} \
|
||||
if (vflag>0) { \
|
||||
for (int i=0; i<6; i++) { \
|
||||
engv[ei]=virial[i]*(acctyp)0.5; \
|
||||
ei+=inum; \
|
||||
} \
|
||||
} \
|
||||
ans[ii]=f; \
|
||||
ans[ii+inum]=tor; \
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#define MY_PIS (acctyp)1.77245385090551602729
|
||||
|
||||
__kernel void k_dipole_long_lj(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict mu_,
|
||||
const __global numtyp *restrict cutsq,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp sp_lj[8];
|
||||
sp_lj[0]=sp_lj_in[0];
|
||||
sp_lj[1]=sp_lj_in[1];
|
||||
sp_lj[2]=sp_lj_in[2];
|
||||
sp_lj[3]=sp_lj_in[3];
|
||||
sp_lj[4]=sp_lj_in[4];
|
||||
sp_lj[5]=sp_lj_in[5];
|
||||
sp_lj[6]=sp_lj_in[6];
|
||||
sp_lj[7]=sp_lj_in[7];
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp4 tor;
|
||||
tor.x=(acctyp)0;
|
||||
tor.y=(acctyp)0;
|
||||
tor.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
numtyp pre1 = numtyp(2.0) * g_ewald / MY_PIS;
|
||||
numtyp pre2 = numtyp(4.0) * (g_ewald*g_ewald*g_ewald) / MY_PIS;
|
||||
numtyp pre3 = numtyp(8.0) * (g_ewald*g_ewald*g_ewald*g_ewald*g_ewald) / MY_PIS;
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
numtyp4 mui; fetch4(mui,i,mu_tex); //mu_[i];
|
||||
int itype=ix.w;
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
numtyp qj; fetch(qj,j,q_tex);
|
||||
numtyp4 muj; fetch4(muj,j,mu_tex); //mu_[j];
|
||||
int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq<cutsq[mtype]) {
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp force_lj,rinv,r6inv;
|
||||
numtyp pdotp, pidotr, pjdotr, _erfc;
|
||||
numtyp g0,g1,g2,b0,b1,b2,b3,d0,d1,d2,d3;
|
||||
numtyp zdix,zdiy,zdiz,zdjx,zdjy,zdjz,zaix,zaiy,zaiz,zajx,zajy,zajz;
|
||||
numtyp g0b1_g1b2_g2b3,g0d1_g1d2_g2d3,facm1;
|
||||
numtyp fdx,fdy,fdz,fax,fay,faz;
|
||||
acctyp4 forcecoul, ticoul;
|
||||
acctyp4 force;
|
||||
|
||||
forcecoul.x = forcecoul.y = forcecoul.z = (acctyp)0;
|
||||
ticoul.x = ticoul.y = ticoul.z = (acctyp)0;
|
||||
|
||||
if (rsq < lj1[mtype].z) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
force_lj = factor_lj*r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y)*r2inv;
|
||||
} else force_lj = (numtyp)0.0;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
rinv = ucl_rsqrt(rsq);
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
|
||||
pdotp = mui.x*muj.x + mui.y*muj.y + mui.z*muj.z;
|
||||
pidotr = mui.x*delx + mui.y*dely + mui.z*delz;
|
||||
pjdotr = muj.x*delx + muj.y*dely + muj.z*delz;
|
||||
|
||||
g0 = qtmp*qj;
|
||||
g1 = qtmp*pjdotr - qj*pidotr + pdotp;
|
||||
g2 = -pidotr*pjdotr;
|
||||
|
||||
if (factor_coul > (numtyp)0.0) {
|
||||
b0 = _erfc * rinv;
|
||||
b1 = (b0 + pre1*expm2) * r2inv;
|
||||
b2 = ((numtyp)3.0*b1 + pre2*expm2) * r2inv;
|
||||
b3 = ((numtyp)5.0*b2 + pre3*expm2) * r2inv;
|
||||
|
||||
g0b1_g1b2_g2b3 = g0*b1 + g1*b2 + g2*b3;
|
||||
fdx = delx * g0b1_g1b2_g2b3 -
|
||||
b1 * (qtmp*muj.x - qj*mui.x) +
|
||||
b2 * (pjdotr*mui.x + pidotr*muj.x);
|
||||
fdy = dely * g0b1_g1b2_g2b3 -
|
||||
b1 * (qtmp*muj.y - qj*mui.y) +
|
||||
b2 * (pjdotr*mui.y + pidotr*muj.y);
|
||||
fdz = delz * g0b1_g1b2_g2b3 -
|
||||
b1 * (qtmp*muj.z - qj*mui.z) +
|
||||
b2 * (pjdotr*mui.z + pidotr*muj.z);
|
||||
|
||||
zdix = delx * (qj*b1 + b2*pjdotr) - b1*muj.x;
|
||||
zdiy = dely * (qj*b1 + b2*pjdotr) - b1*muj.y;
|
||||
zdiz = delz * (qj*b1 + b2*pjdotr) - b1*muj.z;
|
||||
zdjx = delx * (-qtmp*b1 + b2*pidotr) - b1*mui.x;
|
||||
zdjy = dely * (-qtmp*b1 + b2*pidotr) - b1*mui.y;
|
||||
zdjz = delz * (-qtmp*b1 + b2*pidotr) - b1*mui.z;
|
||||
|
||||
if (factor_coul < (numtyp)1.0) {
|
||||
fdx *= factor_coul;
|
||||
fdy *= factor_coul;
|
||||
fdz *= factor_coul;
|
||||
zdix *= factor_coul;
|
||||
zdiy *= factor_coul;
|
||||
zdiz *= factor_coul;
|
||||
zdjx *= factor_coul;
|
||||
zdjy *= factor_coul;
|
||||
zdjz *= factor_coul;
|
||||
}
|
||||
} else {
|
||||
fdx = fdy = fdz = (numtyp)0.0;
|
||||
zdix = zdiy = zdiz = (numtyp)0.0;
|
||||
zdjx = zdjy = zdjz = (numtyp)0.0;
|
||||
}
|
||||
|
||||
if (factor_coul < (numtyp)1.0) {
|
||||
d0 = (_erfc - (numtyp)1.0) * rinv;
|
||||
d1 = (d0 + pre1*expm2) * r2inv;
|
||||
d2 = ((numtyp)3.0*d1 + pre2*expm2) * r2inv;
|
||||
d3 = ((numtyp)5.0*d2 + pre3*expm2) * r2inv;
|
||||
|
||||
g0d1_g1d2_g2d3 = g0*d1 + g1*d2 + g2*d3;
|
||||
fax = delx * g0d1_g1d2_g2d3 -
|
||||
d1 * (qtmp*muj.x - qj*mui.x) +
|
||||
d2 * (pjdotr*mui.x + pidotr*muj.x);
|
||||
fay = dely * g0d1_g1d2_g2d3 -
|
||||
d1 * (qtmp*muj.y - qj*mui.y) +
|
||||
d2 * (pjdotr*mui.y + pidotr*muj.y);
|
||||
faz = delz * g0d1_g1d2_g2d3 -
|
||||
d1 * (qtmp*muj.z - qj*mui.z) +
|
||||
d2 * (pjdotr*mui.z + pidotr*muj.z);
|
||||
|
||||
zaix = delx * (qj*d1 + d2*pjdotr) - d1*muj.x;
|
||||
zaiy = dely * (qj*d1 + d2*pjdotr) - d1*muj.y;
|
||||
zaiz = delz * (qj*d1 + d2*pjdotr) - d1*muj.z;
|
||||
zajx = delx * (-qtmp*d1 + d2*pidotr) - d1*mui.x;
|
||||
zajy = dely * (-qtmp*d1 + d2*pidotr) - d1*mui.y;
|
||||
zajz = delz * (-qtmp*d1 + d2*pidotr) - d1*mui.z;
|
||||
|
||||
if (factor_coul > (numtyp)0.0) {
|
||||
facm1 = (numtyp)1.0 - factor_coul;
|
||||
fax *= facm1;
|
||||
fay *= facm1;
|
||||
faz *= facm1;
|
||||
zaix *= facm1;
|
||||
zaiy *= facm1;
|
||||
zaiz *= facm1;
|
||||
zajx *= facm1;
|
||||
zajy *= facm1;
|
||||
zajz *= facm1;
|
||||
}
|
||||
} else {
|
||||
fax = fay = faz = (numtyp)0.0;
|
||||
zaix = zaiy = zaiz = (numtyp)0.0;
|
||||
zajx = zajy = zajz = (numtyp)0.0;
|
||||
}
|
||||
|
||||
forcecoul.x = fdx + fax;
|
||||
forcecoul.y = fdy + fay;
|
||||
forcecoul.z = fdz + faz;
|
||||
|
||||
ticoul.x = mui.y*(zdiz + zaiz) - mui.z*(zdiy + zaiy);
|
||||
ticoul.y = mui.z*(zdix + zaix) - mui.x*(zdiz + zaiz);
|
||||
ticoul.z = mui.x*(zdiy + zaiy) - mui.y*(zdix + zaix);
|
||||
|
||||
} else {
|
||||
forcecoul.x = forcecoul.y = forcecoul.z = (numtyp)0.0;
|
||||
ticoul.x = ticoul.y = ticoul.z = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force.x = qqrd2e*forcecoul.x + delx*force_lj;
|
||||
force.y = qqrd2e*forcecoul.y + dely*force_lj;
|
||||
force.z = qqrd2e*forcecoul.z + delz*force_lj;
|
||||
f.x+=force.x;
|
||||
f.y+=force.y;
|
||||
f.z+=force.z;
|
||||
tor.x+=qqrd2e*ticoul.x;
|
||||
tor.y+=qqrd2e*ticoul.y;
|
||||
tor.z+=qqrd2e*ticoul.z;
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e = (acctyp)0.0;
|
||||
if (rsq < cut_coulsq && factor_coul > (numtyp)0.0) {
|
||||
e = qqrd2e*(b0*g0 + b1*g1 + b2*g2);
|
||||
if (factor_coul < (numtyp)1.0) {
|
||||
e_coul *= factor_coul;
|
||||
e_coul += ((numtyp)1.0-factor_coul) * qqrd2e * (d0*g0 + d1*g1 + d2*g2);
|
||||
}
|
||||
} else e = (acctyp)0.0;
|
||||
e_coul += e;
|
||||
|
||||
if (rsq < lj1[mtype].z) {
|
||||
e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*force.x;
|
||||
virial[1] += dely*force.y;
|
||||
virial[2] += delz*force.z;
|
||||
virial[3] += delx*force.y;
|
||||
virial[4] += delx*force.z;
|
||||
virial[5] += dely*force.z;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_tq(f,tor,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_dipole_long_lj_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const __global numtyp4 *restrict mu_,
|
||||
const __global numtyp *restrict _cutsq,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp cutsq[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_lj[8];
|
||||
if (tid<8)
|
||||
sp_lj[tid]=sp_lj_in[tid];
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
lj1[tid]=lj1_in[tid];
|
||||
cutsq[tid]=_cutsq[tid];
|
||||
if (eflag>0)
|
||||
lj3[tid]=lj3_in[tid];
|
||||
}
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp4 tor;
|
||||
tor.x=(acctyp)0;
|
||||
tor.y=(acctyp)0;
|
||||
tor.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
numtyp pre1 = numtyp(2.0) * g_ewald / MY_PIS;
|
||||
numtyp pre2 = numtyp(4.0) * (g_ewald*g_ewald*g_ewald) / MY_PIS;
|
||||
numtyp pre3 = numtyp(8.0) * (g_ewald*g_ewald*g_ewald*g_ewald*g_ewald) / MY_PIS;
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
numtyp4 mui; fetch4(mui,i,mu_tex); //mu_[i];
|
||||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
numtyp qj; fetch(qj,j,q_tex);
|
||||
numtyp4 muj; fetch4(muj,j,mu_tex); //mu_[j];
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<cutsq[mtype]) {
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp force_lj,rinv,r6inv;
|
||||
numtyp pdotp, pidotr, pjdotr, _erfc;
|
||||
numtyp g0,g1,g2,b0,b1,b2,b3,d0,d1,d2,d3;
|
||||
numtyp zdix,zdiy,zdiz,zdjx,zdjy,zdjz,zaix,zaiy,zaiz,zajx,zajy,zajz;
|
||||
numtyp g0b1_g1b2_g2b3,g0d1_g1d2_g2d3,facm1;
|
||||
numtyp fdx,fdy,fdz,fax,fay,faz;
|
||||
acctyp4 forcecoul, ticoul;
|
||||
acctyp4 force;
|
||||
|
||||
forcecoul.x = forcecoul.y = forcecoul.z = (acctyp)0;
|
||||
ticoul.x = ticoul.y = ticoul.z = (acctyp)0;
|
||||
|
||||
if (rsq < lj1[mtype].z) {
|
||||
r6inv = r2inv*r2inv*r2inv;
|
||||
force_lj = factor_lj*r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y)*r2inv;
|
||||
} else force_lj = (numtyp)0.0;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
rinv = ucl_rsqrt(rsq);
|
||||
numtyp r = ucl_rsqrt(r2inv);
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
|
||||
pdotp = mui.x*muj.x + mui.y*muj.y + mui.z*muj.z;
|
||||
pidotr = mui.x*delx + mui.y*dely + mui.z*delz;
|
||||
pjdotr = muj.x*delx + muj.y*dely + muj.z*delz;
|
||||
|
||||
g0 = qtmp*qj;
|
||||
g1 = qtmp*pjdotr - qj*pidotr + pdotp;
|
||||
g2 = -pidotr*pjdotr;
|
||||
|
||||
if (factor_coul > (numtyp)0.0) {
|
||||
b0 = _erfc * rinv;
|
||||
b1 = (b0 + pre1*expm2) * r2inv;
|
||||
b2 = ((numtyp)3.0*b1 + pre2*expm2) * r2inv;
|
||||
b3 = ((numtyp)5.0*b2 + pre3*expm2) * r2inv;
|
||||
|
||||
g0b1_g1b2_g2b3 = g0*b1 + g1*b2 + g2*b3;
|
||||
fdx = delx * g0b1_g1b2_g2b3 -
|
||||
b1 * (qtmp*muj.x - qj*mui.x) +
|
||||
b2 * (pjdotr*mui.x + pidotr*muj.x);
|
||||
fdy = dely * g0b1_g1b2_g2b3 -
|
||||
b1 * (qtmp*muj.y - qj*mui.y) +
|
||||
b2 * (pjdotr*mui.y + pidotr*muj.y);
|
||||
fdz = delz * g0b1_g1b2_g2b3 -
|
||||
b1 * (qtmp*muj.z - qj*mui.z) +
|
||||
b2 * (pjdotr*mui.z + pidotr*muj.z);
|
||||
|
||||
zdix = delx * (qj*b1 + b2*pjdotr) - b1*muj.x;
|
||||
zdiy = dely * (qj*b1 + b2*pjdotr) - b1*muj.y;
|
||||
zdiz = delz * (qj*b1 + b2*pjdotr) - b1*muj.z;
|
||||
zdjx = delx * (-qtmp*b1 + b2*pidotr) - b1*mui.x;
|
||||
zdjy = dely * (-qtmp*b1 + b2*pidotr) - b1*mui.y;
|
||||
zdjz = delz * (-qtmp*b1 + b2*pidotr) - b1*mui.z;
|
||||
|
||||
if (factor_coul < (numtyp)1.0) {
|
||||
fdx *= factor_coul;
|
||||
fdy *= factor_coul;
|
||||
fdz *= factor_coul;
|
||||
zdix *= factor_coul;
|
||||
zdiy *= factor_coul;
|
||||
zdiz *= factor_coul;
|
||||
zdjx *= factor_coul;
|
||||
zdjy *= factor_coul;
|
||||
zdjz *= factor_coul;
|
||||
}
|
||||
} else {
|
||||
fdx = fdy = fdz = (numtyp)0.0;
|
||||
zdix = zdiy = zdiz = (numtyp)0.0;
|
||||
zdjx = zdjy = zdjz = (numtyp)0.0;
|
||||
}
|
||||
|
||||
if (factor_coul < (numtyp)1.0) {
|
||||
d0 = (_erfc - (numtyp)1.0) * rinv;
|
||||
d1 = (d0 + pre1*expm2) * r2inv;
|
||||
d2 = ((numtyp)3.0*d1 + pre2*expm2) * r2inv;
|
||||
d3 = ((numtyp)5.0*d2 + pre3*expm2) * r2inv;
|
||||
|
||||
g0d1_g1d2_g2d3 = g0*d1 + g1*d2 + g2*d3;
|
||||
fax = delx * g0d1_g1d2_g2d3 -
|
||||
d1 * (qtmp*muj.x - qj*mui.x) +
|
||||
d2 * (pjdotr*mui.x + pidotr*muj.x);
|
||||
fay = dely * g0d1_g1d2_g2d3 -
|
||||
d1 * (qtmp*muj.y - qj*mui.y) +
|
||||
d2 * (pjdotr*mui.y + pidotr*muj.y);
|
||||
faz = delz * g0d1_g1d2_g2d3 -
|
||||
d1 * (qtmp*muj.z - qj*mui.z) +
|
||||
d2 * (pjdotr*mui.z + pidotr*muj.z);
|
||||
|
||||
zaix = delx * (qj*d1 + d2*pjdotr) - d1*muj.x;
|
||||
zaiy = dely * (qj*d1 + d2*pjdotr) - d1*muj.y;
|
||||
zaiz = delz * (qj*d1 + d2*pjdotr) - d1*muj.z;
|
||||
zajx = delx * (-qtmp*d1 + d2*pidotr) - d1*mui.x;
|
||||
zajy = dely * (-qtmp*d1 + d2*pidotr) - d1*mui.y;
|
||||
zajz = delz * (-qtmp*d1 + d2*pidotr) - d1*mui.z;
|
||||
|
||||
if (factor_coul > (numtyp)0.0) {
|
||||
facm1 = (numtyp)1.0 - factor_coul;
|
||||
fax *= facm1;
|
||||
fay *= facm1;
|
||||
faz *= facm1;
|
||||
zaix *= facm1;
|
||||
zaiy *= facm1;
|
||||
zaiz *= facm1;
|
||||
zajx *= facm1;
|
||||
zajy *= facm1;
|
||||
zajz *= facm1;
|
||||
}
|
||||
} else {
|
||||
fax = fay = faz = (numtyp)0.0;
|
||||
zaix = zaiy = zaiz = (numtyp)0.0;
|
||||
zajx = zajy = zajz = (numtyp)0.0;
|
||||
}
|
||||
|
||||
forcecoul.x = fdx + fax;
|
||||
forcecoul.y = fdy + fay;
|
||||
forcecoul.z = fdz + faz;
|
||||
|
||||
ticoul.x = mui.y*(zdiz + zaiz) - mui.z*(zdiy + zaiy);
|
||||
ticoul.y = mui.z*(zdix + zaix) - mui.x*(zdiz + zaiz);
|
||||
ticoul.z = mui.x*(zdiy + zaiy) - mui.y*(zdix + zaix);
|
||||
|
||||
} else {
|
||||
forcecoul.x = forcecoul.y = forcecoul.z = (numtyp)0.0;
|
||||
ticoul.x = ticoul.y = ticoul.z = (numtyp)0.0;
|
||||
}
|
||||
|
||||
force.x = qqrd2e*forcecoul.x + delx*force_lj;
|
||||
force.y = qqrd2e*forcecoul.y + dely*force_lj;
|
||||
force.z = qqrd2e*forcecoul.z + delz*force_lj;
|
||||
f.x+=force.x;
|
||||
f.y+=force.y;
|
||||
f.z+=force.z;
|
||||
tor.x+=qqrd2e*ticoul.x;
|
||||
tor.y+=qqrd2e*ticoul.y;
|
||||
tor.z+=qqrd2e*ticoul.z;
|
||||
|
||||
if (eflag>0) {
|
||||
acctyp e = (acctyp)0.0;
|
||||
if (rsq < cut_coulsq && factor_coul > (numtyp)0.0) {
|
||||
e = qqrd2e*(b0*g0 + b1*g1 + b2*g2);
|
||||
if (factor_coul < (numtyp)1.0) {
|
||||
e_coul *= factor_coul;
|
||||
e_coul += ((numtyp)1.0-factor_coul) * qqrd2e * (d0*g0 + d1*g1 + d2*g2);
|
||||
}
|
||||
} else e = (acctyp)0.0;
|
||||
e_coul += e;
|
||||
|
||||
if (rsq < lj1[mtype].z) {
|
||||
e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*force.x;
|
||||
virial[1] += dely*force.y;
|
||||
virial[2] += delz*force.z;
|
||||
virial[3] += delx*force.y;
|
||||
virial[4] += delx*force.z;
|
||||
virial[5] += dely*force.z;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_tq(f,tor,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
85
lib/gpu/lal_dipole_long_lj.h
Normal file
85
lib/gpu/lal_dipole_long_lj.h
Normal file
@ -0,0 +1,85 @@
|
||||
/***************************************************************************
|
||||
dipole_long_lj.h
|
||||
-------------------
|
||||
Trung Dac Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the lj/cut/dipole/long pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_DIPOLE_LONG_LJ_H
|
||||
#define LAL_DIPOLE_LONG_LJ_H
|
||||
|
||||
#include "lal_base_dipole.h"
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class DipoleLongLJ : public BaseDipole<numtyp, acctyp> {
|
||||
public:
|
||||
DipoleLongLJ();
|
||||
~DipoleLongLJ();
|
||||
|
||||
/// Clear any previous data and set up for a new LAMMPS run
|
||||
/** \param max_nbors initial number of rows in the neighbor matrix
|
||||
* \param cell_size cutoff + skin
|
||||
* \param gpu_split fraction of particles handled by device
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(const int ntypes, double **host_cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **host_offset, double *host_special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen, double **host_cut_ljsq,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
||||
/// Returns memory usage on device per atom
|
||||
int bytes_per_atom(const int max_nbors) const;
|
||||
|
||||
/// Total host memory used by library for pair style
|
||||
double host_memory_usage() const;
|
||||
|
||||
// --------------------------- TYPE DATA --------------------------
|
||||
|
||||
/// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq_vdw, lj1.w = cutsq_coul
|
||||
UCL_D_Vec<numtyp4> lj1;
|
||||
/// lj3.x = lj3, lj3.y = lj4, lj3.z = offset
|
||||
UCL_D_Vec<numtyp4> lj3;
|
||||
/// cutsq
|
||||
UCL_D_Vec<numtyp> cutsq;
|
||||
/// Special LJ values [0-3] and Special Coul values [4-7]
|
||||
UCL_D_Vec<numtyp> sp_lj;
|
||||
|
||||
/// If atom type constants fit in shared memory, use fast kernels
|
||||
bool shared_types;
|
||||
|
||||
/// Number of atom types
|
||||
int _lj_types;
|
||||
|
||||
numtyp _cut_coulsq, _qqrd2e, _g_ewald;
|
||||
|
||||
private:
|
||||
bool _allocated;
|
||||
void loop(const bool _eflag, const bool _vflag);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
129
lib/gpu/lal_dipole_long_lj_ext.cpp
Normal file
129
lib/gpu/lal_dipole_long_lj_ext.cpp
Normal file
@ -0,0 +1,129 @@
|
||||
/***************************************************************************
|
||||
dipole_long_lj_ext.cpp
|
||||
-------------------
|
||||
Trung Dac Nguyen (ORNL)
|
||||
|
||||
Functions for LAMMPS access to dipole/cut acceleration routines.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : nguyentd@ornl.gov
|
||||
***************************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
#include "lal_dipole_long_lj.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace LAMMPS_AL;
|
||||
|
||||
static DipoleLongLJ<PRECISION,ACC_PRECISION> DPLJMF;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int dplj_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double *special_lj, const int inum,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
double **host_cut_ljsq, const double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald) {
|
||||
DPLJMF.clear();
|
||||
gpu_mode=DPLJMF.device->gpu_mode();
|
||||
double gpu_split=DPLJMF.device->particle_split();
|
||||
int first_gpu=DPLJMF.device->first_device();
|
||||
int last_gpu=DPLJMF.device->last_device();
|
||||
int world_me=DPLJMF.device->world_me();
|
||||
int gpu_rank=DPLJMF.device->gpu_rank();
|
||||
int procs_per_gpu=DPLJMF.device->procs_per_gpu();
|
||||
|
||||
DPLJMF.device->init_message(screen,"lj/cut/dipole/long",first_gpu,last_gpu);
|
||||
|
||||
bool message=false;
|
||||
if (DPLJMF.device->replica_me()==0 && screen)
|
||||
message=true;
|
||||
|
||||
if (message) {
|
||||
fprintf(screen,"Initializing Device and compiling on process 0...");
|
||||
fflush(screen);
|
||||
}
|
||||
|
||||
int init_ok=0;
|
||||
if (world_me==0)
|
||||
init_ok=DPLJMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3,
|
||||
host_lj4, offset, special_lj, inum, nall, 300,
|
||||
maxspecial, cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
DPLJMF.device->world_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (message) {
|
||||
if (last_gpu-first_gpu==0)
|
||||
fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
|
||||
else
|
||||
fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
|
||||
last_gpu,i);
|
||||
fflush(screen);
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=DPLJMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, special_lj, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
DPLJMF.device->gpu_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
if (message)
|
||||
fprintf(screen,"\n");
|
||||
|
||||
if (init_ok==0)
|
||||
DPLJMF.estimate_gpu_overhead();
|
||||
return init_ok;
|
||||
}
|
||||
|
||||
void dplj_gpu_clear() {
|
||||
DPLJMF.clear();
|
||||
}
|
||||
|
||||
int** dplj_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double **host_mu,
|
||||
double *boxlo, double *prd) {
|
||||
return DPLJMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||
host_q, host_mu, boxlo, prd);
|
||||
}
|
||||
|
||||
void dplj_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q,
|
||||
double **host_mu, const int nlocal, double *boxlo, double *prd) {
|
||||
DPLJMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,firstneigh,eflag,
|
||||
vflag,eatom,vatom,host_start,cpu_time,success,host_q,host_mu,
|
||||
nlocal,boxlo,prd);
|
||||
}
|
||||
|
||||
double dplj_gpu_bytes() {
|
||||
return DPLJMF.host_memory_usage();
|
||||
}
|
||||
|
||||
|
||||
183
lib/gpu/lal_lj_expand_coul_long.cpp
Normal file
183
lib/gpu/lal_lj_expand_coul_long.cpp
Normal file
@ -0,0 +1,183 @@
|
||||
/***************************************************************************
|
||||
lj_expand_coul_long.cpp
|
||||
--------------------------
|
||||
Trung Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the lj/expand/coul/long pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#if defined(USE_OPENCL)
|
||||
#include "lj_expand_coul_long_cl.h"
|
||||
#elif defined(USE_CUDART)
|
||||
const char *lj_expand_coul_long=0;
|
||||
#else
|
||||
#include "lj_expand_coul_long_cubin.h"
|
||||
#endif
|
||||
|
||||
#include "lal_lj_expand_coul_long.h"
|
||||
#include <cassert>
|
||||
using namespace LAMMPS_AL;
|
||||
#define LJExpandCoulLongT LJExpandCoulLong<numtyp, acctyp>
|
||||
|
||||
extern Device<PRECISION,ACC_PRECISION> device;
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJExpandCoulLongT::LJExpandCoulLong() : BaseCharge<numtyp,acctyp>(),
|
||||
_allocated(false) {
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
LJExpandCoulLongT::~LJExpandCoulLong() {
|
||||
clear();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int LJExpandCoulLongT::bytes_per_atom(const int max_nbors) const {
|
||||
return this->bytes_per_atom_atomic(max_nbors);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int LJExpandCoulLongT::init(const int ntypes,
|
||||
double **host_cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset, double **host_shift,
|
||||
double *host_special_lj, const int nlocal,
|
||||
const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *_screen,
|
||||
double **host_cut_ljsq, const double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald) {
|
||||
int success;
|
||||
success=this->init_atomic(nlocal,nall,max_nbors,maxspecial,cell_size,gpu_split,
|
||||
_screen,lj_expand_coul_long,"k_lj_expand_coul_long");
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
// If atom type constants fit in shared memory use fast kernel
|
||||
int lj_types=ntypes;
|
||||
shared_types=false;
|
||||
int max_shared_types=this->device->max_shared_types();
|
||||
if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
|
||||
lj_types=max_shared_types;
|
||||
shared_types=true;
|
||||
}
|
||||
_lj_types=lj_types;
|
||||
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(lj_types*lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<lj_types*lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
lj1.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,lj1,host_write,host_lj1,host_lj2,
|
||||
host_cutsq, host_cut_ljsq);
|
||||
|
||||
lj3.alloc(lj_types*lj_types,*(this->ucl_device),UCL_READ_ONLY);
|
||||
this->atom->type_pack4(ntypes,lj_types,lj3,host_write,host_lj3,host_lj4,
|
||||
host_offset,host_shift);
|
||||
|
||||
sp_lj.alloc(8,*(this->ucl_device),UCL_READ_ONLY);
|
||||
for (int i=0; i<4; i++) {
|
||||
host_write[i]=host_special_lj[i];
|
||||
host_write[i+4]=host_special_coul[i];
|
||||
}
|
||||
ucl_copy(sp_lj,host_write,8,false);
|
||||
|
||||
_cut_coulsq=host_cut_coulsq;
|
||||
_qqrd2e=qqrd2e;
|
||||
_g_ewald=g_ewald;
|
||||
|
||||
_allocated=true;
|
||||
this->_max_bytes=lj1.row_bytes()+lj3.row_bytes()+sp_lj.row_bytes();
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJExpandCoulLongT::reinit(const int ntypes, double **host_cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **host_offset, double **host_shift, double **host_cut_ljsq) {
|
||||
// Allocate a host write buffer for data initialization
|
||||
UCL_H_Vec<numtyp> host_write(_lj_types*_lj_types*32,*(this->ucl_device),
|
||||
UCL_WRITE_ONLY);
|
||||
|
||||
for (int i=0; i<_lj_types*_lj_types; i++)
|
||||
host_write[i]=0.0;
|
||||
|
||||
this->atom->type_pack4(ntypes,_lj_types,lj1,host_write,host_lj1,host_lj2,
|
||||
host_cutsq, host_cut_ljsq);
|
||||
this->atom->type_pack4(ntypes,_lj_types,lj3,host_write,host_lj3,host_lj4,
|
||||
host_offset,host_shift);
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void LJExpandCoulLongT::clear() {
|
||||
if (!_allocated)
|
||||
return;
|
||||
_allocated=false;
|
||||
|
||||
lj1.clear();
|
||||
lj3.clear();
|
||||
sp_lj.clear();
|
||||
this->clear_atomic();
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double LJExpandCoulLongT::host_memory_usage() const {
|
||||
return this->host_memory_usage_atomic()+sizeof(LJExpandCoulLong<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Calculate energies, forces, and torques
|
||||
// ---------------------------------------------------------------------------
|
||||
template <class numtyp, class acctyp>
|
||||
void LJExpandCoulLongT::loop(const bool _eflag, const bool _vflag) {
|
||||
// Compute the block size and grid size to keep all cores busy
|
||||
const int BX=this->block_size();
|
||||
int eflag, vflag;
|
||||
if (_eflag)
|
||||
eflag=1;
|
||||
else
|
||||
eflag=0;
|
||||
|
||||
if (_vflag)
|
||||
vflag=1;
|
||||
else
|
||||
vflag=0;
|
||||
|
||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||
(BX/this->_threads_per_atom)));
|
||||
|
||||
int ainum=this->ans->inum();
|
||||
int nbor_pitch=this->nbor->nbor_pitch();
|
||||
this->time_pair.start();
|
||||
if (shared_types) {
|
||||
this->k_pair_fast.set_size(GX,BX);
|
||||
this->k_pair_fast.run(&this->atom->x, &lj1, &lj3, &sp_lj,
|
||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||
&this->ans->force, &this->ans->engv, &eflag,
|
||||
&vflag, &ainum, &nbor_pitch, &this->atom->q,
|
||||
&_cut_coulsq, &_qqrd2e, &_g_ewald,
|
||||
&this->_threads_per_atom);
|
||||
} else {
|
||||
this->k_pair.set_size(GX,BX);
|
||||
this->k_pair.run(&this->atom->x, &lj1, &lj3,
|
||||
&_lj_types, &sp_lj, &this->nbor->dev_nbor,
|
||||
&this->_nbor_data->begin(), &this->ans->force,
|
||||
&this->ans->engv, &eflag, &vflag, &ainum,
|
||||
&nbor_pitch, &this->atom->q, &_cut_coulsq,
|
||||
&_qqrd2e, &_g_ewald, &this->_threads_per_atom);
|
||||
}
|
||||
this->time_pair.stop();
|
||||
}
|
||||
|
||||
template class LJExpandCoulLong<PRECISION,ACC_PRECISION>;
|
||||
272
lib/gpu/lal_lj_expand_coul_long.cu
Normal file
272
lib/gpu/lal_lj_expand_coul_long.cu
Normal file
@ -0,0 +1,272 @@
|
||||
// **************************************************************************
|
||||
// lj_coul_long.cu
|
||||
// -------------------
|
||||
// Trung Nguyen (Northwestern)
|
||||
//
|
||||
// Device code for acceleration of the lj/expand/coul/long pair style
|
||||
//
|
||||
// __________________________________________________________________________
|
||||
// This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
// __________________________________________________________________________
|
||||
//
|
||||
// begin :
|
||||
// email : ndactrung@gmail.com
|
||||
// ***************************************************************************/
|
||||
|
||||
#ifdef NV_KERNEL
|
||||
|
||||
#include "lal_aux_fun1.h"
|
||||
#ifndef _DOUBLE_DOUBLE
|
||||
texture<float4> pos_tex;
|
||||
texture<float> q_tex;
|
||||
#else
|
||||
texture<int4,1> pos_tex;
|
||||
texture<int2> q_tex;
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define pos_tex x_
|
||||
#define q_tex q_
|
||||
#endif
|
||||
|
||||
__kernel void k_lj_expand_coul_long(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1,
|
||||
const __global numtyp4 *restrict lj3,
|
||||
const int lj_types,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag, const int inum,
|
||||
const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp sp_lj[8];
|
||||
sp_lj[0]=sp_lj_in[0];
|
||||
sp_lj[1]=sp_lj_in[1];
|
||||
sp_lj[2]=sp_lj_in[2];
|
||||
sp_lj[3]=sp_lj_in[3];
|
||||
sp_lj[4]=sp_lj_in[4];
|
||||
sp_lj[5]=sp_lj_in[5];
|
||||
sp_lj[6]=sp_lj_in[6];
|
||||
sp_lj[7]=sp_lj_in[7];
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int itype=ix.w;
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int jtype=jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
int mtype=itype*lj_types+jtype;
|
||||
if (rsq<lj1[mtype].z) {
|
||||
numtyp forcecoul, force_lj, force, r6inv, prefactor, _erfc;
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp rshift = r - lj3[mtype].w;
|
||||
numtyp rshiftsq = rshift*rshift;
|
||||
numtyp rshift2inv = ucl_recip(rshiftsq);
|
||||
r6inv = rshift2inv*rshift2inv*rshift2inv;
|
||||
force_lj = r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
|
||||
force_lj *= factor_lj/rshift/r;
|
||||
} else
|
||||
force_lj = (numtyp)0.0;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
|
||||
force = force_lj + forcecoul*r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
__kernel void k_lj_expand_coul_long_fast(const __global numtyp4 *restrict x_,
|
||||
const __global numtyp4 *restrict lj1_in,
|
||||
const __global numtyp4 *restrict lj3_in,
|
||||
const __global numtyp *restrict sp_lj_in,
|
||||
const __global int *dev_nbor,
|
||||
const __global int *dev_packed,
|
||||
__global acctyp4 *restrict ans,
|
||||
__global acctyp *restrict engv,
|
||||
const int eflag, const int vflag,
|
||||
const int inum, const int nbor_pitch,
|
||||
const __global numtyp *restrict q_,
|
||||
const numtyp cut_coulsq, const numtyp qqrd2e,
|
||||
const numtyp g_ewald, const int t_per_atom) {
|
||||
int tid, ii, offset;
|
||||
atom_info(t_per_atom,ii,tid,offset);
|
||||
|
||||
__local numtyp4 lj1[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp4 lj3[MAX_SHARED_TYPES*MAX_SHARED_TYPES];
|
||||
__local numtyp sp_lj[8];
|
||||
if (tid<8)
|
||||
sp_lj[tid]=sp_lj_in[tid];
|
||||
if (tid<MAX_SHARED_TYPES*MAX_SHARED_TYPES) {
|
||||
lj1[tid]=lj1_in[tid];
|
||||
lj3[tid]=lj3_in[tid];
|
||||
}
|
||||
|
||||
acctyp energy=(acctyp)0;
|
||||
acctyp e_coul=(acctyp)0;
|
||||
acctyp4 f;
|
||||
f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
|
||||
acctyp virial[6];
|
||||
for (int i=0; i<6; i++)
|
||||
virial[i]=(acctyp)0;
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (ii<inum) {
|
||||
int nbor, nbor_end;
|
||||
int i, numj;
|
||||
__local int n_stride;
|
||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||
n_stride,nbor_end,nbor);
|
||||
|
||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||
numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||
int iw=ix.w;
|
||||
int itype=fast_mul((int)MAX_SHARED_TYPES,iw);
|
||||
|
||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||
int j=dev_packed[nbor];
|
||||
|
||||
numtyp factor_lj, factor_coul;
|
||||
factor_lj = sp_lj[sbmask(j)];
|
||||
factor_coul = (numtyp)1.0-sp_lj[sbmask(j)+4];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||
int mtype=itype+jx.w;
|
||||
|
||||
// Compute r12
|
||||
numtyp delx = ix.x-jx.x;
|
||||
numtyp dely = ix.y-jx.y;
|
||||
numtyp delz = ix.z-jx.z;
|
||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
||||
|
||||
if (rsq<lj1[mtype].z) {
|
||||
numtyp forcecoul, force_lj, force, r6inv, prefactor, _erfc;
|
||||
numtyp r2inv=ucl_recip(rsq);
|
||||
numtyp r = ucl_sqrt(rsq);
|
||||
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp rshift = r - lj3[mtype].w;
|
||||
numtyp rshiftsq = rshift*rshift;
|
||||
numtyp rshift2inv = ucl_recip(rshiftsq);
|
||||
r6inv = rshift2inv*rshift2inv*rshift2inv;
|
||||
force_lj = r6inv*(lj1[mtype].x*r6inv-lj1[mtype].y);
|
||||
force_lj *= factor_lj/rshift/r;
|
||||
} else
|
||||
force_lj = (numtyp)0.0;
|
||||
|
||||
if (rsq < cut_coulsq) {
|
||||
numtyp grij = g_ewald * r;
|
||||
numtyp expm2 = ucl_exp(-grij*grij);
|
||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*grij);
|
||||
_erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
|
||||
fetch(prefactor,j,q_tex);
|
||||
prefactor *= qqrd2e * qtmp/r;
|
||||
forcecoul = prefactor * (_erfc + EWALD_F*grij*expm2-factor_coul);
|
||||
} else
|
||||
forcecoul = (numtyp)0.0;
|
||||
|
||||
force = force_lj + forcecoul*r2inv;
|
||||
|
||||
f.x+=delx*force;
|
||||
f.y+=dely*force;
|
||||
f.z+=delz*force;
|
||||
|
||||
if (eflag>0) {
|
||||
if (rsq < cut_coulsq)
|
||||
e_coul += prefactor*(_erfc-factor_coul);
|
||||
if (rsq < lj1[mtype].w) {
|
||||
numtyp e=r6inv*(lj3[mtype].x*r6inv-lj3[mtype].y);
|
||||
energy+=factor_lj*(e-lj3[mtype].z);
|
||||
}
|
||||
}
|
||||
if (vflag>0) {
|
||||
virial[0] += delx*delx*force;
|
||||
virial[1] += dely*dely*force;
|
||||
virial[2] += delz*delz*force;
|
||||
virial[3] += delx*dely*force;
|
||||
virial[4] += delx*delz*force;
|
||||
virial[5] += dely*delz*force;
|
||||
}
|
||||
}
|
||||
|
||||
} // for nbor
|
||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,offset,eflag,
|
||||
vflag,ans,engv);
|
||||
} // if ii
|
||||
}
|
||||
|
||||
88
lib/gpu/lal_lj_expand_coul_long.h
Normal file
88
lib/gpu/lal_lj_expand_coul_long.h
Normal file
@ -0,0 +1,88 @@
|
||||
/***************************************************************************
|
||||
lj_expand_coul_long.h
|
||||
-------------------
|
||||
Trung Nguyen (Northwestern)
|
||||
|
||||
Class for acceleration of the lj/expand/coul/long pair style.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#ifndef LAL_LJ_EXPAND_COUL_LONG_H
|
||||
#define LAL_LJ_EXPAND_COUL_LONG_H
|
||||
|
||||
#include "lal_base_charge.h"
|
||||
|
||||
namespace LAMMPS_AL {
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
class LJExpandCoulLong : public BaseCharge<numtyp, acctyp> {
|
||||
public:
|
||||
LJExpandCoulLong();
|
||||
~LJExpandCoulLong();
|
||||
|
||||
/// Clear any previous data and set up for a new LAMMPS run
|
||||
/** \param max_nbors initial number of rows in the neighbor matrix
|
||||
* \param cell_size cutoff + skin
|
||||
* \param gpu_split fraction of particles handled by device
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(const int ntypes, double **host_cutsq,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset, double **host_shift, double *host_special_lj,
|
||||
const int nlocal, const int nall, const int max_nbors,
|
||||
const int maxspecial, const double cell_size,
|
||||
const double gpu_split, FILE *screen, double **host_cut_ljsq,
|
||||
const double host_cut_coulsq, double *host_special_coul,
|
||||
const double qqrd2e, const double g_ewald);
|
||||
|
||||
/// Send updated coeffs from host to device (to be compatible with fix adapt)
|
||||
void reinit(const int ntypes, double **host_cutsq,
|
||||
double **host_lj1, double **host_lj2, double **host_lj3,
|
||||
double **host_lj4, double **host_offset, double **host_shift, double **host_cut_ljsq);
|
||||
|
||||
/// Clear all host and device data
|
||||
/** \note This is called at the beginning of the init() routine **/
|
||||
void clear();
|
||||
|
||||
/// Returns memory usage on device per atom
|
||||
int bytes_per_atom(const int max_nbors) const;
|
||||
|
||||
/// Total host memory used by library for pair style
|
||||
double host_memory_usage() const;
|
||||
|
||||
// --------------------------- TYPE DATA --------------------------
|
||||
|
||||
/// lj1.x = lj1, lj1.y = lj2, lj1.z = cutsq, lj1.w = cutsq_vdw
|
||||
UCL_D_Vec<numtyp4> lj1;
|
||||
/// lj3.x = lj3, lj3.y = lj4, lj3.z = offset, lj3.w = shift
|
||||
UCL_D_Vec<numtyp4> lj3;
|
||||
/// Special LJ values [0-3] and Special Coul values [4-7]
|
||||
UCL_D_Vec<numtyp> sp_lj;
|
||||
|
||||
/// If atom type constants fit in shared memory, use fast kernels
|
||||
bool shared_types;
|
||||
|
||||
/// Number of atom types
|
||||
int _lj_types;
|
||||
|
||||
numtyp _cut_coulsq, _qqrd2e, _g_ewald;
|
||||
|
||||
private:
|
||||
bool _allocated;
|
||||
void loop(const bool _eflag, const bool _vflag);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
152
lib/gpu/lal_lj_expand_coul_long_ext.cpp
Normal file
152
lib/gpu/lal_lj_expand_coul_long_ext.cpp
Normal file
@ -0,0 +1,152 @@
|
||||
/***************************************************************************
|
||||
lj_expand_coul_long_ext.cpp
|
||||
------------------------
|
||||
Trung Nguyen (Northwestern)
|
||||
|
||||
Functions for LAMMPS access to lj/expand/coul/long acceleration routines.
|
||||
|
||||
__________________________________________________________________________
|
||||
This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
|
||||
__________________________________________________________________________
|
||||
|
||||
begin :
|
||||
email : ndactrung@gmail.com
|
||||
***************************************************************************/
|
||||
|
||||
#include <iostream>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
|
||||
#include "lal_lj_expand_coul_long.h"
|
||||
|
||||
using namespace std;
|
||||
using namespace LAMMPS_AL;
|
||||
|
||||
static LJExpandCoulLong<PRECISION,ACC_PRECISION> LJECLMF;
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Allocate memory on host and device and copy constants to device
|
||||
// ---------------------------------------------------------------------------
|
||||
int ljecl_gpu_init(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double **shift, double *special_lj, const int inum,
|
||||
const int nall, const int max_nbors, const int maxspecial,
|
||||
const double cell_size, int &gpu_mode, FILE *screen,
|
||||
double **host_cut_ljsq, double host_cut_coulsq,
|
||||
double *host_special_coul, const double qqrd2e,
|
||||
const double g_ewald) {
|
||||
LJECLMF.clear();
|
||||
gpu_mode=LJECLMF.device->gpu_mode();
|
||||
double gpu_split=LJECLMF.device->particle_split();
|
||||
int first_gpu=LJECLMF.device->first_device();
|
||||
int last_gpu=LJECLMF.device->last_device();
|
||||
int world_me=LJECLMF.device->world_me();
|
||||
int gpu_rank=LJECLMF.device->gpu_rank();
|
||||
int procs_per_gpu=LJECLMF.device->procs_per_gpu();
|
||||
|
||||
LJECLMF.device->init_message(screen,"lj/expand/coul/long",first_gpu,last_gpu);
|
||||
|
||||
bool message=false;
|
||||
if (LJECLMF.device->replica_me()==0 && screen)
|
||||
message=true;
|
||||
|
||||
if (message) {
|
||||
fprintf(screen,"Initializing Device and compiling on process 0...");
|
||||
fflush(screen);
|
||||
}
|
||||
|
||||
int init_ok=0;
|
||||
if (world_me==0)
|
||||
init_ok=LJECLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift, special_lj, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
LJECLMF.device->world_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (message) {
|
||||
if (last_gpu-first_gpu==0)
|
||||
fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
|
||||
else
|
||||
fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
|
||||
last_gpu,i);
|
||||
fflush(screen);
|
||||
}
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
init_ok=LJECLMF.init(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift, special_lj, inum, nall, 300, maxspecial,
|
||||
cell_size, gpu_split, screen, host_cut_ljsq,
|
||||
host_cut_coulsq, host_special_coul, qqrd2e, g_ewald);
|
||||
|
||||
LJECLMF.device->gpu_barrier();
|
||||
if (message)
|
||||
fprintf(screen,"Done.\n");
|
||||
}
|
||||
if (message)
|
||||
fprintf(screen,"\n");
|
||||
|
||||
if (init_ok==0)
|
||||
LJECLMF.estimate_gpu_overhead();
|
||||
return init_ok;
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy updated coeffs from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
void ljecl_gpu_reinit(const int ntypes, double **cutsq, double **host_lj1,
|
||||
double **host_lj2, double **host_lj3, double **host_lj4,
|
||||
double **offset, double **shift, double **host_cut_ljsq) {
|
||||
int world_me=LJECLMF.device->world_me();
|
||||
int gpu_rank=LJECLMF.device->gpu_rank();
|
||||
int procs_per_gpu=LJECLMF.device->procs_per_gpu();
|
||||
|
||||
if (world_me==0)
|
||||
LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift, host_cut_ljsq);
|
||||
LJECLMF.device->world_barrier();
|
||||
|
||||
for (int i=0; i<procs_per_gpu; i++) {
|
||||
if (gpu_rank==i && world_me!=0)
|
||||
LJECLMF.reinit(ntypes, cutsq, host_lj1, host_lj2, host_lj3, host_lj4,
|
||||
offset, shift, host_cut_ljsq);
|
||||
LJECLMF.device->gpu_barrier();
|
||||
}
|
||||
}
|
||||
|
||||
void ljecl_gpu_clear() {
|
||||
LJECLMF.clear();
|
||||
}
|
||||
|
||||
int** ljecl_gpu_compute_n(const int ago, const int inum_full,
|
||||
const int nall, double **host_x, int *host_type,
|
||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||
tagint **special, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
int **ilist, int **jnum, const double cpu_time,
|
||||
bool &success, double *host_q, double *boxlo,
|
||||
double *prd) {
|
||||
return LJECLMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
|
||||
subhi, tag, nspecial, special, eflag, vflag, eatom,
|
||||
vatom, host_start, ilist, jnum, cpu_time, success,
|
||||
host_q, boxlo, prd);
|
||||
}
|
||||
|
||||
void ljecl_gpu_compute(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *ilist, int *numj,
|
||||
int **firstneigh, const bool eflag, const bool vflag,
|
||||
const bool eatom, const bool vatom, int &host_start,
|
||||
const double cpu_time, bool &success, double *host_q,
|
||||
const int nlocal, double *boxlo, double *prd) {
|
||||
LJECLMF.compute(ago,inum_full,nall,host_x,host_type,ilist,numj,
|
||||
firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success,
|
||||
host_q,nlocal,boxlo,prd);
|
||||
}
|
||||
|
||||
double ljecl_gpu_bytes() {
|
||||
return LJECLMF.host_memory_usage();
|
||||
}
|
||||
|
||||
|
||||
@ -118,24 +118,24 @@ __kernel void transpose(__global tagint *restrict out,
|
||||
const __global tagint *restrict in,
|
||||
int columns_in, int rows_in)
|
||||
{
|
||||
__local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
__local tagint block[BLOCK_CELL_2D][BLOCK_CELL_2D+1];
|
||||
|
||||
unsigned ti=THREAD_ID_X;
|
||||
unsigned tj=THREAD_ID_Y;
|
||||
unsigned bi=BLOCK_ID_X;
|
||||
unsigned bj=BLOCK_ID_Y;
|
||||
unsigned ti=THREAD_ID_X;
|
||||
unsigned tj=THREAD_ID_Y;
|
||||
unsigned bi=BLOCK_ID_X;
|
||||
unsigned bj=BLOCK_ID_Y;
|
||||
|
||||
unsigned i=bi*BLOCK_CELL_2D+ti;
|
||||
unsigned j=bj*BLOCK_CELL_2D+tj;
|
||||
if ((i<columns_in) && (j<rows_in))
|
||||
block[tj][ti]=in[j*columns_in+i];
|
||||
unsigned i=bi*BLOCK_CELL_2D+ti;
|
||||
unsigned j=bj*BLOCK_CELL_2D+tj;
|
||||
if ((i<columns_in) && (j<rows_in))
|
||||
block[tj][ti]=in[j*columns_in+i];
|
||||
|
||||
__syncthreads();
|
||||
__syncthreads();
|
||||
|
||||
i=bj*BLOCK_CELL_2D+ti;
|
||||
j=bi*BLOCK_CELL_2D+tj;
|
||||
if ((i<rows_in) && (j<columns_in))
|
||||
out[j*rows_in+i] = block[ti][tj];
|
||||
i=bj*BLOCK_CELL_2D+ti;
|
||||
j=bi*BLOCK_CELL_2D+tj;
|
||||
if ((i<rows_in) && (j<columns_in))
|
||||
out[j*rows_in+i] = block[ti][tj];
|
||||
}
|
||||
|
||||
__kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
||||
@ -191,7 +191,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
||||
nbor_list[pid_i]=pid_i;
|
||||
} else {
|
||||
stride=0;
|
||||
neigh_counts=host_numj+pid_i-inum;
|
||||
neigh_counts=host_numj+pid_i-inum;
|
||||
neigh_list=host_nbor_list+(pid_i-inum)*neigh_bin_size;
|
||||
}
|
||||
|
||||
@ -232,7 +232,7 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
||||
diff.z = atom_i.z - pos_sh[j].z;
|
||||
|
||||
r2 = diff.x*diff.x + diff.y*diff.y + diff.z*diff.z;
|
||||
if (r2 < cell_size*cell_size && r2 > 1e-5) {
|
||||
if (r2 < cell_size*cell_size && pid_j != pid_i) { // && r2 > 1e-5
|
||||
cnt++;
|
||||
if (cnt <= neigh_bin_size) {
|
||||
*neigh_list = pid_j;
|
||||
@ -243,8 +243,8 @@ __kernel void calc_neigh_list_cell(const __global numtyp4 *restrict x_,
|
||||
}
|
||||
}
|
||||
}
|
||||
__syncthreads();
|
||||
} // for (k)
|
||||
__syncthreads();
|
||||
} // for (k)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -21,7 +21,7 @@ Syntax from lib dir: python Install.py -b -v version -a kim-name
|
||||
specify one or more options, order does not matter
|
||||
|
||||
-v = version of KIM API library to use
|
||||
default = kim-api-v1.9.4 (current as of Apr 2018)
|
||||
default = kim-api-v1.9.5 (current as of May 2018)
|
||||
-b = download and build base KIM API library with example Models
|
||||
this will delete any previous installation in the current folder
|
||||
-n = do NOT download and build base KIM API library.
|
||||
@ -109,7 +109,7 @@ nargs = len(args)
|
||||
if nargs == 0: error()
|
||||
|
||||
thisdir = os.environ['PWD']
|
||||
version = "kim-api-v1.9.4"
|
||||
version = "kim-api-v1.9.5"
|
||||
|
||||
buildflag = False
|
||||
everythingflag = False
|
||||
|
||||
@ -1,5 +1,58 @@
|
||||
# Change Log
|
||||
|
||||
## [2.7.00](https://github.com/kokkos/kokkos/tree/2.7.00) (2018-05-24)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.6.00...2.7.00)
|
||||
|
||||
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.7**
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Deprecate team\_size auto adjusting to maximal value possible [\#1618](https://github.com/kokkos/kokkos/issues/1618)
|
||||
- DynamicView - remove restrictions to std::is\_trivial types and value\_type is power of two [\#1586](https://github.com/kokkos/kokkos/issues/1586)
|
||||
- Kokkos::StaticCrsGraph does not propagate memory traits \(e.g., Unmanaged\) [\#1581](https://github.com/kokkos/kokkos/issues/1581)
|
||||
- Adding ETI for DeepCopy / ViewFill etc. [\#1578](https://github.com/kokkos/kokkos/issues/1578)
|
||||
- Deprecate all the left over KOKKOS\_HAVE\_ Macros and Kokkos\_OldMacros.hpp [\#1572](https://github.com/kokkos/kokkos/issues/1572)
|
||||
- Error if Kokkos\_ARCH set in CMake [\#1555](https://github.com/kokkos/kokkos/issues/1555)
|
||||
- Deprecate ExecSpace::initialize / ExecSpace::finalize [\#1532](https://github.com/kokkos/kokkos/issues/1532)
|
||||
- New API for TeamPolicy property setting [\#1531](https://github.com/kokkos/kokkos/issues/1531)
|
||||
- clang 6.0 + cuda debug out-of-memory test failure [\#1521](https://github.com/kokkos/kokkos/issues/1521)
|
||||
- Cuda UniqueToken interface not consistent with other backends [\#1505](https://github.com/kokkos/kokkos/issues/1505)
|
||||
- Move Reducers out of Experimental namespace [\#1494](https://github.com/kokkos/kokkos/issues/1494)
|
||||
- Provide scope guard for initialize/finalize [\#1479](https://github.com/kokkos/kokkos/issues/1479)
|
||||
- Check Kokkos::is\_initialized in SharedAllocationRecord dtor [\#1465](https://github.com/kokkos/kokkos/issues/1465)
|
||||
- Remove static list of allocations [\#1464](https://github.com/kokkos/kokkos/issues/1464)
|
||||
- Makefiles: Support single compile/link line use case [\#1402](https://github.com/kokkos/kokkos/issues/1402)
|
||||
- ThreadVectorRange with a range [\#1400](https://github.com/kokkos/kokkos/issues/1400)
|
||||
- Exclusive scan + last value API [\#1358](https://github.com/kokkos/kokkos/issues/1358)
|
||||
- Install kokkos\_generated\_settings.cmake [\#1348](https://github.com/kokkos/kokkos/issues/1348)
|
||||
- Kokkos arrays \(not views!\) don't do bounds checking in debug mode [\#1342](https://github.com/kokkos/kokkos/issues/1342)
|
||||
- Expose round-robin GPU assignment outside of initialize\(int, char\*\*\) [\#1318](https://github.com/kokkos/kokkos/issues/1318)
|
||||
- DynamicView misses use\_count and label function [\#1298](https://github.com/kokkos/kokkos/issues/1298)
|
||||
- View constructor should check arguments [\#1286](https://github.com/kokkos/kokkos/issues/1286)
|
||||
- False Positive on Oversubscription Warning [\#1207](https://github.com/kokkos/kokkos/issues/1207)
|
||||
- Allow \(require\) execution space for 1st arg of VerifyExecutionCanAccessMemorySpace [\#1192](https://github.com/kokkos/kokkos/issues/1192)
|
||||
- ROCm: Add ROCmHostPinnedSpace [\#958](https://github.com/kokkos/kokkos/issues/958)
|
||||
- power of two functions [\#656](https://github.com/kokkos/kokkos/issues/656)
|
||||
- CUDA 8 has 64bit \_\_shfl [\#361](https://github.com/kokkos/kokkos/issues/361)
|
||||
- Add TriBITS/CMake configure information about node types [\#243](https://github.com/kokkos/kokkos/issues/243)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- CUDA atomic\_fetch\_sub for doubles is hitting CAS instead of intrinsic [\#1624](https://github.com/kokkos/kokkos/issues/1624)
|
||||
- Bug: use of ballot on Volta [\#1612](https://github.com/kokkos/kokkos/issues/1612)
|
||||
- Kokkos::deep\_copy memory access failures [\#1583](https://github.com/kokkos/kokkos/issues/1583)
|
||||
- g++ -std option doubly set for cmake project [\#1548](https://github.com/kokkos/kokkos/issues/1548)
|
||||
- ViewFill for 1D Views of larger 32bit entries fails [\#1541](https://github.com/kokkos/kokkos/issues/1541)
|
||||
- CUDA Volta another warpsync bug [\#1520](https://github.com/kokkos/kokkos/issues/1520)
|
||||
- triple\_nested\_parallelism fails with KOKKOS\_DEBUG and CUDA [\#1513](https://github.com/kokkos/kokkos/issues/1513)
|
||||
- Jenkins errors in Kokkos\_SharedAlloc.cpp with debug build [\#1511](https://github.com/kokkos/kokkos/issues/1511)
|
||||
- Kokkos::Sort out-of-bounds with empty bins [\#1504](https://github.com/kokkos/kokkos/issues/1504)
|
||||
- Get rid of deprecated functions inside Kokkos [\#1484](https://github.com/kokkos/kokkos/issues/1484)
|
||||
- get\_work\_partition casts int64\_t to int, causing a seg fault [\#1481](https://github.com/kokkos/kokkos/issues/1481)
|
||||
- NVCC bug with \_\_device\_\_ on defaulted function [\#1470](https://github.com/kokkos/kokkos/issues/1470)
|
||||
- CMake example broken with CUDA backend [\#1468](https://github.com/kokkos/kokkos/issues/1468)
|
||||
|
||||
|
||||
## [2.6.00](https://github.com/kokkos/kokkos/tree/2.6.00) (2018-03-07)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.5.00...2.6.00)
|
||||
|
||||
|
||||
@ -44,6 +44,7 @@ IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
"${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings")
|
||||
endif()
|
||||
include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake)
|
||||
install(FILES ${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake DESTINATION lib/cmake/Kokkos)
|
||||
string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}")
|
||||
string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}")
|
||||
string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}")
|
||||
|
||||
@ -1,7 +1,9 @@
|
||||
# Default settings common options.
|
||||
|
||||
#LAMMPS specific settings:
|
||||
KOKKOS_PATH=../../lib/kokkos
|
||||
ifndef KOKKOS_PATH
|
||||
KOKKOS_PATH=../../lib/kokkos
|
||||
endif
|
||||
CXXFLAGS=$(CCFLAGS)
|
||||
|
||||
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
|
||||
@ -21,8 +23,10 @@ KOKKOS_DEBUG ?= "no"
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
# Options: c++11,c++1z
|
||||
KOKKOS_CXX_STANDARD ?= "c++11"
|
||||
# Options: aggressive_vectorization,disable_profiling,disable_deprecated_code
|
||||
# Options: aggressive_vectorization,disable_profiling,disable_deprecated_code,enable_large_mem_tests
|
||||
KOKKOS_OPTIONS ?= ""
|
||||
# Option for setting ETI path
|
||||
KOKKOS_ETI_PATH ?= ${KOKKOS_PATH}/core/src/eti
|
||||
|
||||
# Default settings specific options.
|
||||
# Options: force_uvm,use_ldg,rdc,enable_lambda
|
||||
@ -51,10 +55,12 @@ KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),
|
||||
KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
|
||||
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
|
||||
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
|
||||
KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_large_mem_tests)
|
||||
KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg)
|
||||
KOKKOS_INTERNAL_CUDA_USE_UVM := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),force_uvm)
|
||||
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),rdc)
|
||||
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),enable_lambda)
|
||||
KOKKOS_INTERNAL_ENABLE_ETI := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_eti)
|
||||
|
||||
|
||||
# Check for Kokkos Host Execution Spaces one of which must be on.
|
||||
@ -78,7 +84,12 @@ KOKKOS_INTERNAL_USE_OPENMPTARGET := $(call kokkos_has_string,$(KOKKOS_DEVICES),O
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
|
||||
CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
|
||||
ifeq ($(origin CUDA_PATH), undefined)
|
||||
CUDA_PATH = $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
|
||||
endif
|
||||
ifeq ($(CUDA_PATH),)
|
||||
CUDA_PATH = $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
|
||||
endif
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
|
||||
endif
|
||||
|
||||
@ -116,7 +127,7 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell $(CXX) --version | grep version | cut -d ' ' -f3 | tr -d '.')
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
|
||||
@ -323,12 +334,13 @@ endif
|
||||
|
||||
# Generating the list of Flags.
|
||||
|
||||
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
|
||||
#CPPFLAGS is now unused
|
||||
KOKKOS_CPPFLAGS =
|
||||
KOKKOS_CXXFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src -I$(KOKKOS_ETI_PATH)
|
||||
KOKKOS_TPL_INCLUDE_DIRS =
|
||||
KOKKOS_TPL_LIBRARY_DIRS =
|
||||
KOKKOS_TPL_LIBRARY_NAMES =
|
||||
|
||||
KOKKOS_CXXFLAGS =
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_WARNINGS)
|
||||
endif
|
||||
@ -336,6 +348,8 @@ endif
|
||||
KOKKOS_LIBS = -ldl
|
||||
KOKKOS_TPL_LIBRARY_NAMES += dl
|
||||
KOKKOS_LDFLAGS = -L$(shell pwd)
|
||||
# CXXLDFLAGS is used together with CXXFLAGS in a combined compile/link command
|
||||
KOKKOS_CXXLDFLAGS = -L$(shell pwd)
|
||||
KOKKOS_LINK_FLAGS =
|
||||
KOKKOS_SRC =
|
||||
KOKKOS_HEADERS =
|
||||
@ -362,7 +376,7 @@ tmp := $(call kokkos_append_header,'\#endif')
|
||||
tmp := $(call kokkos_append_header,"/* Execution Spaces */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CUDA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
@ -374,19 +388,19 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_HAVE_OPENMP')
|
||||
tmp := $(call kokkos_append_header,'\#define KOKKOS_ENABLE_OPENMP')
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_PTHREAD")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_THREADS")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_QTHREADS")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_QTHREADS")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_SERIAL")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_SERIAL")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
|
||||
@ -422,13 +436,13 @@ endif
|
||||
tmp := $(call kokkos_append_header,"/* General Settings */")
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX11")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_CXX1Z")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX11")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CXX1Z")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
@ -437,9 +451,9 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
|
||||
endif
|
||||
|
||||
KOKKOS_CXXFLAGS += -g
|
||||
KOKKOS_LDFLAGS += -g -ldl
|
||||
KOKKOS_LDFLAGS += -g
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_DEBUG")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG")
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK")
|
||||
endif
|
||||
@ -451,14 +465,15 @@ endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
ifneq ($(HWLOC_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_CXXFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
KOKKOS_CXXLDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib
|
||||
endif
|
||||
KOKKOS_LIBS += -lhwloc
|
||||
KOKKOS_TPL_LIBRARY_NAMES += hwloc
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HWLOC")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HWLOC")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
@ -469,14 +484,15 @@ endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
ifneq ($(MEMKIND_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_CXXFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_CXXLDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib
|
||||
endif
|
||||
KOKKOS_LIBS += -lmemkind -lnuma
|
||||
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HBWSPACE")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_HBWSPACE")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
|
||||
@ -486,6 +502,13 @@ endif
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_ETI")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_LARGE_MEM_TESTS), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_LARGE_MEM_TESTS")
|
||||
endif
|
||||
|
||||
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
|
||||
|
||||
@ -497,27 +520,35 @@ tmp := $(call kokkos_append_header,"/* Cuda Settings */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LDG_INTRINSIC")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_UVM")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_UVM")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
|
||||
KOKKOS_CXXFLAGS += --relocatable-device-code=true
|
||||
KOKKOS_LDFLAGS += --relocatable-device-code=true
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -ge 90; echo $$?),0)
|
||||
# This diagnostic is just plain wrong in CUDA 9
|
||||
# See https://github.com/kokkos/kokkos/issues/1470
|
||||
KOKKOS_CXXFLAGS += -Xcudafe --diag_suppress=esa_on_defaulted_function_ignored
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
KOKKOS_CXXFLAGS += -expt-extended-lambda
|
||||
else
|
||||
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
|
||||
@ -525,12 +556,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_USE_LAMBDA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_CUDA_CLANG_WORKAROUND")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_IMPL_CUDA_CLANG_WORKAROUND")
|
||||
endif
|
||||
endif
|
||||
|
||||
@ -907,10 +938,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
|
||||
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags)
|
||||
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
|
||||
KOKKOS_CXXLDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
|
||||
KOKKOS_TPL_LIBRARY_NAMES += hc_am m
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
|
||||
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/ROCm/*.cpp)
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
|
||||
endif
|
||||
|
||||
@ -937,10 +972,14 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Cuda/*.cpp)
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
ifneq ($(CUDA_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_CXXLDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
@ -964,6 +1003,9 @@ endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/OpenMP/*.cpp)
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
@ -978,6 +1020,9 @@ endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Threads/*.cpp)
|
||||
endif
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
KOKKOS_LIBS += -lpthread
|
||||
KOKKOS_TPL_LIBRARY_NAMES += pthread
|
||||
@ -987,8 +1032,9 @@ ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
|
||||
ifneq ($(QTHREADS_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
|
||||
KOKKOS_CXXFLAGS += -I$(QTHREADS_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
|
||||
KOKKOS_CXXLDFLAGS += -L$(QTHREADS_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64
|
||||
endif
|
||||
@ -1011,6 +1057,11 @@ endif
|
||||
|
||||
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
|
||||
# device to avoid a link warning.
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_ETI_PATH)/Serial/*.cpp)
|
||||
endif
|
||||
endif
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC))
|
||||
|
||||
@ -31,6 +31,12 @@ Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_
|
||||
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/Serial/Makefile.eti_Serial
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
Kokkos_Cuda_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Impl.cpp
|
||||
@ -40,6 +46,9 @@ Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
|
||||
Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/Cuda/Makefile.eti_Cuda
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
@ -51,6 +60,9 @@ Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_RO
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
|
||||
Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/ROCm/Makefile.eti_ROCm
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
@ -58,6 +70,9 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
|
||||
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/Threads/Makefile.eti_Threads
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
@ -72,6 +87,9 @@ Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokko
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
|
||||
Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
|
||||
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_ETI), 1)
|
||||
include $(KOKKOS_ETI_PATH)/OpenMP/Makefile.eti_OpenMP
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
|
||||
@ -702,7 +702,11 @@ namespace Kokkos {
|
||||
}
|
||||
Random_XorShift64_Pool(uint64_t seed) {
|
||||
num_states_ = 0;
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
init(seed,DeviceType::max_hardware_threads());
|
||||
#else
|
||||
init(seed,DeviceType::impl_max_hardware_threads());
|
||||
#endif
|
||||
}
|
||||
|
||||
Random_XorShift64_Pool(const Random_XorShift64_Pool& src):
|
||||
@ -751,7 +755,11 @@ namespace Kokkos {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift64<DeviceType> get_state() const {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
const int i = DeviceType::hardware_thread_id();;
|
||||
#else
|
||||
const int i = DeviceType::impl_hardware_thread_id();;
|
||||
#endif
|
||||
return Random_XorShift64<DeviceType>(state_(i),i);
|
||||
}
|
||||
|
||||
@ -957,7 +965,11 @@ namespace Kokkos {
|
||||
inline
|
||||
Random_XorShift1024_Pool(uint64_t seed){
|
||||
num_states_ = 0;
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
init(seed,DeviceType::max_hardware_threads());
|
||||
#else
|
||||
init(seed,DeviceType::impl_max_hardware_threads());
|
||||
#endif
|
||||
}
|
||||
|
||||
Random_XorShift1024_Pool(const Random_XorShift1024_Pool& src):
|
||||
@ -1012,7 +1024,11 @@ namespace Kokkos {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Random_XorShift1024<DeviceType> get_state() const {
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
const int i = DeviceType::hardware_thread_id();
|
||||
#else
|
||||
const int i = DeviceType::impl_hardware_thread_id();
|
||||
#endif
|
||||
return Random_XorShift1024<DeviceType>(state_,p_(i),i);
|
||||
};
|
||||
|
||||
|
||||
@ -288,6 +288,7 @@ public:
|
||||
Kokkos::abort("BinSort::sort: values range length != permutation vector length");
|
||||
}
|
||||
|
||||
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE
|
||||
scratch_view_type
|
||||
sorted_values("Scratch",
|
||||
len,
|
||||
@ -298,6 +299,18 @@ public:
|
||||
values.extent(5),
|
||||
values.extent(6),
|
||||
values.extent(7));
|
||||
#else
|
||||
scratch_view_type
|
||||
sorted_values("Scratch",
|
||||
values.rank_dynamic > 0 ? len : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
values.rank_dynamic > 1 ? values.extent(1) : KOKKOS_IMPL_CTOR_DEFAULT_ARG ,
|
||||
values.rank_dynamic > 2 ? values.extent(2) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
values.rank_dynamic > 3 ? values.extent(3) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
values.rank_dynamic > 4 ? values.extent(4) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
values.rank_dynamic > 5 ? values.extent(5) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
values.rank_dynamic > 6 ? values.extent(6) : KOKKOS_IMPL_CTOR_DEFAULT_ARG,
|
||||
values.rank_dynamic > 7 ? values.extent(7) : KOKKOS_IMPL_CTOR_DEFAULT_ARG);
|
||||
#endif
|
||||
|
||||
{
|
||||
copy_permute_functor< scratch_view_type /* DstViewType */
|
||||
@ -362,8 +375,10 @@ public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const bin_sort_bins_tag& tag, const int&i ) const {
|
||||
auto bin_size = bin_count_const(i);
|
||||
if (bin_size <= 1) return;
|
||||
int upper_bound = bin_offsets(i)+bin_size;
|
||||
bool sorted = false;
|
||||
int upper_bound = bin_offsets(i)+bin_count_const(i);
|
||||
while(!sorted) {
|
||||
sorted = true;
|
||||
int old_idx = sort_order(bin_offsets(i));
|
||||
@ -501,7 +516,7 @@ bool try_std_sort(ViewType view) {
|
||||
|
||||
template<class ViewType>
|
||||
struct min_max_functor {
|
||||
typedef Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar;
|
||||
typedef Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar;
|
||||
|
||||
ViewType view;
|
||||
min_max_functor(const ViewType& view_):view(view_) {}
|
||||
@ -523,8 +538,8 @@ void sort( ViewType const & view , bool const always_use_kokkos_sort = false)
|
||||
}
|
||||
typedef BinOp1D<ViewType> CompType;
|
||||
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
parallel_reduce("Kokkos::Sort::FindExtent",Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.extent(0)),
|
||||
Impl::min_max_functor<ViewType>(view),reducer);
|
||||
if(result.min_val == result.max_val) return;
|
||||
@ -542,8 +557,8 @@ void sort( ViewType view
|
||||
typedef Kokkos::RangePolicy<typename ViewType::execution_space> range_policy ;
|
||||
typedef BinOp1D<ViewType> CompType;
|
||||
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
Kokkos::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
|
||||
parallel_reduce("Kokkos::Sort::FindExtent", range_policy( begin , end )
|
||||
, Impl::min_max_functor<ViewType>(view),reducer );
|
||||
|
||||
@ -76,7 +76,11 @@ IF(KOKKOS_SEPARATE_LIBS)
|
||||
)
|
||||
|
||||
foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES)
|
||||
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
|
||||
if ("${lib}" STREQUAL "cuda")
|
||||
set(LIB_cuda "-lcuda")
|
||||
else()
|
||||
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
|
||||
endif()
|
||||
target_link_libraries(kokkoscore PUBLIC ${LIB_${lib}})
|
||||
endforeach()
|
||||
|
||||
@ -154,7 +158,11 @@ ELSE()
|
||||
)
|
||||
|
||||
foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES)
|
||||
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
|
||||
if ("${lib}" STREQUAL "cuda")
|
||||
set(LIB_cuda "-lcuda")
|
||||
else()
|
||||
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
|
||||
endif()
|
||||
target_link_libraries(kokkos PUBLIC ${LIB_${lib}})
|
||||
endforeach()
|
||||
|
||||
|
||||
@ -31,6 +31,7 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST
|
||||
Profiling_Load_Print
|
||||
Aggressive_Vectorization
|
||||
Deprecated_Code
|
||||
Explicit_Instantiation
|
||||
)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
@ -40,6 +41,7 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST
|
||||
foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
|
||||
string(TOUPPER ${opt} OPT )
|
||||
IF(DEFINED Kokkos_ENABLE_${opt})
|
||||
MESSAGE("Kokkos_ENABLE_${opt} is defined!")
|
||||
IF(DEFINED KOKKOS_ENABLE_${OPT})
|
||||
IF(NOT ("${KOKKOS_ENABLE_${OPT}}" STREQUAL "${Kokkos_ENABLE_${opt}}"))
|
||||
IF(DEFINED KOKKOS_ENABLE_${OPT}_INTERNAL)
|
||||
@ -57,18 +59,16 @@ foreach(opt ${KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST})
|
||||
ENDIF()
|
||||
ELSE()
|
||||
SET(KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT ${Kokkos_ENABLE_${opt}})
|
||||
MESSAGE("set KOKKOS_INTERNAL_ENABLE_${OPT}_DEFAULT!")
|
||||
ENDIF()
|
||||
ENDIF()
|
||||
endforeach()
|
||||
|
||||
IF(DEFINED Kokkos_ARCH)
|
||||
MESSAGE(FATAL_ERROR "Defined Kokkos_ARCH, use KOKKOS_ARCH instead!")
|
||||
ENDIF()
|
||||
IF(DEFINED Kokkos_Arch)
|
||||
IF(DEFINED KOKKOS_ARCH)
|
||||
IF(NOT (${KOKKOS_ARCH} STREQUAL "${Kokkos_Arch}"))
|
||||
MESSAGE(FATAL_ERROR "Defined both Kokkos_Arch and KOKKOS_ARCH and they differ!")
|
||||
ENDIF()
|
||||
ELSE()
|
||||
SET(KOKKOS_ARCH ${Kokkos_Arch})
|
||||
ENDIF()
|
||||
MESSAGE(FATAL_ERROR "Defined Kokkos_Arch, use KOKKOS_ARCH instead!")
|
||||
ENDIF()
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
@ -103,6 +103,8 @@ list(APPEND KOKKOS_ARCH_LIST
|
||||
Maxwell53 # (GPU) NVIDIA Maxwell generation CC 5.3
|
||||
Pascal60 # (GPU) NVIDIA Pascal generation CC 6.0
|
||||
Pascal61 # (GPU) NVIDIA Pascal generation CC 6.1
|
||||
Volta70 # (GPU) NVIDIA Volta generation CC 7.0
|
||||
Volta72 # (GPU) NVIDIA Volta generation CC 7.2
|
||||
)
|
||||
|
||||
# List of possible device architectures.
|
||||
@ -267,6 +269,8 @@ set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_P
|
||||
set_kokkos_default_default(DEPRECATED_CODE ON)
|
||||
set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.")
|
||||
|
||||
set_kokkos_default_default(EXPLICIT_INSTANTIATION ON)
|
||||
set(KOKKOS_ENABLE_EXPLICIT_INSTANTIATION ${KOKKOS_INTERNAL_ENABLE_EXPLICIT_INSTANTIATION_DEFAULT} CACHE BOOL "Enable explicit template instantiation.")
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
#------------------------------- KOKKOS_USE_TPLS -------------------------------
|
||||
|
||||
@ -74,6 +74,9 @@ endif()
|
||||
if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT})
|
||||
list(APPEND KOKKOS_OPTIONSl enable_profile_load_print)
|
||||
endif()
|
||||
if(${KOKKOS_ENABLE_EXPLICIT_INSTANTIATION})
|
||||
list(APPEND KOKKOS_OPTIONSl enable_eti)
|
||||
endif()
|
||||
# List needs to be comma-delimitted
|
||||
string(REPLACE ";" "," KOKKOS_GMAKE_OPTIONS "${KOKKOS_OPTIONSl}")
|
||||
|
||||
@ -158,6 +161,19 @@ if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"")
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_STANDARD)
|
||||
if (CMAKE_CXX_STANDARD STREQUAL "98")
|
||||
message(FATAL_ERROR "Kokkos requires C++11 or newer!")
|
||||
endif()
|
||||
set(KOKKOS_CXX_STANDARD "c++${CMAKE_CXX_STANDARD}")
|
||||
if (CMAKE_CXX_EXTENSIONS)
|
||||
if (CMAKE_CXX_COMPILER_ID STREQUAL "GNU")
|
||||
set(KOKKOS_CXX_STANDARD "gnu++${CMAKE_CXX_STANDARD}")
|
||||
endif()
|
||||
endif()
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "KOKKOS_CXX_STANDARD=\"${KOKKOS_CXX_STANDARD}\"")
|
||||
endif()
|
||||
|
||||
# Final form that gets passed to make
|
||||
set(KOKKOS_SETTINGS env ${KOKKOS_SETTINGS})
|
||||
|
||||
|
||||
@ -300,7 +300,9 @@ FUNCTION(TRIBITS_ADD_EXECUTABLE EXE_NAME)
|
||||
ENDIF()
|
||||
ENDFUNCTION()
|
||||
|
||||
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
|
||||
IF(NOT TARGET check)
|
||||
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
|
||||
ENDIF()
|
||||
|
||||
FUNCTION(TRIBITS_ADD_TEST)
|
||||
ENDFUNCTION()
|
||||
|
||||
@ -22,30 +22,38 @@ if [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name
|
||||
if [[ "$HOSTNAME" == n* ]]; then # Warning: very generic name
|
||||
if [[ "$PROCESSOR" = "aarch64" ]]; then
|
||||
MACHINE=sullivan
|
||||
module load git
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
if [[ "$HOSTNAME" == node* ]]; then # Warning: very generic name
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
MACHINE=shepard
|
||||
module load git
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ apollo ]]; then
|
||||
if [[ "$HOSTNAME" == apollo\.* ]]; then
|
||||
MACHINE=apollo
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ sullivan ]]; then
|
||||
if [[ "$HOSTNAME" == sullivan ]]; then
|
||||
MACHINE=sullivan
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" == mayer\.* ]]; then
|
||||
MACHINE=mayer
|
||||
# module load git
|
||||
fi
|
||||
if [[ "$HOSTNAME" == cn* ]]; then # Warning: very generic name
|
||||
MACHINE=mayer
|
||||
fi
|
||||
|
||||
if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
MACHINE=sems
|
||||
@ -83,7 +91,7 @@ CUSTOM_BUILD_LIST=""
|
||||
QTHREADS_PATH=""
|
||||
DRYRUN=False
|
||||
BUILD_ONLY=False
|
||||
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
|
||||
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
TEST_SCRIPT=False
|
||||
SKIP_HWLOC=False
|
||||
SPOT_CHECK=False
|
||||
@ -142,6 +150,9 @@ do
|
||||
--with-cuda-options*)
|
||||
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
|
||||
;;
|
||||
--with-options*)
|
||||
KOKKOS_OPTIONS="--with-options=enable_large_mem_tests,${key#*=}"
|
||||
;;
|
||||
--cxxflags-extra*)
|
||||
CXX_FLAGS_EXTRA="${key#*=}"
|
||||
;;
|
||||
@ -247,7 +258,7 @@ elif [ "$MACHINE" = "white" ]; then
|
||||
ARCH_FLAG="--arch=Power8,Kepler37"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "bowman" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
@ -268,7 +279,7 @@ elif [ "$MACHINE" = "bowman" ]; then
|
||||
ARCH_FLAG="--arch=KNL"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "sullivan" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
@ -284,7 +295,24 @@ elif [ "$MACHINE" = "sullivan" ]; then
|
||||
ARCH_FLAG="--arch=ARMv8-ThunderX"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "mayer" ]; then
|
||||
SKIP_HWLOC=True
|
||||
export SLURM_TASKS_PER_NODE=96
|
||||
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
ARM_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/7.2.0 $BASE_MODULE_LIST $ARM_GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"arm/1.4.0 $ARM_MODULE_LIST $ARM_GCC_BUILD_LIST armclang++ $CLANG_WARNING_FLAGS")
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=ARMv8-TX2"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "shepard" ]; then
|
||||
source /etc/profile.d/modules.sh
|
||||
@ -303,7 +331,7 @@ elif [ "$MACHINE" = "shepard" ]; then
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=HSW"
|
||||
fi
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
elif [ "$MACHINE" = "apollo" ]; then
|
||||
source /projects/sems/modulefiles/utils/sems-modules-init.sh
|
||||
@ -331,7 +359,7 @@ elif [ "$MACHINE" = "apollo" ]; then
|
||||
if [ "$SPOT_CHECK" = "True" ]; then
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/4.8.4 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/5.3.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
|
||||
@ -358,7 +386,7 @@ elif [ "$MACHINE" = "apollo" ]; then
|
||||
ARCH_FLAG="--arch=SNB,Volta70"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=1
|
||||
|
||||
else
|
||||
echo "Unhandled machine $MACHINE" >&2
|
||||
@ -627,6 +655,11 @@ single_build_and_test() {
|
||||
if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
|
||||
local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
|
||||
fi
|
||||
if [[ "$KOKKOS_OPTIONS" != "" ]]; then
|
||||
local extra_args="$extra_args $KOKKOS_OPTIONS"
|
||||
else
|
||||
local extra_args="$extra_args --with-options=enable_large_mem_tests"
|
||||
fi
|
||||
|
||||
echo " Starting job $desc"
|
||||
|
||||
@ -642,7 +675,7 @@ single_build_and_test() {
|
||||
else
|
||||
run_cmd ${KOKKOS_PATH}/generate_makefile.bash --with-devices=$build $ARCH_FLAG --compiler=$(which $compiler_exe) --cxxflags=\"$cxxflags\" --ldflags=\"$ldflags\" $extra_args &>> ${desc}.configure.log || { report_and_log_test_result 1 ${desc} configure && return 0; }
|
||||
local -i build_start_time=$(date +%s)
|
||||
run_cmd make -j 32 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
|
||||
run_cmd make -j 48 build-test >& ${desc}.build.log || { report_and_log_test_result 1 ${desc} build && return 0; }
|
||||
local -i build_end_time=$(date +%s)
|
||||
comment="build_time=$(($build_end_time-$build_start_time))"
|
||||
|
||||
@ -682,6 +715,9 @@ run_in_background() {
|
||||
if [[ "$compiler" == cuda* ]]; then
|
||||
num_jobs=1
|
||||
fi
|
||||
if [[ "$compiler" == clang ]]; then
|
||||
num_jobs=1
|
||||
fi
|
||||
# fi
|
||||
wait_for_jobs $num_jobs
|
||||
|
||||
|
||||
@ -70,13 +70,12 @@ protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
std::cout << std::setprecision(5) << std::scientific;
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||
Kokkos::InitArguments args(-1, -1, 0);
|
||||
Kokkos::initialize(args);
|
||||
}
|
||||
static void TearDownTestCase()
|
||||
{
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
Kokkos::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user