Compare commits
203 Commits
patch_5Jun
...
patch_18Ju
| Author | SHA1 | Date | |
|---|---|---|---|
| bb11344424 | |||
| e754fb80a4 | |||
| 98fbaef406 | |||
| d98c105d34 | |||
| 961dcfc261 | |||
| 81cdce9b04 | |||
| 995b6b31a2 | |||
| e9666f585f | |||
| f4f4a7c850 | |||
| 5fc3081a55 | |||
| a39a35af20 | |||
| 1452d3d25b | |||
| 2dea4eb0e9 | |||
| 5d73b0790f | |||
| 93fd33aad9 | |||
| 6536399fc3 | |||
| 2f29bd29f4 | |||
| b1458ceebf | |||
| 92615bda3a | |||
| 4c0cd0a019 | |||
| fd93c27dcb | |||
| 4178c5167b | |||
| fd20a0606a | |||
| 75e2981bda | |||
| 98d9a9a4d2 | |||
| e73b34a5b1 | |||
| ac57b41b4d | |||
| 43e3c1520b | |||
| e7116c8680 | |||
| 4d7d3a5d53 | |||
| c1550ba29b | |||
| 5fb505ca8c | |||
| 67a1a63f5f | |||
| 65b87fa278 | |||
| c5c03230cb | |||
| be5d3d6a19 | |||
| f8e257d219 | |||
| 3f523ea906 | |||
| a973700295 | |||
| daa53e3008 | |||
| 7a33d1e328 | |||
| 0559e155f2 | |||
| 0b25d2feef | |||
| 1c1491eebf | |||
| 400751f30f | |||
| 4fbf96a353 | |||
| 1dc8bb163d | |||
| bb0240d8eb | |||
| fd592d510f | |||
| dbafb92dd5 | |||
| 83060f0902 | |||
| fd55d7d367 | |||
| 3d5db63381 | |||
| 42b0cb5e3e | |||
| 4cee333c07 | |||
| 7f4c611e21 | |||
| fe29572737 | |||
| 9421466f57 | |||
| 8d985e53f4 | |||
| e72ac92a7f | |||
| 3e2f3a8058 | |||
| 72b295d7f4 | |||
| fa76472135 | |||
| b368b11d60 | |||
| 591e6836fd | |||
| 4ac100fe69 | |||
| 4ecd81f81b | |||
| 203c6d122b | |||
| e788d32678 | |||
| ae59ffe83e | |||
| 99a8d3c2ed | |||
| ca1445788e | |||
| 0908bd7aaf | |||
| 239dfe163c | |||
| cea1bd9cd9 | |||
| 31dc5dbb51 | |||
| 9e3dc26599 | |||
| 8da75c2040 | |||
| 85999fc4a7 | |||
| 73fa8d4055 | |||
| bd237a05b8 | |||
| 4941f11ad4 | |||
| afab5ef303 | |||
| a7c02e699e | |||
| 8fed39d726 | |||
| b9e10d55e2 | |||
| 1f1a0f95b2 | |||
| 61e9dc4c8d | |||
| b53df3dd63 | |||
| 56e3b1d1f4 | |||
| fde7e2de3c | |||
| e2391edce6 | |||
| f7026491f1 | |||
| 40a2f275c2 | |||
| e549f911f7 | |||
| 34dca6dc79 | |||
| 2ebc40deb3 | |||
| e9b4ab7363 | |||
| 8e43a45925 | |||
| 92f078cfff | |||
| 708052dc81 | |||
| 960a975e2a | |||
| 3b7c15a8b2 | |||
| 76900b8325 | |||
| b88158fc3b | |||
| 3b60686827 | |||
| 35be1724e3 | |||
| d717101e94 | |||
| e44c877738 | |||
| c2a200fe85 | |||
| eea67bf3bf | |||
| 439e7da03f | |||
| ed7c09ac81 | |||
| 0ee1daa46d | |||
| 2fbc4f504d | |||
| 95ab056576 | |||
| a0bc619550 | |||
| 82b50706bd | |||
| e90eed9120 | |||
| 5f2f7d1575 | |||
| 3e60ec9be1 | |||
| dc8b43a95f | |||
| 601746b565 | |||
| fbb78e7b78 | |||
| 803e0631c5 | |||
| 51a6bfd579 | |||
| 5b71b3fc57 | |||
| e2ed9330b7 | |||
| 41872e37e6 | |||
| 0c0b106924 | |||
| 98d9c45ad9 | |||
| 084bb3c35b | |||
| 31789ad03b | |||
| fd168068a1 | |||
| a0cc6b5b59 | |||
| f2d881470d | |||
| e13c661f77 | |||
| a1f421cd54 | |||
| 6d84bd6138 | |||
| cb6b498127 | |||
| a243be2dc9 | |||
| a696b26301 | |||
| 4a4297591e | |||
| 1f43efc111 | |||
| c55009a0ac | |||
| e4e2249b63 | |||
| cf35ebe5fa | |||
| 073f003470 | |||
| 618547b72e | |||
| a01bce46bb | |||
| 82be3ee32c | |||
| 16b17f812c | |||
| 22b7e12baf | |||
| b8d3c9e01b | |||
| 149a57f3ae | |||
| 7f2c81657b | |||
| f2ef02b6d9 | |||
| c3adfcbc15 | |||
| 744a8215dd | |||
| 1f44dc2498 | |||
| d1e751d717 | |||
| 08273c40d7 | |||
| e2d28f5160 | |||
| b50ef59a19 | |||
| 36836598b1 | |||
| 8c4baac3f1 | |||
| 179026dd44 | |||
| 71a6227240 | |||
| aecef752e8 | |||
| e422e886de | |||
| 069853fd10 | |||
| 84773f9d1d | |||
| ff7276e494 | |||
| 553714b0bb | |||
| 5d8e3c6cb4 | |||
| 29073f7d0e | |||
| 38c6c1907b | |||
| 85a14ebcb8 | |||
| 0a02097e20 | |||
| a72f3cfb58 | |||
| 58905525bf | |||
| 7a2d326103 | |||
| ddd5e61254 | |||
| d66a1ac054 | |||
| 9727fdc473 | |||
| d5fe8857cc | |||
| a745a0aed0 | |||
| 19aaf294e5 | |||
| 6b4303c405 | |||
| 53a779067e | |||
| cce9fe4a34 | |||
| 407392f6bf | |||
| cdde878d60 | |||
| b9e33e631f | |||
| 82a5346ab1 | |||
| a76457ef22 | |||
| e6b5112ddc | |||
| 16911adcea | |||
| cf1d421e10 | |||
| 8d79db03d3 | |||
| 5e287033f7 | |||
| e1ab38439b | |||
| 062c1a04fc |
1199
cmake/CMakeLists.txt
1199
cmake/CMakeLists.txt
File diff suppressed because it is too large
Load Diff
28
cmake/Modules/CodeCoverage.cmake
Normal file
28
cmake/Modules/CodeCoverage.cmake
Normal file
@ -0,0 +1,28 @@
|
||||
###############################################################################
|
||||
# Coverage
|
||||
#
|
||||
# Requires latest gcovr (for GCC 8.1 support):#
|
||||
# pip install git+https://github.com/gcovr/gcovr.git
|
||||
###############################################################################
|
||||
if(ENABLE_COVERAGE)
|
||||
find_program(GCOVR_BINARY gcovr)
|
||||
find_package_handle_standard_args(GCOVR DEFAULT_MSG GCOVR_BINARY)
|
||||
|
||||
if(GCOVR_FOUND)
|
||||
get_filename_component(ABSOLUTE_LAMMPS_SOURCE_DIR ${LAMMPS_SOURCE_DIR} ABSOLUTE)
|
||||
|
||||
add_custom_target(
|
||||
gen_coverage_xml
|
||||
COMMAND ${GCOVR_BINARY} -s -x -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.xml
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Generating XML Coverage Report..."
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
gen_coverage_html
|
||||
COMMAND ${GCOVR_BINARY} -s --html --html-details -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.html
|
||||
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
|
||||
COMMENT "Generating HTML Coverage Report..."
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
59
cmake/Modules/Documentation.cmake
Normal file
59
cmake/Modules/Documentation.cmake
Normal file
@ -0,0 +1,59 @@
|
||||
###############################################################################
|
||||
# Build documentation
|
||||
###############################################################################
|
||||
option(BUILD_DOC "Build LAMMPS documentation" OFF)
|
||||
if(BUILD_DOC)
|
||||
include(ProcessorCount)
|
||||
ProcessorCount(NPROCS)
|
||||
find_package(PythonInterp 3 REQUIRED)
|
||||
|
||||
set(VIRTUALENV ${PYTHON_EXECUTABLE} -m virtualenv)
|
||||
|
||||
file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.txt)
|
||||
file(GLOB PDF_EXTRA_SOURCES ${LAMMPS_DOC_DIR}/src/lammps_commands*.txt ${LAMMPS_DOC_DIR}/src/lammps_support.txt ${LAMMPS_DOC_DIR}/src/lammps_tutorials.txt)
|
||||
list(REMOVE_ITEM DOC_SOURCES ${PDF_EXTRA_SOURCES})
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT docenv
|
||||
COMMAND ${VIRTUALENV} docenv
|
||||
)
|
||||
|
||||
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT requirements.txt
|
||||
DEPENDS docenv
|
||||
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt
|
||||
COMMAND ${DOCENV_BINARY_DIR}/pip install -r requirements.txt --upgrade
|
||||
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
|
||||
)
|
||||
|
||||
set(RST_FILES "")
|
||||
set(RST_DIR ${CMAKE_BINARY_DIR}/rst)
|
||||
file(MAKE_DIRECTORY ${RST_DIR})
|
||||
foreach(TXT_FILE ${DOC_SOURCES})
|
||||
get_filename_component(FILENAME ${TXT_FILE} NAME_WE)
|
||||
set(RST_FILE ${RST_DIR}/${FILENAME}.rst)
|
||||
list(APPEND RST_FILES ${RST_FILE})
|
||||
add_custom_command(
|
||||
OUTPUT ${RST_FILE}
|
||||
DEPENDS requirements.txt docenv ${TXT_FILE}
|
||||
COMMAND ${DOCENV_BINARY_DIR}/txt2rst -o ${RST_DIR} ${TXT_FILE}
|
||||
)
|
||||
endforeach()
|
||||
|
||||
add_custom_command(
|
||||
OUTPUT html
|
||||
DEPENDS ${RST_FILES}
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src ${RST_DIR}
|
||||
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -j ${NPROCS} -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${RST_DIR} html
|
||||
)
|
||||
|
||||
add_custom_target(
|
||||
doc ALL
|
||||
DEPENDS html
|
||||
SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES}
|
||||
)
|
||||
|
||||
install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
|
||||
endif()
|
||||
71
cmake/Modules/LAMMPSUtils.cmake
Normal file
71
cmake/Modules/LAMMPSUtils.cmake
Normal file
@ -0,0 +1,71 @@
|
||||
# Utility functions
|
||||
function(list_to_bulletpoints result)
|
||||
list(REMOVE_AT ARGV 0)
|
||||
set(temp "")
|
||||
foreach(item ${ARGV})
|
||||
set(temp "${temp}* ${item}\n")
|
||||
endforeach()
|
||||
set(${result} "${temp}" PARENT_SCOPE)
|
||||
endfunction(list_to_bulletpoints)
|
||||
|
||||
function(validate_option name values)
|
||||
string(TOLOWER ${${name}} needle_lower)
|
||||
string(TOUPPER ${${name}} needle_upper)
|
||||
list(FIND ${values} ${needle_lower} IDX_LOWER)
|
||||
list(FIND ${values} ${needle_upper} IDX_UPPER)
|
||||
if(${IDX_LOWER} LESS 0 AND ${IDX_UPPER} LESS 0)
|
||||
list_to_bulletpoints(POSSIBLE_VALUE_LIST ${${values}})
|
||||
message(FATAL_ERROR "\n########################################################################\n"
|
||||
"Invalid value '${${name}}' for option ${name}\n"
|
||||
"\n"
|
||||
"Possible values are:\n"
|
||||
"${POSSIBLE_VALUE_LIST}"
|
||||
"########################################################################")
|
||||
endif()
|
||||
endfunction(validate_option)
|
||||
|
||||
function(get_lammps_version version_header variable)
|
||||
file(READ ${version_header} line)
|
||||
set(MONTHS x Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)
|
||||
string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\1" day "${line}")
|
||||
string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\2" month "${line}")
|
||||
string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\3" year "${line}")
|
||||
string(STRIP ${day} day)
|
||||
string(STRIP ${month} month)
|
||||
string(STRIP ${year} year)
|
||||
list(FIND MONTHS "${month}" month)
|
||||
string(LENGTH ${day} day_length)
|
||||
string(LENGTH ${month} month_length)
|
||||
if(day_length EQUAL 1)
|
||||
set(day "0${day}")
|
||||
endif()
|
||||
if(month_length EQUAL 1)
|
||||
set(month "0${month}")
|
||||
endif()
|
||||
set(${variable} "${year}${month}${day}" PARENT_SCOPE)
|
||||
endfunction()
|
||||
|
||||
function(check_for_autogen_files source_dir)
|
||||
message(STATUS "Running check for auto-generated files from make-based build system")
|
||||
file(GLOB SRC_AUTOGEN_FILES ${source_dir}/style_*.h)
|
||||
file(GLOB SRC_AUTOGEN_PACKAGES ${source_dir}/packages_*.h)
|
||||
list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h)
|
||||
foreach(_SRC ${SRC_AUTOGEN_FILES})
|
||||
get_filename_component(FILENAME "${_SRC}" NAME)
|
||||
if(EXISTS ${source_dir}/${FILENAME})
|
||||
message(FATAL_ERROR "\n########################################################################\n"
|
||||
"Found header file(s) generated by the make-based build system\n"
|
||||
"\n"
|
||||
"Please run\n"
|
||||
"make -C ${source_dir} purge\n"
|
||||
"to remove\n"
|
||||
"########################################################################")
|
||||
endif()
|
||||
endforeach()
|
||||
endfunction()
|
||||
|
||||
macro(pkg_depends PKG1 PKG2)
|
||||
if(PKG_${PKG1} AND NOT (PKG_${PKG2} OR BUILD_${PKG2}))
|
||||
message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}")
|
||||
endif()
|
||||
endmacro()
|
||||
5
cmake/Modules/Packages/COMPRESS.cmake
Normal file
5
cmake/Modules/Packages/COMPRESS.cmake
Normal file
@ -0,0 +1,5 @@
|
||||
if(PKG_COMPRESS)
|
||||
find_package(ZLIB REQUIRED)
|
||||
include_directories(${ZLIB_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${ZLIB_LIBRARIES})
|
||||
endif()
|
||||
13
cmake/Modules/Packages/CORESHELL.cmake
Normal file
13
cmake/Modules/Packages/CORESHELL.cmake
Normal file
@ -0,0 +1,13 @@
|
||||
if(PKG_CORESHELL)
|
||||
set(CORESHELL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/CORESHELL)
|
||||
set(CORESHELL_SOURCES)
|
||||
set_property(GLOBAL PROPERTY "CORESHELL_SOURCES" "${CORESHELL_SOURCES}")
|
||||
|
||||
# detects styles which have a CORESHELL version
|
||||
RegisterStylesExt(${CORESHELL_SOURCES_DIR} cs CORESHELL_SOURCES)
|
||||
|
||||
get_property(CORESHELL_SOURCES GLOBAL PROPERTY CORESHELL_SOURCES)
|
||||
|
||||
list(APPEND LIB_SOURCES ${CORESHELL_SOURCES})
|
||||
include_directories(${CORESHELL_SOURCES_DIR})
|
||||
endif()
|
||||
194
cmake/Modules/Packages/GPU.cmake
Normal file
194
cmake/Modules/Packages/GPU.cmake
Normal file
@ -0,0 +1,194 @@
|
||||
if(PKG_GPU)
|
||||
if (CMAKE_VERSION VERSION_LESS "3.1")
|
||||
message(FATAL_ERROR "For the GPU package you need at least cmake-3.1")
|
||||
endif()
|
||||
set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
|
||||
set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h
|
||||
${GPU_SOURCES_DIR}/fix_gpu.h
|
||||
${GPU_SOURCES_DIR}/fix_gpu.cpp)
|
||||
|
||||
set(GPU_API "opencl" CACHE STRING "API used by GPU package")
|
||||
set(GPU_API_VALUES opencl cuda)
|
||||
set_property(CACHE GPU_API PROPERTY STRINGS ${GPU_API_VALUES})
|
||||
validate_option(GPU_API GPU_API_VALUES)
|
||||
string(TOUPPER ${GPU_API} GPU_API)
|
||||
|
||||
set(GPU_PREC "mixed" CACHE STRING "LAMMPS GPU precision")
|
||||
set(GPU_PREC_VALUES double mixed single)
|
||||
set_property(CACHE GPU_PREC PROPERTY STRINGS ${GPU_PREC_VALUES})
|
||||
validate_option(GPU_PREC GPU_PREC_VALUES)
|
||||
string(TOUPPER ${GPU_PREC} GPU_PREC)
|
||||
|
||||
if(GPU_PREC STREQUAL "DOUBLE")
|
||||
set(GPU_PREC_SETTING "DOUBLE_DOUBLE")
|
||||
elseif(GPU_PREC STREQUAL "MIXED")
|
||||
set(GPU_PREC_SETTING "SINGLE_DOUBLE")
|
||||
elseif(GPU_PREC STREQUAL "SINGLE")
|
||||
set(GPU_PREC_SETTING "SINGLE_SINGLE")
|
||||
endif()
|
||||
|
||||
file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp)
|
||||
file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu)
|
||||
|
||||
if(GPU_API STREQUAL "CUDA")
|
||||
find_package(CUDA REQUIRED)
|
||||
find_program(BIN2C bin2c)
|
||||
if(NOT BIN2C)
|
||||
message(FATAL_ERROR "Could not find bin2c, use -DBIN2C=/path/to/bin2c to help cmake finding it.")
|
||||
endif()
|
||||
option(CUDPP_OPT "Enable CUDPP_OPT" ON)
|
||||
option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF)
|
||||
if(CUDA_MPS_SUPPORT)
|
||||
set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY")
|
||||
endif()
|
||||
|
||||
set(GPU_ARCH "sm_30" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)")
|
||||
|
||||
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu)
|
||||
list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)
|
||||
|
||||
cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu)
|
||||
|
||||
if(CUDPP_OPT)
|
||||
cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
|
||||
file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cpp)
|
||||
file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cu)
|
||||
endif()
|
||||
|
||||
# build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
|
||||
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
|
||||
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH} ")
|
||||
# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
|
||||
if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
|
||||
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_20,code=[sm_20,compute_20] ")
|
||||
endif()
|
||||
# Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
|
||||
if(CUDA_VERSION VERSION_GREATER "4.9")
|
||||
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ")
|
||||
endif()
|
||||
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
|
||||
if(CUDA_VERSION VERSION_GREATER "5.9")
|
||||
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] ")
|
||||
endif()
|
||||
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
|
||||
if(CUDA_VERSION VERSION_GREATER "7.9")
|
||||
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] ")
|
||||
endif()
|
||||
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
|
||||
if(CUDA_VERSION VERSION_GREATER "8.9")
|
||||
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_70,code=[sm_70,compute_70] ")
|
||||
endif()
|
||||
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
|
||||
if(CUDA_VERSION VERSION_GREATER "9.9")
|
||||
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_75,code=[sm_75,compute_75] ")
|
||||
endif()
|
||||
|
||||
cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS
|
||||
-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING})
|
||||
|
||||
cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC}
|
||||
-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING})
|
||||
|
||||
foreach(CU_OBJ ${GPU_GEN_OBJS})
|
||||
get_filename_component(CU_NAME ${CU_OBJ} NAME_WE)
|
||||
string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}")
|
||||
add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
|
||||
COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
|
||||
DEPENDS ${CU_OBJ}
|
||||
COMMENT "Generating ${CU_NAME}_cubin.h")
|
||||
list(APPEND GPU_LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h)
|
||||
endforeach()
|
||||
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h")
|
||||
|
||||
|
||||
add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS})
|
||||
target_link_libraries(gpu ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
|
||||
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS})
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT ${GPU_CUDA_MPS_FLAGS})
|
||||
if(CUDPP_OPT)
|
||||
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
|
||||
target_compile_definitions(gpu PRIVATE -DUSE_CUDPP)
|
||||
endif()
|
||||
|
||||
list(APPEND LAMMPS_LINK_LIBS gpu)
|
||||
|
||||
add_executable(nvc_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
|
||||
target_compile_definitions(nvc_get_devices PRIVATE -DUCL_CUDADR)
|
||||
target_link_libraries(nvc_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
|
||||
target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
|
||||
|
||||
|
||||
elseif(GPU_API STREQUAL "OPENCL")
|
||||
find_package(OpenCL REQUIRED)
|
||||
set(OCL_TUNE "generic" CACHE STRING "OpenCL Device Tuning")
|
||||
set(OCL_TUNE_VALUES intel fermi kepler cypress generic)
|
||||
set_property(CACHE OCL_TUNE PROPERTY STRINGS ${OCL_TUNE_VALUES})
|
||||
validate_option(OCL_TUNE OCL_TUNE_VALUES)
|
||||
string(TOUPPER ${OCL_TUNE} OCL_TUNE)
|
||||
|
||||
include(OpenCLUtils)
|
||||
set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
|
||||
|
||||
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu)
|
||||
list(REMOVE_ITEM GPU_LIB_CU
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
|
||||
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
|
||||
)
|
||||
|
||||
foreach(GPU_KERNEL ${GPU_LIB_CU})
|
||||
get_filename_component(basename ${GPU_KERNEL} NAME_WE)
|
||||
string(SUBSTRING ${basename} 4 -1 KERNEL_NAME)
|
||||
GenerateOpenCLHeader(${KERNEL_NAME} ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h ${OCL_COMMON_HEADERS} ${GPU_KERNEL})
|
||||
list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h)
|
||||
endforeach()
|
||||
|
||||
GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu)
|
||||
GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
|
||||
GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu)
|
||||
GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu)
|
||||
GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
|
||||
GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
|
||||
GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
|
||||
|
||||
list(APPEND GPU_LIB_SOURCES
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
|
||||
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
|
||||
)
|
||||
|
||||
add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
||||
target_link_libraries(gpu ${OpenCL_LIBRARIES})
|
||||
target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS})
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
|
||||
target_compile_definitions(gpu PRIVATE -DUSE_OPENCL)
|
||||
|
||||
list(APPEND LAMMPS_LINK_LIBS gpu)
|
||||
|
||||
add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
|
||||
target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL)
|
||||
target_link_libraries(ocl_get_devices PRIVATE ${OpenCL_LIBRARIES})
|
||||
target_include_directories(ocl_get_devices PRIVATE ${OpenCL_INCLUDE_DIRS})
|
||||
endif()
|
||||
|
||||
# GPU package
|
||||
FindStyleHeaders(${GPU_SOURCES_DIR} FIX_CLASS fix_ FIX)
|
||||
|
||||
set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}")
|
||||
|
||||
# detects styles which have GPU version
|
||||
RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES)
|
||||
|
||||
get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES)
|
||||
|
||||
list(APPEND LIB_SOURCES ${GPU_SOURCES})
|
||||
include_directories(${GPU_SOURCES_DIR})
|
||||
endif()
|
||||
42
cmake/Modules/Packages/KIM.cmake
Normal file
42
cmake/Modules/Packages/KIM.cmake
Normal file
@ -0,0 +1,42 @@
|
||||
if(PKG_KIM)
|
||||
find_package(CURL)
|
||||
if(CURL_FOUND)
|
||||
include_directories(${CURL_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${CURL_LIBRARIES})
|
||||
add_definitions(-DLMP_KIM_CURL)
|
||||
endif()
|
||||
find_package(KIM-API QUIET)
|
||||
if(KIM-API_FOUND)
|
||||
set(DOWNLOAD_KIM_DEFAULT OFF)
|
||||
else()
|
||||
set(DOWNLOAD_KIM_DEFAULT ON)
|
||||
endif()
|
||||
option(DOWNLOAD_KIM "Download KIM-API from OpenKIM instead of using an already installed one" ${DOWNLOAD_KIM_DEFAULT})
|
||||
if(DOWNLOAD_KIM)
|
||||
if(CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
message(FATAL_ERROR "Cannot build downloaded KIM-API library with Ninja build tool")
|
||||
endif()
|
||||
message(STATUS "KIM-API download requested - we will build our own")
|
||||
enable_language(C)
|
||||
enable_language(Fortran)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(kim_build
|
||||
URL https://s3.openkim.org/kim-api/kim-api-2.0.2.txz
|
||||
URL_MD5 537d9c0abd30f85b875ebb584f9143fa
|
||||
BINARY_DIR build
|
||||
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
|
||||
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
|
||||
-DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER}
|
||||
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
|
||||
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||
)
|
||||
ExternalProject_get_property(kim_build INSTALL_DIR)
|
||||
set(KIM-API_INCLUDE_DIRS ${INSTALL_DIR}/include/kim-api)
|
||||
set(KIM-API_LDFLAGS ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libkim-api${CMAKE_SHARED_LIBRARY_SUFFIX})
|
||||
list(APPEND LAMMPS_DEPS kim_build)
|
||||
else()
|
||||
find_package(KIM-API REQUIRED)
|
||||
endif()
|
||||
list(APPEND LAMMPS_LINK_LIBS "${KIM-API_LDFLAGS}")
|
||||
include_directories(${KIM-API_INCLUDE_DIRS})
|
||||
endif()
|
||||
53
cmake/Modules/Packages/KOKKOS.cmake
Normal file
53
cmake/Modules/Packages/KOKKOS.cmake
Normal file
@ -0,0 +1,53 @@
|
||||
if(PKG_KOKKOS)
|
||||
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
|
||||
set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos)
|
||||
add_definitions(-DLMP_KOKKOS)
|
||||
add_subdirectory(${LAMMPS_LIB_KOKKOS_SRC_DIR} ${LAMMPS_LIB_KOKKOS_BIN_DIR})
|
||||
|
||||
set(Kokkos_INCLUDE_DIRS ${LAMMPS_LIB_KOKKOS_SRC_DIR}/core/src
|
||||
${LAMMPS_LIB_KOKKOS_SRC_DIR}/containers/src
|
||||
${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src
|
||||
${LAMMPS_LIB_KOKKOS_BIN_DIR})
|
||||
include_directories(${Kokkos_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS kokkos)
|
||||
|
||||
set(KOKKOS_PKG_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/KOKKOS)
|
||||
set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/fix_nh_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp
|
||||
${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp)
|
||||
|
||||
if(PKG_KSPACE)
|
||||
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp)
|
||||
endif()
|
||||
|
||||
set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}")
|
||||
|
||||
# detects styles which have KOKKOS version
|
||||
RegisterStylesExt(${KOKKOS_PKG_SOURCES_DIR} kokkos KOKKOS_PKG_SOURCES)
|
||||
|
||||
# register kokkos-only styles
|
||||
RegisterNBinStyle(${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.h)
|
||||
RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.h)
|
||||
|
||||
if(PKG_USER-DPD)
|
||||
get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES)
|
||||
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.cpp)
|
||||
RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.h)
|
||||
set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}")
|
||||
endif()
|
||||
|
||||
get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES)
|
||||
|
||||
list(APPEND LIB_SOURCES ${KOKKOS_PKG_SOURCES})
|
||||
include_directories(${KOKKOS_PKG_SOURCES_DIR})
|
||||
endif()
|
||||
38
cmake/Modules/Packages/KSPACE.cmake
Normal file
38
cmake/Modules/Packages/KSPACE.cmake
Normal file
@ -0,0 +1,38 @@
|
||||
if(PKG_KSPACE)
|
||||
option(FFT_SINGLE "Use single precision FFT instead of double" OFF)
|
||||
set(FFTW "FFTW3")
|
||||
if(FFT_SINGLE)
|
||||
set(FFTW "FFTW3F")
|
||||
add_definitions(-DFFT_SINGLE)
|
||||
endif()
|
||||
find_package(${FFTW} QUIET)
|
||||
if(${FFTW}_FOUND)
|
||||
set(FFT "${FFTW}" CACHE STRING "FFT library for KSPACE package")
|
||||
else()
|
||||
set(FFT "KISS" CACHE STRING "FFT library for KSPACE package")
|
||||
endif()
|
||||
set(FFT_VALUES KISS ${FFTW} MKL)
|
||||
set_property(CACHE FFT PROPERTY STRINGS ${FFT_VALUES})
|
||||
validate_option(FFT FFT_VALUES)
|
||||
string(TOUPPER ${FFT} FFT)
|
||||
if(NOT FFT STREQUAL "KISS")
|
||||
find_package(${FFT} REQUIRED)
|
||||
if(NOT FFT STREQUAL "FFTW3F")
|
||||
add_definitions(-DFFT_FFTW)
|
||||
else()
|
||||
add_definitions(-DFFT_${FFT})
|
||||
endif()
|
||||
include_directories(${${FFT}_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${${FFT}_LIBRARIES})
|
||||
else()
|
||||
add_definitions(-DFFT_KISS)
|
||||
endif()
|
||||
set(FFT_PACK "array" CACHE STRING "Optimization for FFT")
|
||||
set(FFT_PACK_VALUES array pointer memcpy)
|
||||
set_property(CACHE FFT_PACK PROPERTY STRINGS ${FFT_PACK_VALUES})
|
||||
validate_option(FFT_PACK FFT_PACK_VALUES)
|
||||
if(NOT FFT_PACK STREQUAL "array")
|
||||
string(TOUPPER ${FFT_PACK} FFT_PACK)
|
||||
add_definitions(-DFFT_PACK_${FFT_PACK})
|
||||
endif()
|
||||
endif()
|
||||
38
cmake/Modules/Packages/LATTE.cmake
Normal file
38
cmake/Modules/Packages/LATTE.cmake
Normal file
@ -0,0 +1,38 @@
|
||||
if(PKG_LATTE)
|
||||
enable_language(Fortran)
|
||||
find_package(LATTE)
|
||||
if(LATTE_FOUND)
|
||||
set(DOWNLOAD_LATTE_DEFAULT OFF)
|
||||
else()
|
||||
set(DOWNLOAD_LATTE_DEFAULT ON)
|
||||
endif()
|
||||
option(DOWNLOAD_LATTE "Download the LATTE library instead of using an already installed one" ${DOWNLOAD_LATTE_DEFAULT})
|
||||
if(DOWNLOAD_LATTE)
|
||||
if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR
|
||||
message(FATAL_ERROR "For downlading LATTE you need at least cmake-3.7")
|
||||
endif()
|
||||
if(CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
message(FATAL_ERROR "Cannot build downloaded LATTE library with Ninja build tool")
|
||||
endif()
|
||||
message(STATUS "LATTE download requested - we will build our own")
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(latte_build
|
||||
URL https://github.com/lanl/LATTE/archive/v1.2.1.tar.gz
|
||||
URL_MD5 85ac414fdada2d04619c8f936344df14
|
||||
SOURCE_SUBDIR cmake
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${CMAKE_REQUEST_PIC}
|
||||
-DBLAS_LIBRARIES=${BLAS_LIBRARIES} -DLAPACK_LIBRARIES=${LAPACK_LIBRARIES}
|
||||
-DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -DCMAKE_Fortran_FLAGS=${CMAKE_Fortran_FLAGS}
|
||||
-DCMAKE_Fortran_FLAGS_${BTYPE}=${CMAKE_Fortran_FLAGS_${BTYPE}} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
|
||||
)
|
||||
ExternalProject_get_property(latte_build INSTALL_DIR)
|
||||
set(LATTE_LIBRARIES ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/liblatte.a)
|
||||
list(APPEND LAMMPS_DEPS latte_build)
|
||||
else()
|
||||
find_package(LATTE)
|
||||
if(NOT LATTE_FOUND)
|
||||
message(FATAL_ERROR "LATTE library not found, help CMake to find it by setting LATTE_LIBRARY, or set DOWNLOAD_LATTE=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND LAMMPS_LINK_LIBS ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
29
cmake/Modules/Packages/MESSAGE.cmake
Normal file
29
cmake/Modules/Packages/MESSAGE.cmake
Normal file
@ -0,0 +1,29 @@
|
||||
if(PKG_MESSAGE)
|
||||
option(MESSAGE_ZMQ "Use ZeroMQ in MESSAGE package" OFF)
|
||||
file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.F
|
||||
${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.c
|
||||
${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.cpp)
|
||||
|
||||
add_library(cslib STATIC ${cslib_SOURCES})
|
||||
if(BUILD_MPI)
|
||||
target_compile_definitions(cslib PRIVATE -DMPI_YES)
|
||||
set_target_properties(cslib PROPERTIES OUTPUT_NAME "csmpi")
|
||||
else()
|
||||
target_compile_definitions(cslib PRIVATE -DMPI_NO)
|
||||
target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_MPI)
|
||||
set_target_properties(cslib PROPERTIES OUTPUT_NAME "csnompi")
|
||||
endif()
|
||||
|
||||
if(MESSAGE_ZMQ)
|
||||
target_compile_definitions(cslib PRIVATE -DZMQ_YES)
|
||||
find_package(ZMQ REQUIRED)
|
||||
target_include_directories(cslib PRIVATE ${ZMQ_INCLUDE_DIRS})
|
||||
target_link_libraries(cslib PUBLIC ${ZMQ_LIBRARIES})
|
||||
else()
|
||||
target_compile_definitions(cslib PRIVATE -DZMQ_NO)
|
||||
target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_ZMQ)
|
||||
endif()
|
||||
|
||||
list(APPEND LAMMPS_LINK_LIBS cslib)
|
||||
include_directories(${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src)
|
||||
endif()
|
||||
45
cmake/Modules/Packages/MSCG.cmake
Normal file
45
cmake/Modules/Packages/MSCG.cmake
Normal file
@ -0,0 +1,45 @@
|
||||
if(PKG_MSCG)
|
||||
find_package(GSL REQUIRED)
|
||||
find_package(MSCG QUIET)
|
||||
if(MSGC_FOUND)
|
||||
set(DOWNLOAD_MSCG_DEFAULT OFF)
|
||||
else()
|
||||
set(DOWNLOAD_MSCG_DEFAULT ON)
|
||||
endif()
|
||||
option(DOWNLOAD_MSCG "Download MSCG library instead of using an already installed one)" ${DOWNLOAD_MSCG_DEFAULT})
|
||||
if(DOWNLOAD_MSCG)
|
||||
if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR
|
||||
message(FATAL_ERROR "For downlading MSCG you need at least cmake-3.7")
|
||||
endif()
|
||||
if(CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
message(FATAL_ERROR "Cannot build downloaded MSCG library with Ninja build tool")
|
||||
endif()
|
||||
include(ExternalProject)
|
||||
if(NOT LAPACK_FOUND)
|
||||
set(EXTRA_MSCG_OPTS "-DLAPACK_LIBRARIES=${CMAKE_CURRENT_BINARY_DIR}/liblinalg.a")
|
||||
endif()
|
||||
ExternalProject_Add(mscg_build
|
||||
URL https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz
|
||||
URL_MD5 8c45e269ee13f60b303edd7823866a91
|
||||
SOURCE_SUBDIR src/CMake
|
||||
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${CMAKE_REQUEST_PIC} ${EXTRA_MSCG_OPTS}
|
||||
BUILD_COMMAND make mscg INSTALL_COMMAND ""
|
||||
)
|
||||
ExternalProject_get_property(mscg_build BINARY_DIR)
|
||||
set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a)
|
||||
ExternalProject_get_property(mscg_build SOURCE_DIR)
|
||||
set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src)
|
||||
list(APPEND LAMMPS_DEPS mscg_build)
|
||||
if(NOT LAPACK_FOUND)
|
||||
file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS})
|
||||
add_dependencies(mscg_build linalg)
|
||||
endif()
|
||||
else()
|
||||
find_package(MSCG)
|
||||
if(NOT MSCG_FOUND)
|
||||
message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
list(APPEND LAMMPS_LINK_LIBS ${MSCG_LIBRARIES} ${GSL_LIBRARIES} ${LAPACK_LIBRARIES})
|
||||
include_directories(${MSCG_INCLUDE_DIRS})
|
||||
endif()
|
||||
13
cmake/Modules/Packages/OPT.cmake
Normal file
13
cmake/Modules/Packages/OPT.cmake
Normal file
@ -0,0 +1,13 @@
|
||||
if(PKG_OPT)
|
||||
set(OPT_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/OPT)
|
||||
set(OPT_SOURCES)
|
||||
set_property(GLOBAL PROPERTY "OPT_SOURCES" "${OPT_SOURCES}")
|
||||
|
||||
# detects styles which have OPT version
|
||||
RegisterStylesExt(${OPT_SOURCES_DIR} opt OPT_SOURCES)
|
||||
|
||||
get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES)
|
||||
|
||||
list(APPEND LIB_SOURCES ${OPT_SOURCES})
|
||||
include_directories(${OPT_SOURCES_DIR})
|
||||
endif()
|
||||
6
cmake/Modules/Packages/PYTHON.cmake
Normal file
6
cmake/Modules/Packages/PYTHON.cmake
Normal file
@ -0,0 +1,6 @@
|
||||
if(PKG_PYTHON)
|
||||
find_package(PythonLibs REQUIRED)
|
||||
add_definitions(-DLMP_PYTHON)
|
||||
include_directories(${PYTHON_INCLUDE_DIR})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${PYTHON_LIBRARY})
|
||||
endif()
|
||||
20
cmake/Modules/Packages/QEQ.cmake
Normal file
20
cmake/Modules/Packages/QEQ.cmake
Normal file
@ -0,0 +1,20 @@
|
||||
# Fix qeq/fire requires MANYBODY (i.e. COMB and COMB3) to be installed
|
||||
if(PKG_QEQ)
|
||||
set(QEQ_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/QEQ)
|
||||
file(GLOB QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix*.h)
|
||||
file(GLOB QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix*.cpp)
|
||||
|
||||
if(NOT PKG_MANYBODY)
|
||||
list(REMOVE_ITEM QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix_qeq_fire.h)
|
||||
list(REMOVE_ITEM QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix_qeq_fire.cpp)
|
||||
endif()
|
||||
set_property(GLOBAL PROPERTY "QEQ_SOURCES" "${QEQ_SOURCES}")
|
||||
|
||||
foreach(MY_HEADER ${QEQ_HEADERS})
|
||||
AddStyleHeader(${MY_HEADER} FIX)
|
||||
endforeach()
|
||||
|
||||
get_property(QEQ_SOURCES GLOBAL PROPERTY QEQ_SOURCES)
|
||||
list(APPEND LIB_SOURCES ${QEQ_SOURCES})
|
||||
include_directories(${QEQ_SOURCES_DIR})
|
||||
endif()
|
||||
8
cmake/Modules/Packages/USER-H5MD.cmake
Normal file
8
cmake/Modules/Packages/USER-H5MD.cmake
Normal file
@ -0,0 +1,8 @@
|
||||
if(PKG_USER-H5MD)
|
||||
enable_language(C)
|
||||
|
||||
find_package(HDF5 REQUIRED)
|
||||
target_link_libraries(h5md ${HDF5_LIBRARIES})
|
||||
target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS})
|
||||
include_directories(${HDF5_INCLUDE_DIRS})
|
||||
endif()
|
||||
118
cmake/Modules/Packages/USER-INTEL.cmake
Normal file
118
cmake/Modules/Packages/USER-INTEL.cmake
Normal file
@ -0,0 +1,118 @@
|
||||
if(PKG_USER-INTEL)
|
||||
check_include_file_cxx(immintrin.h FOUND_IMMINTRIN)
|
||||
if(NOT FOUND_IMMINTRIN)
|
||||
message(FATAL_ERROR "immintrin.h header not found, Intel package won't work without it")
|
||||
endif()
|
||||
|
||||
add_definitions(-DLMP_USER_INTEL)
|
||||
|
||||
set(INTEL_ARCH "cpu" CACHE STRING "Architectures used by USER-INTEL (cpu or knl)")
|
||||
set(INTEL_ARCH_VALUES cpu knl)
|
||||
set_property(CACHE INTEL_ARCH PROPERTY STRINGS ${INTEL_ARCH_VALUES})
|
||||
validate_option(INTEL_ARCH INTEL_ARCH_VALUES)
|
||||
string(TOUPPER ${INTEL_ARCH} INTEL_ARCH)
|
||||
|
||||
find_package(Threads QUIET)
|
||||
if(Threads_FOUND)
|
||||
set(INTEL_LRT_MODE "threads" CACHE STRING "Long-range threads mode (none, threads, or c++11)")
|
||||
else()
|
||||
set(INTEL_LRT_MODE "none" CACHE STRING "Long-range threads mode (none, threads, or c++11)")
|
||||
endif()
|
||||
set(INTEL_LRT_VALUES none threads c++11)
|
||||
set_property(CACHE INTEL_LRT_MODE PROPERTY STRINGS ${INTEL_LRT_VALUES})
|
||||
validate_option(INTEL_LRT_MODE INTEL_LRT_VALUES)
|
||||
string(TOUPPER ${INTEL_LRT_MODE} INTEL_LRT_MODE)
|
||||
if(INTEL_LRT_MODE STREQUAL "THREADS")
|
||||
if(Threads_FOUND)
|
||||
add_definitions(-DLMP_INTEL_USELRT)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${CMAKE_THREAD_LIBS_INIT})
|
||||
else()
|
||||
message(FATAL_ERROR "Must have working threads library for Long-range thread support")
|
||||
endif()
|
||||
endif()
|
||||
if(INTEL_LRT_MODE STREQUAL "C++11")
|
||||
add_definitions(-DLMP_INTEL_USERLRT -DLMP_INTEL_LRT11)
|
||||
endif()
|
||||
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
|
||||
message(FATAL_ERROR "USER-INTEL needs at least a 2016 Intel compiler, found ${CMAKE_CXX_COMPILER_VERSION}")
|
||||
endif()
|
||||
else()
|
||||
message(WARNING "USER-INTEL gives best performance with Intel compilers")
|
||||
endif()
|
||||
|
||||
find_package(TBB QUIET)
|
||||
if(TBB_FOUND)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${TBB_MALLOC_LIBRARIES})
|
||||
else()
|
||||
add_definitions(-DLMP_INTEL_NO_TBB)
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
message(WARNING "USER-INTEL with Intel compilers should use TBB malloc libraries")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
find_package(MKL QUIET)
|
||||
if(MKL_FOUND)
|
||||
add_definitions(-DLMP_USE_MKL_RNG)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
|
||||
else()
|
||||
message(STATUS "Pair style dpd/intel will be faster with MKL libraries")
|
||||
endif()
|
||||
|
||||
if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "64") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "128") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "256"))
|
||||
message(FATAL_ERROR "USER-INTEL only supports memory alignment of 64, 128 or 256 on this platform")
|
||||
endif()
|
||||
|
||||
if(INTEL_ARCH STREQUAL "KNL")
|
||||
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
message(FATAL_ERROR "Must use Intel compiler with USER-INTEL for KNL architecture")
|
||||
endif()
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -xHost -qopenmp -qoffload")
|
||||
set(MIC_OPTIONS "-qoffload-option,mic,compiler,\"-fp-model fast=2 -mGLOB_default_function_attrs=\\\"gather_scatter_loop_unroll=4\\\"\"")
|
||||
add_compile_options(-xMIC-AVX512 -qoffload -fno-alias -ansi-alias -restrict -qoverride-limits ${MIC_OPTIONS})
|
||||
add_definitions(-DLMP_INTEL_OFFLOAD)
|
||||
else()
|
||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||
if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xCOMMON-AVX512")
|
||||
else()
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost")
|
||||
endif()
|
||||
include(CheckCXXCompilerFlag)
|
||||
foreach(_FLAG -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -qopt-zmm-usage=high -qno-offload -fno-alias -ansi-alias -restrict)
|
||||
check_cxx_compiler_flag("${__FLAG}" COMPILER_SUPPORTS${_FLAG})
|
||||
if(COMPILER_SUPPORTS${_FLAG})
|
||||
add_compile_options(${_FLAG})
|
||||
endif()
|
||||
endforeach()
|
||||
else()
|
||||
add_compile_options(-O3 -ffast-math)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# collect sources
|
||||
set(USER-INTEL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-INTEL)
|
||||
set(USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/fix_intel.cpp
|
||||
${USER-INTEL_SOURCES_DIR}/fix_nh_intel.cpp
|
||||
${USER-INTEL_SOURCES_DIR}/intel_buffers.cpp
|
||||
${USER-INTEL_SOURCES_DIR}/nbin_intel.cpp
|
||||
${USER-INTEL_SOURCES_DIR}/npair_intel.cpp)
|
||||
|
||||
set_property(GLOBAL PROPERTY "USER-INTEL_SOURCES" "${USER-INTEL_SOURCES}")
|
||||
|
||||
# detect styles which have a USER-INTEL version
|
||||
RegisterStylesExt(${USER-INTEL_SOURCES_DIR} intel USER-INTEL_SOURCES)
|
||||
RegisterNBinStyle(${USER-INTEL_SOURCES_DIR}/nbin_intel.h)
|
||||
RegisterNPairStyle(${USER-INTEL_SOURCES_DIR}/npair_intel.h)
|
||||
RegisterFixStyle(${USER-INTEL_SOURCES_DIR}/fix_intel.h)
|
||||
|
||||
get_property(USER-INTEL_SOURCES GLOBAL PROPERTY USER-INTEL_SOURCES)
|
||||
if(PKG_KSPACE)
|
||||
list(APPEND USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.cpp)
|
||||
RegisterIntegrateStyle(${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.h)
|
||||
endif()
|
||||
|
||||
list(APPEND LIB_SOURCES ${USER-INTEL_SOURCES})
|
||||
include_directories(${USER-INTEL_SOURCES_DIR})
|
||||
endif()
|
||||
10
cmake/Modules/Packages/USER-MOLFILE.cmake
Normal file
10
cmake/Modules/Packages/USER-MOLFILE.cmake
Normal file
@ -0,0 +1,10 @@
|
||||
if(PKG_USER-MOLFILE)
|
||||
set(MOLFILE_INCLUDE_DIRS "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers")
|
||||
add_library(molfile INTERFACE)
|
||||
target_include_directories(molfile INTERFACE ${MOLFILE_INCLUDE_DIRS})
|
||||
# no need to link with -ldl on windows
|
||||
if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
|
||||
target_link_libraries(molfile INTERFACE ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
list(APPEND LAMMPS_LINK_LIBS molfile)
|
||||
endif()
|
||||
6
cmake/Modules/Packages/USER-NETCDF.cmake
Normal file
6
cmake/Modules/Packages/USER-NETCDF.cmake
Normal file
@ -0,0 +1,6 @@
|
||||
if(PKG_USER-NETCDF)
|
||||
find_package(NetCDF REQUIRED)
|
||||
include_directories(${NETCDF_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARIES})
|
||||
add_definitions(-DLMP_HAS_NETCDF -DNC_64BIT_DATA=0x0020)
|
||||
endif()
|
||||
42
cmake/Modules/Packages/USER-OMP.cmake
Normal file
42
cmake/Modules/Packages/USER-OMP.cmake
Normal file
@ -0,0 +1,42 @@
|
||||
if(PKG_USER-OMP)
|
||||
set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP)
|
||||
set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp
|
||||
${USER-OMP_SOURCES_DIR}/thr_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/fix_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/domain_omp.cpp)
|
||||
add_definitions(-DLMP_USER_OMP)
|
||||
set_property(GLOBAL PROPERTY "OMP_SOURCES" "${USER-OMP_SOURCES}")
|
||||
|
||||
# detects styles which have USER-OMP version
|
||||
RegisterStylesExt(${USER-OMP_SOURCES_DIR} omp OMP_SOURCES)
|
||||
RegisterFixStyle(${USER-OMP_SOURCES_DIR}/fix_omp.h)
|
||||
|
||||
get_property(USER-OMP_SOURCES GLOBAL PROPERTY OMP_SOURCES)
|
||||
|
||||
# manually add package dependent source files from USER-OMP that do not provide styles
|
||||
|
||||
if(PKG_ASPHERE)
|
||||
list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_nh_asphere_omp.cpp)
|
||||
endif()
|
||||
|
||||
if(PKG_RIGID)
|
||||
list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_rigid_nh_omp.cpp)
|
||||
endif()
|
||||
|
||||
if(PKG_USER-REAXC)
|
||||
list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/reaxc_bond_orders_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_hydrogen_bonds_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_nonbonded_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_bonds_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_init_md_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_torsion_angles_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_forces_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_multi_body_omp.cpp
|
||||
${USER-OMP_SOURCES_DIR}/reaxc_valence_angles_omp.cpp)
|
||||
endif()
|
||||
|
||||
list(APPEND LIB_SOURCES ${USER-OMP_SOURCES})
|
||||
include_directories(${USER-OMP_SOURCES_DIR})
|
||||
endif()
|
||||
79
cmake/Modules/Packages/USER-PLUMED.cmake
Normal file
79
cmake/Modules/Packages/USER-PLUMED.cmake
Normal file
@ -0,0 +1,79 @@
|
||||
if(PKG_USER-PLUMED)
|
||||
find_package(GSL REQUIRED)
|
||||
set(PLUMED_MODE "static" CACHE STRING "Linkage mode for Plumed2 library")
|
||||
set(PLUMED_MODE_VALUES static shared runtime)
|
||||
set_property(CACHE PLUMED_MODE PROPERTY STRINGS ${PLUMED_MODE_VALUES})
|
||||
validate_option(PLUMED_MODE PLUMED_MODE_VALUES)
|
||||
string(TOUPPER ${PLUMED_MODE} PLUMED_MODE)
|
||||
|
||||
find_package(PkgConfig QUIET)
|
||||
set(DOWNLOAD_PLUMED_DEFAULT ON)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(PLUMED QUIET plumed)
|
||||
if(PLUMED_FOUND)
|
||||
set(DOWNLOAD_PLUMED_DEFAULT OFF)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(DOWNLOAD_PLUMED "Download Plumed package instead of using an already installed one" ${DOWNLOAD_PLUMED_DEFAULT})
|
||||
if(DOWNLOAD_PLUMED)
|
||||
if(CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
message(FATAL_ERROR "Cannot build downloaded Plumed library with Ninja build tool")
|
||||
endif()
|
||||
if(BUILD_MPI)
|
||||
set(PLUMED_CONFIG_MPI "--enable-mpi")
|
||||
set(PLUMED_CONFIG_CC ${CMAKE_MPI_C_COMPILER})
|
||||
set(PLUMED_CONFIG_CXX ${CMAKE_MPI_CXX_COMPILER})
|
||||
else()
|
||||
set(PLUMED_CONFIG_MPI "--disable-mpi")
|
||||
set(PLUMED_CONFIG_CC ${CMAKE_C_COMPILER})
|
||||
set(PLUMED_CONFIG_CXX ${CMAKE_CXX_COMPILER})
|
||||
endif()
|
||||
if(BUILD_OMP)
|
||||
set(PLUMED_CONFIG_OMP "--enable-openmp")
|
||||
else()
|
||||
set(PLUMED_CONFIG_OMP "--disable-openmp")
|
||||
endif()
|
||||
message(STATUS "PLUMED download requested - we will build our own")
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(plumed_build
|
||||
URL https://github.com/plumed/plumed2/releases/download/v2.5.1/plumed-src-2.5.1.tgz
|
||||
URL_MD5 c2a7b519e32197a120cdf47e0f194f81
|
||||
BUILD_IN_SOURCE 1
|
||||
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
|
||||
${CONFIGURE_REQUEST_PIC}
|
||||
--enable-modules=all
|
||||
${PLUMED_CONFIG_MPI}
|
||||
${PLUMED_CONFIG_OMP}
|
||||
CXX=${PLUMED_CONFIG_CXX}
|
||||
CC=${PLUMED_CONFIG_CC}
|
||||
)
|
||||
ExternalProject_get_property(plumed_build INSTALL_DIR)
|
||||
set(PLUMED_INSTALL_DIR ${INSTALL_DIR})
|
||||
list(APPEND LAMMPS_DEPS plumed_build)
|
||||
if(PLUMED_MODE STREQUAL "STATIC")
|
||||
add_definitions(-D__PLUMED_WRAPPER_CXX=1)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${GSL_LIBRARIES} ${LAPACK_LIBRARIES} ${CMAKE_DL_LIBS})
|
||||
elseif(PLUMED_MODE STREQUAL "SHARED")
|
||||
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.so ${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so ${CMAKE_DL_LIBS})
|
||||
elseif(PLUMED_MODE STREQUAL "RUNTIME")
|
||||
add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumedWrapper.a -rdynamic ${CMAKE_DL_LIBS})
|
||||
endif()
|
||||
set(PLUMED_INCLUDE_DIRS "${PLUMED_INSTALL_DIR}/include")
|
||||
else()
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(PLUMED REQUIRED plumed)
|
||||
if(PLUMED_MODE STREQUAL "STATIC")
|
||||
add_definitions(-D__PLUMED_WRAPPER_CXX=1)
|
||||
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.static)
|
||||
elseif(PLUMED_MODE STREQUAL "SHARED")
|
||||
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.shared)
|
||||
elseif(PLUMED_MODE STREQUAL "RUNTIME")
|
||||
add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel.so)
|
||||
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.runtime)
|
||||
endif()
|
||||
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_LOAD})
|
||||
endif()
|
||||
include_directories(${PLUMED_INCLUDE_DIRS})
|
||||
endif()
|
||||
9
cmake/Modules/Packages/USER-QMMM.cmake
Normal file
9
cmake/Modules/Packages/USER-QMMM.cmake
Normal file
@ -0,0 +1,9 @@
|
||||
if(PKG_USER-QMMM)
|
||||
enable_language(Fortran)
|
||||
enable_language(C)
|
||||
|
||||
message(WARNING "Building QMMM with CMake is still experimental")
|
||||
find_package(QE REQUIRED)
|
||||
include_directories(${QE_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES})
|
||||
endif()
|
||||
5
cmake/Modules/Packages/USER-QUIP.cmake
Normal file
5
cmake/Modules/Packages/USER-QUIP.cmake
Normal file
@ -0,0 +1,5 @@
|
||||
if(PKG_USER-QUIP)
|
||||
enable_language(Fortran)
|
||||
find_package(QUIP REQUIRED)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${LAPACK_LIBRARIES})
|
||||
endif()
|
||||
62
cmake/Modules/Packages/USER-SCAFACOS.cmake
Normal file
62
cmake/Modules/Packages/USER-SCAFACOS.cmake
Normal file
@ -0,0 +1,62 @@
|
||||
if(PKG_USER-SCAFACOS)
|
||||
enable_language(Fortran)
|
||||
enable_language(C)
|
||||
|
||||
find_package(GSL REQUIRED)
|
||||
find_package(PkgConfig QUIET)
|
||||
set(DOWNLOAD_SCAFACOS_DEFAULT ON)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(SCAFACOS QUIET scafacos)
|
||||
if(SCAFACOS_FOUND)
|
||||
set(DOWNLOAD_SCAFACOS_DEFAULT OFF)
|
||||
endif()
|
||||
endif()
|
||||
option(DOWNLOAD_SCAFACOS "Download ScaFaCoS library instead of using an already installed one" ${DOWNLOAD_SCAFACOS_DEFAULT})
|
||||
if(DOWNLOAD_SCAFACOS)
|
||||
if(CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
message(FATAL_ERROR "Cannot build downloaded ScaFaCoS library with Ninja build tool")
|
||||
endif()
|
||||
message(STATUS "ScaFaCoS download requested - we will build our own")
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(scafacos_build
|
||||
URL https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz
|
||||
URL_MD5 bd46d74e3296bd8a444d731bb10c1738
|
||||
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR> --disable-doc
|
||||
--enable-fcs-solvers=fmm,p2nfft,direct,ewald,p3m
|
||||
--with-internal-fftw --with-internal-pfft
|
||||
--with-internal-pnfft ${CONFIGURE_REQUEST_PIC}
|
||||
FC=${CMAKE_MPI_Fortran_COMPILER}
|
||||
CXX=${CMAKE_MPI_CXX_COMPILER}
|
||||
CC=${CMAKE_MPI_C_COMPILER}
|
||||
F77=
|
||||
)
|
||||
ExternalProject_get_property(scafacos_build INSTALL_DIR)
|
||||
set(SCAFACOS_BUILD_DIR ${INSTALL_DIR})
|
||||
set(SCAFACOS_INCLUDE_DIRS ${SCAFACOS_BUILD_DIR}/include)
|
||||
list(APPEND LAMMPS_DEPS scafacos_build)
|
||||
# list and order from pkg_config file of ScaFaCoS build
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_direct.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_ewald.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fmm.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p2nfft.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p3m.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${GSL_LIBRARIES})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_near.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_gridsort.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_resort.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_redist.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_common.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pnfft.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pfft.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3_mpi.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3.a)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${MPI_Fortran_LIBRARIES})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${MPI_C_LIBRARIES})
|
||||
else()
|
||||
find_package(PkgConfig REQUIRED)
|
||||
pkg_check_modules(SCAFACOS REQUIRED scafacos)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_LDFLAGS})
|
||||
endif()
|
||||
include_directories(${SCAFACOS_INCLUDE_DIRS})
|
||||
endif()
|
||||
13
cmake/Modules/Packages/USER-SDPD.cmake
Normal file
13
cmake/Modules/Packages/USER-SDPD.cmake
Normal file
@ -0,0 +1,13 @@
|
||||
# Fix rigid/meso requires RIGID to be installed
|
||||
if(PKG_USER-SDPD)
|
||||
set(USER-SDPD_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-SDPD)
|
||||
|
||||
get_property(hlist GLOBAL PROPERTY FIX)
|
||||
if(NOT PKG_RIGID)
|
||||
list(REMOVE_ITEM hlist ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.h)
|
||||
list(REMOVE_ITEM LIB_SOURCES ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.cpp)
|
||||
endif()
|
||||
set_property(GLOBAL PROPERTY FIX "${hlist}")
|
||||
|
||||
include_directories(${USER-SDPD_SOURCES_DIR})
|
||||
endif()
|
||||
28
cmake/Modules/Packages/USER-SMD.cmake
Normal file
28
cmake/Modules/Packages/USER-SMD.cmake
Normal file
@ -0,0 +1,28 @@
|
||||
if(PKG_USER-SMD)
|
||||
find_package(Eigen3 NO_MODULE)
|
||||
if(EIGEN3_FOUND)
|
||||
set(DOWNLOAD_EIGEN3_DEFAULT OFF)
|
||||
else()
|
||||
set(DOWNLOAD_EIGEN3_DEFAULT ON)
|
||||
endif()
|
||||
option(DOWNLOAD_EIGEN3 "Download Eigen3 instead of using an already installed one)" ${DOWNLOAD_EIGEN3_DEFAULT})
|
||||
if(DOWNLOAD_EIGEN3)
|
||||
message(STATUS "Eigen3 download requested - we will build our own")
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(Eigen3_build
|
||||
URL http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz
|
||||
URL_MD5 f2a417d083fe8ca4b8ed2bc613d20f07
|
||||
CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND ""
|
||||
)
|
||||
ExternalProject_get_property(Eigen3_build SOURCE_DIR)
|
||||
set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR})
|
||||
list(APPEND LAMMPS_DEPS Eigen3_build)
|
||||
else()
|
||||
find_package(Eigen3 NO_MODULE)
|
||||
mark_as_advanced(Eigen3_DIR)
|
||||
if(NOT EIGEN3_FOUND)
|
||||
message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_EIGEN3=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
include_directories(${EIGEN3_INCLUDE_DIR})
|
||||
endif()
|
||||
6
cmake/Modules/Packages/USER-VTK.cmake
Normal file
6
cmake/Modules/Packages/USER-VTK.cmake
Normal file
@ -0,0 +1,6 @@
|
||||
if(PKG_USER-VTK)
|
||||
find_package(VTK REQUIRED NO_MODULE)
|
||||
include(${VTK_USE_FILE})
|
||||
add_definitions(-DLAMMPS_VTK)
|
||||
list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES})
|
||||
endif()
|
||||
41
cmake/Modules/Packages/VORONOI.cmake
Normal file
41
cmake/Modules/Packages/VORONOI.cmake
Normal file
@ -0,0 +1,41 @@
|
||||
if(PKG_VORONOI)
|
||||
find_package(VORO)
|
||||
if(VORO_FOUND)
|
||||
set(DOWNLOAD_VORO_DEFAULT OFF)
|
||||
else()
|
||||
set(DOWNLOAD_VORO_DEFAULT ON)
|
||||
endif()
|
||||
option(DOWNLOAD_VORO "Download and compile the Voro++ library instead of using an already installed one" ${DOWNLOAD_VORO_DEFAULT})
|
||||
if(DOWNLOAD_VORO)
|
||||
if(CMAKE_GENERATOR STREQUAL "Ninja")
|
||||
message(FATAL_ERROR "Cannot build downloaded Voro++ library with Ninja build tool")
|
||||
endif()
|
||||
message(STATUS "Voro++ download requested - we will build our own")
|
||||
include(ExternalProject)
|
||||
|
||||
if(BUILD_SHARED_LIBS)
|
||||
set(VORO_BUILD_CFLAGS "${CMAKE_SHARED_LIBRARY_CXX_FLAGS} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}")
|
||||
else()
|
||||
set(VORO_BUILD_CFLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}")
|
||||
endif()
|
||||
string(APPEND VORO_BUILD_CFLAGS ${CMAKE_CXX_FLAGS})
|
||||
set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS})
|
||||
|
||||
ExternalProject_Add(voro_build
|
||||
URL https://download.lammps.org/thirdparty/voro++-0.4.6.tar.gz
|
||||
URL_MD5 2338b824c3b7b25590e18e8df5d68af9
|
||||
CONFIGURE_COMMAND "" BUILD_COMMAND make ${VORO_BUILD_OPTIONS} BUILD_IN_SOURCE 1 INSTALL_COMMAND ""
|
||||
)
|
||||
ExternalProject_get_property(voro_build SOURCE_DIR)
|
||||
set(VORO_LIBRARIES ${SOURCE_DIR}/src/libvoro++.a)
|
||||
set(VORO_INCLUDE_DIRS ${SOURCE_DIR}/src)
|
||||
list(APPEND LAMMPS_DEPS voro_build)
|
||||
else()
|
||||
find_package(VORO)
|
||||
if(NOT VORO_FOUND)
|
||||
message(FATAL_ERROR "Voro++ library not found. Help CMake to find it by setting VORO_LIBRARY and VORO_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
include_directories(${VORO_INCLUDE_DIRS})
|
||||
list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES})
|
||||
endif()
|
||||
52
cmake/Modules/Testing.cmake
Normal file
52
cmake/Modules/Testing.cmake
Normal file
@ -0,0 +1,52 @@
|
||||
###############################################################################
|
||||
# Testing
|
||||
###############################################################################
|
||||
option(ENABLE_TESTING "Enable testing" OFF)
|
||||
if(ENABLE_TESTING AND BUILD_EXE)
|
||||
enable_testing()
|
||||
option(LAMMPS_TESTING_SOURCE_DIR "Location of lammps-testing source directory" "")
|
||||
option(LAMMPS_TESTING_GIT_TAG "Git tag of lammps-testing" "master")
|
||||
mark_as_advanced(LAMMPS_TESTING_SOURCE_DIR LAMMPS_TESTING_GIT_TAG)
|
||||
|
||||
if (CMAKE_VERSION VERSION_GREATER "3.10.3" AND NOT LAMMPS_TESTING_SOURCE_DIR)
|
||||
include(FetchContent)
|
||||
|
||||
FetchContent_Declare(lammps-testing
|
||||
GIT_REPOSITORY https://github.com/lammps/lammps-testing.git
|
||||
GIT_TAG ${LAMMPS_TESTING_GIT_TAG}
|
||||
)
|
||||
|
||||
FetchContent_GetProperties(lammps-testing)
|
||||
if(NOT lammps-testing_POPULATED)
|
||||
message(STATUS "Downloading tests...")
|
||||
FetchContent_Populate(lammps-testing)
|
||||
endif()
|
||||
|
||||
set(LAMMPS_TESTING_SOURCE_DIR ${lammps-testing_SOURCE_DIR})
|
||||
elseif(NOT LAMMPS_TESTING_SOURCE_DIR)
|
||||
message(WARNING "Full test-suite requires CMake >= 3.11 or copy of\n"
|
||||
"https://github.com/lammps/lammps-testing in LAMMPS_TESTING_SOURCE_DIR")
|
||||
endif()
|
||||
|
||||
add_test(ShowHelp ${CMAKE_BINARY_DIR}/${LAMMPS_BINARY} -help)
|
||||
|
||||
if(EXISTS ${LAMMPS_TESTING_SOURCE_DIR})
|
||||
message(STATUS "Running test discovery...")
|
||||
|
||||
file(GLOB_RECURSE TEST_SCRIPTS ${LAMMPS_TESTING_SOURCE_DIR}/tests/core/*/in.*)
|
||||
foreach(script_path ${TEST_SCRIPTS})
|
||||
get_filename_component(TEST_NAME ${script_path} EXT)
|
||||
get_filename_component(SCRIPT_NAME ${script_path} NAME)
|
||||
get_filename_component(PARENT_DIR ${script_path} DIRECTORY)
|
||||
string(SUBSTRING ${TEST_NAME} 1 -1 TEST_NAME)
|
||||
string(REPLACE "-" "_" TEST_NAME ${TEST_NAME})
|
||||
string(REPLACE "+" "_" TEST_NAME ${TEST_NAME})
|
||||
set(TEST_NAME "test_core_${TEST_NAME}_serial")
|
||||
add_test(${TEST_NAME} ${CMAKE_BINARY_DIR}/${LAMMPS_BINARY} -in ${SCRIPT_NAME})
|
||||
set_tests_properties(${TEST_NAME} PROPERTIES WORKING_DIRECTORY ${PARENT_DIR})
|
||||
endforeach()
|
||||
list(LENGTH TEST_SCRIPTS NUM_TESTS)
|
||||
|
||||
message(STATUS "Found ${NUM_TESTS} tests.")
|
||||
endif()
|
||||
endif()
|
||||
@ -1,4 +1,4 @@
|
||||
.TH LAMMPS "5 June 2019" "2019-06-05"
|
||||
.TH LAMMPS "18 June 2019" "2019-06-18"
|
||||
.SH NAME
|
||||
.B LAMMPS
|
||||
\- Molecular Dynamics Simulator.
|
||||
|
||||
@ -32,10 +32,18 @@ cmake \[options ...\] ../cmake # configuration with (command-line) cmake
|
||||
make # compilation :pre
|
||||
|
||||
The cmake command will detect available features, enable selected
|
||||
packages and options, and will generate the build environment. The make
|
||||
command will then compile and link LAMMPS, producing (by default) an
|
||||
executable called "lmp" and a library called "liblammps.a" in the
|
||||
"build" folder.
|
||||
packages and options, and will generate the build environment. By default
|
||||
this build environment will be created for "Unix Makefiles" on most
|
||||
platforms and particularly on Linux. However, alternate build tools
|
||||
(e.g. Ninja) and support files for Integrated Development Environments
|
||||
(IDE) like Eclipse, CodeBlocks, or Kate can be generated, too. This is
|
||||
selected via the "-G" command line flag. For the rest of the documentation
|
||||
we will assume that the build environment is generated for makefiles
|
||||
and thus the make command will be used to compile and link LAMMPS as
|
||||
indicated above, producing (by default) an executable called "lmp" and
|
||||
a library called "liblammps.a" in the "build" folder. When generating
|
||||
a build environment for the "Ninja" build tool, the build command would
|
||||
be "ninja" instead of "make".
|
||||
|
||||
If your machine has multiple CPU cores (most do these days), using a
|
||||
command like "make -jN" (with N being the number of available local
|
||||
|
||||
@ -28,8 +28,12 @@ OPT.
|
||||
|
||||
"none"_bond_none.html,
|
||||
"zero"_bond_zero.html,
|
||||
"hybrid"_bond_hybrid.html :tb(c=3,ea=c)
|
||||
|
||||
"hybrid"_bond_hybrid.html,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
"class2 (ko)"_bond_class2.html,
|
||||
"fene (iko)"_bond_fene.html,
|
||||
"fene/expand (o)"_bond_fene_expand.html,
|
||||
@ -56,8 +60,12 @@ OPT.
|
||||
|
||||
"none"_angle_none.html,
|
||||
"zero"_angle_zero.html,
|
||||
"hybrid"_angle_hybrid.html :tb(c=3,ea=c)
|
||||
|
||||
"hybrid"_angle_hybrid.html,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
"charmm (iko)"_angle_charmm.html,
|
||||
"class2 (ko)"_angle_class2.html,
|
||||
"class2/p6"_angle_class2.html,
|
||||
@ -89,8 +97,12 @@ OPT.
|
||||
|
||||
"none"_dihedral_none.html,
|
||||
"zero"_dihedral_zero.html,
|
||||
"hybrid"_dihedral_hybrid.html :tb(c=3,ea=c)
|
||||
|
||||
"hybrid"_dihedral_hybrid.html,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
"charmm (iko)"_dihedral_charmm.html,
|
||||
"charmmfsw"_dihedral_charmm.html,
|
||||
"class2 (ko)"_dihedral_class2.html,
|
||||
@ -117,8 +129,12 @@ OPT.
|
||||
|
||||
"none"_improper_none.html,
|
||||
"zero"_improper_zero.html,
|
||||
"hybrid"_improper_hybrid.html :tb(c=3,ea=c)
|
||||
|
||||
"hybrid"_improper_hybrid.html,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
"class2 (ko)"_improper_class2.html,
|
||||
"cossq (o)"_improper_cossq.html,
|
||||
"cvff (io)"_improper_cvff.html,
|
||||
|
||||
@ -27,8 +27,11 @@ OPT.
|
||||
"none"_pair_none.html,
|
||||
"zero"_pair_zero.html,
|
||||
"hybrid (k)"_pair_hybrid.html,
|
||||
"hybrid/overlay (k)"_pair_hybrid.html :tb(c=4,ea=c)
|
||||
|
||||
"hybrid/overlay (k)"_pair_hybrid.html,
|
||||
,
|
||||
,
|
||||
,
|
||||
,
|
||||
"adp (o)"_pair_adp.html,
|
||||
"agni (o)"_pair_agni.html,
|
||||
"airebo (io)"_pair_airebo.html,
|
||||
|
||||
BIN
doc/src/Eqs/pair_spin_dipole.jpg
Normal file
BIN
doc/src/Eqs/pair_spin_dipole.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 38 KiB |
42
doc/src/Eqs/pair_spin_dipole.tex
Normal file
42
doc/src/Eqs/pair_spin_dipole.tex
Normal file
@ -0,0 +1,42 @@
|
||||
\documentclass[preview]{standalone}
|
||||
\usepackage{varwidth}
|
||||
\usepackage[utf8x]{inputenc}
|
||||
\usepackage{amsmath,amssymb,graphics,bm,setspace}
|
||||
|
||||
\begin{document}
|
||||
\begin{varwidth}{50in}
|
||||
\begin{equation}
|
||||
\mathcal{H}_{\rm long}=
|
||||
-\frac{\mu_{0} \left( \mu_B\right)^2}{4\pi}
|
||||
\sum_{i,j,i\neq j}^{N}
|
||||
\frac{g_i g_j}{r_{ij}^3}
|
||||
\Big(3
|
||||
\left(\bm{e}_{ij}\cdot \bm{s}_{i}\right)
|
||||
\left(\bm{e}_{ij}\cdot \bm{s}_{j}\right)
|
||||
-\bm{s}_i\cdot\bm{s}_j \Big)
|
||||
\nonumber
|
||||
\end{equation}
|
||||
\begin{equation}
|
||||
\bm{\omega}_i =
|
||||
\frac{\mu_0 (\mu_B)^2}{4\pi\hbar}\sum_{j}
|
||||
\frac{g_i g_j}{r_{ij}^3}
|
||||
\, \Big(
|
||||
3\,(\bm{e}_{ij}\cdot\bm{s}_{j})\bm{e}_{ij}
|
||||
-\bm{s}_{j} \Big) \nonumber
|
||||
\end{equation}
|
||||
\begin{equation}
|
||||
\bm{F}_i =
|
||||
\frac{3\, \mu_0 (\mu_B)^2}{4\pi} \sum_j
|
||||
\frac{g_i g_j}{r_{ij}^4}
|
||||
\Big[\big( (\bm{s}_i\cdot\bm{s}_j)
|
||||
-5(\bm{e}_{ij}\cdot\bm{s}_i)
|
||||
(\bm{e}_{ij}\cdot\bm{s}_j)\big) \bm{e}_{ij}+
|
||||
\big(
|
||||
(\bm{e}_{ij}\cdot\bm{s}_i)\bm{s}_j+
|
||||
(\bm{e}_{ij}\cdot\bm{s}_j)\bm{s}_i
|
||||
\big)
|
||||
\Big]
|
||||
\nonumber
|
||||
\end{equation}
|
||||
\end{varwidth}
|
||||
\end{document}
|
||||
BIN
doc/src/Eqs/pair_spin_long_range.jpg
Normal file
BIN
doc/src/Eqs/pair_spin_long_range.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 12 KiB |
20
doc/src/Eqs/pair_spin_long_range.tex
Normal file
20
doc/src/Eqs/pair_spin_long_range.tex
Normal file
@ -0,0 +1,20 @@
|
||||
\documentclass[preview]{standalone}
|
||||
\usepackage{varwidth}
|
||||
\usepackage[utf8x]{inputenc}
|
||||
\usepackage{amsmath,amssymb,graphics,bm,setspace}
|
||||
|
||||
\begin{document}
|
||||
\begin{varwidth}{50in}
|
||||
\begin{equation}
|
||||
\mathcal{H}_{\rm long}=
|
||||
-\frac{\mu_{0} \left( \mu_B\right)^2}{4\pi}
|
||||
\sum_{i,j,i\neq j}^{N}
|
||||
\frac{g_i g_j}{r_{ij}^3}
|
||||
\Big(3
|
||||
\left(\bm{e}_{ij}\cdot \bm{s}_{i}\right)
|
||||
\left(\bm{e}_{ij}\cdot \bm{s}_{j}\right)
|
||||
-\bm{s}_i\cdot\bm{s}_j \Big)
|
||||
\nonumber
|
||||
\end{equation}
|
||||
\end{varwidth}
|
||||
\end{document}
|
||||
BIN
doc/src/Eqs/pair_spin_long_range_force.jpg
Normal file
BIN
doc/src/Eqs/pair_spin_long_range_force.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 16 KiB |
23
doc/src/Eqs/pair_spin_long_range_force.tex
Normal file
23
doc/src/Eqs/pair_spin_long_range_force.tex
Normal file
@ -0,0 +1,23 @@
|
||||
\documentclass[preview]{standalone}
|
||||
\usepackage{varwidth}
|
||||
\usepackage[utf8x]{inputenc}
|
||||
\usepackage{amsmath,amssymb,graphics,bm,setspace}
|
||||
|
||||
\begin{document}
|
||||
\begin{varwidth}{50in}
|
||||
\begin{equation}
|
||||
\bm{F}_i =
|
||||
\frac{\mu_0 (\mu_B)^2}{4\pi} \sum_j
|
||||
\frac{g_i g_j}{r_{ij}^4}
|
||||
\Big[\big( (\bm{s}_i\cdot\bm{s}_j)
|
||||
-5(\bm{e}_{ij}\cdot\bm{s}_i)
|
||||
(\bm{e}_{ij}\cdot\bm{s}_j)\big) \bm{e}_{ij}+
|
||||
\big(
|
||||
(\bm{e}_{ij}\cdot\bm{s}_i)\bm{s}_j+
|
||||
(\bm{e}_{ij}\cdot\bm{s}_j)\bm{s}_i
|
||||
\big)
|
||||
\Big]
|
||||
\nonumber
|
||||
\end{equation}
|
||||
\end{varwidth}
|
||||
\end{document}
|
||||
BIN
doc/src/Eqs/pair_spin_long_range_magforce.jpg
Normal file
BIN
doc/src/Eqs/pair_spin_long_range_magforce.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 9.2 KiB |
17
doc/src/Eqs/pair_spin_long_range_magforce.tex
Normal file
17
doc/src/Eqs/pair_spin_long_range_magforce.tex
Normal file
@ -0,0 +1,17 @@
|
||||
\documentclass[preview]{standalone}
|
||||
\usepackage{varwidth}
|
||||
\usepackage[utf8x]{inputenc}
|
||||
\usepackage{amsmath,amssymb,graphics,bm,setspace}
|
||||
|
||||
\begin{document}
|
||||
\begin{varwidth}{50in}
|
||||
\begin{equation}
|
||||
\bm{\omega}_i =
|
||||
\frac{\mu_0 (\mu_B)^2}{4\pi\hbar}\sum_{j}
|
||||
\frac{g_i g_j}{r_{ij}^3}
|
||||
\, \Big(
|
||||
3\,(\bm{e}_{ij}\cdot\bm{s}_{j})\bm{e}_{ij}
|
||||
-\bm{s}_{j} \Big) \nonumber
|
||||
\end{equation}
|
||||
\end{varwidth}
|
||||
\end{document}
|
||||
@ -636,12 +636,12 @@ Please ensure reaction map files are properly formatted. :dd
|
||||
|
||||
{Bond/react: Atom affected by reaction too close to template edge} :dt
|
||||
|
||||
This means an atom which changes type during the reaction is too close
|
||||
to an 'edge' atom defined in the superimpose file. This could cause
|
||||
incorrect assignment of bonds, angle, etc. Generally, this means you
|
||||
must include more atoms in your templates, such that there are at
|
||||
least two atoms between each atom involved in the reaction and an edge
|
||||
atom. :dd
|
||||
This means an atom which changes type or connectivity during the
|
||||
reaction is too close to an 'edge' atom defined in the superimpose
|
||||
file. This could cause incorrect assignment of bonds, angle, etc.
|
||||
Generally, this means you must include more atoms in your templates,
|
||||
such that there are at least two atoms between each atom involved in
|
||||
the reaction and an edge atom. :dd
|
||||
|
||||
{Bond/react: Fix bond/react needs ghost atoms from farther away} :dt
|
||||
|
||||
@ -2202,10 +2202,6 @@ Self-explanatory. :dd
|
||||
|
||||
This is a current restriction in LAMMPS. :dd
|
||||
|
||||
{Cannot use pair hybrid with GPU neighbor list builds} :dt
|
||||
|
||||
Neighbor list builds must be done on the CPU for this pair style. :dd
|
||||
|
||||
{Cannot use pair tail corrections with 2d simulations} :dt
|
||||
|
||||
The correction factors are only currently defined for 3d systems. :dd
|
||||
@ -5523,10 +5519,6 @@ Self-explanatory. :dd
|
||||
For this pair style, you cannot run part of the force calculation on
|
||||
the host. See the package command. :dd
|
||||
|
||||
{GPU split param must be positive for hybrid pair styles} :dt
|
||||
|
||||
See the package gpu command. :dd
|
||||
|
||||
{GPUs are requested but Kokkos has not been compiled for CUDA} :dt
|
||||
|
||||
Re-compile Kokkos with CUDA support to use GPUs. :dd
|
||||
@ -7097,6 +7089,18 @@ Self-explanatory. :dd
|
||||
|
||||
One or more GPUs must be used when Kokkos is compiled for CUDA. :dd
|
||||
|
||||
{Kspace_modify mesh parameter must be all zero or all positive} :dt
|
||||
|
||||
Valid kspace mesh parameters are >0. The code will try to auto-detect
|
||||
suitable values when all three mesh sizes are set to zero (the default). :dd
|
||||
|
||||
{Kspace_modify mesh/disp parameter must be all zero or all positive} :dt
|
||||
|
||||
Valid kspace mesh/disp parameters are >0. The code will try to auto-detect
|
||||
suitable values when all three mesh sizes are set to zero [and]
|
||||
the required accuracy via {force/disp/real} as well as
|
||||
{force/disp/kspace} is set. :dd
|
||||
|
||||
{Kspace style does not support compute group/group} :dt
|
||||
|
||||
Self-explanatory. :dd
|
||||
|
||||
@ -82,10 +82,14 @@ bond/angle/dihedral. LAMMPS computes this by taking the maximum bond
|
||||
length, multiplying by the number of bonds in the interaction (e.g. 3
|
||||
for a dihedral) and adding a small amount of stretch. :dd
|
||||
|
||||
{Bond/react: An atom in 'react #%d' changes bond connectivity but not atom type} :dt
|
||||
{Bond/react: Atom affected by reaction too close to template edge} :dt
|
||||
|
||||
You may want to double-check that all atom types are properly assigned
|
||||
in the post-reaction template. :dd
|
||||
This means an atom which changes type or connectivity during the
|
||||
reaction is too close to an 'edge' atom defined in the superimpose
|
||||
file. This could cause incorrect assignment of bonds, angle, etc.
|
||||
Generally, this means you must include more atoms in your templates,
|
||||
such that there are at least two atoms between each atom involved in
|
||||
the reaction and an edge atom. :dd
|
||||
|
||||
{Both groups in compute group/group have a net charge; the Kspace boundary correction to energy will be non-zero} :dt
|
||||
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
<!-- HTML_ONLY -->
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="5 Jun 2019 version">
|
||||
<META NAME="docnumber" CONTENT="18 Jun 2019 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -21,7 +21,7 @@
|
||||
:line
|
||||
|
||||
LAMMPS Documentation :c,h1
|
||||
5 Jun 2019 version :c,h2
|
||||
18 Jun 2019 version :c,h2
|
||||
|
||||
"What is a LAMMPS version?"_Manual_version.html
|
||||
|
||||
|
||||
@ -104,7 +104,7 @@ code (with a performance penalty due to having data transfers between
|
||||
host and GPU). :ulb,l
|
||||
|
||||
The GPU package requires neighbor lists to be built on the CPU when using
|
||||
exclusion lists, hybrid pair styles, or a triclinic simulation box. :l
|
||||
exclusion lists, or a triclinic simulation box. :l
|
||||
|
||||
The GPU package can be compiled for CUDA or OpenCL and thus supports
|
||||
both, Nvidia and AMD GPUs well. On Nvidia hardware, using CUDA is typically
|
||||
|
||||
@ -46,7 +46,7 @@ software version 7.5 or later must be installed on your system. See
|
||||
the discussion for the "GPU package"_Speed_gpu.html for details of how
|
||||
to check and do this.
|
||||
|
||||
NOTE: Kokkos with CUDA currently implicitly assumes, that the MPI
|
||||
NOTE: Kokkos with CUDA currently implicitly assumes that the MPI
|
||||
library is CUDA-aware and has support for GPU-direct. This is not
|
||||
always the case, especially when using pre-compiled MPI libraries
|
||||
provided by a Linux distribution. This is not a problem when using
|
||||
@ -207,19 +207,21 @@ supports.
|
||||
|
||||
[Running on GPUs:]
|
||||
|
||||
Use the "-k" "command-line switch"_Run_options.html to
|
||||
specify the number of GPUs per node. Typically the -np setting of the
|
||||
mpirun command should set the number of MPI tasks/node to be equal to
|
||||
the number of physical GPUs on the node. You can assign multiple MPI
|
||||
tasks to the same GPU with the KOKKOS package, but this is usually
|
||||
only faster if significant portions of the input script have not
|
||||
been ported to use Kokkos. Using CUDA MPS is recommended in this
|
||||
scenario. Using a CUDA-aware MPI library with support for GPU-direct
|
||||
is highly recommended. GPU-direct use can be avoided by using
|
||||
"-pk kokkos gpu/direct no"_package.html.
|
||||
As above for multi-core CPUs (and no GPU), if N is the number of
|
||||
physical cores/node, then the number of MPI tasks/node should not
|
||||
exceed N.
|
||||
Use the "-k" "command-line switch"_Run_options.html to specify the
|
||||
number of GPUs per node. Typically the -np setting of the mpirun command
|
||||
should set the number of MPI tasks/node to be equal to the number of
|
||||
physical GPUs on the node. You can assign multiple MPI tasks to the same
|
||||
GPU with the KOKKOS package, but this is usually only faster if some
|
||||
portions of the input script have not been ported to use Kokkos. In this
|
||||
case, also packing/unpacking communication buffers on the host may give
|
||||
speedup (see the KOKKOS "package"_package.html command). Using CUDA MPS
|
||||
is recommended in this scenario.
|
||||
|
||||
Using a CUDA-aware MPI library with
|
||||
support for GPU-direct is highly recommended. GPU-direct use can be
|
||||
avoided by using "-pk kokkos gpu/direct no"_package.html. As above for
|
||||
multi-core CPUs (and no GPU), if N is the number of physical cores/node,
|
||||
then the number of MPI tasks/node should not exceed N.
|
||||
|
||||
-k on g Ng :pre
|
||||
|
||||
|
||||
@ -24,12 +24,7 @@ twojmax = band limit for bispectrum components (non-negative integer) :l
|
||||
R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l
|
||||
w_1, w_2,... = list of neighbor weights, one for each type :l
|
||||
zero or more keyword/value pairs may be appended :l
|
||||
keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l
|
||||
{diagonal} value = {0} or {1} or {2} or {3}
|
||||
{0} = all j1, j2, j <= twojmax, j2 <= j1
|
||||
{1} = subset satisfying j1 == j2
|
||||
{2} = subset satisfying j1 == j2 == j3
|
||||
{3} = subset satisfying j2 <= j1 <= j
|
||||
keyword = {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l
|
||||
{rmin0} value = parameter in distance to angle conversion (distance units)
|
||||
{switchflag} value = {0} or {1}
|
||||
{0} = do not use switching function
|
||||
@ -44,7 +39,7 @@ keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag
|
||||
|
||||
[Examples:]
|
||||
|
||||
compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 diagonal 3 rmin0 0.0
|
||||
compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 rmin0 0.0
|
||||
compute db all sna/atom 1.4 0.95 6 2.0 1.0
|
||||
compute vb all sna/atom 1.4 0.95 6 2.0 1.0 :pre
|
||||
|
||||
@ -151,7 +146,7 @@ The argument {rfac0} and the optional keyword {rmin0} define the
|
||||
linear mapping from radial distance to polar angle {theta0} on the
|
||||
3-sphere.
|
||||
|
||||
The argument {twojmax} and the keyword {diagonal} define which
|
||||
The argument {twojmax} defines which
|
||||
bispectrum components are generated. See section below on output for a
|
||||
detailed explanation of the number of bispectrum components and the
|
||||
ordered in which they are listed.
|
||||
@ -192,23 +187,18 @@ command that includes all pairs in the neighbor list.
|
||||
Compute {sna/atom} calculates a per-atom array, each column
|
||||
corresponding to a particular bispectrum component. The total number
|
||||
of columns and the identity of the bispectrum component contained in
|
||||
each column depend on the values of {twojmax} and {diagonal}, as
|
||||
each column depend of the value of {twojmax}, as
|
||||
described by the following piece of python code:
|
||||
|
||||
for j1 in range(0,twojmax+1):
|
||||
if(diagonal==2):
|
||||
print j1/2.,j1/2.,j1/2.
|
||||
elif(diagonal==1):
|
||||
for j in range(0,min(twojmax,2*j1)+1,2):
|
||||
print j1/2.,j1/2.,j/2.
|
||||
elif(diagonal==0):
|
||||
for j2 in range(0,j1+1):
|
||||
for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
|
||||
print j1/2.,j2/2.,j/2.
|
||||
elif(diagonal==3):
|
||||
for j2 in range(0,j1+1):
|
||||
for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
|
||||
if (j>=j1): print j1/2.,j2/2.,j/2. :pre
|
||||
for j2 in range(0,j1+1):
|
||||
for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
|
||||
if (j>=j1): print j1/2.,j2/2.,j/2. :pre
|
||||
|
||||
NOTE: the {diagonal} keyword allowing other possible choices
|
||||
for the number of bispectrum components was removed in 2019,
|
||||
since all potentials use the value of 3, corresponding to the
|
||||
above set of bispectrum components.
|
||||
|
||||
Compute {snad/atom} evaluates a per-atom array. The columns are
|
||||
arranged into {ntypes} blocks, listed in order of atom type {I}. Each
|
||||
@ -259,7 +249,7 @@ package"_Build_package.html doc page for more info.
|
||||
|
||||
[Default:]
|
||||
|
||||
The optional keyword defaults are {diagonal} = 0, {rmin0} = 0,
|
||||
The optional keyword defaults are {rmin0} = 0,
|
||||
{switchflag} = 1, {bzeroflag} = 1, {quadraticflag} = 0,
|
||||
|
||||
:line
|
||||
|
||||
@ -88,6 +88,8 @@ potentials only include the pair potential portion of the EAM
|
||||
interaction when used by this compute, not the embedding term. Also
|
||||
bonded or Kspace interactions do not contribute to this compute.
|
||||
|
||||
The computes in this package are not compatible with dynamic groups.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
{compute group/group}_compute_group_group.html, {compute
|
||||
|
||||
@ -392,7 +392,8 @@ boundaries can be set using "boundary"_boundary.html (the slab
|
||||
approximation in not needed). The {slab} keyword is not currently
|
||||
supported by Ewald or PPPM when using a triclinic simulation cell. The
|
||||
slab correction has also been extended to point dipole interactions
|
||||
"(Klapp)"_#Klapp in "kspace_style"_kspace_style.html {ewald/disp}.
|
||||
"(Klapp)"_#Klapp in "kspace_style"_kspace_style.html {ewald/disp},
|
||||
{ewald/dipole}, and {pppm/dipole}.
|
||||
|
||||
NOTE: If you wish to apply an electric field in the Z-direction, in
|
||||
conjunction with the {slab} keyword, you should do it by adding
|
||||
|
||||
@ -20,6 +20,10 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
|
||||
accuracy = desired relative error in forces
|
||||
{ewald/omp} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{ewald/dipole} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{ewald/dipole/spin} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{pppm} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{pppm/cg} values = accuracy (smallq)
|
||||
@ -47,6 +51,10 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
|
||||
accuracy = desired relative error in forces
|
||||
{pppm/stagger} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{pppm/dipole} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{pppm/dipole/spin} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{msm} value = accuracy
|
||||
accuracy = desired relative error in forces
|
||||
{msm/cg} value = accuracy (smallq)
|
||||
@ -105,9 +113,15 @@ The {ewald/disp} style adds a long-range dispersion sum option for
|
||||
but in a more efficient manner than the {ewald} style. The 1/r^6
|
||||
capability means that Lennard-Jones or Buckingham potentials can be
|
||||
used without a cutoff, i.e. they become full long-range potentials.
|
||||
The {ewald/disp} style can also be used with point-dipoles
|
||||
"(Toukmaji)"_#Toukmaji and is currently the only kspace solver in
|
||||
LAMMPS with this capability.
|
||||
The {ewald/disp} style can also be used with point-dipoles, see
|
||||
"(Toukmaji)"_#Toukmaji.
|
||||
|
||||
The {ewald/dipole} style adds long-range standard Ewald summations
|
||||
for dipole-dipole interactions, see "(Toukmaji)"_#Toukmaji.
|
||||
|
||||
The {ewald/dipole/spin} style adds long-range standard Ewald
|
||||
summations for magnetic dipole-dipole interactions between
|
||||
magnetic spins.
|
||||
|
||||
:line
|
||||
|
||||
@ -128,6 +142,12 @@ The optional {smallq} argument defines the cutoff for the absolute
|
||||
charge value which determines whether a particle is considered charged
|
||||
or not. Its default value is 1.0e-5.
|
||||
|
||||
The {pppm/dipole} style invokes a particle-particle particle-mesh solver
|
||||
for dipole-dipole interactions, following the method of "(Cerda)"_#Cerda2008.
|
||||
|
||||
The {pppm/dipole/spin} style invokes a particle-particle particle-mesh solver
|
||||
for magnetic dipole-dipole interactions between magnetic spins.
|
||||
|
||||
The {pppm/tip4p} style is identical to the {pppm} style except that it
|
||||
adds a charge at the massless 4th site in each TIP4P water molecule.
|
||||
It should be used with "pair styles"_pair_style.html with a
|
||||
@ -317,7 +337,10 @@ using ideas from chapter 3 of "(Hardy)"_#Hardy2006, with equation 3.197
|
||||
of particular note. When using {msm} with non-periodic boundary
|
||||
conditions, it is expected that the error estimation will be too
|
||||
pessimistic. RMS force errors for dipoles when using {ewald/disp}
|
||||
are estimated using equations 33 and 46 of "(Wang)"_#Wang.
|
||||
or {ewald/dipole} are estimated using equations 33 and 46 of
|
||||
"(Wang)"_#Wang. The RMS force errors for {pppm/dipole} are estimated
|
||||
using the equations in "(Cerda)"_#Cerda2008.
|
||||
|
||||
|
||||
See the "kspace_modify"_kspace_modify.html command for additional
|
||||
options of the K-space solvers that can be set, including a {force}
|
||||
@ -464,6 +487,9 @@ Illinois at Urbana-Champaign, (2006).
|
||||
:link(Sutmann2013)
|
||||
[(Sutmann)] Sutmann, Arnold, Fahrenberger, et. al., Physical review / E 88(6), 063308 (2013)
|
||||
|
||||
:link(Cerda2008)
|
||||
[(Cerda)] Cerda, Ballenegger, Lenz, Holm, J Chem Phys 129, 234104 (2008)
|
||||
|
||||
:link(Who2012)
|
||||
[(Who)] Who, Author2, Author3, J of Long Range Solvers, 35, 164-177
|
||||
(2012).
|
||||
|
||||
@ -64,13 +64,16 @@ args = arguments specific to the style :l
|
||||
{no_affinity} values = none
|
||||
{kokkos} args = keyword value ...
|
||||
zero or more keyword/value pairs may be appended
|
||||
keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct}
|
||||
keywords = {neigh} or {neigh/qeq} or {neigh/thread} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct}
|
||||
{neigh} value = {full} or {half}
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
{neigh/qeq} value = {full} or {half}
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
{neigh/thread} value = {off} or {on}
|
||||
off = thread only over atoms
|
||||
on = thread over both atoms and neighbors
|
||||
{newton} = {off} or {on}
|
||||
off = set Newton pairwise and bonded flags off
|
||||
on = set Newton pairwise and bonded flags on
|
||||
@ -173,12 +176,10 @@ computation will be built. If {neigh} is {yes}, which is the default,
|
||||
neighbor list building is performed on the GPU. If {neigh} is {no},
|
||||
neighbor list building is performed on the CPU. GPU neighbor list
|
||||
building currently cannot be used with a triclinic box. GPU neighbor
|
||||
list calculation currently cannot be used with
|
||||
"hybrid"_pair_hybrid.html pair styles. GPU neighbor lists are not
|
||||
compatible with commands that are not GPU-enabled. When a non-GPU
|
||||
enabled command requires a neighbor list, it will also be built on the
|
||||
CPU. In these cases, it will typically be more efficient to only use
|
||||
CPU neighbor list builds.
|
||||
lists are not compatible with commands that are not GPU-enabled. When
|
||||
a non-GPU enabled command requires a neighbor list, it will also be
|
||||
built on the CPU. In these cases, it will typically be more efficient
|
||||
to only use CPU neighbor list builds.
|
||||
|
||||
The {newton} keyword sets the Newton flags for pairwise (not bonded)
|
||||
interactions to {off} or {on}, the same as the "newton"_newton.html
|
||||
@ -442,7 +443,19 @@ running on CPUs, a {half} neighbor list is the default because it are
|
||||
often faster, just as it is for non-accelerated pair styles. Similarly,
|
||||
the {neigh/qeq} keyword determines how neighbor lists are built for "fix
|
||||
qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of
|
||||
{neigh/qeq} will match {neigh}.
|
||||
{neigh/qeq} will match {neigh}.
|
||||
|
||||
If the {neigh/thread} keyword is set to {off}, then the KOKKOS package
|
||||
threads only over atoms. However, for small systems, this may not expose
|
||||
enough parallelism to keep a GPU busy. When this keyword is set to {on},
|
||||
the KOKKOS package threads over both atoms and neighbors of atoms. When
|
||||
using {neigh/thread} {on}, a full neighbor list must also be used. Using
|
||||
{neigh/thread} {on} may be slower for large systems, so this this option
|
||||
is turned on by default only when there are 16K atoms or less owned by
|
||||
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
|
||||
potentials support this keyword yet, and only thread over atoms. Many
|
||||
simple pair-wise potentials such as Lennard-Jones do support threading
|
||||
over both atoms and neighbors.
|
||||
|
||||
The {newton} keyword sets the Newton flags for pairwise and bonded
|
||||
interactions to {off} or {on}, the same as the "newton"_newton.html
|
||||
@ -475,10 +488,10 @@ are rebuilt. The data is only for atoms that migrate to new processors.
|
||||
"Forward" communication happens every timestep. "Reverse" communication
|
||||
happens every timestep if the {newton} option is on. The data is for
|
||||
atom coordinates and any other atom properties that needs to be updated
|
||||
for ghost atoms owned by each processor.
|
||||
for ghost atoms owned by each processor.
|
||||
|
||||
The {comm} keyword is simply a short-cut to set the same value for both
|
||||
the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
|
||||
the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
|
||||
|
||||
The value options for all 3 keywords are {no} or {host} or {device}. A
|
||||
value of {no} means to use the standard non-KOKKOS method of
|
||||
@ -486,26 +499,26 @@ packing/unpacking data for the communication. A value of {host} means to
|
||||
use the host, typically a multi-core CPU, and perform the
|
||||
packing/unpacking in parallel with threads. A value of {device} means to
|
||||
use the device, typically a GPU, to perform the packing/unpacking
|
||||
operation.
|
||||
operation.
|
||||
|
||||
The optimal choice for these keywords depends on the input script and
|
||||
the hardware used. The {no} value is useful for verifying that the
|
||||
Kokkos-based {host} and {device} values are working correctly. It is the
|
||||
default when running on CPUs since it is usually the fastest.
|
||||
default when running on CPUs since it is usually the fastest.
|
||||
|
||||
When running on CPUs or Xeon Phi, the {host} and {device} values work
|
||||
identically. When using GPUs, the {device} value is the default since it
|
||||
will typically be optimal if all of your styles used in your input
|
||||
script are supported by the KOKKOS package. In this case data can stay
|
||||
on the GPU for many timesteps without being moved between the host and
|
||||
GPU, if you use the {device} value. This requires that your MPI is able
|
||||
to access GPU memory directly. Currently that is true for OpenMPI 1.8
|
||||
(or later versions), Mvapich2 1.9 (or later), and CrayMPI. If your
|
||||
script uses styles (e.g. fixes) which are not yet supported by the
|
||||
KOKKOS package, then data has to be move between the host and device
|
||||
anyway, so it is typically faster to let the host handle communication,
|
||||
by using the {host} value. Using {host} instead of {no} will enable use
|
||||
of multiple threads to pack/unpack communicated data.
|
||||
GPU, if you use the {device} value. If your script uses styles (e.g.
|
||||
fixes) which are not yet supported by the KOKKOS package, then data has
|
||||
to be move between the host and device anyway, so it is typically faster
|
||||
to let the host handle communication, by using the {host} value. Using
|
||||
{host} instead of {no} will enable use of multiple threads to
|
||||
pack/unpack communicated data. When running small systems on a GPU,
|
||||
performing the exchange pack/unpack on the host CPU can give speedup
|
||||
since it reduces the number of CUDA kernel launches.
|
||||
|
||||
The {gpu/direct} keyword chooses whether GPU-direct will be used. When
|
||||
this keyword is set to {on}, buffers in GPU memory are passed directly
|
||||
@ -518,7 +531,8 @@ the {gpu/direct} keyword is automatically set to {off} by default. When
|
||||
the {gpu/direct} keyword is set to {off} while any of the {comm}
|
||||
keywords are set to {device}, the value for these {comm} keywords will
|
||||
be automatically changed to {host}. This setting has no effect if not
|
||||
running on GPUs.
|
||||
running on GPUs. GPU-direct is available for OpenMPI 1.8 (or later
|
||||
versions), Mvapich2 1.9 (or later), and CrayMPI.
|
||||
|
||||
:line
|
||||
|
||||
@ -630,11 +644,12 @@ neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
|
||||
value, comm = device, gpu/direct = on. When LAMMPS can safely detect
|
||||
that GPU-direct is not available, the default value of gpu/direct
|
||||
becomes "off". For CPUs or Xeon Phis, the option defaults are neigh =
|
||||
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. These
|
||||
settings are made automatically by the required "-k on" "command-line
|
||||
switch"_Run_options.html. You can change them by using the package
|
||||
kokkos command in your input script or via the "-pk kokkos command-line
|
||||
switch"_Run_options.html.
|
||||
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The
|
||||
option neigh/thread = on when there are 16K atoms or less on an MPI
|
||||
rank, otherwise it is "off". These settings are made automatically by
|
||||
the required "-k on" "command-line switch"_Run_options.html. You can
|
||||
change them by using the package kokkos command in your input script or
|
||||
via the "-pk kokkos command-line switch"_Run_options.html.
|
||||
|
||||
For the OMP package, the default is Nthreads = 0 and the option
|
||||
defaults are neigh = yes. These settings are made automatically if
|
||||
|
||||
@ -155,9 +155,12 @@ All of the lj/class2 pair styles write their information to "binary
|
||||
restart files"_restart.html, so pair_style and pair_coeff commands do
|
||||
not need to be specified in an input script that reads a restart file.
|
||||
|
||||
All of the lj/class2 pair styles can only be used via the {pair}
|
||||
keyword of the "run_style respa"_run_style.html command. They do not
|
||||
support the {inner}, {middle}, {outer} keywords.
|
||||
Only the {lj/class2} pair style support the use of the
|
||||
{inner}, {middle}, and {outer} keywords of the "run_style
|
||||
respa"_run_style.html command, meaning the pairwise forces can be
|
||||
partitioned by distance at different levels of the rRESPA hierarchy.
|
||||
The other styles only support the {pair} keyword of run_style respa.
|
||||
See the "run_style"_run_style.html command for details.
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
|
||||
@ -38,7 +38,7 @@ where {B_k^i} is the {k}-th bispectrum component of atom {i},
|
||||
and {beta_k^alpha_i} is the corresponding linear coefficient
|
||||
that depends on {alpha_i}, the SNAP element of atom {i}. The
|
||||
number of bispectrum components used and their definitions
|
||||
depend on the values of {twojmax} and {diagonalstyle}
|
||||
depend on the value of {twojmax}
|
||||
defined in the SNAP parameter file described below.
|
||||
The bispectrum calculation is described in more detail
|
||||
in "compute sna/atom"_compute_sna_atom.html.
|
||||
@ -125,14 +125,13 @@ This line is followed by {ncoeff} coefficients, one per line.
|
||||
The SNAP parameter file can contain blank and comment lines (start
|
||||
with #) anywhere. Each non-blank non-comment line must contain one
|
||||
keyword/value pair. The required keywords are {rcutfac} and
|
||||
{twojmax}. Optional keywords are {rfac0}, {rmin0}, {diagonalstyle},
|
||||
{twojmax}. Optional keywords are {rfac0}, {rmin0},
|
||||
{switchflag}, and {bzeroflag}.
|
||||
|
||||
The default values for these keywords are
|
||||
|
||||
{rfac0} = 0.99363
|
||||
{rmin0} = 0.0
|
||||
{diagonalstyle} = 3
|
||||
{switchflag} = 0
|
||||
{bzeroflag} = 1
|
||||
{quadraticflag} = 1 :ul
|
||||
@ -144,6 +143,9 @@ If {quadraticflag} is set to 1, then the SNAP energy expression includes the qua
|
||||
The SNAP element file should contain {K}({K}+1)/2 additional coefficients
|
||||
for each element, the upper-triangular elements of alpha.
|
||||
|
||||
NOTE: The previously used {diagonalstyle} keyword was removed in 2019,
|
||||
since all known SNAP potentials use the default value of 3.
|
||||
|
||||
:line
|
||||
|
||||
[Mixing, shift, table, tail correction, restart, rRESPA info]:
|
||||
|
||||
89
doc/src/pair_spin_dipole.txt
Normal file
89
doc/src/pair_spin_dipole.txt
Normal file
@ -0,0 +1,89 @@
|
||||
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
|
||||
|
||||
:link(lws,http://lammps.sandia.gov)
|
||||
:link(ld,Manual.html)
|
||||
:link(lc,Commands_all.html)
|
||||
|
||||
:line
|
||||
|
||||
pair_style spin/dipole/cut command :h3
|
||||
pair_style spin/dipole/long command :h3
|
||||
|
||||
[Syntax:]
|
||||
|
||||
pair_style spin/dipole/cut cutoff
|
||||
pair_style spin/dipole/long cutoff :pre
|
||||
|
||||
cutoff = global cutoff for magnetic dipole energy and forces
|
||||
(optional) (distance units) :ulb,l
|
||||
:ule
|
||||
|
||||
[Examples:]
|
||||
|
||||
pair_style spin/dipole/cut 10.0
|
||||
pair_coeff * * 10.0
|
||||
pair_coeff 2 3 8.0 :pre
|
||||
|
||||
pair_style spin/dipole/long 9.0
|
||||
pair_coeff * * 1.0 1.0
|
||||
pair_coeff 2 3 1.0 1.0 2.5 4.0 scale 0.5
|
||||
pair_coeff 2 3 1.0 1.0 2.5 4.0 :pre
|
||||
|
||||
[Description:]
|
||||
|
||||
Style {spin/dipole/cut} computes a short-range dipole-dipole
|
||||
interaction between pairs of magnetic particles that each
|
||||
have a magnetic spin.
|
||||
The magnetic dipole-dipole interactions are computed by the
|
||||
following formulas for the magnetic energy, magnetic precession
|
||||
vector omega and mechanical force between particles I and J.
|
||||
|
||||
:c,image(Eqs/pair_spin_dipole.jpg)
|
||||
|
||||
where si and sj are the spin on two magnetic particles,
|
||||
r is their separation distance, and the vector e = (Ri - Rj)/|Ri - Rj|
|
||||
is the direction vector between the two particles.
|
||||
|
||||
Style {spin/dipole/long} computes long-range magnetic dipole-dipole
|
||||
interaction.
|
||||
A "kspace_style"_kspace_style.html must be defined to
|
||||
use this pair style. Currently, "kspace_style
|
||||
ewald/dipole/spin"_kspace_style.html and "kspace_style
|
||||
pppm/dipole/spin"_kspace_style.html support long-range magnetic
|
||||
dipole-dipole interactions.
|
||||
|
||||
:line
|
||||
|
||||
The "pair_modify"_pair_modify.html table option is not relevant
|
||||
for this pair style.
|
||||
|
||||
This pair style does not support the "pair_modify"_pair_modify.html
|
||||
tail option for adding long-range tail corrections to energy and
|
||||
pressure.
|
||||
|
||||
This pair style writes its information to "binary restart
|
||||
files"_restart.html, so pair_style and pair_coeff commands do not need
|
||||
to be specified in an input script that reads a restart file.
|
||||
|
||||
[Restrictions:]
|
||||
|
||||
The {spin/dipole/cut} and {spin/dipole/long} styles are part of
|
||||
the SPIN package. They are only enabled if LAMMPS was built with that
|
||||
package. See the "Build package"_Build_package.html doc page for more
|
||||
info.
|
||||
|
||||
Using dipole/spin pair styles with {electron} "units"_units.html is not
|
||||
currently supported.
|
||||
|
||||
[Related commands:]
|
||||
|
||||
"pair_coeff"_pair_coeff.html, "kspace_style"_kspace_style.html
|
||||
"fix nve/spin"_fix_nve_spin.html
|
||||
|
||||
[Default:] none
|
||||
|
||||
:line
|
||||
|
||||
:link(Allen2)
|
||||
[(Allen)] Allen and Tildesley, Computer Simulation of Liquids,
|
||||
Clarendon Press, Oxford, 1987.
|
||||
@ -52,7 +52,7 @@ style = {delete} or {index} or {loop} or {world} or {universe} or {uloop} or {st
|
||||
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x),
|
||||
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x)
|
||||
ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z),
|
||||
stride(x,y,z), stride2(x,y,z,a,b,c),
|
||||
logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c),
|
||||
vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)
|
||||
group functions = count(group), mass(group), charge(group),
|
||||
xcm(group,dim), vcm(group,dim), fcm(group,dim),
|
||||
@ -459,8 +459,8 @@ Math functions: sqrt(x), exp(x), ln(x), log(x), abs(x), \
|
||||
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), \
|
||||
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), \
|
||||
ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), \
|
||||
stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), \
|
||||
swiggle(x,y,z), cwiggle(x,y,z)
|
||||
logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), \
|
||||
vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)
|
||||
Group functions: count(ID), mass(ID), charge(ID), xcm(ID,dim), \
|
||||
vcm(ID,dim), fcm(ID,dim), bound(ID,dir), \
|
||||
gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), \
|
||||
@ -670,6 +670,16 @@ sequence of output timesteps:
|
||||
|
||||
100,150,200,...950,1000,1500,2000,...9500,10000,15000,etc :pre
|
||||
|
||||
The logfreq3(x,y,z) function generates y points between x and z (inclusive),
|
||||
that are separated by a multiplicative ratio: (z/x)^(1/(y-1)). Constraints
|
||||
are: x,z > 0, y > 1, z-x >= y-1. For eg., if logfreq3(10,25,1000) is used in
|
||||
a variable by the "fix print"_fix_print.html command, then the interval
|
||||
between 10 and 1000 is divided into 24 parts with a multiplicative
|
||||
separation of ~1.21, and it will generate the following sequence of output
|
||||
timesteps:
|
||||
|
||||
10, 13, 15, 18, 22, 27, 32,...384, 465, 563, 682, 826, 1000 :pre
|
||||
|
||||
The stride(x,y,z) function uses the current timestep to generate a new
|
||||
timestep. X,y >= 0 and z > 0 and x <= y are required. The generated
|
||||
timesteps increase in increments of z, from x to y, i.e. it generates
|
||||
|
||||
@ -1405,6 +1405,7 @@ Lenart
|
||||
lennard
|
||||
Lennard
|
||||
Lenosky
|
||||
Lenz
|
||||
Lett
|
||||
Leuven
|
||||
Leven
|
||||
|
||||
@ -25,16 +25,18 @@ velocity all create 100 4928459 rot yes dist gaussian
|
||||
|
||||
#pair_style hybrid/overlay eam/alloy spin/exchange 4.0 spin/neel 4.0
|
||||
pair_style hybrid/overlay eam/alloy spin/exchange 4.0
|
||||
pair_coeff * * eam/alloy Co_PurjaPun_2012.eam.alloy Co
|
||||
pair_coeff * * spin/exchange exchange 4.0 0.3593 1.135028015e-05 1.064568567
|
||||
pair_coeff * * eam/alloy ../examples/SPIN/cobalt_hcp/Co_PurjaPun_2012.eam.alloy Co
|
||||
#pair_coeff * * eam/alloy Co_PurjaPun_2012.eam.alloy Co
|
||||
pair_coeff * * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.064568567
|
||||
#pair_coeff * * spin/neel neel 4.0 0.0048 0.234 1.168 2.6905 0.705 0.652
|
||||
|
||||
neighbor 0.1 bin
|
||||
neigh_modify every 10 check yes delay 20
|
||||
|
||||
#fix 1 all precession/spin zeeman 1.0 0.0 0.0 1.0
|
||||
fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0
|
||||
fix 2 all langevin/spin 0.0 0.0 21
|
||||
fix 1 all precession/spin anisotropy 0.01 0.0 0.0 1.0
|
||||
#fix 2 all langevin/spin 0.0 0.0 21
|
||||
fix 2 all langevin/spin 0.0 0.1 21
|
||||
fix 3 all nve/spin lattice yes
|
||||
|
||||
timestep 0.0001
|
||||
|
||||
1
examples/SPIN/dipole_spin/Fe_Mishin2006.eam.alloy
Symbolic link
1
examples/SPIN/dipole_spin/Fe_Mishin2006.eam.alloy
Symbolic link
@ -0,0 +1 @@
|
||||
../iron/Fe_Mishin2006.eam.alloy
|
||||
@ -0,0 +1,5 @@
|
||||
2.4824 0.01948336
|
||||
2.8665 0.01109
|
||||
4.0538 -0.0002176
|
||||
4.753 -0.001714
|
||||
4.965 -0.001986
|
||||
@ -0,0 +1,32 @@
|
||||
#Program fitting the exchange interaction
|
||||
#Model curve: Bethe-Slater function
|
||||
import numpy as np, pylab, tkinter
|
||||
import matplotlib.pyplot as plt
|
||||
from scipy.optimize import curve_fit
|
||||
from decimal import *
|
||||
|
||||
print("Loop begin")
|
||||
|
||||
#Definition of the Bethe-Slater function
|
||||
def func(x,a,b,c):
|
||||
return 4*a*((x/c)**2)*(1-b*(x/c)**2)*np.exp(-(x/c)**2)
|
||||
|
||||
#Exchange coeff table (data to fit)
|
||||
rdata, Jdata = np.loadtxt('exchange_bcc_iron.dat', usecols=(0,1), unpack=True)
|
||||
plt.plot(rdata, Jdata, 'b-', label='data')
|
||||
|
||||
#Perform the fit
|
||||
popt, pcov = curve_fit(func, rdata, Jdata, bounds=(0, [500.,5.,5.]))
|
||||
plt.plot(rdata, func(rdata, *popt), 'r--', label='fit')
|
||||
|
||||
#Print the fitted params
|
||||
print("Parameters: a={:.10} (in meV), b={:.10} (adim), c={:.10} (in Ang)".format(*popt))
|
||||
|
||||
#Ploting the result
|
||||
plt.xlabel('r_ij')
|
||||
pylab.xlim([0,6.5])
|
||||
plt.ylabel('J_ij')
|
||||
plt.legend()
|
||||
plt.show()
|
||||
|
||||
print("Loop end")
|
||||
19
examples/SPIN/dipole_spin/fe_dd.dat
Normal file
19
examples/SPIN/dipole_spin/fe_dd.dat
Normal file
@ -0,0 +1,19 @@
|
||||
6 8
|
||||
Optimal parameter set
|
||||
1 4.100199340884814 F
|
||||
2 1.565647547483517 F
|
||||
1 0.9332056681088162 T 3.000000000000000
|
||||
2 -1.162558782567700 T 2.866666666666670
|
||||
3 -0.3502026949249225 T 2.733333333333330
|
||||
4 0.4287820835430028 T 2.600000000000000
|
||||
5 4.907925057809273 T 2.400000000000000
|
||||
6 -5.307049068415304 T 2.300000000000000
|
||||
1 -0.1960674387419232 F 4.100000000000000
|
||||
2 0.3687525935422963 F 3.800000000000000
|
||||
3 -1.505333614924853 F 3.500000000000000
|
||||
4 4.948907078156191 T 3.200000000000000
|
||||
5 -4.894613262753399 T 2.900000000000000
|
||||
6 3.468897724782442 T 2.600000000000000
|
||||
7 -1.792218099820337 T 2.400000000000000
|
||||
8 80.22069592246987 T 2.300000000000000
|
||||
|
||||
59
examples/SPIN/dipole_spin/in.spin.iron_dipole_cut
Normal file
59
examples/SPIN/dipole_spin/in.spin.iron_dipole_cut
Normal file
@ -0,0 +1,59 @@
|
||||
# bcc iron in a 3d periodic box
|
||||
|
||||
clear
|
||||
units metal
|
||||
atom_style spin
|
||||
|
||||
dimension 3
|
||||
boundary p p p
|
||||
|
||||
# necessary for the serial algorithm (sametag)
|
||||
atom_modify map array
|
||||
|
||||
lattice bcc 2.8665
|
||||
region box block 0.0 5.0 0.0 5.0 0.0 5.0
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
# setting mass, mag. moments, and interactions for bcc iron
|
||||
|
||||
mass 1 55.845
|
||||
set group all spin 2.2 -1.0 0.0 0.0
|
||||
velocity all create 100 4928459 rot yes dist gaussian
|
||||
|
||||
pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/cut 8.0
|
||||
pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe
|
||||
pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841
|
||||
pair_coeff * * spin/dipole/cut 8.0
|
||||
|
||||
neighbor 0.1 bin
|
||||
neigh_modify every 10 check yes delay 20
|
||||
|
||||
fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0
|
||||
fix_modify 1 energy yes
|
||||
fix 2 all langevin/spin 0.0 0.0 21
|
||||
|
||||
fix 3 all nve/spin lattice yes
|
||||
timestep 0.0001
|
||||
|
||||
# compute and output options
|
||||
|
||||
compute out_mag all spin
|
||||
compute out_pe all pe
|
||||
compute out_ke all ke
|
||||
compute out_temp all temp
|
||||
|
||||
variable magx equal c_out_mag[1]
|
||||
variable magy equal c_out_mag[2]
|
||||
variable magz equal c_out_mag[3]
|
||||
variable magnorm equal c_out_mag[4]
|
||||
variable emag equal c_out_mag[5]
|
||||
variable tmag equal c_out_mag[6]
|
||||
|
||||
thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal
|
||||
thermo 50
|
||||
|
||||
compute outsp all property/atom spx spy spz sp fmx fmy fmz
|
||||
dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3]
|
||||
|
||||
run 2000
|
||||
61
examples/SPIN/dipole_spin/in.spin.iron_dipole_ewald
Normal file
61
examples/SPIN/dipole_spin/in.spin.iron_dipole_ewald
Normal file
@ -0,0 +1,61 @@
|
||||
# bcc iron in a 3d periodic box
|
||||
|
||||
clear
|
||||
units metal
|
||||
atom_style spin
|
||||
|
||||
dimension 3
|
||||
boundary p p p
|
||||
|
||||
# necessary for the serial algorithm (sametag)
|
||||
atom_modify map array
|
||||
|
||||
lattice bcc 2.8665
|
||||
region box block 0.0 5.0 0.0 5.0 0.0 5.0
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
# setting mass, mag. moments, and interactions for bcc iron
|
||||
|
||||
mass 1 55.845
|
||||
set group all spin 2.2 -1.0 0.0 0.0
|
||||
velocity all create 100 4928459 rot yes dist gaussian
|
||||
|
||||
pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/long 8.0
|
||||
pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe
|
||||
pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841
|
||||
pair_coeff * * spin/dipole/long 8.0
|
||||
|
||||
neighbor 0.1 bin
|
||||
neigh_modify every 10 check yes delay 20
|
||||
|
||||
kspace_style ewald/dipole/spin 1.0e-4
|
||||
|
||||
fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0
|
||||
fix_modify 1 energy yes
|
||||
fix 2 all langevin/spin 0.0 0.0 21
|
||||
|
||||
fix 3 all nve/spin lattice yes
|
||||
timestep 0.0001
|
||||
|
||||
# compute and output options
|
||||
|
||||
compute out_mag all spin
|
||||
compute out_pe all pe
|
||||
compute out_ke all ke
|
||||
compute out_temp all temp
|
||||
|
||||
variable magx equal c_out_mag[1]
|
||||
variable magy equal c_out_mag[2]
|
||||
variable magz equal c_out_mag[3]
|
||||
variable magnorm equal c_out_mag[4]
|
||||
variable emag equal c_out_mag[5]
|
||||
variable tmag equal c_out_mag[6]
|
||||
|
||||
thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal
|
||||
thermo 50
|
||||
|
||||
compute outsp all property/atom spx spy spz sp fmx fmy fmz
|
||||
dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3]
|
||||
|
||||
run 2000
|
||||
62
examples/SPIN/dipole_spin/in.spin.iron_dipole_pppm
Normal file
62
examples/SPIN/dipole_spin/in.spin.iron_dipole_pppm
Normal file
@ -0,0 +1,62 @@
|
||||
# bcc iron in a 3d periodic box
|
||||
|
||||
clear
|
||||
units metal
|
||||
atom_style spin
|
||||
|
||||
dimension 3
|
||||
boundary p p p
|
||||
|
||||
# necessary for the serial algorithm (sametag)
|
||||
atom_modify map array
|
||||
|
||||
lattice bcc 2.8665
|
||||
region box block 0.0 5.0 0.0 5.0 0.0 5.0
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
# setting mass, mag. moments, and interactions for bcc iron
|
||||
|
||||
mass 1 55.845
|
||||
set group all spin 2.2 -1.0 0.0 0.0
|
||||
velocity all create 100 4928459 rot yes dist gaussian
|
||||
|
||||
pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/long 8.0
|
||||
pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe
|
||||
pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841
|
||||
pair_coeff * * spin/dipole/long 8.0
|
||||
|
||||
neighbor 0.1 bin
|
||||
neigh_modify every 10 check yes delay 20
|
||||
|
||||
kspace_style pppm/dipole/spin 1.0e-4
|
||||
kspace_modify compute yes
|
||||
|
||||
fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0
|
||||
fix_modify 1 energy yes
|
||||
fix 2 all langevin/spin 0.0 0.0 21
|
||||
|
||||
fix 3 all nve/spin lattice yes
|
||||
timestep 0.0001
|
||||
|
||||
# compute and output options
|
||||
|
||||
compute out_mag all spin
|
||||
compute out_pe all pe
|
||||
compute out_ke all ke
|
||||
compute out_temp all temp
|
||||
|
||||
variable magx equal c_out_mag[1]
|
||||
variable magy equal c_out_mag[2]
|
||||
variable magz equal c_out_mag[3]
|
||||
variable magnorm equal c_out_mag[4]
|
||||
variable emag equal c_out_mag[5]
|
||||
variable tmag equal c_out_mag[6]
|
||||
|
||||
thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal
|
||||
thermo 50
|
||||
|
||||
compute outsp all property/atom spx spy spz sp fmx fmy fmz
|
||||
dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3]
|
||||
|
||||
run 2000
|
||||
@ -19,7 +19,8 @@ create_atoms 1 box
|
||||
|
||||
mass 1 55.845
|
||||
|
||||
set group all spin/random 31 2.2
|
||||
#set group all spin/random 31 2.2
|
||||
set group all spin 2.2 0.0 0.0 1.0
|
||||
velocity all create 100 4928459 rot yes dist gaussian
|
||||
|
||||
pair_style hybrid/overlay eam/alloy spin/exchange 3.5
|
||||
|
||||
1
examples/USER/misc/kolmogorov_crespi_full/CH.rebo
Symbolic link
1
examples/USER/misc/kolmogorov_crespi_full/CH.rebo
Symbolic link
@ -0,0 +1 @@
|
||||
../../../../potentials/CH.rebo
|
||||
@ -18,7 +18,7 @@ group adsorbate type 2
|
||||
######################## Potential defition ########################
|
||||
pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0
|
||||
####################################################################
|
||||
pair_coeff * * rebo CH.airebo NULL C # chemical
|
||||
pair_coeff * * rebo CH.rebo NULL C # chemical
|
||||
pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range
|
||||
####################################################################
|
||||
# Neighbor update settings
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
LAMMPS (5 Jun 2019)
|
||||
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:88)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Initialization
|
||||
units metal
|
||||
@ -21,6 +22,8 @@ read_data Bi_gr_AB_stack_2L_noH_300K.data
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
special bonds CPU = 0.000353813 secs
|
||||
read_data CPU = 0.0043292 secs
|
||||
mass 1 12.0107 # carbon mass (g/mole) | membrane
|
||||
mass 2 12.0107 # carbon mass (g/mole) | adsorbate
|
||||
# Separate atom groups
|
||||
@ -32,8 +35,8 @@ group adsorbate type 2
|
||||
######################## Potential defition ########################
|
||||
pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0
|
||||
####################################################################
|
||||
pair_coeff * * rebo CH.airebo NULL C # chemical
|
||||
Reading potential file CH.airebo with DATE: 2011-10-25
|
||||
pair_coeff * * rebo CH.rebo NULL C # chemical
|
||||
Reading potential file CH.rebo with DATE: 2018-7-3
|
||||
pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range
|
||||
####################################################################
|
||||
# Neighbor update settings
|
||||
@ -92,32 +95,32 @@ Neighbor list info ...
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 16.96 | 16.96 | 16.96 Mbytes
|
||||
Step TotEng PotEng KinEng v_REBO v_KC Temp v_adsxcom v_adsycom v_adszcom v_adsvxcom v_adsvycom v_adsvzcom
|
||||
0 -5025.3867722725 -5040.0767391239 14.6899668514 -5011.2636297759 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
|
||||
100 -5025.3962433293 -5041.3829775585 15.9867342292 -5012.5109377234 -28.8720398351 91.0071804888 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069014
|
||||
200 -5025.3942568861 -5041.9638220670 16.5695651809 -5012.7804299195 -29.1833921475 94.3250439654 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265577
|
||||
300 -5025.3919463074 -5040.9705419367 15.5785956293 -5012.0510295102 -28.9195124265 88.6837826830 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745908
|
||||
400 -5025.3965376948 -5041.6929964127 16.2964587179 -5012.6418090677 -29.0511873450 92.7703393702 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704496
|
||||
500 -5025.4050172900 -5042.1712310053 16.7662137153 -5013.1850218645 -28.9862091408 95.4444989087 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
|
||||
600 -5025.3985715734 -5041.2158947893 15.8173232159 -5012.4875319345 -28.7283628548 90.0427797270 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001093
|
||||
700 -5025.3997561572 -5041.6276721306 16.2279159734 -5012.7093581188 -28.9183140118 92.3801482386 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311564
|
||||
800 -5025.3967603736 -5042.3401685987 16.9434082251 -5013.3044877099 -29.0356808888 96.4532085367 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492925
|
||||
900 -5025.3984542801 -5042.2820667481 16.8836124680 -5013.4066841442 -28.8753826039 96.1128111061 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648638
|
||||
1000 -5025.3988185618 -5041.9160822433 16.5172636815 -5012.8147737982 -29.1013084450 94.0273088606 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
|
||||
Loop time of 156.142 on 1 procs for 1000 steps with 1360 atoms
|
||||
0 -5025.3867727863 -5040.0767396377 14.6899668514 -5011.2636302897 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
|
||||
100 -5025.3962438431 -5041.3829780735 15.9867342304 -5012.5109382383 -28.8720398352 91.0071804956 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069015
|
||||
200 -5025.3942574000 -5041.9638225847 16.5695651847 -5012.7804304371 -29.1833921476 94.3250439874 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265578
|
||||
300 -5025.3919468212 -5040.9705424499 15.5785956286 -5012.0510300232 -28.9195124266 88.6837826792 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745907
|
||||
400 -5025.3965382086 -5041.6929969192 16.2964587107 -5012.6418095739 -29.0511873454 92.7703393292 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704497
|
||||
500 -5025.4050178038 -5042.1712315208 16.7662137170 -5013.1850223792 -28.9862091417 95.4444989189 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
|
||||
600 -5025.3985720873 -5041.2158953052 15.8173232179 -5012.4875324499 -28.7283628553 90.0427797386 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001092
|
||||
700 -5025.3997566711 -5041.6276726420 16.2279159709 -5012.7093586298 -28.9183140122 92.3801482242 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311565
|
||||
800 -5025.3967608874 -5042.3401691104 16.9434082230 -5013.3044882226 -29.0356808878 96.4532085250 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492926
|
||||
900 -5025.3984547937 -5042.2820672614 16.8836124676 -5013.4066846579 -28.8753826035 96.1128111040 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648640
|
||||
1000 -5025.3988190757 -5041.9160827657 16.5172636900 -5012.8147743212 -29.1013084444 94.0273089090 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
|
||||
Loop time of 103.724 on 1 procs for 1000 steps with 1360 atoms
|
||||
|
||||
Performance: 0.553 ns/day, 43.373 hours/ns, 6.404 timesteps/s
|
||||
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
Performance: 0.833 ns/day, 28.812 hours/ns, 9.641 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 155.99 | 155.99 | 155.99 | 0.0 | 99.90
|
||||
Bond | 0.00075769 | 0.00075769 | 0.00075769 | 0.0 | 0.00
|
||||
Pair | 103.59 | 103.59 | 103.59 | 0.0 | 99.87
|
||||
Bond | 0.00022388 | 0.00022388 | 0.00022388 | 0.0 | 0.00
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.084217 | 0.084217 | 0.084217 | 0.0 | 0.05
|
||||
Output | 0.0016122 | 0.0016122 | 0.0016122 | 0.0 | 0.00
|
||||
Modify | 0.034797 | 0.034797 | 0.034797 | 0.0 | 0.02
|
||||
Other | | 0.02838 | | | 0.02
|
||||
Comm | 0.082476 | 0.082476 | 0.082476 | 0.0 | 0.08
|
||||
Output | 0.0010884 | 0.0010884 | 0.0010884 | 0.0 | 0.00
|
||||
Modify | 0.032938 | 0.032938 | 0.032938 | 0.0 | 0.03
|
||||
Other | | 0.01749 | | | 0.02
|
||||
|
||||
Nlocal: 1360 ave 1360 max 1360 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -133,4 +136,4 @@ Ave neighs/atom = 195.004
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:02:36
|
||||
Total wall time: 0:01:43
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
LAMMPS (5 Jun 2019)
|
||||
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:88)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Initialization
|
||||
units metal
|
||||
@ -21,6 +22,8 @@ read_data Bi_gr_AB_stack_2L_noH_300K.data
|
||||
0 = max # of 1-3 neighbors
|
||||
0 = max # of 1-4 neighbors
|
||||
1 = max # of special neighbors
|
||||
special bonds CPU = 0.000187874 secs
|
||||
read_data CPU = 0.00234103 secs
|
||||
mass 1 12.0107 # carbon mass (g/mole) | membrane
|
||||
mass 2 12.0107 # carbon mass (g/mole) | adsorbate
|
||||
# Separate atom groups
|
||||
@ -32,8 +35,8 @@ group adsorbate type 2
|
||||
######################## Potential defition ########################
|
||||
pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0
|
||||
####################################################################
|
||||
pair_coeff * * rebo CH.airebo NULL C # chemical
|
||||
Reading potential file CH.airebo with DATE: 2011-10-25
|
||||
pair_coeff * * rebo CH.rebo NULL C # chemical
|
||||
Reading potential file CH.rebo with DATE: 2018-7-3
|
||||
pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range
|
||||
####################################################################
|
||||
# Neighbor update settings
|
||||
@ -92,32 +95,32 @@ Neighbor list info ...
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 11.13 | 11.13 | 11.13 Mbytes
|
||||
Step TotEng PotEng KinEng v_REBO v_KC Temp v_adsxcom v_adsycom v_adszcom v_adsvxcom v_adsvycom v_adsvzcom
|
||||
0 -5025.3867722725 -5040.0767391239 14.6899668514 -5011.2636297759 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
|
||||
100 -5025.3962433293 -5041.3829775585 15.9867342292 -5012.5109377234 -28.8720398351 91.0071804888 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069014
|
||||
200 -5025.3942568861 -5041.9638220670 16.5695651809 -5012.7804299195 -29.1833921475 94.3250439654 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265577
|
||||
300 -5025.3919463074 -5040.9705419367 15.5785956293 -5012.0510295103 -28.9195124265 88.6837826830 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745908
|
||||
400 -5025.3965376948 -5041.6929964127 16.2964587179 -5012.6418090677 -29.0511873450 92.7703393702 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704496
|
||||
500 -5025.4050172900 -5042.1712310053 16.7662137153 -5013.1850218645 -28.9862091408 95.4444989088 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
|
||||
600 -5025.3985715734 -5041.2158947893 15.8173232159 -5012.4875319345 -28.7283628548 90.0427797270 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001093
|
||||
700 -5025.3997561572 -5041.6276721306 16.2279159734 -5012.7093581188 -28.9183140118 92.3801482386 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311564
|
||||
800 -5025.3967603736 -5042.3401685987 16.9434082251 -5013.3044877099 -29.0356808888 96.4532085367 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492925
|
||||
900 -5025.3984542801 -5042.2820667481 16.8836124680 -5013.4066841442 -28.8753826039 96.1128111061 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648638
|
||||
1000 -5025.3988185618 -5041.9160822433 16.5172636815 -5012.8147737983 -29.1013084450 94.0273088606 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
|
||||
Loop time of 42.5422 on 4 procs for 1000 steps with 1360 atoms
|
||||
0 -5025.3867727863 -5040.0767396377 14.6899668514 -5011.2636302897 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
|
||||
100 -5025.3962438431 -5041.3829780735 15.9867342304 -5012.5109382383 -28.8720398352 91.0071804956 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069015
|
||||
200 -5025.3942574000 -5041.9638225847 16.5695651847 -5012.7804304371 -29.1833921476 94.3250439874 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265578
|
||||
300 -5025.3919468212 -5040.9705424499 15.5785956286 -5012.0510300232 -28.9195124266 88.6837826792 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745907
|
||||
400 -5025.3965382086 -5041.6929969192 16.2964587107 -5012.6418095739 -29.0511873454 92.7703393291 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704497
|
||||
500 -5025.4050178038 -5042.1712315208 16.7662137170 -5013.1850223792 -28.9862091417 95.4444989189 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
|
||||
600 -5025.3985720873 -5041.2158953052 15.8173232179 -5012.4875324499 -28.7283628553 90.0427797386 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001092
|
||||
700 -5025.3997566711 -5041.6276726420 16.2279159709 -5012.7093586298 -28.9183140122 92.3801482242 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311565
|
||||
800 -5025.3967608874 -5042.3401691104 16.9434082230 -5013.3044882226 -29.0356808878 96.4532085250 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492926
|
||||
900 -5025.3984547938 -5042.2820672614 16.8836124676 -5013.4066846579 -28.8753826035 96.1128111040 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648640
|
||||
1000 -5025.3988190757 -5041.9160827657 16.5172636900 -5012.8147743212 -29.1013084444 94.0273089090 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
|
||||
Loop time of 33.7338 on 4 procs for 1000 steps with 1360 atoms
|
||||
|
||||
Performance: 2.031 ns/day, 11.817 hours/ns, 23.506 timesteps/s
|
||||
98.9% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
Performance: 2.561 ns/day, 9.370 hours/ns, 29.644 timesteps/s
|
||||
94.1% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 39.928 | 40.992 | 42.377 | 15.8 | 96.36
|
||||
Bond | 0.0003643 | 0.00043392 | 0.00048113 | 0.0 | 0.00
|
||||
Pair | 30.833 | 31.356 | 32.18 | 9.1 | 92.95
|
||||
Bond | 0.00026059 | 0.00029182 | 0.00031185 | 0.0 | 0.00
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.12253 | 1.5076 | 2.5698 | 82.1 | 3.54
|
||||
Output | 0.0012577 | 0.0013637 | 0.0016453 | 0.4 | 0.00
|
||||
Modify | 0.010833 | 0.012247 | 0.013317 | 0.9 | 0.03
|
||||
Other | | 0.02864 | | | 0.07
|
||||
Comm | 1.443 | 2.2722 | 2.8091 | 34.3 | 6.74
|
||||
Output | 0.00068855 | 0.00095087 | 0.0017185 | 0.0 | 0.00
|
||||
Modify | 0.010187 | 0.011709 | 0.015284 | 1.9 | 0.03
|
||||
Other | | 0.09241 | | | 0.27
|
||||
|
||||
Nlocal: 340 ave 344 max 334 min
|
||||
Histogram: 1 0 0 0 0 0 1 0 1 1
|
||||
@ -133,4 +136,4 @@ Ave neighs/atom = 195.004
|
||||
Ave special neighs/atom = 0
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
Total wall time: 0:00:42
|
||||
Total wall time: 0:00:33
|
||||
@ -64,9 +64,12 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
|
||||
} else
|
||||
_nbor_data=&(nbor->dev_nbor);
|
||||
|
||||
int success=device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor,
|
||||
maxspecial,_gpu_host,max_nbors,cell_size,false,
|
||||
_threads_per_atom);
|
||||
int success=device->init(*ans,false,false,nlocal,nall,maxspecial);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -65,9 +65,12 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
|
||||
} else
|
||||
_nbor_data=&(nbor->dev_nbor);
|
||||
|
||||
int success=device->init(*ans,true,false,nlocal,host_nlocal,nall,nbor,
|
||||
maxspecial,_gpu_host,max_nbors,cell_size,false,
|
||||
_threads_per_atom);
|
||||
int success=device->init(*ans,true,false,nlocal,nall,maxspecial);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -66,9 +66,12 @@ int BaseDipoleT::init_atomic(const int nlocal, const int nall,
|
||||
} else
|
||||
_nbor_data=&(nbor->dev_nbor);
|
||||
|
||||
int success=device->init(*ans,true,true,nlocal,host_nlocal,nall,nbor,
|
||||
maxspecial,_gpu_host,max_nbors,cell_size,false,
|
||||
_threads_per_atom);
|
||||
int success=device->init(*ans,true,true,nlocal,nall,maxspecial);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -65,9 +65,13 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
|
||||
} else
|
||||
_nbor_data=&(nbor->dev_nbor);
|
||||
|
||||
int success=device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor,
|
||||
maxspecial,_gpu_host,max_nbors,cell_size,false,
|
||||
_threads_per_atom,true);
|
||||
int success=device->init(*ans,false,false,nlocal,nall,maxspecial,true);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -71,12 +71,15 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
|
||||
|
||||
_threads_per_atom=device->threads_per_atom();
|
||||
|
||||
int success=device->init(*ans,false,true,nlocal,host_nlocal,nall,nbor,
|
||||
maxspecial,_gpu_host,max_nbors,cell_size,true,
|
||||
1);
|
||||
int success=device->init(*ans,false,true,nlocal,nall,maxspecial);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,true,1);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
ucl_device=device->gpu;
|
||||
atom=&device->atom;
|
||||
|
||||
|
||||
@ -78,9 +78,12 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
|
||||
if (_threads_per_atom*_threads_per_atom>device->warp_size())
|
||||
return -10;
|
||||
|
||||
int success=device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor,
|
||||
maxspecial,_gpu_host,max_nbors,cell_size,false,
|
||||
_threads_per_atom);
|
||||
int success=device->init(*ans,false,false,nlocal,nall,maxspecial);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
|
||||
max_nbors,cell_size,false,_threads_per_atom);
|
||||
if (success!=0)
|
||||
return success;
|
||||
|
||||
|
||||
@ -246,11 +246,8 @@ int DeviceT::set_ocl_params(char *ocl_vendor) {
|
||||
template <class numtyp, class acctyp>
|
||||
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
const bool rot, const int nlocal,
|
||||
const int host_nlocal, const int nall,
|
||||
Neighbor *nbor, const int maxspecial,
|
||||
const int gpu_host, const int max_nbors,
|
||||
const double cell_size, const bool pre_cut,
|
||||
const int threads_per_atom, const bool vel) {
|
||||
const int nall, const int maxspecial,
|
||||
const bool vel) {
|
||||
if (!_device_init)
|
||||
return -1;
|
||||
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
|
||||
@ -301,16 +298,6 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
|
||||
if (!ans.init(ef_nlocal,charge,rot,*gpu))
|
||||
return -3;
|
||||
|
||||
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
||||
*gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
|
||||
_block_cell_id, _block_nbor_build, threads_per_atom,
|
||||
_warp_size, _time_device, compile_string()))
|
||||
return -3;
|
||||
if (_cell_size<0.0)
|
||||
nbor->cell_size(cell_size,cell_size);
|
||||
else
|
||||
nbor->cell_size(_cell_size,cell_size);
|
||||
|
||||
_init_count++;
|
||||
return 0;
|
||||
}
|
||||
@ -338,6 +325,39 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
|
||||
const int host_nlocal, const int nall,
|
||||
const int maxspecial, const int gpu_host,
|
||||
const int max_nbors, const double cell_size,
|
||||
const bool pre_cut, const int threads_per_atom) {
|
||||
int ef_nlocal=nlocal;
|
||||
if (_particle_split<1.0 && _particle_split>0.0)
|
||||
ef_nlocal=static_cast<int>(_particle_split*nlocal);
|
||||
|
||||
int gpu_nbor=0;
|
||||
if (_gpu_mode==Device<numtyp,acctyp>::GPU_NEIGH)
|
||||
gpu_nbor=1;
|
||||
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
|
||||
gpu_nbor=2;
|
||||
#ifndef USE_CUDPP
|
||||
if (gpu_nbor==1)
|
||||
gpu_nbor=2;
|
||||
#endif
|
||||
|
||||
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
|
||||
*gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d,
|
||||
_block_cell_id, _block_nbor_build, threads_per_atom,
|
||||
_warp_size, _time_device, compile_string()))
|
||||
return -3;
|
||||
if (_cell_size<0.0)
|
||||
nbor->cell_size(cell_size,cell_size);
|
||||
else
|
||||
nbor->cell_size(_cell_size,cell_size);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void DeviceT::set_single_precompute
|
||||
(PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) {
|
||||
@ -614,7 +634,7 @@ void DeviceT::output_kspace_times(UCL_Timer &time_in,
|
||||
if (screen && times[6]>0.0) {
|
||||
fprintf(screen,"\n\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
fprintf(screen," Device Time Info (average): ");
|
||||
fprintf(screen," Device Time Info (average) for kspace: ");
|
||||
fprintf(screen,"\n-------------------------------------");
|
||||
fprintf(screen,"--------------------------------\n");
|
||||
|
||||
|
||||
@ -53,11 +53,43 @@ class Device {
|
||||
const int t_per_atom, const double cell_size,
|
||||
char *vendor_string, const int block_pair);
|
||||
|
||||
/// Initialize the device for Atom and Neighbor storage
|
||||
/** \param rot True if quaternions need to be stored
|
||||
/// Initialize the device for Atom storage
|
||||
/** \param charge True if charges need to be stored
|
||||
* \param rot True if quaternions need to be stored
|
||||
* \param nlocal Total number of local particles to allocate memory for
|
||||
* \param nall Total number of local+ghost particles
|
||||
* \param maxspecial Maximum mumber of special bonded atoms per atom
|
||||
* \param vel True if velocities need to be stored
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(Answer<numtyp,acctyp> &ans, const bool charge, const bool rot,
|
||||
const int nlocal, const int nall, const int maxspecial,
|
||||
const bool vel=false);
|
||||
|
||||
/// Initialize the device for Atom storage only
|
||||
/** \param nlocal Total number of local particles to allocate memory for
|
||||
* \param nall Total number of local+ghost particles
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
|
||||
|
||||
/// Initialize the neighbor list storage
|
||||
/** \param charge True if charges need to be stored
|
||||
* \param rot True if quaternions need to be stored
|
||||
* \param nlocal Total number of local particles to allocate memory for
|
||||
* \param host_nlocal Initial number of host particles to allocate memory for
|
||||
* \param nall Total number of local+ghost particles
|
||||
* \param maxspecial Maximum mumber of special bonded atoms per atom
|
||||
* \param gpu_host 0 if host will not perform force calculations,
|
||||
* 1 if gpu_nbor is true, and host needs a half nbor list,
|
||||
* 2 if gpu_nbor is true, and host needs a full nbor list
|
||||
@ -73,23 +105,11 @@ class Device {
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(Answer<numtyp,acctyp> &a, const bool charge, const bool rot,
|
||||
const int nlocal, const int host_nlocal, const int nall,
|
||||
Neighbor *nbor, const int maxspecial, const int gpu_host,
|
||||
const int max_nbors, const double cell_size, const bool pre_cut,
|
||||
const int threads_per_atom, const bool vel=false);
|
||||
|
||||
/// Initialize the device for Atom storage only
|
||||
/** \param nlocal Total number of local particles to allocate memory for
|
||||
* \param nall Total number of local+ghost particles
|
||||
*
|
||||
* Returns:
|
||||
* - 0 if successfull
|
||||
* - -1 if fix gpu not found
|
||||
* - -3 if there is an out of memory error
|
||||
* - -4 if the GPU library was not compiled for GPU
|
||||
* - -5 Double precision is not supported on card **/
|
||||
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
|
||||
int init_nbor(Neighbor *nbor, const int nlocal,
|
||||
const int host_nlocal, const int nall,
|
||||
const int maxspecial, const int gpu_host,
|
||||
const int max_nbors, const double cell_size,
|
||||
const bool pre_cut, const int threads_per_atom);
|
||||
|
||||
/// Output a message for pair_style acceleration with device stats
|
||||
void init_message(FILE *screen, const char *name,
|
||||
@ -173,7 +193,7 @@ class Device {
|
||||
/// Return host memory usage in bytes
|
||||
double host_memory_usage() const;
|
||||
|
||||
/// Return the number of procs sharing a device (size of device commincator)
|
||||
/// Return the number of procs sharing a device (size of device communicator)
|
||||
inline int procs_per_gpu() const { return _procs_per_gpu; }
|
||||
/// Return the number of threads per proc
|
||||
inline int num_threads() const { return _nthreads; }
|
||||
@ -260,12 +280,12 @@ class Device {
|
||||
/// Atom Data
|
||||
Atom<numtyp,acctyp> atom;
|
||||
|
||||
// --------------------------- NBOR DATA ----------------------------
|
||||
// --------------------------- NBOR SHARED KERNELS ----------------
|
||||
|
||||
/// Neighbor Data
|
||||
/// Shared kernels for neighbor lists
|
||||
NeighborShared _neighbor_shared;
|
||||
|
||||
// ------------------------ LONG RANGE DATA -------------------------
|
||||
// ------------------------ LONG RANGE DATA -----------------------
|
||||
|
||||
// Long Range Data
|
||||
int _long_range_precompute;
|
||||
|
||||
@ -10,6 +10,5 @@ twojmax 6
|
||||
|
||||
rfac0 0.99363
|
||||
rmin0 0
|
||||
diagonalstyle 3
|
||||
bzeroflag 0
|
||||
quadraticflag 0
|
||||
|
||||
@ -8,6 +8,5 @@ twojmax 8
|
||||
|
||||
rfac0 0.99363
|
||||
rmin0 0
|
||||
diagonalstyle 3
|
||||
bzeroflag 0
|
||||
quadraticflag 0
|
||||
|
||||
12
src/.gitignore
vendored
12
src/.gitignore
vendored
@ -167,6 +167,10 @@
|
||||
/pair_spin.h
|
||||
/pair_spin_dmi.cpp
|
||||
/pair_spin_dmi.h
|
||||
/pair_spin_dipole_cut.cpp
|
||||
/pair_spin_dipole_cut.h
|
||||
/pair_spin_dipole_long.cpp
|
||||
/pair_spin_dipole_long.h
|
||||
/pair_spin_exchange.cpp
|
||||
/pair_spin_exchange.h
|
||||
/pair_spin_magelec.cpp
|
||||
@ -428,6 +432,10 @@
|
||||
/ewald.h
|
||||
/ewald_cg.cpp
|
||||
/ewald_cg.h
|
||||
/ewald_dipole.cpp
|
||||
/ewald_dipole.h
|
||||
/ewald_dipole_spin.cpp
|
||||
/ewald_dipole_spin.h
|
||||
/ewald_disp.cpp
|
||||
/ewald_disp.h
|
||||
/ewald_n.cpp
|
||||
@ -1027,6 +1035,10 @@
|
||||
/pppm.h
|
||||
/pppm_cg.cpp
|
||||
/pppm_cg.h
|
||||
/pppm_dipole.cpp
|
||||
/pppm_dipole.h
|
||||
/pppm_dipole_spin.cpp
|
||||
/pppm_dipole_spin.h
|
||||
/pppm_disp.cpp
|
||||
/pppm_disp.h
|
||||
/pppm_disp_tip4p.cpp
|
||||
|
||||
@ -2,12 +2,10 @@
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -19,7 +17,12 @@
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "neighbor.h"
|
||||
#include "neigh_list.h"
|
||||
#include "neigh_request.h"
|
||||
#include "update.h"
|
||||
#include "integrate.h"
|
||||
#include "respa.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "error.h"
|
||||
@ -31,6 +34,7 @@ using namespace MathConst;
|
||||
|
||||
PairLJClass2::PairLJClass2(LAMMPS *lmp) : Pair(lmp)
|
||||
{
|
||||
respa_enable = 1;
|
||||
writedata = 1;
|
||||
}
|
||||
|
||||
@ -133,6 +137,270 @@ void PairLJClass2::compute(int eflag, int vflag)
|
||||
if (vflag_fdotr) virial_fdotr_compute();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
*/
|
||||
|
||||
void PairLJClass2::compute_inner()
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
|
||||
double rsq,rinv,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double *special_lj = force->special_lj;
|
||||
int newton_pair = force->newton_pair;
|
||||
|
||||
inum = list->inum_inner;
|
||||
ilist = list->ilist_inner;
|
||||
numneigh = list->numneigh_inner;
|
||||
firstneigh = list->firstneigh_inner;
|
||||
|
||||
double cut_out_on = cut_respa[0];
|
||||
double cut_out_off = cut_respa[1];
|
||||
|
||||
double cut_out_diff = cut_out_off - cut_out_on;
|
||||
double cut_out_on_sq = cut_out_on*cut_out_on;
|
||||
double cut_out_off_sq = cut_out_off*cut_out_off;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq < cut_out_off_sq) {
|
||||
r2inv = 1.0/rsq;
|
||||
rinv = sqrt(r2inv);
|
||||
r3inv = r2inv*rinv;
|
||||
r6inv = r3inv*r3inv;
|
||||
jtype = type[j];
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
if (rsq > cut_out_on_sq) {
|
||||
rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
|
||||
fpair *= 1.0 - rsw*rsw*(3.0 - 2.0*rsw);
|
||||
}
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
if (newton_pair || j < nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2::compute_middle()
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
|
||||
double rsq,rinv,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double *special_lj = force->special_lj;
|
||||
int newton_pair = force->newton_pair;
|
||||
|
||||
inum = list->inum_middle;
|
||||
ilist = list->ilist_middle;
|
||||
numneigh = list->numneigh_middle;
|
||||
firstneigh = list->firstneigh_middle;
|
||||
|
||||
double cut_in_off = cut_respa[0];
|
||||
double cut_in_on = cut_respa[1];
|
||||
double cut_out_on = cut_respa[2];
|
||||
double cut_out_off = cut_respa[3];
|
||||
|
||||
double cut_in_diff = cut_in_on - cut_in_off;
|
||||
double cut_out_diff = cut_out_off - cut_out_on;
|
||||
double cut_in_off_sq = cut_in_off*cut_in_off;
|
||||
double cut_in_on_sq = cut_in_on*cut_in_on;
|
||||
double cut_out_on_sq = cut_out_on*cut_out_on;
|
||||
double cut_out_off_sq = cut_out_off*cut_out_off;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
|
||||
r2inv = 1.0/rsq;
|
||||
rinv = sqrt(r2inv);
|
||||
r3inv = r2inv*rinv;
|
||||
r6inv = r3inv*r3inv;
|
||||
jtype = type[j];
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
if (rsq < cut_in_on_sq) {
|
||||
rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
|
||||
fpair *= rsw*rsw*(3.0 - 2.0*rsw);
|
||||
}
|
||||
if (rsq > cut_out_on_sq) {
|
||||
rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
|
||||
fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
|
||||
}
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
if (newton_pair || j < nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2::compute_outer(int eflag, int vflag)
|
||||
{
|
||||
int i,j,ii,jj,inum,jnum,itype,jtype;
|
||||
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
|
||||
double rsq,rinv,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
|
||||
int *ilist,*jlist,*numneigh,**firstneigh;
|
||||
|
||||
evdwl = 0.0;
|
||||
ev_init(eflag,vflag);
|
||||
|
||||
double **x = atom->x;
|
||||
double **f = atom->f;
|
||||
int *type = atom->type;
|
||||
int nlocal = atom->nlocal;
|
||||
double *special_lj = force->special_lj;
|
||||
int newton_pair = force->newton_pair;
|
||||
|
||||
inum = list->inum;
|
||||
ilist = list->ilist;
|
||||
numneigh = list->numneigh;
|
||||
firstneigh = list->firstneigh;
|
||||
|
||||
double cut_in_off = cut_respa[2];
|
||||
double cut_in_on = cut_respa[3];
|
||||
|
||||
double cut_in_diff = cut_in_on - cut_in_off;
|
||||
double cut_in_off_sq = cut_in_off*cut_in_off;
|
||||
double cut_in_on_sq = cut_in_on*cut_in_on;
|
||||
|
||||
// loop over neighbors of my atoms
|
||||
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
xtmp = x[i][0];
|
||||
ytmp = x[i][1];
|
||||
ztmp = x[i][2];
|
||||
itype = type[i];
|
||||
jlist = firstneigh[i];
|
||||
jnum = numneigh[i];
|
||||
|
||||
for (jj = 0; jj < jnum; jj++) {
|
||||
j = jlist[jj];
|
||||
factor_lj = special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
|
||||
delx = xtmp - x[j][0];
|
||||
dely = ytmp - x[j][1];
|
||||
delz = ztmp - x[j][2];
|
||||
rsq = delx*delx + dely*dely + delz*delz;
|
||||
jtype = type[j];
|
||||
|
||||
if (rsq < cutsq[itype][jtype]) {
|
||||
if (rsq > cut_in_off_sq) {
|
||||
r2inv = 1.0/rsq;
|
||||
rinv = sqrt(r2inv);
|
||||
r3inv = r2inv*rinv;
|
||||
r6inv = r3inv*r3inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
if (rsq < cut_in_on_sq) {
|
||||
rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
|
||||
fpair *= rsw*rsw*(3.0 - 2.0*rsw);
|
||||
}
|
||||
|
||||
f[i][0] += delx*fpair;
|
||||
f[i][1] += dely*fpair;
|
||||
f[i][2] += delz*fpair;
|
||||
if (newton_pair || j < nlocal) {
|
||||
f[j][0] -= delx*fpair;
|
||||
f[j][1] -= dely*fpair;
|
||||
f[j][2] -= delz*fpair;
|
||||
}
|
||||
}
|
||||
|
||||
if (eflag) {
|
||||
r2inv = 1.0/rsq;
|
||||
rinv = sqrt(r2inv);
|
||||
r3inv = r2inv*rinv;
|
||||
r6inv = r3inv*r3inv;
|
||||
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
|
||||
offset[itype][jtype];
|
||||
evdwl *= factor_lj;
|
||||
}
|
||||
|
||||
if (vflag) {
|
||||
if (rsq <= cut_in_off_sq) {
|
||||
r2inv = 1.0/rsq;
|
||||
rinv = sqrt(r2inv);
|
||||
r3inv = r2inv*rinv;
|
||||
r6inv = r3inv*r3inv;
|
||||
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
} else if (rsq < cut_in_on_sq)
|
||||
fpair = factor_lj*forcelj*r2inv;
|
||||
}
|
||||
|
||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
||||
evdwl,0.0,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
/* ----------------------------------------------------------------------
|
||||
allocate all arrays
|
||||
------------------------------------------------------------------------- */
|
||||
@ -212,6 +480,38 @@ void PairLJClass2::coeff(int narg, char **arg)
|
||||
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init specific to this pair style
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void PairLJClass2::init_style()
|
||||
{
|
||||
// request regular or rRESPA neighbor list
|
||||
|
||||
int irequest;
|
||||
int respa = 0;
|
||||
|
||||
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
|
||||
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
|
||||
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
|
||||
}
|
||||
|
||||
irequest = neighbor->request(this,instance_me);
|
||||
|
||||
if (respa >= 1) {
|
||||
neighbor->requests[irequest]->respaouter = 1;
|
||||
neighbor->requests[irequest]->respainner = 1;
|
||||
}
|
||||
if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
|
||||
|
||||
// set rRESPA cutoffs
|
||||
|
||||
if (strstr(update->integrate_style,"respa") &&
|
||||
((Respa *) update->integrate)->level_inner >= 0)
|
||||
cut_respa = ((Respa *) update->integrate)->cutoff;
|
||||
else cut_respa = NULL;
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
init for one type pair i,j and corresponding j,i
|
||||
------------------------------------------------------------------------- */
|
||||
@ -246,6 +546,11 @@ double PairLJClass2::init_one(int i, int j)
|
||||
lj4[j][i] = lj4[i][j];
|
||||
offset[j][i] = offset[i][j];
|
||||
|
||||
// check interior rRESPA cutoff
|
||||
|
||||
if (cut_respa && cut[i][j] < cut_respa[3])
|
||||
error->all(FLERR,"Pair cutoff < Respa interior cutoff");
|
||||
|
||||
// compute I,J contribution to long-range tail correction
|
||||
// count total # of atoms of type I and J via Allreduce
|
||||
|
||||
|
||||
@ -2,12 +2,10 @@
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -31,6 +29,7 @@ class PairLJClass2 : public Pair {
|
||||
virtual void compute(int, int);
|
||||
virtual void settings(int, char **);
|
||||
void coeff(int, char **);
|
||||
void init_style();
|
||||
virtual double init_one(int, int);
|
||||
void write_restart(FILE *);
|
||||
void read_restart(FILE *);
|
||||
@ -41,11 +40,16 @@ class PairLJClass2 : public Pair {
|
||||
double single(int, int, int, int, double, double, double, double &);
|
||||
void *extract(const char *, int &);
|
||||
|
||||
void compute_inner();
|
||||
void compute_middle();
|
||||
void compute_outer(int, int);
|
||||
|
||||
protected:
|
||||
double cut_global;
|
||||
double **cut;
|
||||
double **epsilon,**sigma;
|
||||
double **lj1,**lj2,**lj3,**lj4,**offset;
|
||||
double *cut_respa;
|
||||
|
||||
virtual void allocate();
|
||||
};
|
||||
@ -56,15 +60,13 @@ class PairLJClass2 : public Pair {
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Illegal ... command
|
||||
|
||||
Self-explanatory. Check the input script syntax and compare to the
|
||||
documentation for the command. You can use -echo screen as a
|
||||
command-line option when running LAMMPS to see the offending line.
|
||||
|
||||
E: Incorrect args for pair coefficients
|
||||
|
||||
Self-explanatory. Check the input script or data file.
|
||||
|
||||
E: Pair cutoff < Respa interior cutoff
|
||||
One or more pairwise cutoffs are too short to use with the specified
|
||||
rRESPA cutoffs.
|
||||
*/
|
||||
|
||||
@ -44,7 +44,7 @@ using namespace MathConst;
|
||||
PairLJCutDipoleLong::PairLJCutDipoleLong(LAMMPS *lmp) : Pair(lmp)
|
||||
{
|
||||
single_enable = 0;
|
||||
ewaldflag = dipoleflag = 1;
|
||||
ewaldflag = pppmflag = dipoleflag = 1;
|
||||
respa_enable = 0;
|
||||
}
|
||||
|
||||
|
||||
@ -1,9 +1,8 @@
|
||||
This package implements GPU optimizations of various LAMMPS styles.
|
||||
|
||||
Section 5.3.1 on the manual gives details of what hardware and Cuda
|
||||
Section 3.7 of the manual gives details of what hardware and Cuda
|
||||
software is required on your system, and full details on how to build
|
||||
and use this package. See the KOKKOS package, which also has
|
||||
GPU-enabled styles.
|
||||
and use this package. The KOKKOS package also has GPU-enabled styles.
|
||||
|
||||
This package uses an external library provided in lib/gpu which must
|
||||
be compiled before making LAMMPS. See the lib/gpu/README file and the
|
||||
|
||||
@ -219,17 +219,6 @@ void FixGPU::init()
|
||||
error->all(FLERR,"GPU package does not (yet) work with "
|
||||
"atom_style template");
|
||||
|
||||
// hybrid cannot be used with force/neigh option
|
||||
|
||||
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
|
||||
if (force->pair_match("^hybrid",0) != NULL)
|
||||
error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds");
|
||||
|
||||
if (_particle_split < 0)
|
||||
if (force->pair_match("^hybrid",0) != NULL)
|
||||
error->all(FLERR,"GPU split param must be positive "
|
||||
"for hybrid pair styles");
|
||||
|
||||
// neighbor list builds on the GPU with triclinic box is not yet supported
|
||||
|
||||
if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&
|
||||
|
||||
@ -65,14 +65,6 @@ E: GPU package does not (yet) work with atom_style template
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Cannot use pair hybrid with GPU neighbor list builds
|
||||
|
||||
Neighbor list builds must be done on the CPU for this pair style.
|
||||
|
||||
E: GPU split param must be positive for hybrid pair styles
|
||||
|
||||
See the package gpu command.
|
||||
|
||||
E: Cannot use package gpu neigh yes with triclinic box
|
||||
|
||||
This is a current restriction in LAMMPS.
|
||||
|
||||
@ -22,6 +22,7 @@
|
||||
#include "memory_kokkos.h"
|
||||
#include "error.h"
|
||||
#include "kokkos.h"
|
||||
#include "atom_masks.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
@ -270,8 +271,10 @@ int AtomKokkos::add_custom(const char *name, int flag)
|
||||
int n = strlen(name) + 1;
|
||||
dname[index] = new char[n];
|
||||
strcpy(dname[index],name);
|
||||
this->sync(Device,DVECTOR_MASK);
|
||||
memoryKK->grow_kokkos(k_dvector,dvector,ndvector,nmax,
|
||||
"atom:dvector");
|
||||
this->modified(Device,DVECTOR_MASK);
|
||||
}
|
||||
|
||||
return index;
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -59,14 +59,15 @@ AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
||||
|
||||
void AtomVecAngleKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -98,7 +99,7 @@ void AtomVecAngleKokkos::grow(int n)
|
||||
"atom:angle_atom3");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
@ -282,7 +283,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
||||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
@ -309,7 +310,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
@ -397,8 +398,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
||||
const int nfirst, const int &pbc_flag,
|
||||
const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
|
||||
@ -429,8 +430,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
|
||||
@ -493,13 +494,13 @@ struct AtomVecAngleKokkos_UnpackComm {
|
||||
void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
@ -642,7 +643,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
@ -659,7 +660,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
|
||||
void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
modified(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -960,9 +961,9 @@ struct AtomVecAngleKokkos_UnpackBorder {
|
||||
void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
@ -984,7 +985,7 @@ void AtomVecAngleKokkos::unpack_border(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -1010,7 +1011,7 @@ void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -1412,7 +1413,7 @@ int AtomVecAngleKokkos::unpack_exchange(double *buf)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | SPECIAL_MASK);
|
||||
|
||||
@ -1487,7 +1488,7 @@ int AtomVecAngleKokkos::size_restart()
|
||||
|
||||
int AtomVecAngleKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | SPECIAL_MASK);
|
||||
|
||||
@ -1541,7 +1542,7 @@ int AtomVecAngleKokkos::unpack_restart(double *buf)
|
||||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | SPECIAL_MASK);
|
||||
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -55,14 +55,15 @@ AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
||||
|
||||
void AtomVecAtomicKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -74,7 +75,7 @@ void AtomVecAtomicKokkos::grow(int n)
|
||||
memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
@ -393,9 +394,9 @@ struct AtomVecAtomicKokkos_UnpackBorder {
|
||||
|
||||
void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -415,7 +416,7 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -440,7 +441,7 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -668,7 +669,7 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK);
|
||||
|
||||
int m = 1;
|
||||
@ -720,7 +721,7 @@ int AtomVecAtomicKokkos::size_restart()
|
||||
|
||||
int AtomVecAtomicKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK );
|
||||
|
||||
int m = 1;
|
||||
@ -755,7 +756,7 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf)
|
||||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK );
|
||||
|
||||
int m = 1;
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -58,14 +58,15 @@ AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
||||
|
||||
void AtomVecBondKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -84,7 +85,7 @@ void AtomVecBondKokkos::grow(int n)
|
||||
memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atomKK->nextra_grow; iextra++)
|
||||
@ -468,9 +469,9 @@ struct AtomVecBondKokkos_UnpackBorder {
|
||||
void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
@ -492,7 +493,7 @@ void AtomVecBondKokkos::unpack_border(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -518,7 +519,7 @@ void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -866,7 +867,7 @@ int AtomVecBondKokkos::unpack_exchange(double *buf)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
|
||||
|
||||
int k;
|
||||
@ -934,7 +935,7 @@ int AtomVecBondKokkos::size_restart()
|
||||
|
||||
int AtomVecBondKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
|
||||
int m = 1;
|
||||
buf[m++] = h_x(i,0);
|
||||
@ -978,7 +979,7 @@ int AtomVecBondKokkos::unpack_restart(double *buf)
|
||||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
|
||||
int m = 1;
|
||||
h_x(nlocal,0) = buf[m++];
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -58,14 +58,15 @@ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
||||
|
||||
void AtomVecChargeKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -79,7 +80,7 @@ void AtomVecChargeKokkos::grow(int n)
|
||||
memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
@ -494,7 +495,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -510,7 +511,7 @@ void AtomVecChargeKokkos::unpack_border(int n, int first, double *buf)
|
||||
if (i == nmax) {
|
||||
grow(0);
|
||||
}
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -536,7 +537,7 @@ void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -797,7 +798,7 @@ int AtomVecChargeKokkos::unpack_exchange(double *buf)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK);
|
||||
|
||||
int m = 1;
|
||||
@ -850,7 +851,7 @@ int AtomVecChargeKokkos::size_restart()
|
||||
|
||||
int AtomVecChargeKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK);
|
||||
|
||||
int m = 1;
|
||||
@ -888,7 +889,7 @@ int AtomVecChargeKokkos::unpack_restart(double *buf)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK);
|
||||
|
||||
int m = 1;
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -60,14 +60,15 @@ AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
||||
|
||||
void AtomVecDPDKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -93,7 +94,7 @@ void AtomVecDPDKokkos::grow(int n)
|
||||
modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -158,7 +159,7 @@ void AtomVecDPDKokkos::grow_reset()
|
||||
|
||||
void AtomVecDPDKokkos::copy(int i, int j, int delflag)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCG_MASK | UCGNEW_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
@ -184,7 +185,7 @@ void AtomVecDPDKokkos::copy(int i, int j, int delflag)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCG_MASK | UCGNEW_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
@ -268,7 +269,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
||||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,
|
||||
@ -303,7 +304,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,
|
||||
@ -410,8 +411,8 @@ struct AtomVecDPDKokkos_PackCommSelf {
|
||||
int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
||||
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,
|
||||
@ -446,8 +447,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,
|
||||
@ -528,15 +529,15 @@ struct AtomVecDPDKokkos_UnpackComm {
|
||||
void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
struct AtomVecDPDKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
struct AtomVecDPDKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,first);
|
||||
@ -552,7 +553,7 @@ int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf,
|
||||
int i,j,m;
|
||||
double dx,dy,dz;
|
||||
|
||||
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
@ -598,7 +599,7 @@ int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf,
|
||||
int i,j,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
@ -685,7 +686,7 @@ void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf)
|
||||
h_uChem[i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -709,7 +710,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
h_uChem[i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -717,7 +718,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
@ -734,8 +735,8 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
|
||||
void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
sync(Host,F_MASK);
|
||||
modified(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
}
|
||||
|
||||
int m = 0;
|
||||
@ -819,7 +820,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
|
||||
{
|
||||
X_FLOAT dx,dy,dz;
|
||||
|
||||
sync(space,ALL_MASK);
|
||||
atomKK->sync(space,ALL_MASK);
|
||||
|
||||
if (pbc_flag != 0) {
|
||||
if (domain->triclinic == 0) {
|
||||
@ -876,7 +877,7 @@ int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf,
|
||||
int i,j,m;
|
||||
double dx,dy,dz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
@ -937,7 +938,7 @@ int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf,
|
||||
int i,j,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
@ -1032,7 +1033,7 @@ int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
|
||||
sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK);
|
||||
|
||||
m = 0;
|
||||
@ -1052,7 +1053,7 @@ int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
int i,j,m;
|
||||
|
||||
sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
m = 0;
|
||||
@ -1127,11 +1128,11 @@ struct AtomVecDPDKokkos_UnpackBorder {
|
||||
|
||||
void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
|
||||
if(space==Host) {
|
||||
@ -1179,7 +1180,7 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf)
|
||||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
|
||||
}
|
||||
@ -1217,7 +1218,7 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
|
||||
}
|
||||
@ -1237,7 +1238,7 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf)
|
||||
h_uChem(i) = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK );
|
||||
|
||||
return m;
|
||||
@ -1260,7 +1261,7 @@ int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf)
|
||||
h_uCGnew(i) = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
|
||||
|
||||
return m;
|
||||
@ -1384,7 +1385,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
|
||||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
@ -1402,7 +1403,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
|
||||
|
||||
int AtomVecDPDKokkos::pack_exchange(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
@ -1518,7 +1519,7 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
|
||||
modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
@ -1556,7 +1557,7 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf)
|
||||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(nlocal,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
@ -1593,7 +1594,7 @@ int AtomVecDPDKokkos::size_restart()
|
||||
|
||||
int AtomVecDPDKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
|
||||
@ -1658,7 +1659,7 @@ int AtomVecDPDKokkos::unpack_restart(double *buf)
|
||||
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
|
||||
UCG_MASK | UCGNEW_MASK |
|
||||
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -58,14 +58,15 @@ AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
||||
|
||||
void AtomVecFullKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -123,7 +124,7 @@ void AtomVecFullKokkos::grow(int n)
|
||||
atomKK->improper_per_atom,"atom:improper_atom4");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
@ -608,9 +609,9 @@ struct AtomVecFullKokkos_UnpackBorder {
|
||||
void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
|
||||
@ -632,7 +633,7 @@ void AtomVecFullKokkos::unpack_border(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -659,7 +660,7 @@ void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -1204,7 +1205,7 @@ int AtomVecFullKokkos::unpack_exchange(double *buf)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
@ -1297,7 +1298,7 @@ int AtomVecFullKokkos::size_restart()
|
||||
|
||||
int AtomVecFullKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
@ -1370,10 +1371,10 @@ int AtomVecFullKokkos::unpack_restart(double *buf)
|
||||
if (atom->nextra_store)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
||||
@ -307,7 +307,7 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int
|
||||
int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz;
|
||||
@ -351,7 +351,7 @@ int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf,
|
||||
int AtomVecHybridKokkos::pack_comm_vel(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
@ -463,7 +463,7 @@ void AtomVecHybridKokkos::unpack_comm(int n, int first, double *buf)
|
||||
h_x(i,2) = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
@ -500,7 +500,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
}
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
@ -512,7 +512,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
|
||||
int AtomVecHybridKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int i,k,m,last;
|
||||
|
||||
@ -546,7 +546,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
h_f(j,2) += buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
@ -559,7 +559,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz;
|
||||
@ -613,7 +613,7 @@ int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf,
|
||||
int AtomVecHybridKokkos::pack_border_vel(int n, int *list, double *buf,
|
||||
int pbc_flag, int *pbc)
|
||||
{
|
||||
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
int i,j,k,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
int omega_flag = atom->omega_flag;
|
||||
@ -741,7 +741,7 @@ void AtomVecHybridKokkos::unpack_border(int n, int first, double *buf)
|
||||
h_mask[i] = (int) ubuf(buf[m++]).i;
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
@ -787,7 +787,7 @@ void AtomVecHybridKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
}
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
// unpack sub-style contributions as contiguous chunks
|
||||
|
||||
@ -969,7 +969,7 @@ void AtomVecHybridKokkos::create_atom(int itype, double *coord)
|
||||
|
||||
void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **values)
|
||||
{
|
||||
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
@ -1000,7 +1000,7 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val
|
||||
h_angmom(nlocal,2) = 0.0;
|
||||
}
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
|
||||
|
||||
// each sub-style parses sub-style specific values
|
||||
|
||||
@ -1017,13 +1017,13 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val
|
||||
|
||||
void AtomVecHybridKokkos::data_vel(int m, char **values)
|
||||
{
|
||||
sync(Host,V_MASK);
|
||||
atomKK->sync(Host,V_MASK);
|
||||
|
||||
h_v(m,0) = atof(values[0]);
|
||||
h_v(m,1) = atof(values[1]);
|
||||
h_v(m,2) = atof(values[2]);
|
||||
|
||||
modified(Host,V_MASK);
|
||||
atomKK->modified(Host,V_MASK);
|
||||
|
||||
// each sub-style parses sub-style specific values
|
||||
|
||||
@ -1038,7 +1038,7 @@ void AtomVecHybridKokkos::data_vel(int m, char **values)
|
||||
|
||||
void AtomVecHybridKokkos::pack_data(double **buf)
|
||||
{
|
||||
sync(Host,TAG_MASK|TYPE_MASK|X_MASK);
|
||||
atomKK->sync(Host,TAG_MASK|TYPE_MASK|X_MASK);
|
||||
|
||||
int k,m;
|
||||
|
||||
@ -1089,7 +1089,7 @@ void AtomVecHybridKokkos::write_data(FILE *fp, int n, double **buf)
|
||||
|
||||
void AtomVecHybridKokkos::pack_vel(double **buf)
|
||||
{
|
||||
sync(Host,V_MASK);
|
||||
atomKK->sync(Host,V_MASK);
|
||||
|
||||
int k,m;
|
||||
|
||||
|
||||
@ -267,6 +267,114 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
|
||||
return n*3;
|
||||
}
|
||||
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType,int TRICLINIC>
|
||||
struct AtomVecKokkos_PackCommSelfFused {
|
||||
typedef DeviceType device_type;
|
||||
|
||||
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
|
||||
typename ArrayTypes<DeviceType>::t_x_array _xw;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
|
||||
typename ArrayTypes<DeviceType>::t_int_2d_const _pbc;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _pbc_flag;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _firstrecv;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _sendnum_scan;
|
||||
typename ArrayTypes<DeviceType>::t_int_1d_const _g2l;
|
||||
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
|
||||
|
||||
AtomVecKokkos_PackCommSelfFused(
|
||||
const typename DAT::tdual_x_array &x,
|
||||
const typename DAT::tdual_int_2d &list,
|
||||
const typename DAT::tdual_int_2d &pbc,
|
||||
const typename DAT::tdual_int_1d &pbc_flag,
|
||||
const typename DAT::tdual_int_1d &firstrecv,
|
||||
const typename DAT::tdual_int_1d &sendnum_scan,
|
||||
const typename DAT::tdual_int_1d &g2l,
|
||||
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
|
||||
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz):
|
||||
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
|
||||
_list(list.view<DeviceType>()),
|
||||
_pbc(pbc.view<DeviceType>()),
|
||||
_pbc_flag(pbc_flag.view<DeviceType>()),
|
||||
_firstrecv(firstrecv.view<DeviceType>()),
|
||||
_sendnum_scan(sendnum_scan.view<DeviceType>()),
|
||||
_g2l(g2l.view<DeviceType>()),
|
||||
_xprd(xprd),_yprd(yprd),_zprd(zprd),
|
||||
_xy(xy),_xz(xz),_yz(yz) {};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& ii) const {
|
||||
|
||||
int iswap = 0;
|
||||
while (ii >= _sendnum_scan[iswap]) iswap++;
|
||||
int i = ii;
|
||||
if (iswap > 0)
|
||||
i = ii - _sendnum_scan[iswap-1];
|
||||
|
||||
const int _nfirst = _firstrecv[iswap];
|
||||
const int nlocal = _firstrecv[0];
|
||||
|
||||
int j = _list(iswap,i);
|
||||
if (j >= nlocal)
|
||||
j = _g2l(j-nlocal);
|
||||
|
||||
if (_pbc_flag(ii) == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0);
|
||||
_xw(i+_nfirst,1) = _x(j,1);
|
||||
_xw(i+_nfirst,2) = _x(j,2);
|
||||
} else {
|
||||
if (TRICLINIC == 0) {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
} else {
|
||||
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz;
|
||||
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz;
|
||||
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan,
|
||||
const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc,
|
||||
const DAT::tdual_int_1d &g2l) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
}
|
||||
return n*3;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template<class DeviceType>
|
||||
|
||||
@ -51,6 +51,14 @@ class AtomVecKokkos : public AtomVec {
|
||||
const int & iswap, const int nfirst,
|
||||
const int &pbc_flag, const int pbc[]);
|
||||
|
||||
virtual int
|
||||
pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list,
|
||||
const DAT::tdual_int_1d &sendnum_scan,
|
||||
const DAT::tdual_int_1d &firstrecv,
|
||||
const DAT::tdual_int_1d &pbc_flag,
|
||||
const DAT::tdual_int_2d &pbc,
|
||||
const DAT::tdual_int_1d &g2l);
|
||||
|
||||
virtual int
|
||||
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
|
||||
const int & iswap, const DAT::tdual_xfloat_2d &buf,
|
||||
|
||||
@ -24,7 +24,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -58,14 +58,15 @@ AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
|
||||
|
||||
void AtomVecMolecularKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atomKK->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -121,7 +122,7 @@ void AtomVecMolecularKokkos::grow(int n)
|
||||
atomKK->improper_per_atom,"atom:improper_atom4");
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
if (atom->nextra_grow)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
@ -361,7 +362,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
||||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
|
||||
@ -388,7 +389,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
|
||||
@ -477,8 +478,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
||||
const int nfirst, const int &pbc_flag,
|
||||
const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
|
||||
@ -505,8 +506,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
|
||||
@ -565,13 +566,13 @@ struct AtomVecMolecularKokkos_UnpackComm {
|
||||
void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
@ -714,7 +715,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK);
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
@ -731,7 +732,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
|
||||
void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
modified(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -1032,9 +1033,9 @@ struct AtomVecMolecularKokkos_UnpackBorder {
|
||||
void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,
|
||||
ExecutionSpace space) {
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
@ -1056,7 +1057,7 @@ void AtomVecMolecularKokkos::unpack_border(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -1082,7 +1083,7 @@ void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
last = first + n;
|
||||
for (i = first; i < last; i++) {
|
||||
if (i == nmax) grow(0);
|
||||
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
h_x(i,0) = buf[m++];
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
@ -1615,7 +1616,7 @@ int AtomVecMolecularKokkos::unpack_exchange(double *buf)
|
||||
{
|
||||
int nlocal = atom->nlocal;
|
||||
if (nlocal == nmax) grow(0);
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
@ -1707,7 +1708,7 @@ int AtomVecMolecularKokkos::size_restart()
|
||||
|
||||
int AtomVecMolecularKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
@ -1780,7 +1781,7 @@ int AtomVecMolecularKokkos::unpack_restart(double *buf)
|
||||
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
|
||||
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
|
||||
|
||||
|
||||
@ -30,7 +30,7 @@
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define DELTA 10000
|
||||
#define DELTA 10
|
||||
|
||||
static const double MY_PI = 3.14159265358979323846; // pi
|
||||
|
||||
@ -93,14 +93,15 @@ void AtomVecSphereKokkos::init()
|
||||
|
||||
void AtomVecSphereKokkos::grow(int n)
|
||||
{
|
||||
if (n == 0) nmax += DELTA;
|
||||
int step = MAX(DELTA,nmax*0.01);
|
||||
if (n == 0) nmax += step;
|
||||
else nmax = n;
|
||||
atom->nmax = nmax;
|
||||
if (nmax < 0 || nmax > MAXSMALLINT)
|
||||
error->one(FLERR,"Per-processor system is too big");
|
||||
|
||||
sync(Device,ALL_MASK);
|
||||
modified(Device,ALL_MASK);
|
||||
atomKK->sync(Device,ALL_MASK);
|
||||
atomKK->modified(Device,ALL_MASK);
|
||||
|
||||
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
|
||||
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
|
||||
@ -120,7 +121,7 @@ void AtomVecSphereKokkos::grow(int n)
|
||||
modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
|
||||
|
||||
grow_reset();
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -172,7 +173,7 @@ void AtomVecSphereKokkos::grow_reset()
|
||||
|
||||
void AtomVecSphereKokkos::copy(int i, int j, int delflag)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | RADIUS_MASK |
|
||||
RMASS_MASK | OMEGA_MASK);
|
||||
|
||||
@ -197,7 +198,7 @@ void AtomVecSphereKokkos::copy(int i, int j, int delflag)
|
||||
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
|
||||
modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | RADIUS_MASK |
|
||||
RMASS_MASK | OMEGA_MASK);
|
||||
}
|
||||
@ -277,7 +278,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
||||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
|
||||
@ -316,7 +317,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
|
||||
@ -464,7 +465,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
const int* const pbc)
|
||||
{
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if(pbc_flag) {
|
||||
if(deform_vremap) {
|
||||
if(domain->triclinic) {
|
||||
@ -595,7 +596,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if(pbc_flag) {
|
||||
if(deform_vremap) {
|
||||
if(domain->triclinic) {
|
||||
@ -795,8 +796,8 @@ int AtomVecSphereKokkos::pack_comm_self(
|
||||
if (radvary == 0)
|
||||
return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
|
||||
if(commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
|
||||
@ -835,8 +836,8 @@ int AtomVecSphereKokkos::pack_comm_self(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
|
||||
@ -926,14 +927,14 @@ void AtomVecSphereKokkos::unpack_comm_kokkos(
|
||||
return;
|
||||
}
|
||||
if(commKK->forward_comm_on_host) {
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
struct AtomVecSphereKokkos_UnpackComm<LMPDeviceType> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -998,7 +999,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
|
||||
const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
|
||||
atomKK->k_x,
|
||||
@ -1015,7 +1016,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_UnpackCommVel<LMPDeviceType,0> f(
|
||||
atomKK->k_x,
|
||||
@ -1044,7 +1045,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf,
|
||||
|
||||
if (radvary == 0) {
|
||||
// Not sure if we need to call sync for X here
|
||||
sync(Host,X_MASK);
|
||||
atomKK->sync(Host,X_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -1071,7 +1072,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -1115,7 +1116,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf,
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
if (radvary == 0) {
|
||||
sync(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -1178,7 +1179,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf,
|
||||
}
|
||||
}
|
||||
} else {
|
||||
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
for (i = 0; i < n; i++) {
|
||||
@ -1257,7 +1258,7 @@ int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
if (radvary == 0) return 0;
|
||||
|
||||
sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -1280,7 +1281,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf)
|
||||
h_x(i,1) = buf[m++];
|
||||
h_x(i,2) = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
} else {
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
@ -1291,7 +1292,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf)
|
||||
h_radius[i] = buf[m++];
|
||||
h_rmass[i] = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1313,7 +1314,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
h_omega(i,1) = buf[m++];
|
||||
h_omega(i,2) = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK);
|
||||
} else {
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
@ -1330,7 +1331,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
h_omega(i,1) = buf[m++];
|
||||
h_omega(i,2) = buf[m++];
|
||||
}
|
||||
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1346,7 +1347,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
|
||||
h_radius[i] = buf[m++];
|
||||
h_rmass[i] = buf[m++];
|
||||
}
|
||||
modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
return m;
|
||||
}
|
||||
|
||||
@ -1355,7 +1356,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
|
||||
int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,F_MASK|TORQUE_MASK);
|
||||
atomKK->sync(Host,F_MASK|TORQUE_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
@ -1375,7 +1376,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
|
||||
int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
sync(Host,TORQUE_MASK);
|
||||
atomKK->sync(Host,TORQUE_MASK);
|
||||
|
||||
int m = 0;
|
||||
const int last = first + n;
|
||||
@ -1392,7 +1393,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
|
||||
void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
modified(Host,F_MASK|TORQUE_MASK);
|
||||
atomKK->modified(Host,F_MASK|TORQUE_MASK);
|
||||
}
|
||||
|
||||
int m = 0;
|
||||
@ -1412,7 +1413,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
modified(Host,TORQUE_MASK);
|
||||
atomKK->modified(Host,TORQUE_MASK);
|
||||
}
|
||||
|
||||
int m = 0;
|
||||
@ -1492,7 +1493,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
|
||||
X_FLOAT dx,dy,dz;
|
||||
|
||||
// This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
|
||||
sync(space,ALL_MASK);
|
||||
atomKK->sync(space,ALL_MASK);
|
||||
|
||||
if (pbc_flag != 0) {
|
||||
if (domain->triclinic == 0) {
|
||||
@ -1549,7 +1550,7 @@ int AtomVecSphereKokkos::pack_border(
|
||||
int i,j,m;
|
||||
double dx,dy,dz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
@ -1686,7 +1687,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
|
||||
X_FLOAT dvx=0,dvy=0,dvz=0;
|
||||
|
||||
// This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
|
||||
sync(space,ALL_MASK);
|
||||
atomKK->sync(space,ALL_MASK);
|
||||
|
||||
if (pbc_flag != 0) {
|
||||
if (domain->triclinic == 0) {
|
||||
@ -1776,7 +1777,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf,
|
||||
int i,j,m;
|
||||
double dx,dy,dz,dvx,dvy,dvz;
|
||||
|
||||
sync(Host,ALL_MASK);
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
|
||||
m = 0;
|
||||
if (pbc_flag == 0) {
|
||||
@ -1866,7 +1867,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf,
|
||||
|
||||
int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->sync(Host,RADIUS_MASK|RMASS_MASK);
|
||||
|
||||
int m = 0;
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -1942,7 +1943,7 @@ void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
RADIUS_MASK|RMASS_MASK);
|
||||
}
|
||||
|
||||
@ -1969,7 +1970,7 @@ void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf)
|
||||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
}
|
||||
|
||||
|
||||
@ -2052,7 +2053,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
|
||||
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
}
|
||||
|
||||
@ -2085,7 +2086,7 @@ void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf)
|
||||
m += modify->fix[atom->extra_border[iextra]]->
|
||||
unpack_border(n,first,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -2098,7 +2099,7 @@ int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf)
|
||||
h_radius[i] = buf[m++];
|
||||
h_rmass[i] = buf[m++];
|
||||
}
|
||||
modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,RADIUS_MASK|RMASS_MASK);
|
||||
return m;
|
||||
}
|
||||
|
||||
@ -2218,7 +2219,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
||||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
@ -2239,7 +2240,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
||||
|
||||
int AtomVecSphereKokkos::pack_exchange(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
@ -2354,7 +2355,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
|
||||
k_count.sync<LMPHostType>();
|
||||
}
|
||||
|
||||
modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
@ -2391,7 +2392,7 @@ int AtomVecSphereKokkos::unpack_exchange(double *buf)
|
||||
m += modify->fix[atom->extra_grow[iextra]]->
|
||||
unpack_exchange(nlocal,&buf[m]);
|
||||
|
||||
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
@ -2427,7 +2428,7 @@ int AtomVecSphereKokkos::size_restart()
|
||||
|
||||
int AtomVecSphereKokkos::pack_restart(int i, double *buf)
|
||||
{
|
||||
sync(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->sync(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | V_MASK |
|
||||
RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
|
||||
|
||||
@ -2494,7 +2495,7 @@ int AtomVecSphereKokkos::unpack_restart(double *buf)
|
||||
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
|
||||
}
|
||||
|
||||
modified(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
atomKK->modified(Host,X_MASK | TAG_MASK | TYPE_MASK |
|
||||
MASK_MASK | IMAGE_MASK | V_MASK |
|
||||
RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
|
||||
|
||||
@ -2616,14 +2617,14 @@ int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values)
|
||||
|
||||
void AtomVecSphereKokkos::data_vel(int m, char **values)
|
||||
{
|
||||
sync(Host,V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,V_MASK|OMEGA_MASK);
|
||||
h_v(m,0) = atof(values[0]);
|
||||
h_v(m,1) = atof(values[1]);
|
||||
h_v(m,2) = atof(values[2]);
|
||||
h_omega(m,0) = atof(values[3]);
|
||||
h_omega(m,1) = atof(values[4]);
|
||||
h_omega(m,2) = atof(values[5]);
|
||||
modified(Host,V_MASK|OMEGA_MASK);
|
||||
atomKK->modified(Host,V_MASK|OMEGA_MASK);
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -2632,11 +2633,11 @@ void AtomVecSphereKokkos::data_vel(int m, char **values)
|
||||
|
||||
int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values)
|
||||
{
|
||||
sync(Host,OMEGA_MASK);
|
||||
atomKK->sync(Host,OMEGA_MASK);
|
||||
omega[m][0] = atof(values[0]);
|
||||
omega[m][1] = atof(values[1]);
|
||||
omega[m][2] = atof(values[2]);
|
||||
modified(Host,OMEGA_MASK);
|
||||
atomKK->modified(Host,OMEGA_MASK);
|
||||
return 3;
|
||||
}
|
||||
|
||||
@ -2711,7 +2712,7 @@ int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf)
|
||||
|
||||
void AtomVecSphereKokkos::pack_vel(double **buf)
|
||||
{
|
||||
sync(Host,TAG_MASK|V_MASK|OMEGA_MASK);
|
||||
atomKK->sync(Host,TAG_MASK|V_MASK|OMEGA_MASK);
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
@ -2731,7 +2732,7 @@ void AtomVecSphereKokkos::pack_vel(double **buf)
|
||||
|
||||
int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf)
|
||||
{
|
||||
sync(Host,OMEGA_MASK);
|
||||
atomKK->sync(Host,OMEGA_MASK);
|
||||
|
||||
buf[0] = h_omega(i,0);
|
||||
buf[1] = h_omega(i,1);
|
||||
|
||||
@ -57,10 +57,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
|
||||
memory->destroy(buf_recv);
|
||||
buf_recv = NULL;
|
||||
|
||||
k_exchange_sendlist = DAT::
|
||||
tdual_int_1d("comm:k_exchange_sendlist",100);
|
||||
k_exchange_copylist = DAT::
|
||||
tdual_int_1d("comm:k_exchange_copylist",100);
|
||||
k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100);
|
||||
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
|
||||
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
|
||||
k_count = DAT::tdual_int_scalar("comm:k_count");
|
||||
k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
|
||||
|
||||
@ -187,71 +186,80 @@ void CommKokkos::forward_comm_device(int dummy)
|
||||
k_sendlist.sync<DeviceType>();
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
|
||||
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_x_only) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
buf = atomKK->k_x.view<DeviceType>().data() +
|
||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (comm->nprocs == 1 && !ghost_velocity) {
|
||||
k_swap.sync<DeviceType>();
|
||||
k_swap2.sync<DeviceType>();
|
||||
k_pbc.sync<DeviceType>();
|
||||
n = avec->pack_comm_self_fused(totalsend,k_sendlist,k_sendnum_scan,
|
||||
k_firstrecv,k_pbc_flag,k_pbc,k_g2l);
|
||||
} else {
|
||||
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,X_MASK);
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
if (sendproc[iswap] != me) {
|
||||
if (comm_x_only) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
buf = atomKK->k_x.view<DeviceType>().data() +
|
||||
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
|
||||
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
|
||||
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),
|
||||
n,MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
|
||||
space,X_MASK);
|
||||
}
|
||||
} else if (ghost_velocity) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n)
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else if (ghost_velocity) {
|
||||
if (size_forward_recv[iswap]) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
}
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n) {
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
}
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
if (size_forward_recv[iswap])
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
|
||||
size_forward_recv[iswap],MPI_DOUBLE,
|
||||
recvproc[iswap],0,world,&request);
|
||||
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
if (n)
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
|
||||
MPI_DOUBLE,sendproc[iswap],0,world);
|
||||
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
if (!ghost_velocity) {
|
||||
if (sendnum[iswap])
|
||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||
DeviceType::fence();
|
||||
if (!ghost_velocity) {
|
||||
if (sendnum[iswap])
|
||||
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
|
||||
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
|
||||
k_buf_send,pbc_flag[iswap],pbc[iswap]);
|
||||
DeviceType::fence();
|
||||
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
|
||||
DeviceType::fence();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -496,9 +504,8 @@ void CommKokkos::exchange()
|
||||
}
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
|
||||
CommBrick::exchange();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -565,146 +572,149 @@ void CommKokkos::exchange_device()
|
||||
atom->nghost = 0;
|
||||
atom->avec->clear_bonus();
|
||||
|
||||
// subbox bounds for orthogonal or triclinic
|
||||
if (comm->nprocs > 1) { // otherwise no-op
|
||||
|
||||
if (triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
// subbox bounds for orthogonal or triclinic
|
||||
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
if (triclinic == 0) {
|
||||
sublo = domain->sublo;
|
||||
subhi = domain->subhi;
|
||||
} else {
|
||||
sublo = domain->sublo_lamda;
|
||||
subhi = domain->subhi_lamda;
|
||||
}
|
||||
|
||||
// loop over dimensions
|
||||
for (int dim = 0; dim < 3; dim++) {
|
||||
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
// when atom is deleted, fill it in with last atom
|
||||
// loop over dimensions
|
||||
for (int dim = 0; dim < 3; dim++) {
|
||||
|
||||
x = atom->x;
|
||||
lo = sublo[dim];
|
||||
hi = subhi[dim];
|
||||
nlocal = atom->nlocal;
|
||||
i = nsend = 0;
|
||||
// fill buffer with atoms leaving my box, using < and >=
|
||||
// when atom is deleted, fill it in with last atom
|
||||
|
||||
if (true) {
|
||||
if (k_sendflag.h_view.extent(0)<nlocal) k_sendflag.resize(nlocal);
|
||||
k_sendflag.sync<DeviceType>();
|
||||
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
|
||||
while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_count.h_view() = 0;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<DeviceType>();
|
||||
x = atom->x;
|
||||
lo = sublo[dim];
|
||||
hi = subhi[dim];
|
||||
nlocal = atom->nlocal;
|
||||
i = nsend = 0;
|
||||
|
||||
BuildExchangeListFunctor<DeviceType>
|
||||
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_sendflag.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
if (true) {
|
||||
if (k_sendflag.h_view.extent(0)<nlocal) k_sendflag.resize(nlocal);
|
||||
k_sendflag.sync<DeviceType>();
|
||||
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
|
||||
while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_count.h_view() = 0;
|
||||
k_count.modify<LMPHostType>();
|
||||
k_count.sync<DeviceType>();
|
||||
|
||||
k_count.sync<LMPHostType>();
|
||||
if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_exchange_sendlist.resize(k_count.h_view()*1.1);
|
||||
k_exchange_copylist.resize(k_count.h_view()*1.1);
|
||||
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
|
||||
BuildExchangeListFunctor<DeviceType>
|
||||
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
|
||||
nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nlocal,f);
|
||||
k_exchange_sendlist.modify<DeviceType>();
|
||||
k_sendflag.modify<DeviceType>();
|
||||
k_count.modify<DeviceType>();
|
||||
|
||||
k_count.sync<LMPHostType>();
|
||||
if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
|
||||
k_exchange_lists.resize(2,k_count.h_view()*1.1);
|
||||
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
|
||||
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
|
||||
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
|
||||
}
|
||||
}
|
||||
|
||||
k_exchange_lists.sync<LMPHostType>();
|
||||
k_sendflag.sync<LMPHostType>();
|
||||
|
||||
int sendpos = nlocal-1;
|
||||
nlocal -= k_count.h_view();
|
||||
for(int i = 0; i < k_count.h_view(); i++) {
|
||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||
k_exchange_copylist.h_view(i) = sendpos;
|
||||
sendpos--;
|
||||
} else
|
||||
k_exchange_copylist.h_view(i) = -1;
|
||||
}
|
||||
|
||||
k_exchange_copylist.modify<LMPHostType>();
|
||||
k_exchange_copylist.sync<DeviceType>();
|
||||
nsend = k_count.h_view();
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend =
|
||||
avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
|
||||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space,
|
||||
dim,lo,hi);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
while (i < nlocal) {
|
||||
if (x[i][dim] < lo || x[i][dim] >= hi) {
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend += avec->pack_exchange(i,&buf_send[nsend]);
|
||||
avec->copy(nlocal-1,i,1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
}
|
||||
k_exchange_copylist.sync<LMPHostType>();
|
||||
k_exchange_sendlist.sync<LMPHostType>();
|
||||
k_sendflag.sync<LMPHostType>();
|
||||
atom->nlocal = nlocal;
|
||||
|
||||
int sendpos = nlocal-1;
|
||||
nlocal -= k_count.h_view();
|
||||
for(int i = 0; i < k_count.h_view(); i++) {
|
||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||
k_exchange_copylist.h_view(i) = sendpos;
|
||||
sendpos--;
|
||||
} else
|
||||
k_exchange_copylist.h_view(i) = -1;
|
||||
}
|
||||
// send/recv atoms in both directions
|
||||
// if 1 proc in dimension, no send/recv, set recv buf to send buf
|
||||
// if 2 procs in dimension, single send/recv
|
||||
// if more than 2 procs in dimension, send/recv to both neighbors
|
||||
|
||||
k_exchange_copylist.modify<LMPHostType>();
|
||||
k_exchange_copylist.sync<DeviceType>();
|
||||
nsend = k_count.h_view();
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend =
|
||||
avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
|
||||
k_exchange_sendlist,k_exchange_copylist,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space,
|
||||
dim,lo,hi);
|
||||
DeviceType::fence();
|
||||
} else {
|
||||
while (i < nlocal) {
|
||||
if (x[i][dim] < lo || x[i][dim] >= hi) {
|
||||
if (nsend > maxsend) grow_send_kokkos(nsend,1);
|
||||
nsend += avec->pack_exchange(i,&buf_send[nsend]);
|
||||
avec->copy(nlocal-1,i,1);
|
||||
nlocal--;
|
||||
} else i++;
|
||||
}
|
||||
}
|
||||
atom->nlocal = nlocal;
|
||||
if (procgrid[dim] == 1) {
|
||||
nrecv = nsend;
|
||||
if (nrecv) {
|
||||
atom->nlocal=avec->
|
||||
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
|
||||
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv = nrecv1;
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
|
||||
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv += nrecv2;
|
||||
}
|
||||
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
|
||||
|
||||
// send/recv atoms in both directions
|
||||
// if 1 proc in dimension, no send/recv, set recv buf to send buf
|
||||
// if 2 procs in dimension, single send/recv
|
||||
// if more than 2 procs in dimension, send/recv to both neighbors
|
||||
|
||||
if (procgrid[dim] == 1) {
|
||||
nrecv = nsend;
|
||||
if (nrecv) {
|
||||
atom->nlocal=avec->
|
||||
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
} else {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
|
||||
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv = nrecv1;
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
|
||||
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE);
|
||||
nrecv += nrecv2;
|
||||
}
|
||||
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
|
||||
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nrecv1,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][0],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nrecv1,
|
||||
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nrecv1,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,world);
|
||||
MPI_DOUBLE,procneigh[dim][0],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
|
||||
if (procgrid[dim] > 2) {
|
||||
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nrecv1,
|
||||
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
|
||||
world,&request);
|
||||
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
|
||||
MPI_DOUBLE,procneigh[dim][1],0,world);
|
||||
MPI_Wait(&request,MPI_STATUS_IGNORE);
|
||||
}
|
||||
|
||||
if (nrecv) {
|
||||
atom->nlocal = avec->
|
||||
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
}
|
||||
|
||||
if (nrecv) {
|
||||
atom->nlocal = avec->
|
||||
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
|
||||
ExecutionSpaceFromDevice<DeviceType>::space);
|
||||
DeviceType::fence();
|
||||
}
|
||||
// check incoming atoms to see if they are in my box
|
||||
// if so, add to my list
|
||||
|
||||
}
|
||||
|
||||
// check incoming atoms to see if they are in my box
|
||||
// if so, add to my list
|
||||
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
}
|
||||
|
||||
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
|
||||
|
||||
if (atom->firstgroupname) {
|
||||
/* this is not yet implemented with Kokkos */
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
@ -742,14 +752,15 @@ void CommKokkos::borders()
|
||||
if (!exchange_comm_classic) {
|
||||
if (exchange_comm_on_host) borders_device<LMPHostType>();
|
||||
else borders_device<LMPDeviceType>();
|
||||
return;
|
||||
} else {
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
CommBrick::borders();
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
}
|
||||
|
||||
atomKK->sync(Host,ALL_MASK);
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::borders();
|
||||
k_sendlist.modify<LMPHostType>();
|
||||
atomKK->modified(Host,ALL_MASK);
|
||||
if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_classic)
|
||||
copy_swap_info();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -817,7 +828,6 @@ void CommKokkos::borders_device() {
|
||||
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
|
||||
|
||||
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
k_sendlist.sync<DeviceType>();
|
||||
atomKK->sync(exec_space,ALL_MASK);
|
||||
|
||||
// do swaps over all 3 dimensions
|
||||
@ -1037,6 +1047,69 @@ void CommKokkos::borders_device() {
|
||||
atom->map_set();
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
copy swap info
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void CommKokkos::copy_swap_info()
|
||||
{
|
||||
if (nswap > k_swap.extent(1)) {
|
||||
k_swap = DAT::tdual_int_2d("comm:swap",2,nswap);
|
||||
k_firstrecv = Kokkos::subview(k_swap,0,Kokkos::ALL);
|
||||
k_sendnum_scan = Kokkos::subview(k_swap,1,Kokkos::ALL);
|
||||
}
|
||||
int scan = 0;
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
scan += sendnum[iswap];
|
||||
k_sendnum_scan.h_view[iswap] = scan;
|
||||
k_firstrecv.h_view[iswap] = firstrecv[iswap];
|
||||
}
|
||||
totalsend = scan;
|
||||
|
||||
// create map of ghost to local atom id
|
||||
// store periodic boundary transform from local to ghost
|
||||
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
|
||||
if (totalsend > k_pbc.extent(0)) {
|
||||
k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6);
|
||||
k_swap2 = DAT::tdual_int_2d("comm:swap2",2,totalsend);
|
||||
k_pbc_flag = Kokkos::subview(k_swap2,0,Kokkos::ALL);
|
||||
k_g2l = Kokkos::subview(k_swap2,1,Kokkos::ALL);
|
||||
}
|
||||
|
||||
for (int iswap = 0; iswap < nswap; iswap++) {
|
||||
for (int i = 0; i < sendnum[iswap]; i++) {
|
||||
int source = sendlist[iswap][i] - atom->nlocal;
|
||||
int dest = firstrecv[iswap] + i - atom->nlocal;
|
||||
k_pbc_flag.h_view(dest) = pbc_flag[iswap];
|
||||
k_pbc.h_view(dest,0) = pbc[iswap][0];
|
||||
k_pbc.h_view(dest,1) = pbc[iswap][1];
|
||||
k_pbc.h_view(dest,2) = pbc[iswap][2];
|
||||
k_pbc.h_view(dest,3) = pbc[iswap][3];
|
||||
k_pbc.h_view(dest,4) = pbc[iswap][4];
|
||||
k_pbc.h_view(dest,5) = pbc[iswap][5];
|
||||
k_g2l.h_view(dest) = atom->nlocal + source;
|
||||
|
||||
if (source >= 0) {
|
||||
k_pbc_flag.h_view(dest) = k_pbc_flag.h_view(dest) || k_pbc_flag.h_view(source);
|
||||
k_pbc.h_view(dest,0) += k_pbc.h_view(source,0);
|
||||
k_pbc.h_view(dest,1) += k_pbc.h_view(source,1);
|
||||
k_pbc.h_view(dest,2) += k_pbc.h_view(source,2);
|
||||
k_pbc.h_view(dest,3) += k_pbc.h_view(source,3);
|
||||
k_pbc.h_view(dest,4) += k_pbc.h_view(source,4);
|
||||
k_pbc.h_view(dest,5) += k_pbc.h_view(source,5);
|
||||
k_g2l.h_view(dest) = k_g2l.h_view(source);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
k_swap.modify<LMPHostType>();
|
||||
k_swap2.modify<LMPHostType>();
|
||||
k_pbc.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
|
||||
if flag = 1, realloc
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user