Merge branch 'master' into acks2_release
@ -1,6 +1,6 @@
|
|||||||
message(STATUS "Downloading and building OpenCL loader library")
|
message(STATUS "Downloading and building OpenCL loader library")
|
||||||
set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2021.06.30.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
|
set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2021.09.18.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
|
||||||
set(OPENCL_LOADER_MD5 "f9e55dd550cfbf77f46507adf7cb8fd2" CACHE STRING "MD5 checksum of OpenCL loader tarball")
|
set(OPENCL_LOADER_MD5 "3b3882627964bd02e5c3b02065daac3c" CACHE STRING "MD5 checksum of OpenCL loader tarball")
|
||||||
mark_as_advanced(OPENCL_LOADER_URL)
|
mark_as_advanced(OPENCL_LOADER_URL)
|
||||||
mark_as_advanced(OPENCL_LOADER_MD5)
|
mark_as_advanced(OPENCL_LOADER_MD5)
|
||||||
|
|
||||||
|
|||||||
@ -71,6 +71,11 @@ if(GPU_API STREQUAL "CUDA")
|
|||||||
# build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
|
# build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
|
||||||
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
|
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
|
||||||
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
|
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
|
||||||
|
|
||||||
|
# apply the following to build "fat" CUDA binaries only for known CUDA toolkits
|
||||||
|
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
|
||||||
|
message(WARNING "Untested CUDA Toolkit version. Use at your own risk")
|
||||||
|
else()
|
||||||
# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
|
# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
|
||||||
if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
|
if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
|
||||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20] ")
|
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20] ")
|
||||||
@ -107,8 +112,6 @@ if(GPU_API STREQUAL "CUDA")
|
|||||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
|
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
|
||||||
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
|
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
|
||||||
endif()
|
endif()
|
||||||
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
|
|
||||||
message(WARNING "Unsupported CUDA version. Use at your own risk.")
|
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
|
cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
|
||||||
@ -214,13 +217,20 @@ elseif(GPU_API STREQUAL "OPENCL")
|
|||||||
elseif(GPU_API STREQUAL "HIP")
|
elseif(GPU_API STREQUAL "HIP")
|
||||||
if(NOT DEFINED HIP_PATH)
|
if(NOT DEFINED HIP_PATH)
|
||||||
if(NOT DEFINED ENV{HIP_PATH})
|
if(NOT DEFINED ENV{HIP_PATH})
|
||||||
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed")
|
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation")
|
||||||
else()
|
else()
|
||||||
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed")
|
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation")
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
|
if(NOT DEFINED ROCM_PATH)
|
||||||
find_package(HIP REQUIRED)
|
if(NOT DEFINED ENV{ROCM_PATH})
|
||||||
|
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to ROCm installation")
|
||||||
|
else()
|
||||||
|
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to ROCm installation")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} ${ROCM_PATH})
|
||||||
|
find_package(hip REQUIRED)
|
||||||
option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)
|
option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)
|
||||||
|
|
||||||
if(NOT DEFINED HIP_PLATFORM)
|
if(NOT DEFINED HIP_PLATFORM)
|
||||||
@ -322,10 +332,11 @@ elseif(GPU_API STREQUAL "HIP")
|
|||||||
|
|
||||||
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp")
|
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp")
|
||||||
|
|
||||||
hip_add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
||||||
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu)
|
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu)
|
||||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
|
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
|
||||||
target_compile_definitions(gpu PRIVATE -DUSE_HIP)
|
target_compile_definitions(gpu PRIVATE -DUSE_HIP)
|
||||||
|
target_link_libraries(gpu PRIVATE hip::host)
|
||||||
|
|
||||||
if(HIP_USE_DEVICE_SORT)
|
if(HIP_USE_DEVICE_SORT)
|
||||||
# add hipCUB
|
# add hipCUB
|
||||||
@ -374,8 +385,9 @@ elseif(GPU_API STREQUAL "HIP")
|
|||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
hip_add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
|
add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
|
||||||
target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)
|
target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)
|
||||||
|
target_link_libraries(hip_get_devices hip::host)
|
||||||
|
|
||||||
if(HIP_PLATFORM STREQUAL "nvcc")
|
if(HIP_PLATFORM STREQUAL "nvcc")
|
||||||
target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)
|
target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)
|
||||||
|
|||||||
@ -1,6 +1,8 @@
|
|||||||
########################################################################
|
########################################################################
|
||||||
# As of version 3.3.0 Kokkos requires C++14
|
# As of version 3.3.0 Kokkos requires C++14
|
||||||
|
if(CMAKE_CXX_STANDARD LESS 14)
|
||||||
set(CMAKE_CXX_STANDARD 14)
|
set(CMAKE_CXX_STANDARD 14)
|
||||||
|
endif()
|
||||||
########################################################################
|
########################################################################
|
||||||
# consistency checks and Kokkos options/settings required by LAMMPS
|
# consistency checks and Kokkos options/settings required by LAMMPS
|
||||||
if(Kokkos_ENABLE_CUDA)
|
if(Kokkos_ENABLE_CUDA)
|
||||||
|
|||||||
@ -19,6 +19,14 @@ if(DOWNLOAD_LATTE)
|
|||||||
set(LATTE_MD5 "820e73a457ced178c08c71389a385de7" CACHE STRING "MD5 checksum of LATTE tarball")
|
set(LATTE_MD5 "820e73a457ced178c08c71389a385de7" CACHE STRING "MD5 checksum of LATTE tarball")
|
||||||
mark_as_advanced(LATTE_URL)
|
mark_as_advanced(LATTE_URL)
|
||||||
mark_as_advanced(LATTE_MD5)
|
mark_as_advanced(LATTE_MD5)
|
||||||
|
|
||||||
|
# CMake cannot pass BLAS or LAPACK library variable to external project if they are a list
|
||||||
|
list(LENGTH BLAS_LIBRARIES} NUM_BLAS)
|
||||||
|
list(LENGTH LAPACK_LIBRARIES NUM_LAPACK)
|
||||||
|
if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1))
|
||||||
|
message(FATAL_ERROR "Cannot compile downloaded LATTE library due to a technical limitation")
|
||||||
|
endif()
|
||||||
|
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
ExternalProject_Add(latte_build
|
ExternalProject_Add(latte_build
|
||||||
URL ${LATTE_URL}
|
URL ${LATTE_URL}
|
||||||
|
|||||||
@ -45,12 +45,12 @@ if(DOWNLOAD_N2P2)
|
|||||||
# get path to MPI include directory when cross-compiling to windows
|
# get path to MPI include directory when cross-compiling to windows
|
||||||
if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING)
|
if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING)
|
||||||
get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
|
get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
|
||||||
set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1")
|
set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
|
||||||
set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
||||||
endif()
|
endif()
|
||||||
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
|
||||||
get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
|
get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
|
||||||
set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1")
|
set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
|
||||||
set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
@ -69,6 +69,12 @@ if(DOWNLOAD_N2P2)
|
|||||||
# echo final flag for debugging
|
# echo final flag for debugging
|
||||||
message(STATUS "N2P2 BUILD OPTIONS: ${N2P2_BUILD_OPTIONS}")
|
message(STATUS "N2P2 BUILD OPTIONS: ${N2P2_BUILD_OPTIONS}")
|
||||||
|
|
||||||
|
# must have "sed" command to compile n2p2 library (for now)
|
||||||
|
find_program(HAVE_SED sed)
|
||||||
|
if(NOT HAVE_SED)
|
||||||
|
message(FATAL_ERROR "Must have 'sed' program installed to compile 'n2p2' library for ML-HDNNP package")
|
||||||
|
endif()
|
||||||
|
|
||||||
# download compile n2p2 library. much patch MPI calls in LAMMPS interface to accommodate MPI-2 (e.g. for cross-compiling)
|
# download compile n2p2 library. much patch MPI calls in LAMMPS interface to accommodate MPI-2 (e.g. for cross-compiling)
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
ExternalProject_Add(n2p2_build
|
ExternalProject_Add(n2p2_build
|
||||||
|
|||||||
@ -38,7 +38,7 @@ if(DOWNLOAD_QUIP)
|
|||||||
set(temp "${temp}HAVE_LOCAL_E_MIX=0\nHAVE_QC=0\nHAVE_GAP=1\nHAVE_DESCRIPTORS_NONCOMMERCIAL=1\n")
|
set(temp "${temp}HAVE_LOCAL_E_MIX=0\nHAVE_QC=0\nHAVE_GAP=1\nHAVE_DESCRIPTORS_NONCOMMERCIAL=1\n")
|
||||||
set(temp "${temp}HAVE_TURBOGAP=0\nHAVE_QR=1\nHAVE_THIRDPARTY=0\nHAVE_FX=0\nHAVE_SCME=0\nHAVE_MTP=0\n")
|
set(temp "${temp}HAVE_TURBOGAP=0\nHAVE_QR=1\nHAVE_THIRDPARTY=0\nHAVE_FX=0\nHAVE_SCME=0\nHAVE_MTP=0\n")
|
||||||
set(temp "${temp}HAVE_MBD=0\nHAVE_TTM_NF=0\nHAVE_CH4=0\nHAVE_NETCDF4=0\nHAVE_MDCORE=0\nHAVE_ASAP=0\n")
|
set(temp "${temp}HAVE_MBD=0\nHAVE_TTM_NF=0\nHAVE_CH4=0\nHAVE_NETCDF4=0\nHAVE_MDCORE=0\nHAVE_ASAP=0\n")
|
||||||
set(temp "${temp}HAVE_CGAL=0\nHAVE_METIS=0\nHAVE_LMTO_TBE=0\n")
|
set(temp "${temp}HAVE_CGAL=0\nHAVE_METIS=0\nHAVE_LMTO_TBE=0\nHAVE_SCALAPACK=0\n")
|
||||||
file(WRITE ${CMAKE_BINARY_DIR}/quip.config "${temp}")
|
file(WRITE ${CMAKE_BINARY_DIR}/quip.config "${temp}")
|
||||||
|
|
||||||
message(STATUS "QUIP download via git requested - we will build our own")
|
message(STATUS "QUIP download via git requested - we will build our own")
|
||||||
@ -50,7 +50,7 @@ if(DOWNLOAD_QUIP)
|
|||||||
GIT_TAG origin/public
|
GIT_TAG origin/public
|
||||||
GIT_SHALLOW YES
|
GIT_SHALLOW YES
|
||||||
GIT_PROGRESS YES
|
GIT_PROGRESS YES
|
||||||
PATCH_COMMAND cp ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps
|
PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps
|
||||||
CONFIGURE_COMMAND env QUIP_ARCH=lammps make config
|
CONFIGURE_COMMAND env QUIP_ARCH=lammps make config
|
||||||
BUILD_COMMAND env QUIP_ARCH=lammps make libquip
|
BUILD_COMMAND env QUIP_ARCH=lammps make libquip
|
||||||
INSTALL_COMMAND ""
|
INSTALL_COMMAND ""
|
||||||
|
|||||||
@ -12,6 +12,13 @@ if(DOWNLOAD_MSCG)
|
|||||||
mark_as_advanced(MSCG_URL)
|
mark_as_advanced(MSCG_URL)
|
||||||
mark_as_advanced(MSCG_MD5)
|
mark_as_advanced(MSCG_MD5)
|
||||||
|
|
||||||
|
# CMake cannot pass BLAS or LAPACK library variable to external project if they are a list
|
||||||
|
list(LENGTH BLAS_LIBRARIES} NUM_BLAS)
|
||||||
|
list(LENGTH LAPACK_LIBRARIES NUM_LAPACK)
|
||||||
|
if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1))
|
||||||
|
message(FATAL_ERROR "Cannot compile downloaded MSCG library due to a technical limitation")
|
||||||
|
endif()
|
||||||
|
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
ExternalProject_Add(mscg_build
|
ExternalProject_Add(mscg_build
|
||||||
URL ${MSCG_URL}
|
URL ${MSCG_URL}
|
||||||
|
|||||||
@ -23,6 +23,11 @@ if(DOWNLOAD_SCAFACOS)
|
|||||||
file(DOWNLOAD ${LAMMPS_THIRDPARTY_URL}/scafacos-1.0.1-fix.diff ${CMAKE_CURRENT_BINARY_DIR}/scafacos-1.0.1.fix.diff
|
file(DOWNLOAD ${LAMMPS_THIRDPARTY_URL}/scafacos-1.0.1-fix.diff ${CMAKE_CURRENT_BINARY_DIR}/scafacos-1.0.1.fix.diff
|
||||||
EXPECTED_HASH MD5=4baa1333bb28fcce102d505e1992d032)
|
EXPECTED_HASH MD5=4baa1333bb28fcce102d505e1992d032)
|
||||||
|
|
||||||
|
find_program(HAVE_PATCH patch)
|
||||||
|
if(NOT HAVE_PATCH)
|
||||||
|
message(FATAL_ERROR "The 'patch' program is required to build the ScaFaCoS library")
|
||||||
|
endif()
|
||||||
|
|
||||||
include(ExternalProject)
|
include(ExternalProject)
|
||||||
ExternalProject_Add(scafacos_build
|
ExternalProject_Add(scafacos_build
|
||||||
URL ${SCAFACOS_URL}
|
URL ${SCAFACOS_URL}
|
||||||
|
|||||||
@ -26,6 +26,11 @@ if(DOWNLOAD_VORO)
|
|||||||
set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS})
|
set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS})
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
find_program(HAVE_PATCH patch)
|
||||||
|
if(NOT HAVE_PATCH)
|
||||||
|
message(FATAL_ERROR "The 'patch' program is required to build the voro++ library")
|
||||||
|
endif()
|
||||||
|
|
||||||
ExternalProject_Add(voro_build
|
ExternalProject_Add(voro_build
|
||||||
URL ${VORO_URL}
|
URL ${VORO_URL}
|
||||||
URL_MD5 ${VORO_MD5}
|
URL_MD5 ${VORO_MD5}
|
||||||
|
|||||||
@ -1,7 +1,28 @@
|
|||||||
[
|
[
|
||||||
{ include: [ "<bits/types/struct_rusage.h>", private, "<sys/resource.h>", public ] },
|
|
||||||
{ include: [ "<bits/exception.h>", public, "<exception>", public ] },
|
|
||||||
{ include: [ "@<Eigen/.*>", private, "<Eigen/Eigen>", public ] },
|
{ include: [ "@<Eigen/.*>", private, "<Eigen/Eigen>", public ] },
|
||||||
{ include: [ "@<gtest/.*>", private, "\"gtest/gtest.h\"", public ] },
|
{ include: [ "@<gtest/.*>", private, "\"gtest/gtest.h\"", public ] },
|
||||||
{ include: [ "@<gmock/.*>", private, "\"gmock/gmock.h\"", public ] },
|
{ include: [ "@<gmock/.*>", private, "\"gmock/gmock.h\"", public ] },
|
||||||
|
{ include: [ "@<gmock/.*>", private, "\"gmock/gmock.h\"", public ] },
|
||||||
|
{ include: [ "@<(cell|c_loops|container).hh>", private, "<voro++.hh>", public ] },
|
||||||
|
{ include: [ "@\"atom_vec_.*.h\"", public, "\"style_atom.h\"", public ] },
|
||||||
|
{ include: [ "@\"body_.*.h\"", public, "\"style_body.h\"", public ] },
|
||||||
|
{ include: [ "@\"compute_.*.h\"", public, "\"style_compute.h\"", public ] },
|
||||||
|
{ include: [ "@\"fix_.*.h\"", public, "\"style_fix.h\"", public ] },
|
||||||
|
{ include: [ "@\"dump_.*.h\"", public, "\"style_dump.h\"", public ] },
|
||||||
|
{ include: [ "@\"min_.*.h\"", public, "\"style_minimize.h\"", public ] },
|
||||||
|
{ include: [ "@\"reader_.*.h\"", public, "\"style_reader.h\"", public ] },
|
||||||
|
{ include: [ "@\"region_.*.h\"", public, "\"style_region.h\"", public ] },
|
||||||
|
{ include: [ "@\"pair_.*.h\"", public, "\"style_pair.h\"", public ] },
|
||||||
|
{ include: [ "@\"angle_.*.h\"", public, "\"style_angle.h\"", public ] },
|
||||||
|
{ include: [ "@\"bond_.*.h\"", public, "\"style_bond.h\"", public ] },
|
||||||
|
{ include: [ "@\"dihedral_.*.h\"", public, "\"style_dihedral.h\"", public ] },
|
||||||
|
{ include: [ "@\"improper_.*.h\"", public, "\"style_improper.h\"", public ] },
|
||||||
|
{ include: [ "@\"kspace_.*.h\"", public, "\"style_kspace.h\"", public ] },
|
||||||
|
{ include: [ "@\"nbin_.*.h\"", public, "\"style_nbin.h\"", public ] },
|
||||||
|
{ include: [ "@\"npair_.*.h\"", public, "\"style_npair.h\"", public ] },
|
||||||
|
{ include: [ "@\"nstenci_.*.h\"", public, "\"style_nstencil.h\"", public ] },
|
||||||
|
{ include: [ "@\"ntopo_.*.h\"", public, "\"style_ntopo.h\"", public ] },
|
||||||
|
{ include: [ "<float.h>", public, "<cfloat>", public ] },
|
||||||
|
{ include: [ "<limits.h>", public, "<climits>", public ] },
|
||||||
|
{ include: [ "<bits/types/struct_tm.h>", private, "<ctime>", public ] },
|
||||||
]
|
]
|
||||||
|
|||||||
30
cmake/presets/hip_amd.cmake
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
# preset that will enable hip (clang/clang++) with support for MPI and OpenMP (on Linux boxes)
|
||||||
|
|
||||||
|
# prefer flang over gfortran, if available
|
||||||
|
find_program(CLANG_FORTRAN NAMES flang gfortran f95)
|
||||||
|
set(ENV{OMPI_FC} ${CLANG_FORTRAN})
|
||||||
|
|
||||||
|
set(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_C_COMPILER "hipcc" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_Fortran_COMPILER ${CLANG_FORTRAN} CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_CXX_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_Fortran_FLAGS_DEBUG "-Wall -Wextra -g -std=f2003" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_C_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
|
||||||
|
set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
|
||||||
|
|
||||||
|
set(MPI_CXX "hipcc" CACHE STRING "" FORCE)
|
||||||
|
set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
|
||||||
|
|
||||||
|
unset(HAVE_OMP_H_INCLUDE CACHE)
|
||||||
|
set(OpenMP_C "hipcc" CACHE STRING "" FORCE)
|
||||||
|
set(OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" FORCE)
|
||||||
|
set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
|
||||||
|
set(OpenMP_CXX "hipcc" CACHE STRING "" FORCE)
|
||||||
|
set(OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" FORCE)
|
||||||
|
set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
|
||||||
|
set(OpenMP_omp_LIBRARY "libomp.so" CACHE PATH "" FORCE)
|
||||||
@ -24,6 +24,7 @@ set(ALL_PACKAGES
|
|||||||
DRUDE
|
DRUDE
|
||||||
EFF
|
EFF
|
||||||
EXTRA-COMPUTE
|
EXTRA-COMPUTE
|
||||||
|
EXTRA-DUMP
|
||||||
EXTRA-FIX
|
EXTRA-FIX
|
||||||
EXTRA-MOLECULE
|
EXTRA-MOLECULE
|
||||||
EXTRA-PAIR
|
EXTRA-PAIR
|
||||||
|
|||||||
@ -1,4 +1,4 @@
|
|||||||
.TH LAMMPS "31 August 2021" "2021-08-31"
|
.TH LAMMPS "29 September 2021" "2021-09-29"
|
||||||
.SH NAME
|
.SH NAME
|
||||||
.B LAMMPS
|
.B LAMMPS
|
||||||
\- Molecular Dynamics Simulator.
|
\- Molecular Dynamics Simulator.
|
||||||
|
|||||||
@ -58,13 +58,16 @@ Report missing and unneeded '#include' statements (CMake only)
|
|||||||
The conventions for how and when to use and order include statements in
|
The conventions for how and when to use and order include statements in
|
||||||
LAMMPS are documented in :doc:`Modify_style`. To assist with following
|
LAMMPS are documented in :doc:`Modify_style`. To assist with following
|
||||||
these conventions one can use the `Include What You Use tool <https://include-what-you-use.org/>`_.
|
these conventions one can use the `Include What You Use tool <https://include-what-you-use.org/>`_.
|
||||||
This is still under development and for large and complex projects like LAMMPS
|
This tool is still under development and for large and complex projects like LAMMPS
|
||||||
there are some false positives, so suggested changes need to be verified manually.
|
there are some false positives, so suggested changes need to be verified manually.
|
||||||
It is recommended to use at least version 0.14, which has much fewer incorrect
|
It is recommended to use at least version 0.16, which has much fewer incorrect
|
||||||
reports than earlier versions.
|
reports than earlier versions. To install the IWYU toolkit, you need to have
|
||||||
|
the clang compiler **and** its development package installed. Download the IWYU
|
||||||
|
version that matches the version of the clang compiler, configure, build, and
|
||||||
|
install it.
|
||||||
|
|
||||||
The necessary steps to generate the report can be enabled via a
|
The necessary steps to generate the report can be enabled via a CMake variable
|
||||||
CMake variable:
|
during CMake configuration.
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
|
|||||||
@ -71,7 +71,8 @@ LAMMPS can use them if they are available on your system.
|
|||||||
|
|
||||||
-D FFTW3_INCLUDE_DIR=path # path to FFTW3 include files
|
-D FFTW3_INCLUDE_DIR=path # path to FFTW3 include files
|
||||||
-D FFTW3_LIBRARY=path # path to FFTW3 libraries
|
-D FFTW3_LIBRARY=path # path to FFTW3 libraries
|
||||||
-D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries
|
-D FFTW3_OMP_LIBRARY=path # path to FFTW3 OpenMP wrapper libraries
|
||||||
|
-D FFT_FFTW_THREADS=on # enable using OpenMP threaded FFTW3 libraries
|
||||||
-D MKL_INCLUDE_DIR=path # ditto for Intel MKL library
|
-D MKL_INCLUDE_DIR=path # ditto for Intel MKL library
|
||||||
-D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries
|
-D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries
|
||||||
-D MKL_LIBRARY=path # path to MKL libraries
|
-D MKL_LIBRARY=path # path to MKL libraries
|
||||||
|
|||||||
@ -11,6 +11,7 @@ of time and requests from the LAMMPS user community.
|
|||||||
:maxdepth: 1
|
:maxdepth: 1
|
||||||
|
|
||||||
Developer_org
|
Developer_org
|
||||||
|
Developer_parallel
|
||||||
Developer_flow
|
Developer_flow
|
||||||
Developer_write
|
Developer_write
|
||||||
Developer_notes
|
Developer_notes
|
||||||
|
|||||||
120
doc/src/Developer_par_comm.rst
Normal file
@ -0,0 +1,120 @@
|
|||||||
|
Communication
|
||||||
|
^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
Following the partitioning scheme in use all per-atom data is
|
||||||
|
distributed across the MPI processes, which allows LAMMPS to handle very
|
||||||
|
large systems provided it uses a correspondingly large number of MPI
|
||||||
|
processes. Since The per-atom data (atom IDs, positions, velocities,
|
||||||
|
types, etc.) To be able to compute the short-range interactions MPI
|
||||||
|
processes need not only access to data of atoms they "own" but also
|
||||||
|
information about atoms from neighboring sub-domains, in LAMMPS referred
|
||||||
|
to as "ghost" atoms. These are copies of atoms storing required
|
||||||
|
per-atom data for up to the communication cutoff distance. The green
|
||||||
|
dashed-line boxes in the :ref:`domain-decomposition` figure illustrate
|
||||||
|
the extended ghost-atom sub-domain for one processor.
|
||||||
|
|
||||||
|
This approach is also used to implement periodic boundary
|
||||||
|
conditions: atoms that lie within the cutoff distance across a periodic
|
||||||
|
boundary are also stored as ghost atoms and taken from the periodic
|
||||||
|
replication of the sub-domain, which may be the same sub-domain, e.g. if
|
||||||
|
running in serial. As a consequence of this, force computation in
|
||||||
|
LAMMPS is not subject to minimum image conventions and thus cutoffs may
|
||||||
|
be larger than half the simulation domain.
|
||||||
|
|
||||||
|
.. _ghost-atom-comm:
|
||||||
|
.. figure:: img/ghost-comm.png
|
||||||
|
:align: center
|
||||||
|
|
||||||
|
ghost atom communication
|
||||||
|
|
||||||
|
This figure shows the ghost atom communication patterns between
|
||||||
|
sub-domains for "brick" (left) and "tiled" communication styles for
|
||||||
|
2d simulations. The numbers indicate MPI process ranks. Here the
|
||||||
|
sub-domains are drawn spatially separated for clarity. The
|
||||||
|
dashed-line box is the extended sub-domain of processor 0 which
|
||||||
|
includes its ghost atoms. The red- and blue-shaded boxes are the
|
||||||
|
regions of communicated ghost atoms.
|
||||||
|
|
||||||
|
Efficient communication patterns are needed to update the "ghost" atom
|
||||||
|
data, since that needs to be done at every MD time step or minimization
|
||||||
|
step. The diagrams of the `ghost-atom-comm` figure illustrate how ghost
|
||||||
|
atom communication is performed in two stages for a 2d simulation (three
|
||||||
|
in 3d) for both a regular and irregular partitioning of the simulation
|
||||||
|
box. For the regular case (left) atoms are exchanged first in the
|
||||||
|
*x*-direction, then in *y*, with four neighbors in the grid of processor
|
||||||
|
sub-domains.
|
||||||
|
|
||||||
|
In the *x* stage, processor ranks 1 and 2 send owned atoms in their
|
||||||
|
red-shaded regions to rank 0 (and vice versa). Then in the *y* stage,
|
||||||
|
ranks 3 and 4 send atoms in their blue-shaded regions to rank 0, which
|
||||||
|
includes ghost atoms they received in the *x* stage. Rank 0 thus
|
||||||
|
acquires all its ghost atoms; atoms in the solid blue corner regions
|
||||||
|
are communicated twice before rank 0 receives them.
|
||||||
|
|
||||||
|
For the irregular case (right) the two stages are similar, but a
|
||||||
|
processor can have more than one neighbor in each direction. In the
|
||||||
|
*x* stage, MPI ranks 1,2,3 send owned atoms in their red-shaded regions to
|
||||||
|
rank 0 (and vice versa). These include only atoms between the lower
|
||||||
|
and upper *y*-boundary of rank 0's sub-domain. In the *y* stage, ranks
|
||||||
|
4,5,6 send atoms in their blue-shaded regions to rank 0. This may
|
||||||
|
include ghost atoms they received in the *x* stage, but only if they
|
||||||
|
are needed by rank 0 to fill its extended ghost atom regions in the
|
||||||
|
+/-*y* directions (blue rectangles). Thus in this case, ranks 5 and
|
||||||
|
6 do not include ghost atoms they received from each other (in the *x*
|
||||||
|
stage) in the atoms they send to rank 0. The key point is that while
|
||||||
|
the pattern of communication is more complex in the irregular
|
||||||
|
partitioning case, it can still proceed in two stages (three in 3d)
|
||||||
|
via atom exchanges with only neighboring processors.
|
||||||
|
|
||||||
|
When attributes of owned atoms are sent to neighboring processors to
|
||||||
|
become attributes of their ghost atoms, LAMMPS calls this a "forward"
|
||||||
|
communication. On timesteps when atoms migrate to new owning processors
|
||||||
|
and neighbor lists are rebuilt, each processor creates a list of its
|
||||||
|
owned atoms which are ghost atoms in each of its neighbor processors.
|
||||||
|
These lists are used to pack per-atom coordinates (for example) into
|
||||||
|
message buffers in subsequent steps until the next reneighboring.
|
||||||
|
|
||||||
|
A "reverse" communication is when computed ghost atom attributes are
|
||||||
|
sent back to the processor who owns the atom. This is used (for
|
||||||
|
example) to sum partial forces on ghost atoms to the complete force on
|
||||||
|
owned atoms. The order of the two stages described in the
|
||||||
|
:ref:`ghost-atom-comm` figure is inverted and the same lists of atoms
|
||||||
|
are used to pack and unpack message buffers with per-atom forces. When
|
||||||
|
a received buffer is unpacked, the ghost forces are summed to owned atom
|
||||||
|
forces. As in forward communication, forces on atoms in the four blue
|
||||||
|
corners of the diagrams are sent, received, and summed twice (once at
|
||||||
|
each stage) before owning processors have the full force.
|
||||||
|
|
||||||
|
These two operations are used many places within LAMMPS aside from
|
||||||
|
exchange of coordinates and forces, for example by manybody potentials
|
||||||
|
to share intermediate per-atom values, or by rigid-body integrators to
|
||||||
|
enable each atom in a body to access body properties. Here are
|
||||||
|
additional details about how these communication operations are
|
||||||
|
performed in LAMMPS:
|
||||||
|
|
||||||
|
- When exchanging data with different processors, forward and reverse
|
||||||
|
communication is done using ``MPI_Send()`` and ``MPI_IRecv()`` calls.
|
||||||
|
If a processor is "exchanging" atoms with itself, only the pack and
|
||||||
|
unpack operations are performed, e.g. to create ghost atoms across
|
||||||
|
periodic boundaries when running on a single processor.
|
||||||
|
|
||||||
|
- For forward communication of owned atom coordinates, periodic box
|
||||||
|
lengths are added and subtracted when the receiving processor is
|
||||||
|
across a periodic boundary from the sender. There is then no need to
|
||||||
|
apply a minimum image convention when calculating distances between
|
||||||
|
atom pairs when building neighbor lists or computing forces.
|
||||||
|
|
||||||
|
- The cutoff distance for exchanging ghost atoms is typically equal to
|
||||||
|
the neighbor cutoff. But it can also chosen to be longer if needed,
|
||||||
|
e.g. half the diameter of a rigid body composed of multiple atoms or
|
||||||
|
over 3x the length of a stretched bond for dihedral interactions. It
|
||||||
|
can also exceed the periodic box size. For the regular communication
|
||||||
|
pattern (left), if the cutoff distance extends beyond a neighbor
|
||||||
|
processor's sub-domain, then multiple exchanges are performed in the
|
||||||
|
same direction. Each exchange is with the same neighbor processor,
|
||||||
|
but buffers are packed/unpacked using a different list of atoms. For
|
||||||
|
forward communication, in the first exchange a processor sends only
|
||||||
|
owned atoms. In subsequent exchanges, it sends ghost atoms received
|
||||||
|
in previous exchanges. For the irregular pattern (right) overlaps of
|
||||||
|
a processor's extended ghost-atom sub-domain with all other processors
|
||||||
|
in each dimension are detected.
|
||||||
188
doc/src/Developer_par_long.rst
Normal file
@ -0,0 +1,188 @@
|
|||||||
|
Long-range interactions
|
||||||
|
^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
For charged systems, LAMMPS can compute long-range Coulombic
|
||||||
|
interactions via the FFT-based particle-particle/particle-mesh (PPPM)
|
||||||
|
method implemented in :doc:`kspace style pppm and its variants
|
||||||
|
<kspace_style>`. For that Coulombic interactions are partitioned into
|
||||||
|
short- and long-range components. The short-ranged portion is computed
|
||||||
|
in real space as a loop over pairs of charges within a cutoff distance,
|
||||||
|
using neighbor lists. The long-range portion is computed in reciprocal
|
||||||
|
space using a kspace style. For the PPPM implementation the simulation
|
||||||
|
cell is overlaid with a regular FFT grid in 3d. It proceeds in several stages:
|
||||||
|
|
||||||
|
a) each atom's point charge is interpolated to nearby FFT grid points,
|
||||||
|
b) a forward 3d FFT is performed,
|
||||||
|
c) a convolution operation is performed in reciprocal space,
|
||||||
|
d) one or more inverse 3d FFTs are performed, and
|
||||||
|
e) electric field values from grid points near each atom are interpolated to compute
|
||||||
|
its forces.
|
||||||
|
|
||||||
|
For any of the spatial-decomposition partitioning schemes each processor
|
||||||
|
owns the brick-shaped portion of FFT grid points contained within its
|
||||||
|
sub-domain. The two interpolation operations use a stencil of grid
|
||||||
|
points surrounding each atom. To accommodate the stencil size, each
|
||||||
|
processor also stores a few layers of ghost grid points surrounding its
|
||||||
|
brick. Forward and reverse communication of grid point values is
|
||||||
|
performed similar to the corresponding :doc:`atom data communication
|
||||||
|
<Developer_par_comm>`. In this case, electric field values on owned
|
||||||
|
grid points are sent to neighboring processors to become ghost point
|
||||||
|
values. Likewise charge values on ghost points are sent and summed to
|
||||||
|
values on owned points.
|
||||||
|
|
||||||
|
For triclinic simulation boxes, the FFT grid planes are parallel to
|
||||||
|
the box faces, but the mapping of charge and electric field values
|
||||||
|
to/from grid points is done in reduced coordinates where the tilted
|
||||||
|
box is conceptually a unit cube, so that the stencil and FFT
|
||||||
|
operations are unchanged. However the FFT grid size required for a
|
||||||
|
given accuracy is larger for triclinic domains than it is for
|
||||||
|
orthogonal boxes.
|
||||||
|
|
||||||
|
.. _fft-parallel:
|
||||||
|
.. figure:: img/fft-decomp-parallel.png
|
||||||
|
:align: center
|
||||||
|
|
||||||
|
parallel FFT in PPPM
|
||||||
|
|
||||||
|
Stages of a parallel FFT for a simulation domain overlaid
|
||||||
|
with an 8x8x8 3d FFT grid, partitioned across 64 processors.
|
||||||
|
Within each of the 4 diagrams, grid cells of the same color are
|
||||||
|
owned by a single processor; for simplicity only cells owned by 4
|
||||||
|
or 8 of the 64 processors are colored. The two images on the left
|
||||||
|
illustrate brick-to-pencil communication. The two images on the
|
||||||
|
right illustrate pencil-to-pencil communication, which in this
|
||||||
|
case transposes the *y* and *z* dimensions of the grid.
|
||||||
|
|
||||||
|
Parallel 3d FFTs require substantial communication relative to their
|
||||||
|
computational cost. A 3d FFT is implemented by a series of 1d FFTs
|
||||||
|
along the *x-*, *y-*, and *z-*\ direction of the FFT grid. Thus the FFT
|
||||||
|
grid cannot be decomposed like atoms into 3 dimensions for parallel
|
||||||
|
processing of the FFTs but only in 1 (as planes) or 2 (as pencils)
|
||||||
|
dimensions and in between the steps the grid needs to be transposed to
|
||||||
|
have the FFT grid portion "owned" by each MPI process complete in the
|
||||||
|
direction of the 1d FFTs it has to perform. LAMMPS uses the
|
||||||
|
pencil-decomposition algorithm as shown in the :ref:`fft-parallel` figure.
|
||||||
|
|
||||||
|
Initially (far left), each processor owns a brick of same-color grid
|
||||||
|
cells (actually grid points) contained within in its sub-domain. A
|
||||||
|
brick-to-pencil communication operation converts this layout to 1d
|
||||||
|
pencils in the *x*-dimension (center left). Again, cells of the same
|
||||||
|
color are owned by the same processor. Each processor can then compute
|
||||||
|
a 1d FFT on each pencil of data it wholly owns using a call to the
|
||||||
|
configured FFT library. A pencil-to-pencil communication then converts
|
||||||
|
this layout to pencils in the *y* dimension (center right) which
|
||||||
|
effectively transposes the *x* and *y* dimensions of the grid, followed
|
||||||
|
by 1d FFTs in *y*. A final transpose of pencils from *y* to *z* (far
|
||||||
|
right) followed by 1d FFTs in *z* completes the forward FFT. The data
|
||||||
|
is left in a *z*-pencil layout for the convolution operation. One or
|
||||||
|
more inverse FFTs then perform the sequence of 1d FFTs and communication
|
||||||
|
steps in reverse order; the final layout of resulting grid values is the
|
||||||
|
same as the initial brick layout.
|
||||||
|
|
||||||
|
Each communication operation within the FFT (brick-to-pencil or
|
||||||
|
pencil-to-pencil or pencil-to-brick) converts one tiling of the 3d grid
|
||||||
|
to another, where a tiling in this context means an assignment of a
|
||||||
|
small brick-shaped subset of grid points to each processor, the union of
|
||||||
|
which comprise the entire grid. The parallel `fftMPI library
|
||||||
|
<https://lammps.github.io/fftmpi/>`_ written for LAMMPS allows arbitrary
|
||||||
|
definitions of the tiling so that an irregular partitioning of the
|
||||||
|
simulation domain can use it directly. Transforming data from one
|
||||||
|
tiling to another is implemented in `fftMPI` using point-to-point
|
||||||
|
communication, where each processor sends data to a few other
|
||||||
|
processors, since each tile in the initial tiling overlaps with a
|
||||||
|
handful of tiles in the final tiling.
|
||||||
|
|
||||||
|
The transformations could also be done using collective communication
|
||||||
|
across all $P$ processors with a single call to ``MPI_Alltoall()``, but
|
||||||
|
this is typically much slower. However, for the specialized brick and
|
||||||
|
pencil tiling illustrated in :ref:`fft-parallel` figure, collective
|
||||||
|
communication across the entire MPI communicator is not required. In
|
||||||
|
the example an :math:`8^3` grid with 512 grid cells is partitioned
|
||||||
|
across 64 processors; each processor owns a 2x2x2 3d brick of grid
|
||||||
|
cells. The initial brick-to-pencil communication (upper left to upper
|
||||||
|
right) only requires collective communication within subgroups of 4
|
||||||
|
processors, as illustrated by the 4 colors. More generally, a
|
||||||
|
brick-to-pencil communication can be performed by partitioning *P*
|
||||||
|
processors into :math:`P^{\frac{2}{3}}` subgroups of
|
||||||
|
:math:`P^{\frac{1}{3}}` processors each. Each subgroup performs
|
||||||
|
collective communication only within its subgroup. Similarly,
|
||||||
|
pencil-to-pencil communication can be performed by partitioning *P*
|
||||||
|
processors into :math:`P^{\frac{1}{2}}` subgroups of
|
||||||
|
:math:`P^{\frac{1}{2}}` processors each. This is illustrated in the
|
||||||
|
figure for the :math:`y \Rightarrow z` communication (center). An
|
||||||
|
eight-processor subgroup owns the front *yz* plane of data and performs
|
||||||
|
collective communication within the subgroup to transpose from a
|
||||||
|
*y*-pencil to *z*-pencil layout.
|
||||||
|
|
||||||
|
LAMMPS invokes point-to-point communication by default, but also
|
||||||
|
provides the option of partitioned collective communication when using a
|
||||||
|
:doc:`kspace_modify collective yes <kspace_modify>` command to switch to
|
||||||
|
that mode. In the latter case, the code detects the size of the
|
||||||
|
disjoint subgroups and partitions the single *P*-size communicator into
|
||||||
|
multiple smaller communicators, each of which invokes collective
|
||||||
|
communication. Testing on a large IBM Blue Gene/Q machine at Argonne
|
||||||
|
National Labs showed a significant improvement in FFT performance for
|
||||||
|
large processor counts; partitioned collective communication was faster
|
||||||
|
than point-to-point communication or global collective communication
|
||||||
|
involving all *P* processors.
|
||||||
|
|
||||||
|
Here are some additional details about FFTs for long-range and related
|
||||||
|
grid/particle operations that LAMMPS supports:
|
||||||
|
|
||||||
|
- The fftMPI library allows each grid dimension to be a multiple of
|
||||||
|
small prime factors (2,3,5), and allows any number of processors to
|
||||||
|
perform the FFT. The resulting brick and pencil decompositions are
|
||||||
|
thus not always as well-aligned but the size of subgroups of
|
||||||
|
processors for the two modes of communication (brick/pencil and
|
||||||
|
pencil/pencil) still scale as :math:`O(P^{\frac{1}{3}})` and
|
||||||
|
:math:`O(P^{\frac{1}{2}})`.
|
||||||
|
|
||||||
|
- For efficiency in performing 1d FFTs, the grid transpose
|
||||||
|
operations illustrated in Figure \ref{fig:fft} also involve
|
||||||
|
reordering the 3d data so that a different dimension is contiguous
|
||||||
|
in memory. This reordering can be done during the packing or
|
||||||
|
unpacking of buffers for MPI communication.
|
||||||
|
|
||||||
|
- For large systems and particularly a large number of MPI processes,
|
||||||
|
the dominant cost for parallel FFTs is often the communication, not
|
||||||
|
the computation of 1d FFTs, even though the latter scales as :math:`N
|
||||||
|
\log(N)` in the number of grid points *N* per grid direction. This is
|
||||||
|
due to the fact that only a 2d decomposition into pencils is possible
|
||||||
|
while atom data (and their corresponding short-range force and energy
|
||||||
|
computations) can be decomposed efficiently in 3d.
|
||||||
|
|
||||||
|
This can be addressed by reducing the number of MPI processes involved
|
||||||
|
in the MPI communication by using :doc:`hybrid MPI + OpenMP
|
||||||
|
parallelization <Speed_omp>`. This will use OpenMP parallelization
|
||||||
|
inside the MPI domains and while that may have a lower parallel
|
||||||
|
efficiency, it reduces the communication overhead.
|
||||||
|
|
||||||
|
As an alternative it is also possible to start a :ref:`multi-partition
|
||||||
|
<partition>` calculation and then use the :doc:`verlet/split
|
||||||
|
integrator <run_style>` to perform the PPPM computation on a
|
||||||
|
dedicated, separate partition of MPI processes. This uses an integer
|
||||||
|
"1:*p*" mapping of *p* sub-domains of the atom decomposition to one
|
||||||
|
sub-domain of the FFT grid decomposition and where pairwise non-bonded
|
||||||
|
and bonded forces and energies are computed on the larger partition
|
||||||
|
and the PPPM kspace computation concurrently on the smaller partition.
|
||||||
|
|
||||||
|
- LAMMPS also implements PPPM-based solvers for other long-range
|
||||||
|
interactions, dipole and dispersion (Lennard-Jones), which can be used
|
||||||
|
in conjunction with long-range Coulombics for point charges.
|
||||||
|
|
||||||
|
- LAMMPS implements a ``GridComm`` class which overlays the simulation
|
||||||
|
domain with a regular grid, partitions it across processors in a
|
||||||
|
manner consistent with processor sub-domains, and provides methods for
|
||||||
|
forward and reverse communication of owned and ghost grid point
|
||||||
|
values. It is used for PPPM as an FFT grid (as outlined above) and
|
||||||
|
also for the MSM algorithm which uses a cascade of grid sizes from
|
||||||
|
fine to coarse to compute long-range Coulombic forces. The GridComm
|
||||||
|
class is also useful for models where continuum fields interact with
|
||||||
|
particles. For example, the two-temperature model (TTM) defines heat
|
||||||
|
transfer between atoms (particles) and electrons (continuum gas) where
|
||||||
|
spatial variations in the electron temperature are computed by finite
|
||||||
|
differences of a discretized heat equation on a regular grid. The
|
||||||
|
:doc:`fix ttm/grid <fix_ttm>` command uses the ``GridComm`` class
|
||||||
|
internally to perform its grid operations on a distributed grid
|
||||||
|
instead of the original :doc:`fix ttm <fix_ttm>` which uses a
|
||||||
|
replicated grid.
|
||||||
159
doc/src/Developer_par_neigh.rst
Normal file
@ -0,0 +1,159 @@
|
|||||||
|
Neighbor lists
|
||||||
|
^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
To compute forces efficiently, each processor creates a Verlet-style
|
||||||
|
neighbor list which enumerates all pairs of atoms *i,j* (*i* = owned,
|
||||||
|
*j* = owned or ghost) with separation less than the applicable
|
||||||
|
neighbor list cutoff distance. In LAMMPS the neighbor lists are stored
|
||||||
|
in a multiple-page data structure; each page is a contiguous chunk of
|
||||||
|
memory which stores vectors of neighbor atoms *j* for many *i* atoms.
|
||||||
|
This allows pages to be incrementally allocated or deallocated in blocks
|
||||||
|
as needed. Neighbor lists typically consume the most memory of any data
|
||||||
|
structure in LAMMPS. The neighbor list is rebuilt (from scratch) once
|
||||||
|
every few timesteps, then used repeatedly each step for force or other
|
||||||
|
computations. The neighbor cutoff distance is :math:`R_n = R_f +
|
||||||
|
\Delta_s`, where :math:`R_f` is the (largest) force cutoff defined by
|
||||||
|
the interatomic potential for computing short-range pairwise or manybody
|
||||||
|
forces and :math:`\Delta_s` is a "skin" distance that allows the list to
|
||||||
|
be used for multiple steps assuming that atoms do not move very far
|
||||||
|
between consecutive time steps. Typically the code triggers
|
||||||
|
reneighboring when any atom has moved half the skin distance since the
|
||||||
|
last reneighboring; this and other options of the neighbor list rebuild
|
||||||
|
can be adjusted with the :doc:`neigh_modify <neigh_modify>` command.
|
||||||
|
|
||||||
|
On steps when reneighboring is performed, atoms which have moved outside
|
||||||
|
their owning processor's sub-domain are first migrated to new processors
|
||||||
|
via communication. Periodic boundary conditions are also (only)
|
||||||
|
enforced on these steps to ensure each atom is re-assigned to the
|
||||||
|
correct processor. After migration, the atoms owned by each processor
|
||||||
|
are stored in a contiguous vector. Periodically each processor
|
||||||
|
spatially sorts owned atoms within its vector to reorder it for improved
|
||||||
|
cache efficiency in force computations and neighbor list building. For
|
||||||
|
that atoms are spatially binned and then reordered so that atoms in the
|
||||||
|
same bin are adjacent in the vector. Atom sorting can be disabled or
|
||||||
|
its settings modified with the :doc:`atom_modify <atom_modify>` command.
|
||||||
|
|
||||||
|
.. _neighbor-stencil:
|
||||||
|
.. figure:: img/neigh-stencil.png
|
||||||
|
:align: center
|
||||||
|
|
||||||
|
neighbor list stencils
|
||||||
|
|
||||||
|
A 2d simulation sub-domain (thick black line) and the corresponding
|
||||||
|
ghost atom cutoff region (dashed blue line) for both orthogonal
|
||||||
|
(left) and triclinic (right) domains. A regular grid of neighbor
|
||||||
|
bins (thin lines) overlays the entire simulation domain and need not
|
||||||
|
align with sub-domain boundaries; only the portion overlapping the
|
||||||
|
augmented sub-domain is shown. In the triclinic case it overlaps the
|
||||||
|
bounding box of the tilted rectangle. The blue- and red-shaded bins
|
||||||
|
represent a stencil of bins searched to find neighbors of a particular
|
||||||
|
atom (black dot).
|
||||||
|
|
||||||
|
To build a local neighbor list in linear time, the simulation domain is
|
||||||
|
overlaid (conceptually) with a regular 3d (or 2d) grid of neighbor bins,
|
||||||
|
as shown in the :ref:`neighbor-stencil` figure for 2d models and a
|
||||||
|
single MPI processor's sub-domain. Each processor stores a set of
|
||||||
|
neighbor bins which overlap its sub-domain extended by the neighbor
|
||||||
|
cutoff distance :math:`R_n`. As illustrated, the bins need not align
|
||||||
|
with processor boundaries; an integer number in each dimension is fit to
|
||||||
|
the size of the entire simulation box.
|
||||||
|
|
||||||
|
Most often LAMMPS builds what it calls a "half" neighbor list where
|
||||||
|
each *i,j* neighbor pair is stored only once, with either atom *i* or
|
||||||
|
*j* as the central atom. The build can be done efficiently by using a
|
||||||
|
pre-computed "stencil" of bins around a central origin bin which
|
||||||
|
contains the atom whose neighbors are being searched for. A stencil
|
||||||
|
is simply a list of integer offsets in *x,y,z* of nearby bins
|
||||||
|
surrounding the origin bin which are close enough to contain any
|
||||||
|
neighbor atom *j* within a distance :math:`R_n` from any atom *i* in the
|
||||||
|
origin bin. Note that for a half neighbor list, the stencil can be
|
||||||
|
asymmetric since each atom only need store half its nearby neighbors.
|
||||||
|
|
||||||
|
These stencils are illustrated in the figure for a half list and a bin
|
||||||
|
size of :math:`\frac{1}{2} R_n`. There are 13 red+blue stencil bins in
|
||||||
|
2d (for the orthogonal case, 15 for triclinic). In 3d there would be
|
||||||
|
63, 13 in the plane of bins that contain the origin bin and 25 in each
|
||||||
|
of the two planes above it in the *z* direction (75 for triclinic). The
|
||||||
|
reason the triclinic stencil has extra bins is because the bins tile the
|
||||||
|
bounding box of the entire triclinic domain and thus are not periodic
|
||||||
|
with respect to the simulation box itself. The stencil and logic for
|
||||||
|
determining which *i,j* pairs to include in the neighbor list are
|
||||||
|
altered slightly to account for this.
|
||||||
|
|
||||||
|
To build a neighbor list, a processor first loops over its "owned" plus
|
||||||
|
"ghost" atoms and assigns each to a neighbor bin. This uses an integer
|
||||||
|
vector to create a linked list of atom indices within each bin. It then
|
||||||
|
performs a triply-nested loop over its owned atoms *i*, the stencil of
|
||||||
|
bins surrounding atom *i*'s bin, and the *j* atoms in each stencil bin
|
||||||
|
(including ghost atoms). If the distance :math:`r_{ij} < R_n`, then
|
||||||
|
atom *j* is added to the vector of atom *i*'s neighbors.
|
||||||
|
|
||||||
|
Here are additional details about neighbor list build options LAMMPS
|
||||||
|
supports:
|
||||||
|
|
||||||
|
- The choice of bin size is an option; a size half of :math:`R_n` has
|
||||||
|
been found to be optimal for many typical cases. Smaller bins incur
|
||||||
|
additional overhead to loop over; larger bins require more distance
|
||||||
|
calculations. Note that for smaller bin sizes, the 2d stencil in the
|
||||||
|
figure would be more semi-circular in shape (hemispherical in 3d),
|
||||||
|
with bins near the corners of the square eliminated due to their
|
||||||
|
distance from the origin bin.
|
||||||
|
|
||||||
|
- Depending on the interatomic potential(s) and other commands used in
|
||||||
|
an input script, multiple neighbor lists and stencils with different
|
||||||
|
attributes may be needed. This includes lists with different cutoff
|
||||||
|
distances, e.g. for force computation versus occasional diagnostic
|
||||||
|
computations such as a radial distribution function, or for the
|
||||||
|
r-RESPA time integrator which can partition pairwise forces by
|
||||||
|
distance into subsets computed at different time intervals. It
|
||||||
|
includes "full" lists (as opposed to half lists) where each *i,j* pair
|
||||||
|
appears twice, stored once with *i* and *j*, and which use a larger
|
||||||
|
symmetric stencil. It also includes lists with partial enumeration of
|
||||||
|
ghost atom neighbors. The full and ghost-atom lists are used by
|
||||||
|
various manybody interatomic potentials. Lists may also use different
|
||||||
|
criteria for inclusion of a pair interaction. Typically this simply
|
||||||
|
depends only on the distance between two atoms and the cutoff
|
||||||
|
distance. But for finite-size coarse-grained particles with
|
||||||
|
individual diameters (e.g. polydisperse granular particles), it can
|
||||||
|
also depend on the diameters of the two particles.
|
||||||
|
|
||||||
|
- When using :doc:`pair style hybrid <pair_hybrid>` multiple sub-lists
|
||||||
|
of the master neighbor list for the full system need to be generated,
|
||||||
|
one for each sub-style, which contains only the *i,j* pairs needed to
|
||||||
|
compute interactions between subsets of atoms for the corresponding
|
||||||
|
potential. This means not all *i* or *j* atoms owned by a processor
|
||||||
|
are included in a particular sub-list.
|
||||||
|
|
||||||
|
- Some models use different cutoff lengths for pairwise interactions
|
||||||
|
between different kinds of particles which are stored in a single
|
||||||
|
neighbor list. One example is a solvated colloidal system with large
|
||||||
|
colloidal particles where colloid/colloid, colloid/solvent, and
|
||||||
|
solvent/solvent interaction cutoffs can be dramatically different.
|
||||||
|
Another is a model of polydisperse finite-size granular particles;
|
||||||
|
pairs of particles interact only when they are in contact with each
|
||||||
|
other. Mixtures with particle size ratios as high as 10-100x may be
|
||||||
|
used to model realistic systems. Efficient neighbor list building
|
||||||
|
algorithms for these kinds of systems are available in LAMMPS. They
|
||||||
|
include a method which uses different stencils for different cutoff
|
||||||
|
lengths and trims the stencil to only include bins that straddle the
|
||||||
|
cutoff sphere surface. More recently a method which uses both
|
||||||
|
multiple stencils and multiple bin sizes was developed; it builds
|
||||||
|
neighbor lists efficiently for systems with particles of any size
|
||||||
|
ratio, though other considerations (timestep size, force computations)
|
||||||
|
may limit the ability to model systems with huge polydispersity.
|
||||||
|
|
||||||
|
- For small and sparse systems and as a fallback method, LAMMPS also
|
||||||
|
supports neighbor list construction without binning by using a full
|
||||||
|
:math:`O(N^2)` loop over all *i,j* atom pairs in a sub-domain when
|
||||||
|
using the :doc:`neighbor nsq <neighbor>` command.
|
||||||
|
|
||||||
|
- Dependent on the "pair" setting of the :doc:`newton <newton>` command,
|
||||||
|
the "half" neighbor lists may contain **all** pairs of atoms where
|
||||||
|
atom *j* is a ghost atom (i.e. when the newton pair setting is *off*)
|
||||||
|
For the newton pair *on* setting the atom *j* is only added to the
|
||||||
|
list if its *z* coordinate is larger, or if equal the *y* coordinate
|
||||||
|
is larger, and that is equal, too, the *x* coordinate is larger. For
|
||||||
|
homogeneously dense systems that will result in picking neighbors from
|
||||||
|
a same size sector in always the same direction relative to the
|
||||||
|
"owned" atom and thus it should lead to similar length neighbor lists
|
||||||
|
and thus reduce the chance of a load imbalance.
|
||||||
114
doc/src/Developer_par_openmp.rst
Normal file
@ -0,0 +1,114 @@
|
|||||||
|
OpenMP Parallelism
|
||||||
|
^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The styles in the INTEL, KOKKOS, and OPENMP package offer to use OpenMP
|
||||||
|
thread parallelism to predominantly distribute loops over local data
|
||||||
|
and thus follow an orthogonal parallelization strategy to the
|
||||||
|
decomposition into spatial domains used by the :doc:`MPI partitioning
|
||||||
|
<Developer_par_part>`. For clarity, this section discusses only the
|
||||||
|
implementation in the OPENMP package as it is the simplest. The INTEL
|
||||||
|
and KOKKOS package offer additional options and are more complex since
|
||||||
|
they support more features and different hardware like co-processors
|
||||||
|
or GPUs.
|
||||||
|
|
||||||
|
One of the key decisions when implementing the OPENMP package was to
|
||||||
|
keep the changes to the source code small, so that it would be easier to
|
||||||
|
maintain the code and keep it in sync with the non-threaded standard
|
||||||
|
implementation. this is achieved by a) making the OPENMP version a
|
||||||
|
derived class from the regular version (e.g. ``PairLJCutOMP`` from
|
||||||
|
``PairLJCut``) and overriding only methods that are multi-threaded or
|
||||||
|
need to be modified to support multi-threading (similar to what was done
|
||||||
|
in the OPT package), b) keeping the structure in the modified code very
|
||||||
|
similar so that side-by-side comparisons are still useful, and c)
|
||||||
|
offloading additional functionality and multi-thread support functions
|
||||||
|
into three separate classes ``ThrOMP``, ``ThrData``, and ``FixOMP``.
|
||||||
|
``ThrOMP`` provides additional, multi-thread aware functionality not
|
||||||
|
available in the corresponding base class (e.g. ``Pair`` for
|
||||||
|
``PairLJCutOMP``) like multi-thread aware variants of the "tally"
|
||||||
|
functions. Those functions are made available through multiple
|
||||||
|
inheritance so those new functions have to have unique names to avoid
|
||||||
|
ambiguities; typically ``_thr`` is appended to the name of the function.
|
||||||
|
``ThrData`` is a classes that manages per-thread data structures.
|
||||||
|
It is used instead of extending the corresponding storage to per-thread
|
||||||
|
arrays to avoid slowdowns due to "false sharing" when multiple threads
|
||||||
|
update adjacent elements in an array and thus force the CPU cache lines
|
||||||
|
to be reset and re-fetched. ``FixOMP`` finally manages the "multi-thread
|
||||||
|
state" like settings and access to per-thread storage, it is activated
|
||||||
|
by the :doc:`package omp <package>` command.
|
||||||
|
|
||||||
|
Avoiding data races
|
||||||
|
"""""""""""""""""""
|
||||||
|
|
||||||
|
A key problem when implementing thread parallelism in an MD code is
|
||||||
|
to avoid data races when updating accumulated properties like forces,
|
||||||
|
energies, and stresses. When interactions are computed, they always
|
||||||
|
involve multiple atoms and thus there are race conditions when multiple
|
||||||
|
threads want to update per-atom data of the same atoms. Five possible
|
||||||
|
strategies have been considered to avoid this:
|
||||||
|
|
||||||
|
1) restructure the code so that there is no overlapping access possible
|
||||||
|
when computing in parallel, e.g. by breaking lists into multiple
|
||||||
|
parts and synchronizing threads in between.
|
||||||
|
2) have each thread be "responsible" for a specific group of atoms and
|
||||||
|
compute these interactions multiple times, once on each thread that
|
||||||
|
is responsible for a given atom and then have each thread only update
|
||||||
|
the properties of this atom.
|
||||||
|
3) use mutexes around functions and regions of code where the data race
|
||||||
|
could happen
|
||||||
|
4) use atomic operations when updating per-atom properties
|
||||||
|
5) use replicated per-thread data structures to accumulate data without
|
||||||
|
conflicts and then use a reduction to combine those results into the
|
||||||
|
data structures used by the regular style.
|
||||||
|
|
||||||
|
Option 5 was chosen for the OPENMP package because it would retain the
|
||||||
|
performance for the case of 1 thread and the code would be more
|
||||||
|
maintainable. Option 1 would require extensive code changes,
|
||||||
|
particularly to the neighbor list code; options 2 would have incurred a
|
||||||
|
2x or more performance penalty for the serial case; option 3 causes
|
||||||
|
significant overhead and would enforce serialization of operations in
|
||||||
|
inner loops and thus defeat the purpose of multi-threading; option 4
|
||||||
|
slows down the serial case although not quite as bad as option 2. The
|
||||||
|
downside of option 5 is that the overhead of the reduction operations
|
||||||
|
grows with the number of threads used, so there would be a crossover
|
||||||
|
point where options 2 or 4 would result in faster executing. That is
|
||||||
|
why option 2 for example is used in the GPU package because a GPU is a
|
||||||
|
processor with a massive number of threads. However, since the MPI
|
||||||
|
parallelization is generally more effective for typical MD systems, the
|
||||||
|
expectation is that thread parallelism is only used for a smaller number
|
||||||
|
of threads (2-8). At the time of its implementation, that number was
|
||||||
|
equivalent to the number of CPU cores per CPU socket on high-end
|
||||||
|
supercomputers.
|
||||||
|
|
||||||
|
Thus arrays like the force array are dimensioned to the number of atoms
|
||||||
|
times the number of threads when enabling OpenMP support and inside the
|
||||||
|
compute functions a pointer to a different chunk is obtained by each thread.
|
||||||
|
Similarly, accumulators like potential energy or virial are kept in
|
||||||
|
per-thread instances of the ``ThrData`` class and then only reduced and
|
||||||
|
stored in their global counterparts at the end of the force computation.
|
||||||
|
|
||||||
|
|
||||||
|
Loop scheduling
|
||||||
|
"""""""""""""""
|
||||||
|
|
||||||
|
Multi-thread parallelization is applied by distributing (outer) loops
|
||||||
|
statically across threads. Typically this would be the loop over local
|
||||||
|
atoms *i* when processing *i,j* pairs of atoms from a neighbor list.
|
||||||
|
The design of the neighbor list code results in atoms having a similar
|
||||||
|
number of neighbors for homogeneous systems and thus load imbalances
|
||||||
|
across threads are not common and typically happen for systems where
|
||||||
|
also the MPI parallelization would be unbalanced, which would typically
|
||||||
|
have a more pronounced impact on the performance. This same loop
|
||||||
|
scheduling scheme can also be applied to the reduction operations on
|
||||||
|
per-atom data to try and reduce the overhead of the reduction operation.
|
||||||
|
|
||||||
|
Neighbor list parallelization
|
||||||
|
"""""""""""""""""""""""""""""
|
||||||
|
|
||||||
|
In addition to the parallelization of force computations, also the
|
||||||
|
generation of the neighbor lists is parallelized. As explained
|
||||||
|
previously, neighbor lists are built by looping over "owned" atoms and
|
||||||
|
storing the neighbors in "pages". In the OPENMP variants of the
|
||||||
|
neighbor list code, each thread operates on a different chunk of "owned"
|
||||||
|
atoms and allocates and fills its own set of pages with neighbor list
|
||||||
|
data. This is achieved by each thread keeping its own instance of the
|
||||||
|
:cpp:class:`MyPage <LAMMPS_NS::MyPage>` page allocator class.
|
||||||
89
doc/src/Developer_par_part.rst
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
Partitioning
|
||||||
|
^^^^^^^^^^^^
|
||||||
|
|
||||||
|
The underlying spatial decomposition strategy used by LAMMPS for
|
||||||
|
distributed-memory parallelism is set with the :doc:`comm_style command
|
||||||
|
<comm_style>` and can be either "brick" (a regular grid) or "tiled".
|
||||||
|
|
||||||
|
.. _domain-decomposition:
|
||||||
|
.. figure:: img/domain-decomp.png
|
||||||
|
:align: center
|
||||||
|
|
||||||
|
domain decomposition
|
||||||
|
|
||||||
|
This figure shows the different kinds of domain decomposition used
|
||||||
|
for MPI parallelization: "brick" on the left with an orthogonal
|
||||||
|
(left) and a triclinic (middle) simulation domain, and a "tiled"
|
||||||
|
decomposition (right). The black lines show the division into
|
||||||
|
sub-domains and the contained atoms are "owned" by the corresponding
|
||||||
|
MPI process. The green dashed lines indicate how sub-domains are
|
||||||
|
extended with "ghost" atoms up to the communication cutoff distance.
|
||||||
|
|
||||||
|
The LAMMPS simulation box is a 3d or 2d volume, which can be orthogonal
|
||||||
|
or triclinic in shape, as illustrated in the :ref:`domain-decomposition`
|
||||||
|
figure for the 2d case. Orthogonal means the box edges are aligned with
|
||||||
|
the *x*, *y*, *z* Cartesian axes, and the box faces are thus all
|
||||||
|
rectangular. Triclinic allows for a more general parallelepiped shape
|
||||||
|
in which edges are aligned with three arbitrary vectors and the box
|
||||||
|
faces are parallelograms. In each dimension box faces can be periodic,
|
||||||
|
or non-periodic with fixed or shrink-wrapped boundaries. In the fixed
|
||||||
|
case, atoms which move outside the face are deleted; shrink-wrapped
|
||||||
|
means the position of the box face adjusts continuously to enclose all
|
||||||
|
the atoms.
|
||||||
|
|
||||||
|
For distributed-memory MPI parallelism, the simulation box is spatially
|
||||||
|
decomposed (partitioned) into non-overlapping sub-domains which fill the
|
||||||
|
box. The default partitioning, "brick", is most suitable when atom
|
||||||
|
density is roughly uniform, as shown in the left-side images of the
|
||||||
|
:ref:`domain-decomposition` figure. The sub-domains comprise a regular
|
||||||
|
grid and all sub-domains are identical in size and shape. Both the
|
||||||
|
orthogonal and triclinic boxes can deform continuously during a
|
||||||
|
simulation, e.g. to compress a solid or shear a liquid, in which case
|
||||||
|
the processor sub-domains likewise deform.
|
||||||
|
|
||||||
|
|
||||||
|
For models with non-uniform density, the number of particles per
|
||||||
|
processor can be load-imbalanced with the default partitioning. This
|
||||||
|
reduces parallel efficiency, as the overall simulation rate is limited
|
||||||
|
by the slowest processor, i.e. the one with the largest computational
|
||||||
|
load. For such models, LAMMPS supports multiple strategies to reduce
|
||||||
|
the load imbalance:
|
||||||
|
|
||||||
|
- The processor grid decomposition is by default based on the simulation
|
||||||
|
cell volume and tries to optimize the volume to surface ratio for the sub-domains.
|
||||||
|
This can be changed with the :doc:`processors command <processors>`.
|
||||||
|
- The parallel planes defining the size of the sub-domains can be shifted
|
||||||
|
with the :doc:`balance command <balance>`. Which can be done in addition
|
||||||
|
to choosing a more optimal processor grid.
|
||||||
|
- The recursive bisectioning algorithm in combination with the "tiled"
|
||||||
|
communication style can produce a partitioning with equal numbers of
|
||||||
|
particles in each sub-domain.
|
||||||
|
|
||||||
|
|
||||||
|
.. |decomp1| image:: img/decomp-regular.png
|
||||||
|
:width: 24%
|
||||||
|
|
||||||
|
.. |decomp2| image:: img/decomp-processors.png
|
||||||
|
:width: 24%
|
||||||
|
|
||||||
|
.. |decomp3| image:: img/decomp-balance.png
|
||||||
|
:width: 24%
|
||||||
|
|
||||||
|
.. |decomp4| image:: img/decomp-rcb.png
|
||||||
|
:width: 24%
|
||||||
|
|
||||||
|
|decomp1| |decomp2| |decomp3| |decomp4|
|
||||||
|
|
||||||
|
The pictures above demonstrate different decompositions for a 2d system
|
||||||
|
with 12 MPI ranks. The atom colors indicate the load imbalance of each
|
||||||
|
sub-domain with green being optimal and red the least optimal.
|
||||||
|
|
||||||
|
Due to the vacuum in the system, the default decomposition is unbalanced
|
||||||
|
with several MPI ranks without atoms (left). By forcing a 1x12x1
|
||||||
|
processor grid, every MPI rank does computations now, but number of
|
||||||
|
atoms per sub-domain is still uneven and the thin slice shape increases
|
||||||
|
the amount of communication between sub-domains (center left). With a
|
||||||
|
2x6x1 processor grid and shifting the sub-domain divisions, the load
|
||||||
|
imbalance is further reduced and the amount of communication required
|
||||||
|
between sub-domains is less (center right). And using the recursive
|
||||||
|
bisectioning leads to further improved decomposition (right).
|
||||||
28
doc/src/Developer_parallel.rst
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
Parallel algorithms
|
||||||
|
-------------------
|
||||||
|
|
||||||
|
LAMMPS is designed to enable running simulations in parallel using the
|
||||||
|
MPI parallel communication standard with distributed data via domain
|
||||||
|
decomposition. The parallelization aims to be efficient result in good
|
||||||
|
strong scaling (= good speedup for the same system) and good weak
|
||||||
|
scaling (= the computational cost of enlarging the system is
|
||||||
|
proportional to the system size). Additional parallelization using GPUs
|
||||||
|
or OpenMP can also be applied within the sub-domain assigned to an MPI
|
||||||
|
process. For clarity, most of the following illustrations show the 2d
|
||||||
|
simulation case. The underlying algorithms in those cases, however,
|
||||||
|
apply to both 2d and 3d cases equally well.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
The text and most of the figures in this chapter were adapted
|
||||||
|
for the manual from the section on parallel algorithms in the
|
||||||
|
:ref:`new LAMMPS paper <lammps_paper>`.
|
||||||
|
|
||||||
|
.. toctree::
|
||||||
|
:maxdepth: 1
|
||||||
|
|
||||||
|
Developer_par_part
|
||||||
|
Developer_par_comm
|
||||||
|
Developer_par_neigh
|
||||||
|
Developer_par_long
|
||||||
|
Developer_par_openmp
|
||||||
@ -60,6 +60,9 @@ silently returning the result of a partial conversion or zero in cases
|
|||||||
where the string is not a valid number. This behavior allows to more
|
where the string is not a valid number. This behavior allows to more
|
||||||
easily detect typos or issues when processing input files.
|
easily detect typos or issues when processing input files.
|
||||||
|
|
||||||
|
Similarly the :cpp:func:`logical() <LAMMPS_NS::utils::logical>` function
|
||||||
|
will convert a string into a boolean and will only accept certain words.
|
||||||
|
|
||||||
The *do_abort* flag should be set to ``true`` in case this function
|
The *do_abort* flag should be set to ``true`` in case this function
|
||||||
is called only on a single MPI rank, as that will then trigger the
|
is called only on a single MPI rank, as that will then trigger the
|
||||||
a call to ``Error::one()`` for errors instead of ``Error::all()``
|
a call to ``Error::one()`` for errors instead of ``Error::all()``
|
||||||
@ -83,6 +86,9 @@ strings for compliance without conversion.
|
|||||||
.. doxygenfunction:: tnumeric
|
.. doxygenfunction:: tnumeric
|
||||||
:project: progguide
|
:project: progguide
|
||||||
|
|
||||||
|
.. doxygenfunction:: logical
|
||||||
|
:project: progguide
|
||||||
|
|
||||||
|
|
||||||
String processing
|
String processing
|
||||||
^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^
|
||||||
@ -203,6 +209,9 @@ Convenience functions
|
|||||||
.. doxygenfunction:: date2num
|
.. doxygenfunction:: date2num
|
||||||
:project: progguide
|
:project: progguide
|
||||||
|
|
||||||
|
.. doxygenfunction:: current_date
|
||||||
|
:project: progguide
|
||||||
|
|
||||||
Customized standard functions
|
Customized standard functions
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
|
|||||||
@ -40,11 +40,10 @@ We use it to show how to identify the origin of a segmentation fault.
|
|||||||
|
|
||||||
After recompiling LAMMPS and running the input you should get something like this:
|
After recompiling LAMMPS and running the input you should get something like this:
|
||||||
|
|
||||||
.. code-block:
|
.. code-block::
|
||||||
|
|
||||||
$ ./lmp -in in.melt
|
$ ./lmp -in in.melt
|
||||||
LAMMPS (19 Mar 2020)
|
LAMMPS (19 Mar 2020)
|
||||||
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:94)
|
|
||||||
using 1 OpenMP thread(s) per MPI task
|
using 1 OpenMP thread(s) per MPI task
|
||||||
Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
|
Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
|
||||||
Created orthogonal box = (0 0 0) to (16.796 16.796 16.796)
|
Created orthogonal box = (0 0 0) to (16.796 16.796 16.796)
|
||||||
|
|||||||
@ -4,28 +4,41 @@ Citing LAMMPS
|
|||||||
Core Algorithms
|
Core Algorithms
|
||||||
^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
Since LAMMPS is a community project, there is not a single one
|
The paper mentioned below is the best overview of LAMMPS, but there are
|
||||||
publication or reference that describes **all** of LAMMPS.
|
also publications describing particular models or algorithms implemented
|
||||||
The canonical publication that describes the foundation, that is
|
in LAMMPS or complementary software that is has interfaces to. Please
|
||||||
the basic spatial decomposition approach, the neighbor finding,
|
see below for how to cite contributions to LAMMPS.
|
||||||
and basic communications algorithms used in LAMMPS is:
|
|
||||||
|
.. _lammps_paper:
|
||||||
|
|
||||||
|
The latest canonical publication that describes the basic features, the
|
||||||
|
source code design, the program structure, the spatial decomposition
|
||||||
|
approach, the neighbor finding, basic communications algorithms, and how
|
||||||
|
users and developers have contributed to LAMMPS is:
|
||||||
|
|
||||||
|
`LAMMPS - A flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales, Comp. Phys. Comm. (accepted 09/2021), DOI:10.1016/j.cpc.2021.108171 <https://doi.org/10.1016/j.cpc.2021.108171>`_
|
||||||
|
|
||||||
|
So a project using LAMMPS or a derivative application that uses LAMMPS
|
||||||
|
as a simulation engine should cite this paper. The paper is expected to
|
||||||
|
be published in its final form under the same DOI in the first half
|
||||||
|
of 2022. Please also give the URL of the LAMMPS website in your paper,
|
||||||
|
namely https://www.lammps.org.
|
||||||
|
|
||||||
|
The original publication describing the parallel algorithms used in the
|
||||||
|
initial versions of LAMMPS is:
|
||||||
|
|
||||||
`S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). <http://www.sandia.gov/~sjplimp/papers/jcompphys95.pdf>`_
|
`S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). <http://www.sandia.gov/~sjplimp/papers/jcompphys95.pdf>`_
|
||||||
|
|
||||||
So any project using LAMMPS (or a derivative application using LAMMPS as
|
|
||||||
a simulation engine) should cite this paper. A new publication
|
|
||||||
describing the developments and improvements of LAMMPS in the 25 years
|
|
||||||
since then is currently in preparation.
|
|
||||||
|
|
||||||
|
|
||||||
DOI for the LAMMPS code
|
DOI for the LAMMPS code
|
||||||
^^^^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
LAMMPS developers use the `Zenodo service at CERN
|
LAMMPS developers use the `Zenodo service at CERN <https://zenodo.org/>`_
|
||||||
<https://zenodo.org/>`_ to create digital object identifies (DOI) for
|
to create digital object identifies (DOI) for stable releases of the
|
||||||
stable releases of the LAMMPS code. There are two types of DOIs for the
|
LAMMPS source code. There are two types of DOIs for the LAMMPS source code.
|
||||||
LAMMPS source code: the canonical DOI for **all** versions of LAMMPS,
|
|
||||||
which will always point to the **latest** stable release version is:
|
The canonical DOI for **all** versions of LAMMPS, which will always
|
||||||
|
point to the **latest** stable release version is:
|
||||||
|
|
||||||
- DOI: `10.5281/zenodo.3726416 <https://dx.doi.org/10.5281/zenodo.3726416>`_
|
- DOI: `10.5281/zenodo.3726416 <https://dx.doi.org/10.5281/zenodo.3726416>`_
|
||||||
|
|
||||||
@ -45,11 +58,13 @@ about LAMMPS and its features.
|
|||||||
Citing contributions
|
Citing contributions
|
||||||
^^^^^^^^^^^^^^^^^^^^
|
^^^^^^^^^^^^^^^^^^^^
|
||||||
|
|
||||||
LAMMPS has many features and that use either previously published
|
LAMMPS has many features that use either previously published methods
|
||||||
methods and algorithms or novel features. It also includes potential
|
and algorithms or novel features. It also includes potential parameter
|
||||||
parameter filed for specific models. Where available, a reminder about
|
files for specific models. Where available, a reminder about references
|
||||||
references for optional features used in a specific run is printed to
|
for optional features used in a specific run is printed to the screen
|
||||||
the screen and log file. Style and output location can be selected with
|
and log file. Style and output location can be selected with the
|
||||||
the :ref:`-cite command-line switch <cite>`. Additional references are
|
:ref:`-cite command-line switch <cite>`. Additional references are
|
||||||
given in the documentation of the :doc:`corresponding commands
|
given in the documentation of the :doc:`corresponding commands
|
||||||
<Commands_all>` or in the :doc:`Howto tutorials <Howto>`.
|
<Commands_all>` or in the :doc:`Howto tutorials <Howto>`. So please
|
||||||
|
make certain, that you provide the proper acknowledgments and citations
|
||||||
|
in any published works using LAMMPS.
|
||||||
|
|||||||
@ -26,7 +26,7 @@ available online are listed below.
|
|||||||
* `Tutorials <https://www.lammps.org/tutorials.html>`_
|
* `Tutorials <https://www.lammps.org/tutorials.html>`_
|
||||||
|
|
||||||
* `Pre- and post-processing tools for LAMMPS <https://www.lammps.org/prepost.html>`_
|
* `Pre- and post-processing tools for LAMMPS <https://www.lammps.org/prepost.html>`_
|
||||||
* `Other software usable with LAMMPS <https://www.lammps.org/offsite.html>`_
|
* `Other software usable with LAMMPS <https://www.lammps.org/external.html>`_
|
||||||
* `Viz tools usable with LAMMPS <https://www.lammps.org/viz.html>`_
|
* `Viz tools usable with LAMMPS <https://www.lammps.org/viz.html>`_
|
||||||
|
|
||||||
* `Benchmark performance <https://www.lammps.org/bench.html>`_
|
* `Benchmark performance <https://www.lammps.org/bench.html>`_
|
||||||
|
|||||||
@ -34,7 +34,7 @@ simple example demonstrating its use:
|
|||||||
int lmpargc = sizeof(lmpargv)/sizeof(const char *);
|
int lmpargc = sizeof(lmpargv)/sizeof(const char *);
|
||||||
|
|
||||||
/* create LAMMPS instance */
|
/* create LAMMPS instance */
|
||||||
handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL);
|
handle = lammps_open_no_mpi(lmpargc, (char **)lmpargv, NULL);
|
||||||
if (handle == NULL) {
|
if (handle == NULL) {
|
||||||
printf("LAMMPS initialization failed");
|
printf("LAMMPS initialization failed");
|
||||||
lammps_mpi_finalize();
|
lammps_mpi_finalize();
|
||||||
|
|||||||
@ -115,8 +115,8 @@ External contributions
|
|||||||
|
|
||||||
If you prefer to do so, you can also develop and support your add-on
|
If you prefer to do so, you can also develop and support your add-on
|
||||||
feature **without** having it included in the LAMMPS distribution, for
|
feature **without** having it included in the LAMMPS distribution, for
|
||||||
example as a download from a website of your own. See the `Offsite
|
example as a download from a website of your own. See the `External
|
||||||
LAMMPS packages and tools <https://www.lammps.org/offsite.html>`_ page
|
LAMMPS packages and tools <https://www.lammps.org/external.html>`_ page
|
||||||
of the LAMMPS website for examples of groups that do this. We are happy
|
of the LAMMPS website for examples of groups that do this. We are happy
|
||||||
to advertise your package and website from that page. Simply email the
|
to advertise your package and website from that page. Simply email the
|
||||||
`developers <https://www.lammps.org/authors.html>`_ with info about your
|
`developers <https://www.lammps.org/authors.html>`_ with info about your
|
||||||
|
|||||||
@ -305,19 +305,22 @@ you are uncertain, please ask.
|
|||||||
FILE pointers and only be done on MPI rank 0. Use the :cpp:func:`utils::logmesg`
|
FILE pointers and only be done on MPI rank 0. Use the :cpp:func:`utils::logmesg`
|
||||||
convenience function where possible.
|
convenience function where possible.
|
||||||
|
|
||||||
- header files should only include the absolute minimum number of
|
- Header files, especially those defining a "style", should only use
|
||||||
include files and **must not** contain any ``using`` statements;
|
the absolute minimum number of include files and **must not** contain
|
||||||
rather the include statements should be put into the corresponding
|
any ``using`` statements. Typically that would be only the header for
|
||||||
implementation files. For implementation files, the
|
the base class. Instead any include statements should be put into the
|
||||||
"include-what-you-use" principle should be employed. However, when
|
corresponding implementation files and forward declarations be used.
|
||||||
including the ``pointers.h`` header (or one of the base classes
|
For implementation files, the "include what you use" principle should
|
||||||
derived from it) certain headers will be included and thus need to be
|
be employed. However, there is the notable exception that when the
|
||||||
specified. These are: `mpi.h`, `cstddef`, `cstdio`, `cstdlib`,
|
``pointers.h`` header is included (or one of the base classes derived
|
||||||
`string`, `utils.h`, `fmt/format.h`, `climits`, `cinttypes`. This also
|
from it) certain headers will always be included and thus do not need
|
||||||
means any header can assume that `FILE`, `NULL`, and `INT_MAX` are
|
to be explicitly specified.
|
||||||
defined.
|
These are: `mpi.h`, `cstddef`, `cstdio`, `cstdlib`, `string`, `utils.h`,
|
||||||
|
`vector`, `fmt/format.h`, `climits`, `cinttypes`.
|
||||||
|
This also means any such file can assume that `FILE`, `NULL`, and
|
||||||
|
`INT_MAX` are defined.
|
||||||
|
|
||||||
- header files that define a new LAMMPS style (i.e. that have a
|
- Header files that define a new LAMMPS style (i.e. that have a
|
||||||
``SomeStyle(some/name,SomeName);`` macro in them) should only use the
|
``SomeStyle(some/name,SomeName);`` macro in them) should only use the
|
||||||
include file for the base class and otherwise use forward declarations
|
include file for the base class and otherwise use forward declarations
|
||||||
and pointers; when interfacing to a library use the PIMPL (pointer
|
and pointers; when interfacing to a library use the PIMPL (pointer
|
||||||
@ -325,7 +328,7 @@ you are uncertain, please ask.
|
|||||||
that contains all library specific data (and thus requires the library
|
that contains all library specific data (and thus requires the library
|
||||||
header) but use a forward declaration and define the struct only in
|
header) but use a forward declaration and define the struct only in
|
||||||
the implementation file. This is a **strict** requirement since this
|
the implementation file. This is a **strict** requirement since this
|
||||||
is where type clashes between packages and hard to fine bugs have
|
is where type clashes between packages and hard to find bugs have
|
||||||
regularly manifested in the past.
|
regularly manifested in the past.
|
||||||
|
|
||||||
- Please use clang-format only to reformat files that you have
|
- Please use clang-format only to reformat files that you have
|
||||||
|
|||||||
@ -2,17 +2,25 @@ Basics of running LAMMPS
|
|||||||
========================
|
========================
|
||||||
|
|
||||||
LAMMPS is run from the command line, reading commands from a file via
|
LAMMPS is run from the command line, reading commands from a file via
|
||||||
the -in command line flag, or from standard input.
|
the -in command line flag, or from standard input. Using the "-in
|
||||||
Using the "-in in.file" variant is recommended:
|
in.file" variant is recommended (see note below). The name of the
|
||||||
|
LAMMPS executable is either ``lmp`` or ``lmp_<machine>`` with
|
||||||
|
`<machine>` being the machine string used when compiling LAMMPS. This
|
||||||
|
is required when compiling LAMMPS with the traditional build system
|
||||||
|
(e.g. with ``make mpi``), but optional when using CMake to configure and
|
||||||
|
build LAMMPS:
|
||||||
|
|
||||||
.. code-block:: bash
|
.. code-block:: bash
|
||||||
|
|
||||||
$ lmp_serial -in in.file
|
$ lmp_serial -in in.file
|
||||||
$ lmp_serial < in.file
|
$ lmp_serial < in.file
|
||||||
|
$ lmp -in in.file
|
||||||
|
$ lmp < in.file
|
||||||
$ /path/to/lammps/src/lmp_serial -i in.file
|
$ /path/to/lammps/src/lmp_serial -i in.file
|
||||||
$ mpirun -np 4 lmp_mpi -in in.file
|
$ mpirun -np 4 lmp_mpi -in in.file
|
||||||
|
$ mpiexec -np 4 lmp -in in.file
|
||||||
$ mpirun -np 8 /path/to/lammps/src/lmp_mpi -in in.file
|
$ mpirun -np 8 /path/to/lammps/src/lmp_mpi -in in.file
|
||||||
$ mpirun -np 6 /usr/local/bin/lmp -in in.file
|
$ mpiexec -n 6 /usr/local/bin/lmp -in in.file
|
||||||
|
|
||||||
You normally run the LAMMPS command in the directory where your input
|
You normally run the LAMMPS command in the directory where your input
|
||||||
script is located. That is also where output files are produced by
|
script is located. That is also where output files are produced by
|
||||||
@ -23,7 +31,7 @@ executable itself can be placed elsewhere.
|
|||||||
.. note::
|
.. note::
|
||||||
|
|
||||||
The redirection operator "<" will not always work when running
|
The redirection operator "<" will not always work when running
|
||||||
in parallel with mpirun; for those systems the -in form is required.
|
in parallel with mpirun or mpiexec; for those systems the -in form is required.
|
||||||
|
|
||||||
As LAMMPS runs it prints info to the screen and a logfile named
|
As LAMMPS runs it prints info to the screen and a logfile named
|
||||||
*log.lammps*\ . More info about output is given on the
|
*log.lammps*\ . More info about output is given on the
|
||||||
|
|||||||
@ -7,7 +7,7 @@ steps are often necessary to setup and analyze a simulation. A list
|
|||||||
of such tools can be found on the `LAMMPS webpage <lws_>`_ at these links:
|
of such tools can be found on the `LAMMPS webpage <lws_>`_ at these links:
|
||||||
|
|
||||||
* `Pre/Post processing <https://www.lammps.org/prepost.html>`_
|
* `Pre/Post processing <https://www.lammps.org/prepost.html>`_
|
||||||
* `Offsite LAMMPS packages & tools <https://www.lammps.org/offsite.html>`_
|
* `External LAMMPS packages & tools <https://www.lammps.org/external.html>`_
|
||||||
* `Pizza.py toolkit <pizza_>`_
|
* `Pizza.py toolkit <pizza_>`_
|
||||||
|
|
||||||
The last link for `Pizza.py <pizza_>`_ is a Python-based tool developed at
|
The last link for `Pizza.py <pizza_>`_ is a Python-based tool developed at
|
||||||
|
|||||||
@ -8,9 +8,8 @@ fix brownian command
|
|||||||
fix brownian/sphere command
|
fix brownian/sphere command
|
||||||
===========================
|
===========================
|
||||||
|
|
||||||
fix brownian/sphere command
|
fix brownian/asphere command
|
||||||
===========================
|
============================
|
||||||
|
|
||||||
|
|
||||||
Syntax
|
Syntax
|
||||||
""""""
|
""""""
|
||||||
|
|||||||
@ -38,7 +38,7 @@ Syntax
|
|||||||
*intersect* args = two or more group IDs
|
*intersect* args = two or more group IDs
|
||||||
*dynamic* args = parent-ID keyword value ...
|
*dynamic* args = parent-ID keyword value ...
|
||||||
one or more keyword/value pairs may be appended
|
one or more keyword/value pairs may be appended
|
||||||
keyword = *region* or *var* or *every*
|
keyword = *region* or *var* or *property* or *every*
|
||||||
*region* value = region-ID
|
*region* value = region-ID
|
||||||
*var* value = name of variable
|
*var* value = name of variable
|
||||||
*property* value = name of custom integer or floating point vector
|
*property* value = name of custom integer or floating point vector
|
||||||
|
|||||||
BIN
doc/src/img/decomp-balance.png
Normal file
|
After Width: | Height: | Size: 129 KiB |
BIN
doc/src/img/decomp-processors.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
doc/src/img/decomp-rcb.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
doc/src/img/decomp-regular.png
Normal file
|
After Width: | Height: | Size: 121 KiB |
BIN
doc/src/img/domain-decomp.png
Normal file
|
After Width: | Height: | Size: 547 KiB |
BIN
doc/src/img/fft-decomp-parallel.png
Normal file
|
After Width: | Height: | Size: 35 KiB |
BIN
doc/src/img/ghost-comm.png
Normal file
|
After Width: | Height: | Size: 33 KiB |
BIN
doc/src/img/neigh-stencil.png
Normal file
|
After Width: | Height: | Size: 53 KiB |
@ -27,7 +27,7 @@ Syntax
|
|||||||
on = set Newton pairwise flag on (currently not allowed)
|
on = set Newton pairwise flag on (currently not allowed)
|
||||||
*pair/only* = *off* or *on*
|
*pair/only* = *off* or *on*
|
||||||
off = apply "gpu" suffix to all available styles in the GPU package (default)
|
off = apply "gpu" suffix to all available styles in the GPU package (default)
|
||||||
on - apply "gpu" suffix only pair styles
|
on = apply "gpu" suffix only pair styles
|
||||||
*binsize* value = size
|
*binsize* value = size
|
||||||
size = bin size for neighbor list construction (distance units)
|
size = bin size for neighbor list construction (distance units)
|
||||||
*split* = fraction
|
*split* = fraction
|
||||||
|
|||||||
@ -198,8 +198,8 @@ same:
|
|||||||
|
|
||||||
Coefficients must be defined for each pair of atoms types via the
|
Coefficients must be defined for each pair of atoms types via the
|
||||||
:doc:`pair_coeff <pair_coeff>` command as described above, or in the
|
:doc:`pair_coeff <pair_coeff>` command as described above, or in the
|
||||||
data file read by the :doc:`read_data <read_data>` commands, or by
|
"Pair Coeffs" or "PairIJ Coeffs" section of the data file read by the
|
||||||
mixing as described below.
|
:doc:`read_data <read_data>` command, or by mixing as described below.
|
||||||
|
|
||||||
For all of the *hybrid*, *hybrid/overlay*, and *hybrid/scaled* styles,
|
For all of the *hybrid*, *hybrid/overlay*, and *hybrid/scaled* styles,
|
||||||
every atom type pair I,J (where I <= J) must be assigned to at least one
|
every atom type pair I,J (where I <= J) must be assigned to at least one
|
||||||
@ -208,14 +208,21 @@ examples above, or in the data file read by the :doc:`read_data
|
|||||||
<read_data>`, or by mixing as described below. Also all sub-styles
|
<read_data>`, or by mixing as described below. Also all sub-styles
|
||||||
must be used at least once in a :doc:`pair_coeff <pair_coeff>` command.
|
must be used at least once in a :doc:`pair_coeff <pair_coeff>` command.
|
||||||
|
|
||||||
|
.. note::
|
||||||
|
|
||||||
|
LAMMPS never performs mixing of parameters from different sub-styles,
|
||||||
|
**even** if they use the same type of coefficients, e.g. contain
|
||||||
|
a Lennard-Jones potential variant. Those parameters must be provided
|
||||||
|
explicitly.
|
||||||
|
|
||||||
If you want there to be no interactions between a particular pair of
|
If you want there to be no interactions between a particular pair of
|
||||||
atom types, you have 3 choices. You can assign the type pair to some
|
atom types, you have 3 choices. You can assign the pair of atom types
|
||||||
sub-style and use the :doc:`neigh_modify exclude type <neigh_modify>`
|
to some sub-style and use the :doc:`neigh_modify exclude type <neigh_modify>`
|
||||||
command. You can assign it to some sub-style and set the coefficients
|
command. You can assign it to some sub-style and set the coefficients
|
||||||
so that there is effectively no interaction (e.g. epsilon = 0.0 in a LJ
|
so that there is effectively no interaction (e.g. epsilon = 0.0 in a LJ
|
||||||
potential). Or, for *hybrid*, *hybrid/overlay*, or *hybrid/scaled*
|
potential). Or, for *hybrid*, *hybrid/overlay*, or *hybrid/scaled*
|
||||||
simulations, you can use this form of the pair_coeff command in your
|
simulations, you can use this form of the pair_coeff command in your
|
||||||
input script:
|
input script or the "PairIJ Coeffs" section of your data file:
|
||||||
|
|
||||||
.. code-block:: LAMMPS
|
.. code-block:: LAMMPS
|
||||||
|
|
||||||
@ -238,19 +245,20 @@ styles with different requirements.
|
|||||||
|
|
||||||
----------
|
----------
|
||||||
|
|
||||||
Different force fields (e.g. CHARMM vs AMBER) may have different rules
|
Different force fields (e.g. CHARMM vs. AMBER) may have different rules
|
||||||
for applying weightings that change the strength of pairwise
|
for applying exclusions or weights that change the strength of pairwise
|
||||||
interactions between pairs of atoms that are also 1-2, 1-3, and 1-4
|
non-bonded interactions between pairs of atoms that are also 1-2, 1-3,
|
||||||
neighbors in the molecular bond topology, as normally set by the
|
and 1-4 neighbors in the molecular bond topology. This is normally a
|
||||||
:doc:`special_bonds <special_bonds>` command. Different weights can be
|
global setting defined the :doc:`special_bonds <special_bonds>` command.
|
||||||
assigned to different pair hybrid sub-styles via the :doc:`pair_modify
|
However, different weights can be assigned to different hybrid
|
||||||
special <pair_modify>` command. This allows multiple force fields to be
|
sub-styles via the :doc:`pair_modify special <pair_modify>` command.
|
||||||
used in a model of a hybrid system, however, there is no consistent
|
This allows multiple force fields to be used in a model of a hybrid
|
||||||
approach to determine parameters automatically for the interactions
|
system, however, there is no consistent approach to determine parameters
|
||||||
between the two force fields, this is only recommended when particles
|
automatically for the interactions **between** atoms of the two force
|
||||||
|
fields, thus this approach this is only recommended when particles
|
||||||
described by the different force fields do not mix.
|
described by the different force fields do not mix.
|
||||||
|
|
||||||
Here is an example for mixing CHARMM and AMBER: The global *amber*
|
Here is an example for combining CHARMM and AMBER: The global *amber*
|
||||||
setting sets the 1-4 interactions to non-zero scaling factors and
|
setting sets the 1-4 interactions to non-zero scaling factors and
|
||||||
then overrides them with 0.0 only for CHARMM:
|
then overrides them with 0.0 only for CHARMM:
|
||||||
|
|
||||||
@ -260,7 +268,7 @@ then overrides them with 0.0 only for CHARMM:
|
|||||||
pair_style hybrid lj/charmm/coul/long 8.0 10.0 lj/cut/coul/long 10.0
|
pair_style hybrid lj/charmm/coul/long 8.0 10.0 lj/cut/coul/long 10.0
|
||||||
pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0
|
pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0
|
||||||
|
|
||||||
The this input achieves the same effect:
|
This input achieves the same effect:
|
||||||
|
|
||||||
.. code-block:: LAMMPS
|
.. code-block:: LAMMPS
|
||||||
|
|
||||||
@ -270,9 +278,9 @@ The this input achieves the same effect:
|
|||||||
pair_modify pair lj/cut/coul/long special coul 0.0 0.0 0.83333333
|
pair_modify pair lj/cut/coul/long special coul 0.0 0.0 0.83333333
|
||||||
pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0
|
pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0
|
||||||
|
|
||||||
Here is an example for mixing Tersoff with OPLS/AA based on
|
Here is an example for combining Tersoff with OPLS/AA based on
|
||||||
a data file that defines bonds for all atoms where for the
|
a data file that defines bonds for all atoms where - for the
|
||||||
Tersoff part of the system the force constants for the bonded
|
Tersoff part of the system - the force constants for the bonded
|
||||||
interactions have been set to 0. Note the global settings are
|
interactions have been set to 0. Note the global settings are
|
||||||
effectively *lj/coul 0.0 0.0 0.5* as required for OPLS/AA:
|
effectively *lj/coul 0.0 0.0 0.5* as required for OPLS/AA:
|
||||||
|
|
||||||
|
|||||||
@ -619,7 +619,7 @@ of analysis.
|
|||||||
* - bond
|
* - bond
|
||||||
- atom-ID molecule-ID atom-type x y z
|
- atom-ID molecule-ID atom-type x y z
|
||||||
* - charge
|
* - charge
|
||||||
- atom-type q x y z
|
- atom-ID atom-type q x y z
|
||||||
* - dipole
|
* - dipole
|
||||||
- atom-ID atom-type q x y z mux muy muz
|
- atom-ID atom-type q x y z mux muy muz
|
||||||
* - dpd
|
* - dpd
|
||||||
|
|||||||
@ -1,7 +1,7 @@
|
|||||||
Sphinx==4.0.3
|
Sphinx==4.0.3
|
||||||
sphinxcontrib-spelling
|
sphinxcontrib-spelling==7.2.1
|
||||||
git+git://github.com/akohlmey/sphinx-fortran@parallel-read
|
git+git://github.com/akohlmey/sphinx-fortran@parallel-read
|
||||||
sphinx_tabs
|
sphinx_tabs==3.2.0
|
||||||
breathe
|
breathe==4.31.0
|
||||||
Pygments
|
Pygments==2.10.0
|
||||||
six
|
six==1.16.0
|
||||||
|
|||||||
@ -418,6 +418,7 @@ html_context['current_version'] = os.environ.get('LAMMPS_WEBSITE_BUILD_VERSION',
|
|||||||
html_context['git_commit'] = git_commit
|
html_context['git_commit'] = git_commit
|
||||||
html_context['versions'] = [
|
html_context['versions'] = [
|
||||||
('latest', 'https://docs.lammps.org/latest/'),
|
('latest', 'https://docs.lammps.org/latest/'),
|
||||||
|
('stable', 'https://docs.lammps.org/stable/'),
|
||||||
(version, 'https://docs.lammps.org/')
|
(version, 'https://docs.lammps.org/')
|
||||||
]
|
]
|
||||||
html_context['downloads'] = [('PDF', 'Manual.pdf')]
|
html_context['downloads'] = [('PDF', 'Manual.pdf')]
|
||||||
|
|||||||
@ -2265,6 +2265,7 @@ Nmols
|
|||||||
nn
|
nn
|
||||||
nnodes
|
nnodes
|
||||||
Nocedal
|
Nocedal
|
||||||
|
nO
|
||||||
nocite
|
nocite
|
||||||
nocoeff
|
nocoeff
|
||||||
nodeless
|
nodeless
|
||||||
@ -2443,6 +2444,7 @@ packings
|
|||||||
padua
|
padua
|
||||||
Padua
|
Padua
|
||||||
pafi
|
pafi
|
||||||
|
PairIJ
|
||||||
palegoldenrod
|
palegoldenrod
|
||||||
palegreen
|
palegreen
|
||||||
paleturquoise
|
paleturquoise
|
||||||
@ -3662,6 +3664,7 @@ Yc
|
|||||||
ycm
|
ycm
|
||||||
Yeh
|
Yeh
|
||||||
yellowgreen
|
yellowgreen
|
||||||
|
yEs
|
||||||
Yethiraj
|
Yethiraj
|
||||||
yflag
|
yflag
|
||||||
yhi
|
yhi
|
||||||
|
|||||||
@ -1,3 +1,9 @@
|
|||||||
|
IMPORTANT NOTE: This example has not been updated since 2014,
|
||||||
|
so it is not likely to work anymore out of the box. There have
|
||||||
|
been changes to LAMMPS and its library interface that would need
|
||||||
|
to be applied. Please see the manual for the documentation of
|
||||||
|
the library interface.
|
||||||
|
|
||||||
This directory has an application that runs classical MD via LAMMPS,
|
This directory has an application that runs classical MD via LAMMPS,
|
||||||
but uses quantum forces calculated by the Quest DFT (density
|
but uses quantum forces calculated by the Quest DFT (density
|
||||||
functional) code in place of the usual classical MD forces calculated
|
functional) code in place of the usual classical MD forces calculated
|
||||||
|
|||||||
@ -1,3 +1,9 @@
|
|||||||
|
IMPORTANT NOTE: This example has not been updated since 2013,
|
||||||
|
so it is not likely to work anymore out of the box. There have
|
||||||
|
been changes to LAMMPS and its library interface that would need
|
||||||
|
to be applied. Please see the manual for the documentation of
|
||||||
|
the library interface.
|
||||||
|
|
||||||
This directory has an application that models grain growth in the
|
This directory has an application that models grain growth in the
|
||||||
presence of strain.
|
presence of strain.
|
||||||
|
|
||||||
|
|||||||
@ -28,13 +28,9 @@
|
|||||||
#include <cstdlib>
|
#include <cstdlib>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
#include "lammps.h" // these are LAMMPS include files
|
#define LAMMPS_LIB_MPI // to make lammps_open() visible
|
||||||
#include "input.h"
|
|
||||||
#include "atom.h"
|
|
||||||
#include "library.h"
|
#include "library.h"
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
|
||||||
|
|
||||||
int main(int narg, char **arg)
|
int main(int narg, char **arg)
|
||||||
{
|
{
|
||||||
// setup MPI and various communicators
|
// setup MPI and various communicators
|
||||||
@ -74,7 +70,7 @@ int main(int narg, char **arg)
|
|||||||
char str1[32],str2[32],str3[32];
|
char str1[32],str2[32],str3[32];
|
||||||
|
|
||||||
char **lmparg = new char*[8];
|
char **lmparg = new char*[8];
|
||||||
lmparg[0] = NULL; // required placeholder for program name
|
lmparg[0] = (char *) "LAMMPS"; // required placeholder for program name
|
||||||
lmparg[1] = (char *) "-screen";
|
lmparg[1] = (char *) "-screen";
|
||||||
sprintf(str1,"screen.%d",instance);
|
sprintf(str1,"screen.%d",instance);
|
||||||
lmparg[2] = str1;
|
lmparg[2] = str1;
|
||||||
@ -86,13 +82,9 @@ int main(int narg, char **arg)
|
|||||||
sprintf(str3,"%g",temperature + instance*tdelta);
|
sprintf(str3,"%g",temperature + instance*tdelta);
|
||||||
lmparg[7] = str3;
|
lmparg[7] = str3;
|
||||||
|
|
||||||
// open N instances of LAMMPS
|
// create N instances of LAMMPS
|
||||||
// either of these methods will work
|
|
||||||
|
|
||||||
LAMMPS *lmp = new LAMMPS(8,lmparg,comm_lammps);
|
void *lmp = lammps_open(8,lmparg,comm_lammps,NULL);
|
||||||
|
|
||||||
//LAMMPS *lmp;
|
|
||||||
//lammps_open(8,lmparg,comm_lammps,(void **) &lmp);
|
|
||||||
|
|
||||||
delete [] lmparg;
|
delete [] lmparg;
|
||||||
|
|
||||||
@ -103,7 +95,7 @@ int main(int narg, char **arg)
|
|||||||
// query final temperature and print result for each instance
|
// query final temperature and print result for each instance
|
||||||
|
|
||||||
double *ptr = (double *)
|
double *ptr = (double *)
|
||||||
lammps_extract_compute(lmp,(char *) "thermo_temp",0,0);
|
lammps_extract_compute(lmp,"thermo_temp",LMP_STYLE_GLOBAL,LMP_TYPE_SCALAR);
|
||||||
double finaltemp = *ptr;
|
double finaltemp = *ptr;
|
||||||
|
|
||||||
double *temps = new double[ninstance];
|
double *temps = new double[ninstance];
|
||||||
@ -125,7 +117,7 @@ int main(int narg, char **arg)
|
|||||||
|
|
||||||
// delete LAMMPS instances
|
// delete LAMMPS instances
|
||||||
|
|
||||||
delete lmp;
|
lammps_close(lmp);
|
||||||
|
|
||||||
// close down MPI
|
// close down MPI
|
||||||
|
|
||||||
|
|||||||
@ -13,7 +13,7 @@ like below.
|
|||||||
|
|
||||||
mpicc -c -O -Wall -g -I$HOME/lammps/src liblammpsplugin.c
|
mpicc -c -O -Wall -g -I$HOME/lammps/src liblammpsplugin.c
|
||||||
mpicc -c -O -Wall -g simple.c
|
mpicc -c -O -Wall -g simple.c
|
||||||
mpicc simple.o liblammsplugin.o -ldl -o simpleC
|
mpicc simple.o liblammpsplugin.o -ldl -o simpleC
|
||||||
|
|
||||||
You also need to build LAMMPS as a shared library
|
You also need to build LAMMPS as a shared library
|
||||||
(see examples/COUPLE/README), e.g.
|
(see examples/COUPLE/README), e.g.
|
||||||
|
|||||||
@ -38,44 +38,98 @@ liblammpsplugin_t *liblammpsplugin_load(const char *lib)
|
|||||||
#define ADDSYM(symbol) lmp->symbol = dlsym(handle,"lammps_" #symbol)
|
#define ADDSYM(symbol) lmp->symbol = dlsym(handle,"lammps_" #symbol)
|
||||||
ADDSYM(open);
|
ADDSYM(open);
|
||||||
ADDSYM(open_no_mpi);
|
ADDSYM(open_no_mpi);
|
||||||
|
ADDSYM(open_fortran);
|
||||||
ADDSYM(close);
|
ADDSYM(close);
|
||||||
ADDSYM(version);
|
|
||||||
|
ADDSYM(mpi_init);
|
||||||
|
ADDSYM(mpi_finalize);
|
||||||
|
ADDSYM(kokkos_finalize);
|
||||||
|
ADDSYM(python_finalize);
|
||||||
|
|
||||||
ADDSYM(file);
|
ADDSYM(file);
|
||||||
ADDSYM(command);
|
ADDSYM(command);
|
||||||
ADDSYM(commands_list);
|
ADDSYM(commands_list);
|
||||||
ADDSYM(commands_string);
|
ADDSYM(commands_string);
|
||||||
ADDSYM(free);
|
|
||||||
ADDSYM(extract_setting);
|
ADDSYM(get_natoms);
|
||||||
ADDSYM(extract_global);
|
ADDSYM(get_thermo);
|
||||||
|
|
||||||
ADDSYM(extract_box);
|
ADDSYM(extract_box);
|
||||||
|
ADDSYM(reset_box);
|
||||||
|
|
||||||
|
ADDSYM(memory_usage);
|
||||||
|
ADDSYM(get_mpi_comm);
|
||||||
|
|
||||||
|
ADDSYM(extract_setting);
|
||||||
|
ADDSYM(extract_global_datatype);
|
||||||
|
ADDSYM(extract_global);
|
||||||
|
|
||||||
|
ADDSYM(extract_atom_datatype);
|
||||||
ADDSYM(extract_atom);
|
ADDSYM(extract_atom);
|
||||||
|
|
||||||
ADDSYM(extract_compute);
|
ADDSYM(extract_compute);
|
||||||
ADDSYM(extract_fix);
|
ADDSYM(extract_fix);
|
||||||
ADDSYM(extract_variable);
|
ADDSYM(extract_variable);
|
||||||
|
|
||||||
ADDSYM(get_thermo);
|
|
||||||
ADDSYM(get_natoms);
|
|
||||||
|
|
||||||
ADDSYM(set_variable);
|
ADDSYM(set_variable);
|
||||||
ADDSYM(reset_box);
|
|
||||||
|
|
||||||
ADDSYM(gather_atoms);
|
ADDSYM(gather_atoms);
|
||||||
ADDSYM(gather_atoms_concat);
|
ADDSYM(gather_atoms_concat);
|
||||||
ADDSYM(gather_atoms_subset);
|
ADDSYM(gather_atoms_subset);
|
||||||
ADDSYM(scatter_atoms);
|
ADDSYM(scatter_atoms);
|
||||||
ADDSYM(scatter_atoms_subset);
|
ADDSYM(scatter_atoms_subset);
|
||||||
|
ADDSYM(gather_bonds);
|
||||||
|
|
||||||
ADDSYM(set_fix_external_callback);
|
ADDSYM(create_atoms);
|
||||||
|
|
||||||
ADDSYM(config_has_package);
|
ADDSYM(find_pair_neighlist);
|
||||||
ADDSYM(config_package_count);
|
ADDSYM(find_fix_neighlist);
|
||||||
ADDSYM(config_package_name);
|
ADDSYM(find_compute_neighlist);
|
||||||
|
ADDSYM(neighlist_num_elements);
|
||||||
|
ADDSYM(neighlist_element_neighbors);
|
||||||
|
|
||||||
|
ADDSYM(version);
|
||||||
|
ADDSYM(get_os_info);
|
||||||
|
|
||||||
|
ADDSYM(config_has_mpi_support);
|
||||||
ADDSYM(config_has_gzip_support);
|
ADDSYM(config_has_gzip_support);
|
||||||
ADDSYM(config_has_png_support);
|
ADDSYM(config_has_png_support);
|
||||||
ADDSYM(config_has_jpeg_support);
|
ADDSYM(config_has_jpeg_support);
|
||||||
ADDSYM(config_has_ffmpeg_support);
|
ADDSYM(config_has_ffmpeg_support);
|
||||||
ADDSYM(config_has_exceptions);
|
ADDSYM(config_has_exceptions);
|
||||||
ADDSYM(create_atoms);
|
|
||||||
|
ADDSYM(config_has_package);
|
||||||
|
ADDSYM(config_package_count);
|
||||||
|
ADDSYM(config_package_name);
|
||||||
|
|
||||||
|
ADDSYM(config_accelerator);
|
||||||
|
ADDSYM(has_gpu_device);
|
||||||
|
ADDSYM(get_gpu_device_info);
|
||||||
|
|
||||||
|
ADDSYM(has_style);
|
||||||
|
ADDSYM(style_count);
|
||||||
|
ADDSYM(style_name);
|
||||||
|
|
||||||
|
ADDSYM(has_id);
|
||||||
|
ADDSYM(id_count);
|
||||||
|
ADDSYM(id_name);
|
||||||
|
|
||||||
|
ADDSYM(plugin_count);
|
||||||
|
ADDSYM(plugin_name);
|
||||||
|
|
||||||
|
ADDSYM(set_fix_external_callback);
|
||||||
|
ADDSYM(fix_external_get_force);
|
||||||
|
ADDSYM(fix_external_set_energy_global);
|
||||||
|
ADDSYM(fix_external_set_energy_peratom);
|
||||||
|
ADDSYM(fix_external_set_virial_global);
|
||||||
|
ADDSYM(fix_external_set_virial_peratom);
|
||||||
|
ADDSYM(fix_external_set_vector_length);
|
||||||
|
ADDSYM(fix_external_set_vector);
|
||||||
|
|
||||||
|
ADDSYM(free);
|
||||||
|
|
||||||
|
ADDSYM(is_running);
|
||||||
|
ADDSYM(force_timeout);
|
||||||
|
|
||||||
#ifdef LAMMPS_EXCEPTIONS
|
#ifdef LAMMPS_EXCEPTIONS
|
||||||
lmp->has_exceptions = 1;
|
lmp->has_exceptions = 1;
|
||||||
ADDSYM(has_error);
|
ADDSYM(has_error);
|
||||||
|
|||||||
@ -39,75 +39,121 @@ extern "C" {
|
|||||||
|
|
||||||
#if defined(LAMMPS_BIGBIG)
|
#if defined(LAMMPS_BIGBIG)
|
||||||
typedef void (*FixExternalFnPtr)(void *, int64_t, int, int64_t *, double **, double **);
|
typedef void (*FixExternalFnPtr)(void *, int64_t, int, int64_t *, double **, double **);
|
||||||
#elif defined(LAMMPS_SMALLBIG)
|
#elif defined(LAMMPS_SMALLSMALL)
|
||||||
typedef void (*FixExternalFnPtr)(void *, int64_t, int, int *, double **, double **);
|
|
||||||
#else
|
|
||||||
typedef void (*FixExternalFnPtr)(void *, int, int, int *, double **, double **);
|
typedef void (*FixExternalFnPtr)(void *, int, int, int *, double **, double **);
|
||||||
|
#else
|
||||||
|
typedef void (*FixExternalFnPtr)(void *, int64_t, int, int *, double **, double **);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
struct _liblammpsplugin {
|
struct _liblammpsplugin {
|
||||||
int abiversion;
|
int abiversion;
|
||||||
int has_exceptions;
|
int has_exceptions;
|
||||||
void *handle;
|
void *handle;
|
||||||
void (*open)(int, char **, MPI_Comm, void **);
|
void *(*open)(int, char **, MPI_Comm, void **);
|
||||||
void (*open_no_mpi)(int, char **, void **);
|
void *(*open_no_mpi)(int, char **, void **);
|
||||||
|
void *(*open_fortran)(int, char **, void **, int);
|
||||||
void (*close)(void *);
|
void (*close)(void *);
|
||||||
int (*version)(void *);
|
|
||||||
|
void (*mpi_init)();
|
||||||
|
void (*mpi_finalize)();
|
||||||
|
void (*kokkos_finalize)();
|
||||||
|
void (*python_finalize)();
|
||||||
|
|
||||||
void (*file)(void *, char *);
|
void (*file)(void *, char *);
|
||||||
char *(*command)(void *, char *);
|
char *(*command)(void *, const char *);
|
||||||
void (*commands_list)(void *, int, char **);
|
void (*commands_list)(void *, int, const char **);
|
||||||
void (*commands_string)(void *, char *);
|
void (*commands_string)(void *, const char *);
|
||||||
void (*free)(void *);
|
|
||||||
int (*extract_setting)(void *, char *);
|
double (*get_natoms)(void *);
|
||||||
void *(*extract_global)(void *, char *);
|
double (*get_thermo)(void *, char *);
|
||||||
|
|
||||||
void (*extract_box)(void *, double *, double *,
|
void (*extract_box)(void *, double *, double *,
|
||||||
double *, double *, double *, int *, int *);
|
double *, double *, double *, int *, int *);
|
||||||
void *(*extract_atom)(void *, char *);
|
|
||||||
void *(*extract_compute)(void *, char *, int, int);
|
|
||||||
void *(*extract_fix)(void *, char *, int, int, int, int);
|
|
||||||
void *(*extract_variable)(void *, char *, char *);
|
|
||||||
|
|
||||||
double (*get_thermo)(void *, char *);
|
|
||||||
int (*get_natoms)(void *);
|
|
||||||
|
|
||||||
int (*set_variable)(void *, char *, char *);
|
|
||||||
void (*reset_box)(void *, double *, double *, double, double, double);
|
void (*reset_box)(void *, double *, double *, double, double, double);
|
||||||
|
|
||||||
|
void (*memory_usage)(void *, double *);
|
||||||
|
int (*get_mpi_comm)(void *);
|
||||||
|
|
||||||
|
int (*extract_setting)(void *, const char *);
|
||||||
|
int *(*extract_global_datatype)(void *, const char *);
|
||||||
|
void *(*extract_global)(void *, const char *);
|
||||||
|
|
||||||
|
void *(*extract_atom_datatype)(void *, const char *);
|
||||||
|
void *(*extract_atom)(void *, const char *);
|
||||||
|
|
||||||
|
void *(*extract_compute)(void *, const char *, int, int);
|
||||||
|
void *(*extract_fix)(void *, const char *, int, int, int, int);
|
||||||
|
void *(*extract_variable)(void *, const char *, char *);
|
||||||
|
int (*set_variable)(void *, char *, char *);
|
||||||
|
|
||||||
void (*gather_atoms)(void *, char *, int, int, void *);
|
void (*gather_atoms)(void *, char *, int, int, void *);
|
||||||
void (*gather_atoms_concat)(void *, char *, int, int, void *);
|
void (*gather_atoms_concat)(void *, char *, int, int, void *);
|
||||||
void (*gather_atoms_subset)(void *, char *, int, int, int, int *, void *);
|
void (*gather_atoms_subset)(void *, char *, int, int, int, int *, void *);
|
||||||
void (*scatter_atoms)(void *, char *, int, int, void *);
|
void (*scatter_atoms)(void *, char *, int, int, void *);
|
||||||
void (*scatter_atoms_subset)(void *, char *, int, int, int, int *, void *);
|
void (*scatter_atoms_subset)(void *, char *, int, int, int, int *, void *);
|
||||||
|
|
||||||
void (*set_fix_external_callback)(void *, char *, FixExternalFnPtr, void*);
|
void (*gather_bonds)(void *, void *);
|
||||||
|
|
||||||
int (*config_has_package)(char * package_name);
|
// lammps_create_atoms() takes tagint and imageint as args
|
||||||
int (*config_package_count)();
|
// ifdef insures they are compatible with rest of LAMMPS
|
||||||
int (*config_package_name)(int index, char * buffer, int max_size);
|
// caller must match to how LAMMPS library is built
|
||||||
|
|
||||||
|
#ifndef LAMMPS_BIGBIG
|
||||||
|
void (*create_atoms)(void *, int, int *, int *, double *,
|
||||||
|
double *, int *, int);
|
||||||
|
#else
|
||||||
|
void (*create_atoms)(void *, int, int64_t *, int *, double *,
|
||||||
|
double *, int64_t *, int);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int (*find_pair_neighlist)(void *, const char *, int, int, int);
|
||||||
|
int (*find_fix_neighlist)(void *, const char *, int);
|
||||||
|
int (*find_compute_neighlist)(void *, char *, int);
|
||||||
|
int (*neighlist_num_elements)(void *, int);
|
||||||
|
void (*neighlist_element_neighbors)(void *, int, int, int *, int *, int **);
|
||||||
|
|
||||||
|
int (*version)(void *);
|
||||||
|
void (*get_os_info)(char *, int);
|
||||||
|
|
||||||
|
int (*config_has_mpi_support)();
|
||||||
int (*config_has_gzip_support)();
|
int (*config_has_gzip_support)();
|
||||||
int (*config_has_png_support)();
|
int (*config_has_png_support)();
|
||||||
int (*config_has_jpeg_support)();
|
int (*config_has_jpeg_support)();
|
||||||
int (*config_has_ffmpeg_support)();
|
int (*config_has_ffmpeg_support)();
|
||||||
int (*config_has_exceptions)();
|
int (*config_has_exceptions)();
|
||||||
|
|
||||||
int (*find_pair_neighlist)(void* ptr, char * style, int exact, int nsub, int request);
|
int (*config_has_package)(const char *);
|
||||||
int (*find_fix_neighlist)(void* ptr, char * id, int request);
|
int (*config_package_count)();
|
||||||
int (*find_compute_neighlist)(void* ptr, char * id, int request);
|
int (*config_package_name)(int, char *, int);
|
||||||
int (*neighlist_num_elements)(void* ptr, int idx);
|
|
||||||
void (*neighlist_element_neighbors)(void * ptr, int idx, int element, int * iatom, int * numneigh, int ** neighbors);
|
|
||||||
|
|
||||||
// lammps_create_atoms() takes tagint and imageint as args
|
int (*config_accelerator)(const char *, const char *, const char *);
|
||||||
// ifdef insures they are compatible with rest of LAMMPS
|
int (*has_gpu_device)();
|
||||||
// caller must match to how LAMMPS library is built
|
void (*get_gpu_device_info)(char *, int);
|
||||||
|
|
||||||
#ifdef LAMMPS_BIGBIG
|
int (*has_style)(void *, const char *, const char *);
|
||||||
void (*create_atoms)(void *, int, int64_t *, int *,
|
int (*style_count)(void *, const char *);
|
||||||
double *, double *, int64_t *, int);
|
int (*style_name)(void *, const char *, int, char *, int);
|
||||||
#else
|
|
||||||
void (*create_atoms)(void *, int, int *, int *,
|
int (*has_id)(void *, const char *, const char *);
|
||||||
double *, double *, int *, int);
|
int (*id_count)(void *, const char *);
|
||||||
#endif
|
int (*id_name)(void *, const char *, int, char *, int);
|
||||||
|
|
||||||
|
int (*plugin_count)();
|
||||||
|
int (*plugin_name)(int, char *, char *, int);
|
||||||
|
|
||||||
|
void (*set_fix_external_callback)(void *, const char *, FixExternalFnPtr, void*);
|
||||||
|
void (*fix_external_get_force)(void *, const char *);
|
||||||
|
void (*fix_external_set_energy_global)(void *, const char *, double);
|
||||||
|
void (*fix_external_set_energy_peratom)(void *, const char *, double *);
|
||||||
|
void (*fix_external_set_virial_global)(void *, const char *, double *);
|
||||||
|
void (*fix_external_set_virial_peratom)(void *, const char *, double **);
|
||||||
|
void (*fix_external_set_vector_length)(void *, const char *, int);
|
||||||
|
void (*fix_external_set_vector)(void *, const char *, int, double);
|
||||||
|
|
||||||
|
void (*free)(void *);
|
||||||
|
|
||||||
|
void (*is_running)(void *);
|
||||||
|
void (*force_timeout)(void *);
|
||||||
|
|
||||||
int (*has_error)(void *);
|
int (*has_error)(void *);
|
||||||
int (*get_last_error_message)(void *, char *, int);
|
int (*get_last_error_message)(void *, char *, int);
|
||||||
|
|||||||
@ -1,9 +1,12 @@
|
|||||||
LAMMPS (18 Feb 2020)
|
LAMMPS (31 Aug 2021)
|
||||||
Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
|
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
|
||||||
Created orthogonal box = (0 0 0) to (6.71838 6.71838 6.71838)
|
using 1 OpenMP thread(s) per MPI task
|
||||||
|
Lattice spacing in x,y,z = 1.6795962 1.6795962 1.6795962
|
||||||
|
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
|
||||||
1 by 1 by 1 MPI processor grid
|
1 by 1 by 1 MPI processor grid
|
||||||
Created 256 atoms
|
Created 256 atoms
|
||||||
create_atoms CPU = 0.000297844 secs
|
using lattice units in orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
|
||||||
|
create_atoms CPU = 0.001 seconds
|
||||||
Neighbor list info ...
|
Neighbor list info ...
|
||||||
update every 20 steps, delay 0 steps, check no
|
update every 20 steps, delay 0 steps, check no
|
||||||
max neighbors/atom: 2000, page size: 100000
|
max neighbors/atom: 2000, page size: 100000
|
||||||
@ -14,108 +17,108 @@ Neighbor list info ...
|
|||||||
(1) pair lj/cut, perpetual
|
(1) pair lj/cut, perpetual
|
||||||
attributes: half, newton on
|
attributes: half, newton on
|
||||||
pair build: half/bin/atomonly/newton
|
pair build: half/bin/atomonly/newton
|
||||||
stencil: half/bin/3d/newton
|
stencil: half/bin/3d
|
||||||
bin: standard
|
bin: standard
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 0
|
Current step : 0
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
0 1.44 -6.7733681 0 -4.6218056 -5.0244179
|
0 1.44 -6.7733681 0 -4.6218056 -5.0244179
|
||||||
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
||||||
Loop time of 0.00164276 on 1 procs for 10 steps with 256 atoms
|
Loop time of 0.00239712 on 1 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 2629719.113 tau/day, 6087.313 timesteps/s
|
Performance: 1802163.347 tau/day, 4171.674 timesteps/s
|
||||||
93.7% CPU use with 1 MPI tasks x no OpenMP threads
|
97.2% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0014956 | 0.0014956 | 0.0014956 | 0.0 | 91.04
|
Pair | 0.0020572 | 0.0020572 | 0.0020572 | 0.0 | 85.82
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 8.045e-05 | 8.045e-05 | 8.045e-05 | 0.0 | 4.90
|
Comm | 0.00018731 | 0.00018731 | 0.00018731 | 0.0 | 7.81
|
||||||
Output | 1.1399e-05 | 1.1399e-05 | 1.1399e-05 | 0.0 | 0.69
|
Output | 4.478e-05 | 4.478e-05 | 4.478e-05 | 0.0 | 1.87
|
||||||
Modify | 3.7431e-05 | 3.7431e-05 | 3.7431e-05 | 0.0 | 2.28
|
Modify | 6.3637e-05 | 6.3637e-05 | 6.3637e-05 | 0.0 | 2.65
|
||||||
Other | | 1.789e-05 | | | 1.09
|
Other | | 4.419e-05 | | | 1.84
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1431 ave 1431 max 1431 min
|
Nghost: 1431.00 ave 1431 max 1431 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9984 ave 9984 max 9984 min
|
Neighs: 9984.00 ave 9984 max 9984 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9984
|
Total # of neighbors = 9984
|
||||||
Ave neighs/atom = 39
|
Ave neighs/atom = 39.000000
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 10
|
Current step : 10
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
||||||
20 0.6239063 -5.557644 0 -4.6254403 0.97451173
|
20 0.6239063 -5.557644 0 -4.6254403 0.97451173
|
||||||
Loop time of 0.00199768 on 1 procs for 10 steps with 256 atoms
|
Loop time of 0.00329271 on 1 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 2162504.180 tau/day, 5005.797 timesteps/s
|
Performance: 1311987.619 tau/day, 3037.008 timesteps/s
|
||||||
99.8% CPU use with 1 MPI tasks x no OpenMP threads
|
96.4% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0018518 | 0.0018518 | 0.0018518 | 0.0 | 92.70
|
Pair | 0.0029015 | 0.0029015 | 0.0029015 | 0.0 | 88.12
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 7.9768e-05 | 7.9768e-05 | 7.9768e-05 | 0.0 | 3.99
|
Comm | 0.00021807 | 0.00021807 | 0.00021807 | 0.0 | 6.62
|
||||||
Output | 1.1433e-05 | 1.1433e-05 | 1.1433e-05 | 0.0 | 0.57
|
Output | 4.9163e-05 | 4.9163e-05 | 4.9163e-05 | 0.0 | 1.49
|
||||||
Modify | 3.6904e-05 | 3.6904e-05 | 3.6904e-05 | 0.0 | 1.85
|
Modify | 7.0573e-05 | 7.0573e-05 | 7.0573e-05 | 0.0 | 2.14
|
||||||
Other | | 1.773e-05 | | | 0.89
|
Other | | 5.339e-05 | | | 1.62
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1431 ave 1431 max 1431 min
|
Nghost: 1431.00 ave 1431 max 1431 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9952 ave 9952 max 9952 min
|
Neighs: 9952.00 ave 9952 max 9952 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9952
|
Total # of neighbors = 9952
|
||||||
Ave neighs/atom = 38.875
|
Ave neighs/atom = 38.875000
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 20
|
Current step : 20
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
20 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
20 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
||||||
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
||||||
Loop time of 0.000304321 on 1 procs for 1 steps with 256 atoms
|
Loop time of 0.000638039 on 1 procs for 1 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 1419553.695 tau/day, 3286.004 timesteps/s
|
Performance: 677074.599 tau/day, 1567.302 timesteps/s
|
||||||
98.9% CPU use with 1 MPI tasks x no OpenMP threads
|
98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.00027815 | 0.00027815 | 0.00027815 | 0.0 | 91.40
|
Pair | 0.00042876 | 0.00042876 | 0.00042876 | 0.0 | 67.20
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 8.321e-06 | 8.321e-06 | 8.321e-06 | 0.0 | 2.73
|
Comm | 5.2872e-05 | 5.2872e-05 | 5.2872e-05 | 0.0 | 8.29
|
||||||
Output | 1.0513e-05 | 1.0513e-05 | 1.0513e-05 | 0.0 | 3.45
|
Output | 0.00012218 | 0.00012218 | 0.00012218 | 0.0 | 19.15
|
||||||
Modify | 3.968e-06 | 3.968e-06 | 3.968e-06 | 0.0 | 1.30
|
Modify | 1.3762e-05 | 1.3762e-05 | 1.3762e-05 | 0.0 | 2.16
|
||||||
Other | | 3.365e-06 | | | 1.11
|
Other | | 2.047e-05 | | | 3.21
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1431 ave 1431 max 1431 min
|
Nghost: 1431.00 ave 1431 max 1431 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9705 ave 9705 max 9705 min
|
Neighs: 9705.00 ave 9705 max 9705 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9705
|
Total # of neighbors = 9705
|
||||||
Ave neighs/atom = 37.9102
|
Ave neighs/atom = 37.910156
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Force on 1 atom via extract_atom: 26.9581
|
Force on 1 atom via extract_atom: 26.9581
|
||||||
@ -124,136 +127,136 @@ Setting up Verlet run ...
|
|||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 21
|
Current step : 21
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
||||||
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
||||||
Loop time of 0.00196027 on 1 procs for 10 steps with 256 atoms
|
Loop time of 0.00281277 on 1 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 2203779.175 tau/day, 5101.341 timesteps/s
|
Performance: 1535852.558 tau/day, 3555.214 timesteps/s
|
||||||
99.7% CPU use with 1 MPI tasks x no OpenMP threads
|
92.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0018146 | 0.0018146 | 0.0018146 | 0.0 | 92.57
|
Pair | 0.0024599 | 0.0024599 | 0.0024599 | 0.0 | 87.45
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 8.0268e-05 | 8.0268e-05 | 8.0268e-05 | 0.0 | 4.09
|
Comm | 0.00020234 | 0.00020234 | 0.00020234 | 0.0 | 7.19
|
||||||
Output | 1.0973e-05 | 1.0973e-05 | 1.0973e-05 | 0.0 | 0.56
|
Output | 3.6436e-05 | 3.6436e-05 | 3.6436e-05 | 0.0 | 1.30
|
||||||
Modify | 3.6913e-05 | 3.6913e-05 | 3.6913e-05 | 0.0 | 1.88
|
Modify | 6.7542e-05 | 6.7542e-05 | 6.7542e-05 | 0.0 | 2.40
|
||||||
Other | | 1.756e-05 | | | 0.90
|
Other | | 4.655e-05 | | | 1.65
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1431 ave 1431 max 1431 min
|
Nghost: 1431.00 ave 1431 max 1431 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9688 ave 9688 max 9688 min
|
Neighs: 9688.00 ave 9688 max 9688 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9688
|
Total # of neighbors = 9688
|
||||||
Ave neighs/atom = 37.8438
|
Ave neighs/atom = 37.843750
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 31
|
Current step : 31
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
||||||
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
||||||
Loop time of 0.00433063 on 1 procs for 20 steps with 256 atoms
|
Loop time of 0.00560916 on 1 procs for 20 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 1995088.941 tau/day, 4618.261 timesteps/s
|
Performance: 1540338.414 tau/day, 3565.598 timesteps/s
|
||||||
99.3% CPU use with 1 MPI tasks x no OpenMP threads
|
99.2% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0035121 | 0.0035121 | 0.0035121 | 0.0 | 81.10
|
Pair | 0.0044403 | 0.0044403 | 0.0044403 | 0.0 | 79.16
|
||||||
Neigh | 0.00050258 | 0.00050258 | 0.00050258 | 0.0 | 11.61
|
Neigh | 0.00056186 | 0.00056186 | 0.00056186 | 0.0 | 10.02
|
||||||
Comm | 0.00019444 | 0.00019444 | 0.00019444 | 0.0 | 4.49
|
Comm | 0.00036797 | 0.00036797 | 0.00036797 | 0.0 | 6.56
|
||||||
Output | 1.2092e-05 | 1.2092e-05 | 1.2092e-05 | 0.0 | 0.28
|
Output | 3.676e-05 | 3.676e-05 | 3.676e-05 | 0.0 | 0.66
|
||||||
Modify | 7.2917e-05 | 7.2917e-05 | 7.2917e-05 | 0.0 | 1.68
|
Modify | 0.00011282 | 0.00011282 | 0.00011282 | 0.0 | 2.01
|
||||||
Other | | 3.647e-05 | | | 0.84
|
Other | | 8.943e-05 | | | 1.59
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1421 ave 1421 max 1421 min
|
Nghost: 1421.00 ave 1421 max 1421 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9700 ave 9700 max 9700 min
|
Neighs: 9700.00 ave 9700 max 9700 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9700
|
Total # of neighbors = 9700
|
||||||
Ave neighs/atom = 37.8906
|
Ave neighs/atom = 37.890625
|
||||||
Neighbor list builds = 1
|
Neighbor list builds = 1
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 51
|
Current step : 51
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
||||||
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
||||||
Loop time of 0.00196567 on 1 procs for 10 steps with 256 atoms
|
Loop time of 0.00373815 on 1 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 2197727.285 tau/day, 5087.332 timesteps/s
|
Performance: 1155650.623 tau/day, 2675.117 timesteps/s
|
||||||
99.7% CPU use with 1 MPI tasks x no OpenMP threads
|
98.0% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0018222 | 0.0018222 | 0.0018222 | 0.0 | 92.70
|
Pair | 0.0030908 | 0.0030908 | 0.0030908 | 0.0 | 82.68
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 7.8285e-05 | 7.8285e-05 | 7.8285e-05 | 0.0 | 3.98
|
Comm | 0.00038189 | 0.00038189 | 0.00038189 | 0.0 | 10.22
|
||||||
Output | 1.0862e-05 | 1.0862e-05 | 1.0862e-05 | 0.0 | 0.55
|
Output | 4.1615e-05 | 4.1615e-05 | 4.1615e-05 | 0.0 | 1.11
|
||||||
Modify | 3.6719e-05 | 3.6719e-05 | 3.6719e-05 | 0.0 | 1.87
|
Modify | 0.00013851 | 0.00013851 | 0.00013851 | 0.0 | 3.71
|
||||||
Other | | 1.764e-05 | | | 0.90
|
Other | | 8.533e-05 | | | 2.28
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1421 ave 1421 max 1421 min
|
Nghost: 1421.00 ave 1421 max 1421 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9700 ave 9700 max 9700 min
|
Neighs: 9700.00 ave 9700 max 9700 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9700
|
Total # of neighbors = 9700
|
||||||
Ave neighs/atom = 37.8906
|
Ave neighs/atom = 37.890625
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 61
|
Current step : 61
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
||||||
81 0.77743907 -5.7735004 0 -4.6118971 0.090822641
|
81 0.77743907 -5.7735004 0 -4.6118971 0.090822641
|
||||||
Loop time of 0.00430528 on 1 procs for 20 steps with 256 atoms
|
Loop time of 0.00612177 on 1 procs for 20 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 2006838.581 tau/day, 4645.460 timesteps/s
|
Performance: 1411356.519 tau/day, 3267.029 timesteps/s
|
||||||
99.8% CPU use with 1 MPI tasks x no OpenMP threads
|
98.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0034931 | 0.0034931 | 0.0034931 | 0.0 | 81.13
|
Pair | 0.0047211 | 0.0047211 | 0.0047211 | 0.0 | 77.12
|
||||||
Neigh | 0.00050437 | 0.00050437 | 0.00050437 | 0.0 | 11.72
|
Neigh | 0.00083088 | 0.00083088 | 0.00083088 | 0.0 | 13.57
|
||||||
Comm | 0.0001868 | 0.0001868 | 0.0001868 | 0.0 | 4.34
|
Comm | 0.00032716 | 0.00032716 | 0.00032716 | 0.0 | 5.34
|
||||||
Output | 1.1699e-05 | 1.1699e-05 | 1.1699e-05 | 0.0 | 0.27
|
Output | 3.9891e-05 | 3.9891e-05 | 3.9891e-05 | 0.0 | 0.65
|
||||||
Modify | 7.3308e-05 | 7.3308e-05 | 7.3308e-05 | 0.0 | 1.70
|
Modify | 0.00010926 | 0.00010926 | 0.00010926 | 0.0 | 1.78
|
||||||
Other | | 3.604e-05 | | | 0.84
|
Other | | 9.346e-05 | | | 1.53
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1405 ave 1405 max 1405 min
|
Nghost: 1405.00 ave 1405 max 1405 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9701 ave 9701 max 9701 min
|
Neighs: 9701.00 ave 9701 max 9701 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9701
|
Total # of neighbors = 9701
|
||||||
Ave neighs/atom = 37.8945
|
Ave neighs/atom = 37.894531
|
||||||
Neighbor list builds = 1
|
Neighbor list builds = 1
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Deleted 256 atoms, new total = 0
|
Deleted 256 atoms, new total = 0
|
||||||
@ -261,34 +264,34 @@ Setting up Verlet run ...
|
|||||||
Unit style : lj
|
Unit style : lj
|
||||||
Current step : 81
|
Current step : 81
|
||||||
Time step : 0.005
|
Time step : 0.005
|
||||||
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes
|
Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
|
||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
81 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
81 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
||||||
91 0.75393007 -5.7375259 0 -4.6110484 0.39357367
|
91 0.75393007 -5.7375259 0 -4.6110484 0.39357367
|
||||||
Loop time of 0.00195843 on 1 procs for 10 steps with 256 atoms
|
Loop time of 0.00319065 on 1 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 2205851.941 tau/day, 5106.139 timesteps/s
|
Performance: 1353954.393 tau/day, 3134.154 timesteps/s
|
||||||
99.7% CPU use with 1 MPI tasks x no OpenMP threads
|
99.2% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0018143 | 0.0018143 | 0.0018143 | 0.0 | 92.64
|
Pair | 0.0027828 | 0.0027828 | 0.0027828 | 0.0 | 87.22
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 7.8608e-05 | 7.8608e-05 | 7.8608e-05 | 0.0 | 4.01
|
Comm | 0.00023286 | 0.00023286 | 0.00023286 | 0.0 | 7.30
|
||||||
Output | 1.0786e-05 | 1.0786e-05 | 1.0786e-05 | 0.0 | 0.55
|
Output | 4.0459e-05 | 4.0459e-05 | 4.0459e-05 | 0.0 | 1.27
|
||||||
Modify | 3.7106e-05 | 3.7106e-05 | 3.7106e-05 | 0.0 | 1.89
|
Modify | 7.3576e-05 | 7.3576e-05 | 7.3576e-05 | 0.0 | 2.31
|
||||||
Other | | 1.762e-05 | | | 0.90
|
Other | | 6.094e-05 | | | 1.91
|
||||||
|
|
||||||
Nlocal: 256 ave 256 max 256 min
|
Nlocal: 256.000 ave 256 max 256 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1431 ave 1431 max 1431 min
|
Nghost: 1431.00 ave 1431 max 1431 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 9705 ave 9705 max 9705 min
|
Neighs: 9705.00 ave 9705 max 9705 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9705
|
Total # of neighbors = 9705
|
||||||
Ave neighs/atom = 37.9102
|
Ave neighs/atom = 37.910156
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Total wall time: 0:00:00
|
Total wall time: 0:00:00
|
||||||
|
|||||||
@ -1,9 +1,12 @@
|
|||||||
LAMMPS (18 Feb 2020)
|
LAMMPS (31 Aug 2021)
|
||||||
Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
|
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
|
||||||
Created orthogonal box = (0 0 0) to (6.71838 6.71838 6.71838)
|
using 1 OpenMP thread(s) per MPI task
|
||||||
|
Lattice spacing in x,y,z = 1.6795962 1.6795962 1.6795962
|
||||||
|
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
|
||||||
1 by 1 by 2 MPI processor grid
|
1 by 1 by 2 MPI processor grid
|
||||||
Created 256 atoms
|
Created 256 atoms
|
||||||
create_atoms CPU = 0.000265157 secs
|
using lattice units in orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
|
||||||
|
create_atoms CPU = 0.003 seconds
|
||||||
Neighbor list info ...
|
Neighbor list info ...
|
||||||
update every 20 steps, delay 0 steps, check no
|
update every 20 steps, delay 0 steps, check no
|
||||||
max neighbors/atom: 2000, page size: 100000
|
max neighbors/atom: 2000, page size: 100000
|
||||||
@ -14,7 +17,7 @@ Neighbor list info ...
|
|||||||
(1) pair lj/cut, perpetual
|
(1) pair lj/cut, perpetual
|
||||||
attributes: half, newton on
|
attributes: half, newton on
|
||||||
pair build: half/bin/atomonly/newton
|
pair build: half/bin/atomonly/newton
|
||||||
stencil: half/bin/3d/newton
|
stencil: half/bin/3d
|
||||||
bin: standard
|
bin: standard
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
Unit style : lj
|
Unit style : lj
|
||||||
@ -24,30 +27,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
0 1.44 -6.7733681 0 -4.6218056 -5.0244179
|
0 1.44 -6.7733681 0 -4.6218056 -5.0244179
|
||||||
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
||||||
Loop time of 0.00115264 on 2 procs for 10 steps with 256 atoms
|
Loop time of 0.00330899 on 2 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 3747912.946 tau/day, 8675.724 timesteps/s
|
Performance: 1305535.501 tau/day, 3022.073 timesteps/s
|
||||||
94.5% CPU use with 2 MPI tasks x no OpenMP threads
|
75.7% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.00074885 | 0.00075021 | 0.00075156 | 0.0 | 65.09
|
Pair | 0.0013522 | 0.0013813 | 0.0014104 | 0.1 | 41.74
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 0.00031829 | 0.00031943 | 0.00032056 | 0.0 | 27.71
|
Comm | 0.00049139 | 0.00054241 | 0.00059342 | 0.0 | 16.39
|
||||||
Output | 9.306e-06 | 2.6673e-05 | 4.4041e-05 | 0.0 | 2.31
|
Output | 3.6986e-05 | 0.00056588 | 0.0010948 | 0.0 | 17.10
|
||||||
Modify | 2.0684e-05 | 2.0891e-05 | 2.1098e-05 | 0.0 | 1.81
|
Modify | 4.3909e-05 | 4.3924e-05 | 4.3939e-05 | 0.0 | 1.33
|
||||||
Other | | 3.544e-05 | | | 3.07
|
Other | | 0.0007755 | | | 23.44
|
||||||
|
|
||||||
Nlocal: 128 ave 128 max 128 min
|
Nlocal: 128.000 ave 128 max 128 min
|
||||||
Histogram: 2 0 0 0 0 0 0 0 0 0
|
Histogram: 2 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1109 ave 1109 max 1109 min
|
Nghost: 1109.00 ave 1109 max 1109 min
|
||||||
Histogram: 2 0 0 0 0 0 0 0 0 0
|
Histogram: 2 0 0 0 0 0 0 0 0 0
|
||||||
Neighs: 4992 ave 4992 max 4992 min
|
Neighs: 4992.00 ave 4992 max 4992 min
|
||||||
Histogram: 2 0 0 0 0 0 0 0 0 0
|
Histogram: 2 0 0 0 0 0 0 0 0 0
|
||||||
|
|
||||||
Total # of neighbors = 9984
|
Total # of neighbors = 9984
|
||||||
Ave neighs/atom = 39
|
Ave neighs/atom = 39.000000
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
@ -58,30 +61,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
|
||||||
20 0.6239063 -5.557644 0 -4.6254403 0.97451173
|
20 0.6239063 -5.557644 0 -4.6254403 0.97451173
|
||||||
Loop time of 0.00120443 on 2 procs for 10 steps with 256 atoms
|
Loop time of 0.00648485 on 2 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 3586761.860 tau/day, 8302.689 timesteps/s
|
Performance: 666168.017 tau/day, 1542.056 timesteps/s
|
||||||
95.5% CPU use with 2 MPI tasks x no OpenMP threads
|
44.3% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.00087798 | 0.00091359 | 0.0009492 | 0.0 | 75.85
|
Pair | 0.0022373 | 0.0024405 | 0.0026437 | 0.4 | 37.63
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 0.00016739 | 0.00020368 | 0.00023997 | 0.0 | 16.91
|
Comm | 0.0024446 | 0.0026464 | 0.0028481 | 0.4 | 40.81
|
||||||
Output | 1.0124e-05 | 3.0513e-05 | 5.0901e-05 | 0.0 | 2.53
|
Output | 3.9069e-05 | 0.00059734 | 0.0011556 | 0.0 | 9.21
|
||||||
Modify | 1.89e-05 | 1.9812e-05 | 2.0725e-05 | 0.0 | 1.64
|
Modify | 4.869e-05 | 4.912e-05 | 4.9551e-05 | 0.0 | 0.76
|
||||||
Other | | 3.683e-05 | | | 3.06
|
Other | | 0.0007515 | | | 11.59
|
||||||
|
|
||||||
Nlocal: 128 ave 134 max 122 min
|
Nlocal: 128.000 ave 134 max 122 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Nghost: 1109 ave 1115 max 1103 min
|
Nghost: 1109.00 ave 1115 max 1103 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Neighs: 4976 ave 5205 max 4747 min
|
Neighs: 4976.00 ave 5205 max 4747 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
|
|
||||||
Total # of neighbors = 9952
|
Total # of neighbors = 9952
|
||||||
Ave neighs/atom = 38.875
|
Ave neighs/atom = 38.875000
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
@ -92,34 +95,34 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
20 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
20 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
||||||
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
||||||
Loop time of 0.000206062 on 2 procs for 1 steps with 256 atoms
|
Loop time of 0.00128072 on 2 procs for 1 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 2096456.406 tau/day, 4852.908 timesteps/s
|
Performance: 337310.921 tau/day, 780.812 timesteps/s
|
||||||
94.1% CPU use with 2 MPI tasks x no OpenMP threads
|
60.2% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.00012947 | 0.00013524 | 0.00014101 | 0.0 | 65.63
|
Pair | 0.00047351 | 0.00049064 | 0.00050777 | 0.0 | 38.31
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 1.858e-05 | 2.4113e-05 | 2.9647e-05 | 0.0 | 11.70
|
Comm | 7.6767e-05 | 9.3655e-05 | 0.00011054 | 0.0 | 7.31
|
||||||
Output | 8.699e-06 | 2.4204e-05 | 3.9708e-05 | 0.0 | 11.75
|
Output | 5.4217e-05 | 0.00026297 | 0.00047172 | 0.0 | 20.53
|
||||||
Modify | 2.34e-06 | 2.3705e-06 | 2.401e-06 | 0.0 | 1.15
|
Modify | 1.1554e-05 | 1.2026e-05 | 1.2498e-05 | 0.0 | 0.94
|
||||||
Other | | 2.013e-05 | | | 9.77
|
Other | | 0.0004214 | | | 32.91
|
||||||
|
|
||||||
Nlocal: 128 ave 135 max 121 min
|
Nlocal: 128.000 ave 135 max 121 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Nghost: 1109 ave 1116 max 1102 min
|
Nghost: 1109.00 ave 1116 max 1102 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Neighs: 4852.5 ave 5106 max 4599 min
|
Neighs: 4852.50 ave 5106 max 4599 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
|
|
||||||
Total # of neighbors = 9705
|
Total # of neighbors = 9705
|
||||||
Ave neighs/atom = 37.9102
|
Ave neighs/atom = 37.910156
|
||||||
Force on 1 atom via extract_atom: -18.109
|
|
||||||
Force on 1 atom via extract_variable: -18.109
|
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
|
Force on 1 atom via extract_atom: -18.109
|
||||||
|
Force on 1 atom via extract_variable: -18.109
|
||||||
Force on 1 atom via extract_atom: 26.9581
|
Force on 1 atom via extract_atom: 26.9581
|
||||||
Force on 1 atom via extract_variable: 26.9581
|
Force on 1 atom via extract_variable: 26.9581
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
@ -130,30 +133,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
|
||||||
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
||||||
Loop time of 0.00119048 on 2 procs for 10 steps with 256 atoms
|
Loop time of 0.00784933 on 2 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 3628802.105 tau/day, 8400.005 timesteps/s
|
Performance: 550365.761 tau/day, 1273.995 timesteps/s
|
||||||
98.0% CPU use with 2 MPI tasks x no OpenMP threads
|
59.6% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.00085276 | 0.00089699 | 0.00094123 | 0.0 | 75.35
|
Pair | 0.0019235 | 0.0033403 | 0.0047572 | 2.5 | 42.56
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 0.00016896 | 0.00021444 | 0.00025992 | 0.0 | 18.01
|
Comm | 0.0016948 | 0.003118 | 0.0045411 | 2.5 | 39.72
|
||||||
Output | 9.413e-06 | 2.5939e-05 | 4.2465e-05 | 0.0 | 2.18
|
Output | 3.6445e-05 | 0.00064636 | 0.0012563 | 0.0 | 8.23
|
||||||
Modify | 1.8977e-05 | 2.0009e-05 | 2.1042e-05 | 0.0 | 1.68
|
Modify | 6.2842e-05 | 6.3209e-05 | 6.3577e-05 | 0.0 | 0.81
|
||||||
Other | | 3.31e-05 | | | 2.78
|
Other | | 0.0006814 | | | 8.68
|
||||||
|
|
||||||
Nlocal: 128 ave 135 max 121 min
|
Nlocal: 128.000 ave 135 max 121 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Nghost: 1109 ave 1116 max 1102 min
|
Nghost: 1109.00 ave 1116 max 1102 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Neighs: 4844 ave 5096 max 4592 min
|
Neighs: 4844.00 ave 5096 max 4592 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
|
|
||||||
Total # of neighbors = 9688
|
Total # of neighbors = 9688
|
||||||
Ave neighs/atom = 37.8438
|
Ave neighs/atom = 37.843750
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
@ -164,30 +167,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
|
||||||
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
||||||
Loop time of 0.00252603 on 2 procs for 20 steps with 256 atoms
|
Loop time of 0.00696051 on 2 procs for 20 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 3420382.192 tau/day, 7917.551 timesteps/s
|
Performance: 1241287.730 tau/day, 2873.351 timesteps/s
|
||||||
99.2% CPU use with 2 MPI tasks x no OpenMP threads
|
79.2% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0016245 | 0.0017014 | 0.0017784 | 0.2 | 67.36
|
Pair | 0.0028267 | 0.0036088 | 0.004391 | 1.3 | 51.85
|
||||||
Neigh | 0.00025359 | 0.0002563 | 0.00025901 | 0.0 | 10.15
|
Neigh | 0.00040272 | 0.00040989 | 0.00041707 | 0.0 | 5.89
|
||||||
Comm | 0.00036863 | 0.00045124 | 0.00053385 | 0.0 | 17.86
|
Comm | 0.00081061 | 0.0015825 | 0.0023544 | 1.9 | 22.74
|
||||||
Output | 9.839e-06 | 2.8031e-05 | 4.6223e-05 | 0.0 | 1.11
|
Output | 3.6006e-05 | 0.00062106 | 0.0012061 | 0.0 | 8.92
|
||||||
Modify | 3.7027e-05 | 3.9545e-05 | 4.2063e-05 | 0.0 | 1.57
|
Modify | 6.8937e-05 | 7.1149e-05 | 7.336e-05 | 0.0 | 1.02
|
||||||
Other | | 4.948e-05 | | | 1.96
|
Other | | 0.0006671 | | | 9.58
|
||||||
|
|
||||||
Nlocal: 128 ave 132 max 124 min
|
Nlocal: 128.000 ave 132 max 124 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Nghost: 1100 ave 1101 max 1099 min
|
Nghost: 1100.00 ave 1101 max 1099 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Neighs: 4850 ave 4953 max 4747 min
|
Neighs: 4850.00 ave 4953 max 4747 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
|
|
||||||
Total # of neighbors = 9700
|
Total # of neighbors = 9700
|
||||||
Ave neighs/atom = 37.8906
|
Ave neighs/atom = 37.890625
|
||||||
Neighbor list builds = 1
|
Neighbor list builds = 1
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
@ -198,30 +201,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
|
||||||
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
||||||
Loop time of 0.00115444 on 2 procs for 10 steps with 256 atoms
|
Loop time of 0.00155862 on 2 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 3742065.976 tau/day, 8662.190 timesteps/s
|
Performance: 2771678.197 tau/day, 6415.922 timesteps/s
|
||||||
96.5% CPU use with 2 MPI tasks x no OpenMP threads
|
95.0% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.00087346 | 0.00089311 | 0.00091275 | 0.0 | 77.36
|
Pair | 0.0012369 | 0.001266 | 0.001295 | 0.1 | 81.22
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 0.00016192 | 0.0001823 | 0.00020269 | 0.0 | 15.79
|
Comm | 0.00019462 | 0.00022315 | 0.00025169 | 0.0 | 14.32
|
||||||
Output | 9.49e-06 | 2.6234e-05 | 4.2978e-05 | 0.0 | 2.27
|
Output | 2.0217e-05 | 2.1945e-05 | 2.3673e-05 | 0.0 | 1.41
|
||||||
Modify | 1.9095e-05 | 1.9843e-05 | 2.0591e-05 | 0.0 | 1.72
|
Modify | 2.562e-05 | 2.5759e-05 | 2.5898e-05 | 0.0 | 1.65
|
||||||
Other | | 3.296e-05 | | | 2.85
|
Other | | 2.181e-05 | | | 1.40
|
||||||
|
|
||||||
Nlocal: 128 ave 132 max 124 min
|
Nlocal: 128.000 ave 132 max 124 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Nghost: 1100 ave 1101 max 1099 min
|
Nghost: 1100.00 ave 1101 max 1099 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Neighs: 4850 ave 4953 max 4747 min
|
Neighs: 4850.00 ave 4953 max 4747 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
|
|
||||||
Total # of neighbors = 9700
|
Total # of neighbors = 9700
|
||||||
Ave neighs/atom = 37.8906
|
Ave neighs/atom = 37.890625
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Setting up Verlet run ...
|
Setting up Verlet run ...
|
||||||
@ -232,30 +235,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
|
||||||
81 0.77743907 -5.7735004 0 -4.6118971 0.090822641
|
81 0.77743907 -5.7735004 0 -4.6118971 0.090822641
|
||||||
Loop time of 0.00244325 on 2 procs for 20 steps with 256 atoms
|
Loop time of 0.00351607 on 2 procs for 20 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 3536279.919 tau/day, 8185.833 timesteps/s
|
Performance: 2457288.612 tau/day, 5688.168 timesteps/s
|
||||||
99.0% CPU use with 2 MPI tasks x no OpenMP threads
|
97.9% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0016916 | 0.0017038 | 0.001716 | 0.0 | 69.73
|
Pair | 0.0023896 | 0.0024147 | 0.0024397 | 0.1 | 68.67
|
||||||
Neigh | 0.00025229 | 0.00025512 | 0.00025795 | 0.0 | 10.44
|
Neigh | 0.00037331 | 0.00040456 | 0.0004358 | 0.0 | 11.51
|
||||||
Comm | 0.00035772 | 0.00036918 | 0.00038064 | 0.0 | 15.11
|
Comm | 0.00050571 | 0.00051343 | 0.00052116 | 0.0 | 14.60
|
||||||
Output | 1.0858e-05 | 2.7875e-05 | 4.4891e-05 | 0.0 | 1.14
|
Output | 2.6424e-05 | 5.6547e-05 | 8.667e-05 | 0.0 | 1.61
|
||||||
Modify | 3.817e-05 | 3.9325e-05 | 4.048e-05 | 0.0 | 1.61
|
Modify | 5.0287e-05 | 5.1071e-05 | 5.1856e-05 | 0.0 | 1.45
|
||||||
Other | | 4.796e-05 | | | 1.96
|
Other | | 7.58e-05 | | | 2.16
|
||||||
|
|
||||||
Nlocal: 128 ave 128 max 128 min
|
Nlocal: 128.000 ave 128 max 128 min
|
||||||
Histogram: 2 0 0 0 0 0 0 0 0 0
|
Histogram: 2 0 0 0 0 0 0 0 0 0
|
||||||
Nghost: 1088.5 ave 1092 max 1085 min
|
Nghost: 1088.50 ave 1092 max 1085 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Neighs: 4850.5 ave 4851 max 4850 min
|
Neighs: 4850.50 ave 4851 max 4850 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
|
|
||||||
Total # of neighbors = 9701
|
Total # of neighbors = 9701
|
||||||
Ave neighs/atom = 37.8945
|
Ave neighs/atom = 37.894531
|
||||||
Neighbor list builds = 1
|
Neighbor list builds = 1
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Deleted 256 atoms, new total = 0
|
Deleted 256 atoms, new total = 0
|
||||||
@ -267,30 +270,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
|
|||||||
Step Temp E_pair E_mol TotEng Press
|
Step Temp E_pair E_mol TotEng Press
|
||||||
81 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
81 0.6239063 -5.5404291 0 -4.6082254 1.0394285
|
||||||
91 0.75393007 -5.7375259 0 -4.6110484 0.39357367
|
91 0.75393007 -5.7375259 0 -4.6110484 0.39357367
|
||||||
Loop time of 0.00118092 on 2 procs for 10 steps with 256 atoms
|
Loop time of 0.0109747 on 2 procs for 10 steps with 256 atoms
|
||||||
|
|
||||||
Performance: 3658158.625 tau/day, 8467.960 timesteps/s
|
Performance: 393631.731 tau/day, 911.185 timesteps/s
|
||||||
98.6% CPU use with 2 MPI tasks x no OpenMP threads
|
53.5% CPU use with 2 MPI tasks x 1 OpenMP threads
|
||||||
|
|
||||||
MPI task timing breakdown:
|
MPI task timing breakdown:
|
||||||
Section | min time | avg time | max time |%varavg| %total
|
Section | min time | avg time | max time |%varavg| %total
|
||||||
---------------------------------------------------------------
|
---------------------------------------------------------------
|
||||||
Pair | 0.0008476 | 0.00089265 | 0.00093771 | 0.0 | 75.59
|
Pair | 0.0012057 | 0.0012732 | 0.0013407 | 0.2 | 11.60
|
||||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||||
Comm | 0.00016335 | 0.00020946 | 0.00025557 | 0.0 | 17.74
|
Comm | 0.00018882 | 0.00025686 | 0.00032489 | 0.0 | 2.34
|
||||||
Output | 8.87e-06 | 2.5733e-05 | 4.2595e-05 | 0.0 | 2.18
|
Output | 2.1943e-05 | 0.0047067 | 0.0093915 | 6.8 | 42.89
|
||||||
Modify | 1.8755e-05 | 1.9814e-05 | 2.0872e-05 | 0.0 | 1.68
|
Modify | 2.4614e-05 | 2.5439e-05 | 2.6264e-05 | 0.0 | 0.23
|
||||||
Other | | 3.326e-05 | | | 2.82
|
Other | | 0.004712 | | | 42.94
|
||||||
|
|
||||||
Nlocal: 128 ave 135 max 121 min
|
Nlocal: 128.000 ave 135 max 121 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Nghost: 1109 ave 1116 max 1102 min
|
Nghost: 1109.00 ave 1116 max 1102 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
Neighs: 4852.5 ave 5106 max 4599 min
|
Neighs: 4852.50 ave 5106 max 4599 min
|
||||||
Histogram: 1 0 0 0 0 0 0 0 0 1
|
Histogram: 1 0 0 0 0 0 0 0 0 1
|
||||||
|
|
||||||
Total # of neighbors = 9705
|
Total # of neighbors = 9705
|
||||||
Ave neighs/atom = 37.9102
|
Ave neighs/atom = 37.910156
|
||||||
Neighbor list builds = 0
|
Neighbor list builds = 0
|
||||||
Dangerous builds not checked
|
Dangerous builds not checked
|
||||||
Total wall time: 0:00:00
|
Total wall time: 0:00:00
|
||||||
|
|||||||
@ -87,7 +87,7 @@ int main(int narg, char **arg)
|
|||||||
MPI_Abort(MPI_COMM_WORLD,1);
|
MPI_Abort(MPI_COMM_WORLD,1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (lammps == 1) plugin->open(0,NULL,comm_lammps,&lmp);
|
if (lammps == 1) lmp = plugin->open(0,NULL,comm_lammps,NULL);
|
||||||
|
|
||||||
while (1) {
|
while (1) {
|
||||||
if (me == 0) {
|
if (me == 0) {
|
||||||
@ -139,7 +139,7 @@ int main(int narg, char **arg)
|
|||||||
|
|
||||||
cmds[0] = (char *)"run 10";
|
cmds[0] = (char *)"run 10";
|
||||||
cmds[1] = (char *)"run 20";
|
cmds[1] = (char *)"run 20";
|
||||||
if (lammps == 1) plugin->commands_list(lmp,2,cmds);
|
if (lammps == 1) plugin->commands_list(lmp,2,(const char **)cmds);
|
||||||
|
|
||||||
/* delete all atoms
|
/* delete all atoms
|
||||||
create_atoms() to create new ones with old coords, vels
|
create_atoms() to create new ones with old coords, vels
|
||||||
@ -164,12 +164,13 @@ int main(int narg, char **arg)
|
|||||||
|
|
||||||
if (lammps == 1) {
|
if (lammps == 1) {
|
||||||
plugin->close(lmp);
|
plugin->close(lmp);
|
||||||
|
MPI_Barrier(comm_lammps);
|
||||||
|
MPI_Comm_free(&comm_lammps);
|
||||||
liblammpsplugin_release(plugin);
|
liblammpsplugin_release(plugin);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* close down MPI */
|
/* close down MPI */
|
||||||
|
|
||||||
if (lammps == 1) MPI_Comm_free(&comm_lammps);
|
|
||||||
MPI_Barrier(MPI_COMM_WORLD);
|
MPI_Barrier(MPI_COMM_WORLD);
|
||||||
MPI_Finalize();
|
MPI_Finalize();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -8,7 +8,7 @@ bond_style harmonic
|
|||||||
bond_coeff 1 100 1.122462 # K R0
|
bond_coeff 1 100 1.122462 # K R0
|
||||||
velocity all create 1.0 8008 loop geom
|
velocity all create 1.0 8008 loop geom
|
||||||
|
|
||||||
pair_style lj/cut/coul/long 1.122462 20
|
pair_style lj/cut/coul/long/soft 2 0.5 10.0 1.122462 20
|
||||||
pair_coeff * * 1.0 1.0 1.122462 # charges
|
pair_coeff * * 1.0 1.0 1.122462 # charges
|
||||||
kspace_style pppm 1.0e-3
|
kspace_style pppm 1.0e-3
|
||||||
pair_modify shift yes
|
pair_modify shift yes
|
||||||
|
|||||||
@ -1476,7 +1476,9 @@ int colvarmodule::write_output_files()
|
|||||||
bi != biases.end();
|
bi != biases.end();
|
||||||
bi++) {
|
bi++) {
|
||||||
// Only write output files if they have not already been written this time step
|
// Only write output files if they have not already been written this time step
|
||||||
if ((*bi)->output_freq == 0 || (cvm::step_absolute() % (*bi)->output_freq) != 0) {
|
if ((*bi)->output_freq == 0 ||
|
||||||
|
cvm::step_relative() == 0 ||
|
||||||
|
(cvm::step_absolute() % (*bi)->output_freq) != 0) {
|
||||||
error_code |= (*bi)->write_output_files();
|
error_code |= (*bi)->write_output_files();
|
||||||
}
|
}
|
||||||
error_code |= (*bi)->write_state_to_replicas();
|
error_code |= (*bi)->write_state_to_replicas();
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
#ifndef COLVARS_VERSION
|
#ifndef COLVARS_VERSION
|
||||||
#define COLVARS_VERSION "2021-08-06"
|
#define COLVARS_VERSION "2021-09-21"
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -462,7 +462,6 @@ int UCL_Device::set_platform(int pid) {
|
|||||||
_num_devices = 0;
|
_num_devices = 0;
|
||||||
for (int i=0; i<num_unpart; i++) {
|
for (int i=0; i<num_unpart; i++) {
|
||||||
cl_uint num_subdevices = 1;
|
cl_uint num_subdevices = 1;
|
||||||
cl_device_id *subdevice_list = device_list + i;
|
|
||||||
|
|
||||||
#ifdef CL_VERSION_1_2
|
#ifdef CL_VERSION_1_2
|
||||||
cl_device_affinity_domain adomain;
|
cl_device_affinity_domain adomain;
|
||||||
@ -479,19 +478,21 @@ int UCL_Device::set_platform(int pid) {
|
|||||||
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
|
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
|
||||||
&num_subdevices));
|
&num_subdevices));
|
||||||
if (num_subdevices > 1) {
|
if (num_subdevices > 1) {
|
||||||
subdevice_list = new cl_device_id[num_subdevices];
|
cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
|
||||||
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
|
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
|
||||||
subdevice_list, &num_subdevices));
|
subdevice_list, &num_subdevices));
|
||||||
|
for (int j=0; j<num_subdevices; j++) {
|
||||||
|
_cl_devices.push_back(device_list[i]);
|
||||||
|
add_properties(device_list[i]);
|
||||||
|
_num_devices++;
|
||||||
|
}
|
||||||
|
delete[] subdevice_list;
|
||||||
|
} else {
|
||||||
|
_cl_devices.push_back(device_list[i]);
|
||||||
|
add_properties(device_list[i]);
|
||||||
|
_num_devices++;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
for (int j=0; j<num_subdevices; j++) {
|
|
||||||
_num_devices++;
|
|
||||||
_cl_devices.push_back(subdevice_list[j]);
|
|
||||||
add_properties(subdevice_list[j]);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (num_subdevices > 1) delete[] subdevice_list;
|
|
||||||
} // for i
|
} // for i
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -555,16 +556,22 @@ void UCL_Device::add_properties(cl_device_id device_list) {
|
|||||||
sizeof(float_width),&float_width,nullptr));
|
sizeof(float_width),&float_width,nullptr));
|
||||||
op.preferred_vector_width32=float_width;
|
op.preferred_vector_width32=float_width;
|
||||||
|
|
||||||
// Determine if double precision is supported
|
|
||||||
cl_uint double_width;
|
cl_uint double_width;
|
||||||
CL_SAFE_CALL(clGetDeviceInfo(device_list,
|
CL_SAFE_CALL(clGetDeviceInfo(device_list,
|
||||||
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
|
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
|
||||||
sizeof(double_width),&double_width,nullptr));
|
sizeof(double_width),&double_width,nullptr));
|
||||||
op.preferred_vector_width64=double_width;
|
op.preferred_vector_width64=double_width;
|
||||||
if (double_width==0)
|
|
||||||
op.double_precision=false;
|
// Determine if double precision is supported: All bits in the mask must be set.
|
||||||
else
|
cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO|
|
||||||
|
CL_FP_ROUND_TO_INF|CL_FP_INF_NAN|CL_FP_DENORM);
|
||||||
|
cl_device_fp_config double_avail;
|
||||||
|
CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_DOUBLE_FP_CONFIG,
|
||||||
|
sizeof(double_avail),&double_avail,nullptr));
|
||||||
|
if ((double_avail & double_mask) == double_mask)
|
||||||
op.double_precision=true;
|
op.double_precision=true;
|
||||||
|
else
|
||||||
|
op.double_precision=false;
|
||||||
|
|
||||||
CL_SAFE_CALL(clGetDeviceInfo(device_list,
|
CL_SAFE_CALL(clGetDeviceInfo(device_list,
|
||||||
CL_DEVICE_PROFILING_TIMER_RESOLUTION,
|
CL_DEVICE_PROFILING_TIMER_RESOLUTION,
|
||||||
|
|||||||
@ -38,8 +38,10 @@ namespace ucl_opencl {
|
|||||||
/// Class for timing OpenCL events
|
/// Class for timing OpenCL events
|
||||||
class UCL_Timer {
|
class UCL_Timer {
|
||||||
public:
|
public:
|
||||||
inline UCL_Timer() : _total_time(0.0f), _initialized(false), has_measured_time(false) { }
|
inline UCL_Timer() : start_event(nullptr), stop_event(nullptr), _total_time(0.0f),
|
||||||
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false), has_measured_time(false)
|
_initialized(false), has_measured_time(false) { }
|
||||||
|
inline UCL_Timer(UCL_Device &dev) : start_event(nullptr), stop_event(nullptr), _total_time(0.0f),
|
||||||
|
_initialized(false), has_measured_time(false)
|
||||||
{ init(dev); }
|
{ init(dev); }
|
||||||
|
|
||||||
inline ~UCL_Timer() { clear(); }
|
inline ~UCL_Timer() { clear(); }
|
||||||
|
|||||||
@ -127,9 +127,8 @@ class Answer {
|
|||||||
/// Add forces and torques from the GPU into a LAMMPS pointer
|
/// Add forces and torques from the GPU into a LAMMPS pointer
|
||||||
void get_answers(double **f, double **tor);
|
void get_answers(double **f, double **tor);
|
||||||
|
|
||||||
inline double get_answers(double **f, double **tor, double *eatom,
|
inline double get_answers(double **f, double **tor, double *eatom, double **vatom,
|
||||||
double **vatom, double *virial, double &ecoul,
|
double *virial, double &ecoul, int &error_flag_in) {
|
||||||
int &error_flag_in) {
|
|
||||||
double ta=MPI_Wtime();
|
double ta=MPI_Wtime();
|
||||||
time_answer.sync_stop();
|
time_answer.sync_stop();
|
||||||
_time_cpu_idle+=MPI_Wtime()-ta;
|
_time_cpu_idle+=MPI_Wtime()-ta;
|
||||||
|
|||||||
@ -34,7 +34,7 @@ BornCoulLongT::BornCoulLong() : BaseCharge<numtyp,acctyp>(),
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
BornCoulLongT::~BornCoulLongT() {
|
BornCoulLongT::~BornCoulLong() {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -34,7 +34,7 @@ BornCoulWolfT::BornCoulWolf() : BaseCharge<numtyp,acctyp>(),
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
BornCoulWolfT::~BornCoulWolfT() {
|
BornCoulWolfT::~BornCoulWolf() {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -34,7 +34,7 @@ BuckCoulLongT::BuckCoulLong() : BaseCharge<numtyp,acctyp>(),
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
BuckCoulLongT::~BuckCoulLongT() {
|
BuckCoulLongT::~BuckCoulLong() {
|
||||||
clear();
|
clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -333,6 +333,12 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
|||||||
gpu_barrier();
|
gpu_barrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// check if double precision support is available
|
||||||
|
#if defined(_SINGLE_DOUBLE) || defined(_DOUBLE_DOUBLE)
|
||||||
|
if (!gpu->double_precision())
|
||||||
|
return -16;
|
||||||
|
#endif
|
||||||
|
|
||||||
// Setup auto bin size calculation for calls from atom::sort
|
// Setup auto bin size calculation for calls from atom::sort
|
||||||
// - This is repeated in neighbor init with additional info
|
// - This is repeated in neighbor init with additional info
|
||||||
if (_user_cell_size<0.0) {
|
if (_user_cell_size<0.0) {
|
||||||
@ -348,7 +354,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) {
|
int DeviceT::set_ocl_params(std::string s_config, const std::string &extra_args) {
|
||||||
#ifdef USE_OPENCL
|
#ifdef USE_OPENCL
|
||||||
|
|
||||||
#include "lal_pre_ocl_config.h"
|
#include "lal_pre_ocl_config.h"
|
||||||
@ -368,7 +374,7 @@ int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) {
|
|||||||
int token_count=0;
|
int token_count=0;
|
||||||
std::string params[18];
|
std::string params[18];
|
||||||
char ocl_config[2048];
|
char ocl_config[2048];
|
||||||
strcpy(ocl_config,s_config.c_str());
|
strncpy(ocl_config,s_config.c_str(),2047);
|
||||||
char *pch = strtok(ocl_config,",");
|
char *pch = strtok(ocl_config,",");
|
||||||
_ocl_config_name=pch;
|
_ocl_config_name=pch;
|
||||||
pch = strtok(nullptr,",");
|
pch = strtok(nullptr,",");
|
||||||
@ -546,14 +552,9 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
|
|||||||
return -3;
|
return -3;
|
||||||
|
|
||||||
if (_user_cell_size<0.0) {
|
if (_user_cell_size<0.0) {
|
||||||
#ifndef LAL_USE_OLD_NEIGHBOR
|
|
||||||
_neighbor_shared.setup_auto_cell_size(true,cutoff,nbor->simd_size());
|
|
||||||
#else
|
|
||||||
_neighbor_shared.setup_auto_cell_size(false,cutoff,nbor->simd_size());
|
_neighbor_shared.setup_auto_cell_size(false,cutoff,nbor->simd_size());
|
||||||
#endif
|
|
||||||
} else
|
} else
|
||||||
_neighbor_shared.setup_auto_cell_size(false,_user_cell_size,
|
_neighbor_shared.setup_auto_cell_size(false,_user_cell_size,nbor->simd_size());
|
||||||
nbor->simd_size());
|
|
||||||
nbor->set_cutoff(cutoff);
|
nbor->set_cutoff(cutoff);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
@ -992,10 +993,8 @@ int DeviceT::compile_kernels() {
|
|||||||
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(0) ||
|
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(0) ||
|
||||||
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(1) ||
|
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(1) ||
|
||||||
static_cast<size_t>(_block_cell_id) > gpu->group_size_dim(0) ||
|
static_cast<size_t>(_block_cell_id) > gpu->group_size_dim(0) ||
|
||||||
static_cast<size_t>(_max_shared_types*_max_shared_types*
|
static_cast<size_t>(_max_shared_types*_max_shared_types*sizeof(numtyp)*17 > gpu->slm_size()) ||
|
||||||
sizeof(numtyp)*17 > gpu->slm_size()) ||
|
static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) > gpu->slm_size()))
|
||||||
static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) >
|
|
||||||
gpu->slm_size()))
|
|
||||||
return -13;
|
return -13;
|
||||||
|
|
||||||
if (_block_pair % _simd_size != 0 || _block_bio_pair % _simd_size != 0 ||
|
if (_block_pair % _simd_size != 0 || _block_bio_pair % _simd_size != 0 ||
|
||||||
@ -1071,9 +1070,8 @@ void lmp_clear_device() {
|
|||||||
global_device.clear_device();
|
global_device.clear_device();
|
||||||
}
|
}
|
||||||
|
|
||||||
double lmp_gpu_forces(double **f, double **tor, double *eatom,
|
double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom,
|
||||||
double **vatom, double *virial, double &ecoul,
|
double *virial, double &ecoul, int &error_flag) {
|
||||||
int &error_flag) {
|
|
||||||
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -163,17 +163,15 @@ class Device {
|
|||||||
{ ans_queue.push(ans); }
|
{ ans_queue.push(ans); }
|
||||||
|
|
||||||
/// Add "answers" (force,energies,etc.) into LAMMPS structures
|
/// Add "answers" (force,energies,etc.) into LAMMPS structures
|
||||||
inline double fix_gpu(double **f, double **tor, double *eatom,
|
inline double fix_gpu(double **f, double **tor, double *eatom, double **vatom,
|
||||||
double **vatom, double *virial, double &ecoul,
|
double *virial, double &ecoul, int &error_flag) {
|
||||||
int &error_flag) {
|
|
||||||
error_flag=0;
|
error_flag=0;
|
||||||
atom.data_unavail();
|
atom.data_unavail();
|
||||||
if (ans_queue.empty()==false) {
|
if (ans_queue.empty()==false) {
|
||||||
stop_host_timer();
|
stop_host_timer();
|
||||||
double evdw=0.0;
|
double evdw=0.0;
|
||||||
while (ans_queue.empty()==false) {
|
while (ans_queue.empty()==false) {
|
||||||
evdw+=ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul,
|
evdw += ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul,error_flag);
|
||||||
error_flag);
|
|
||||||
ans_queue.pop();
|
ans_queue.pop();
|
||||||
}
|
}
|
||||||
return evdw;
|
return evdw;
|
||||||
@ -350,7 +348,7 @@ class Device {
|
|||||||
int _data_in_estimate, _data_out_estimate;
|
int _data_in_estimate, _data_out_estimate;
|
||||||
|
|
||||||
std::string _ocl_config_name, _ocl_config_string, _ocl_compile_string;
|
std::string _ocl_config_name, _ocl_config_string, _ocl_compile_string;
|
||||||
int set_ocl_params(std::string, std::string);
|
int set_ocl_params(std::string, const std::string &);
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|||||||
@ -39,7 +39,7 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
|
|||||||
const int block_cell_2d, const int block_cell_id,
|
const int block_cell_2d, const int block_cell_id,
|
||||||
const int block_nbor_build, const int threads_per_atom,
|
const int block_nbor_build, const int threads_per_atom,
|
||||||
const int simd_size, const bool time_device,
|
const int simd_size, const bool time_device,
|
||||||
const std::string compile_flags, const bool ilist_map) {
|
const std::string &compile_flags, const bool ilist_map) {
|
||||||
clear();
|
clear();
|
||||||
_ilist_map = ilist_map;
|
_ilist_map = ilist_map;
|
||||||
|
|
||||||
@ -743,7 +743,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
|
|||||||
mn = _max_nbors;
|
mn = _max_nbors;
|
||||||
const numtyp i_cell_size=static_cast<numtyp>(1.0/_cell_size);
|
const numtyp i_cell_size=static_cast<numtyp>(1.0/_cell_size);
|
||||||
const int neigh_block=_block_cell_id;
|
const int neigh_block=_block_cell_id;
|
||||||
const int GX=(int)ceil((float)nall/neigh_block);
|
const int GX=(int)ceil((double)nall/neigh_block);
|
||||||
const numtyp sublo0=static_cast<numtyp>(sublo[0]);
|
const numtyp sublo0=static_cast<numtyp>(sublo[0]);
|
||||||
const numtyp sublo1=static_cast<numtyp>(sublo[1]);
|
const numtyp sublo1=static_cast<numtyp>(sublo[1]);
|
||||||
const numtyp sublo2=static_cast<numtyp>(sublo[2]);
|
const numtyp sublo2=static_cast<numtyp>(sublo[2]);
|
||||||
|
|||||||
@ -71,7 +71,7 @@ class Neighbor {
|
|||||||
const int block_cell_2d, const int block_cell_id,
|
const int block_cell_2d, const int block_cell_id,
|
||||||
const int block_nbor_build, const int threads_per_atom,
|
const int block_nbor_build, const int threads_per_atom,
|
||||||
const int simd_size, const bool time_device,
|
const int simd_size, const bool time_device,
|
||||||
const std::string compile_flags, const bool ilist_map);
|
const std::string &compile_flags, const bool ilist_map);
|
||||||
|
|
||||||
/// Set the cutoff+skin
|
/// Set the cutoff+skin
|
||||||
inline void set_cutoff(const double cutoff) {
|
inline void set_cutoff(const double cutoff) {
|
||||||
|
|||||||
@ -89,7 +89,7 @@ double NeighborShared::best_cell_size(const double subx, const double suby,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void NeighborShared::compile_kernels(UCL_Device &dev, const int gpu_nbor,
|
void NeighborShared::compile_kernels(UCL_Device &dev, const int gpu_nbor,
|
||||||
const std::string flags) {
|
const std::string &flags) {
|
||||||
if (_compiled)
|
if (_compiled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
|||||||
@ -87,7 +87,7 @@ class NeighborShared {
|
|||||||
|
|
||||||
/// Compile kernels for neighbor lists
|
/// Compile kernels for neighbor lists
|
||||||
void compile_kernels(UCL_Device &dev, const int gpu_nbor,
|
void compile_kernels(UCL_Device &dev, const int gpu_nbor,
|
||||||
const std::string flags);
|
const std::string &flags);
|
||||||
|
|
||||||
// ----------------------------- Kernels
|
// ----------------------------- Kernels
|
||||||
UCL_Program *nbor_program, *build_program;
|
UCL_Program *nbor_program, *build_program;
|
||||||
|
|||||||
@ -69,14 +69,14 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
|
|||||||
|
|
||||||
flag=device->init(*ans,nlocal,nall);
|
flag=device->init(*ans,nlocal,nall);
|
||||||
if (flag!=0)
|
if (flag!=0)
|
||||||
return 0;
|
return nullptr;
|
||||||
if (sizeof(grdtyp)==sizeof(double) && device->double_precision()==false) {
|
if (sizeof(grdtyp)==sizeof(double) && device->double_precision()==false) {
|
||||||
flag=-15;
|
flag=-15;
|
||||||
return 0;
|
return nullptr;
|
||||||
}
|
}
|
||||||
if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) {
|
if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) {
|
||||||
flag=-4;
|
flag=-4;
|
||||||
return 0;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
ucl_device=device->gpu;
|
ucl_device=device->gpu;
|
||||||
@ -168,7 +168,7 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
|
|||||||
UCL_READ_WRITE)==UCL_SUCCESS);
|
UCL_READ_WRITE)==UCL_SUCCESS);
|
||||||
if (!success) {
|
if (!success) {
|
||||||
flag=-3;
|
flag=-3;
|
||||||
return 0;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
error_flag.device.zero();
|
error_flag.device.zero();
|
||||||
@ -342,13 +342,15 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
|
|||||||
vd_brick.update_device(true);
|
vd_brick.update_device(true);
|
||||||
time_in.stop();
|
time_in.stop();
|
||||||
|
|
||||||
|
int ainum=this->ans->inum();
|
||||||
|
if (ainum==0)
|
||||||
|
return;
|
||||||
|
|
||||||
time_interp.start();
|
time_interp.start();
|
||||||
// Compute the block size and grid size to keep all cores busy
|
// Compute the block size and grid size to keep all cores busy
|
||||||
int BX=this->block_size();
|
int BX=this->block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
|
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
|
||||||
|
|
||||||
int ainum=this->ans->inum();
|
|
||||||
|
|
||||||
k_interp.set_size(GX,BX);
|
k_interp.set_size(GX,BX);
|
||||||
k_interp.run(&atom->x, &atom->q, &ainum, &vd_brick, &d_rho_coeff,
|
k_interp.run(&atom->x, &atom->q, &ainum, &vd_brick, &d_rho_coeff,
|
||||||
&_npts_x, &_npts_yx, &_brick_x, &_brick_y, &_brick_z, &_delxinv,
|
&_npts_x, &_npts_yx, &_brick_x, &_brick_y, &_brick_z, &_delxinv,
|
||||||
|
|||||||
@ -2,8 +2,8 @@ SHELL = /bin/sh
|
|||||||
|
|
||||||
# ------ FILES ------
|
# ------ FILES ------
|
||||||
|
|
||||||
SRC_FILES = $(wildcard src/ML-PACE/*.cpp)
|
SRC_FILES = $(wildcard src/USER-PACE/*.cpp)
|
||||||
SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES))
|
SRC = $(filter-out src/USER-PACE/pair_pace.cpp, $(SRC_FILES))
|
||||||
|
|
||||||
# ------ DEFINITIONS ------
|
# ------ DEFINITIONS ------
|
||||||
|
|
||||||
@ -12,7 +12,7 @@ OBJ = $(SRC:.cpp=.o)
|
|||||||
|
|
||||||
|
|
||||||
# ------ SETTINGS ------
|
# ------ SETTINGS ------
|
||||||
CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE
|
CXXFLAGS = -O3 -fPIC -Isrc/USER-PACE
|
||||||
|
|
||||||
ARCHIVE = ar
|
ARCHIVE = ar
|
||||||
ARCHFLAG = -rc
|
ARCHFLAG = -rc
|
||||||
|
|||||||
@ -1,3 +1,3 @@
|
|||||||
pace_SYSINC =-I../../lib/pace/src/ML-PACE
|
pace_SYSINC =-I../../lib/pace/src/USER-PACE
|
||||||
pace_SYSLIB = -L../../lib/pace/ -lpace
|
pace_SYSLIB = -L../../lib/pace/ -lpace
|
||||||
pace_SYSPATH =
|
pace_SYSPATH =
|
||||||
|
|||||||
@ -92,8 +92,12 @@ class numpy_wrapper:
|
|||||||
if dim == LAMMPS_AUTODETECT:
|
if dim == LAMMPS_AUTODETECT:
|
||||||
if dtype in (LAMMPS_INT_2D, LAMMPS_DOUBLE_2D, LAMMPS_INT64_2D):
|
if dtype in (LAMMPS_INT_2D, LAMMPS_DOUBLE_2D, LAMMPS_INT64_2D):
|
||||||
# TODO add other fields
|
# TODO add other fields
|
||||||
if name in ("x", "v", "f", "angmom", "torque", "csforce", "vforce"):
|
if name in ("x", "v", "f", "x0", "omega", "angmom", "torque", "vforce", "vest"):
|
||||||
dim = 3
|
dim = 3
|
||||||
|
elif name == "smd_data_9":
|
||||||
|
dim = 9
|
||||||
|
elif name == "smd_stress":
|
||||||
|
dim = 6
|
||||||
else:
|
else:
|
||||||
dim = 2
|
dim = 2
|
||||||
else:
|
else:
|
||||||
@ -386,6 +390,9 @@ class numpy_wrapper:
|
|||||||
# -------------------------------------------------------------------------
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
def iarray(self, c_int_type, raw_ptr, nelem, dim=1):
|
def iarray(self, c_int_type, raw_ptr, nelem, dim=1):
|
||||||
|
if raw_ptr is None:
|
||||||
|
return None
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np_int_type = self._ctype_to_numpy_int(c_int_type)
|
np_int_type = self._ctype_to_numpy_int(c_int_type)
|
||||||
|
|
||||||
@ -405,7 +412,11 @@ class numpy_wrapper:
|
|||||||
# -------------------------------------------------------------------------
|
# -------------------------------------------------------------------------
|
||||||
|
|
||||||
def darray(self, raw_ptr, nelem, dim=1):
|
def darray(self, raw_ptr, nelem, dim=1):
|
||||||
|
if raw_ptr is None:
|
||||||
|
return None
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
if dim == 1:
|
if dim == 1:
|
||||||
ptr = cast(raw_ptr, POINTER(c_double * nelem))
|
ptr = cast(raw_ptr, POINTER(c_double * nelem))
|
||||||
else:
|
else:
|
||||||
|
|||||||
10
src/.gitignore
vendored
@ -860,8 +860,6 @@
|
|||||||
/fix_ti_rs.h
|
/fix_ti_rs.h
|
||||||
/fix_ti_spring.cpp
|
/fix_ti_spring.cpp
|
||||||
/fix_ti_spring.h
|
/fix_ti_spring.h
|
||||||
/fix_ttm.cpp
|
|
||||||
/fix_ttm.h
|
|
||||||
/fix_tune_kspace.cpp
|
/fix_tune_kspace.cpp
|
||||||
/fix_tune_kspace.h
|
/fix_tune_kspace.h
|
||||||
/fix_wall_body_polygon.cpp
|
/fix_wall_body_polygon.cpp
|
||||||
@ -921,6 +919,7 @@
|
|||||||
/improper_ring.h
|
/improper_ring.h
|
||||||
/improper_umbrella.cpp
|
/improper_umbrella.cpp
|
||||||
/improper_umbrella.h
|
/improper_umbrella.h
|
||||||
|
/interlayer_taper.h
|
||||||
/kissfft.h
|
/kissfft.h
|
||||||
/lj_sdk_common.h
|
/lj_sdk_common.h
|
||||||
/math_complex.h
|
/math_complex.h
|
||||||
@ -935,7 +934,6 @@
|
|||||||
/msm_cg.h
|
/msm_cg.h
|
||||||
/neb.cpp
|
/neb.cpp
|
||||||
/neb.h
|
/neb.h
|
||||||
|
|
||||||
/pair_adp.cpp
|
/pair_adp.cpp
|
||||||
/pair_adp.h
|
/pair_adp.h
|
||||||
/pair_agni.cpp
|
/pair_agni.cpp
|
||||||
@ -996,6 +994,8 @@
|
|||||||
/pair_cosine_squared.h
|
/pair_cosine_squared.h
|
||||||
/pair_coul_diel.cpp
|
/pair_coul_diel.cpp
|
||||||
/pair_coul_diel.h
|
/pair_coul_diel.h
|
||||||
|
/pair_coul_exclude.cpp
|
||||||
|
/pair_coul_exclude.h
|
||||||
/pair_coul_long.cpp
|
/pair_coul_long.cpp
|
||||||
/pair_coul_long.h
|
/pair_coul_long.h
|
||||||
/pair_coul_msm.cpp
|
/pair_coul_msm.cpp
|
||||||
@ -1433,6 +1433,10 @@
|
|||||||
/fix_srp.h
|
/fix_srp.h
|
||||||
/fix_tfmc.cpp
|
/fix_tfmc.cpp
|
||||||
/fix_tfmc.h
|
/fix_tfmc.h
|
||||||
|
/fix_ttm.cpp
|
||||||
|
/fix_ttm.h
|
||||||
|
/fix_ttm_grid.cpp
|
||||||
|
/fix_ttm_grid.h
|
||||||
/fix_ttm_mod.cpp
|
/fix_ttm_mod.cpp
|
||||||
/fix_ttm_mod.h
|
/fix_ttm_mod.h
|
||||||
/pair_born_coul_long_cs.cpp
|
/pair_born_coul_long_cs.cpp
|
||||||
|
|||||||
@ -301,8 +301,7 @@ void PairLineLJ::compute(int eflag, int vflag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
|
||||||
evdwl,0.0,fpair,delx,dely,delz);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -375,8 +375,7 @@ void PairTriLJ::compute(int eflag, int vflag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
|
||||||
evdwl,0.0,fpair,delx,dely,delz);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -39,7 +39,6 @@
|
|||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <map>
|
#include <map>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
|
|||||||
@ -37,7 +37,6 @@
|
|||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace FixConst;
|
using namespace FixConst;
|
||||||
@ -234,9 +233,7 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
|
|||||||
iarg += 2;
|
iarg += 2;
|
||||||
} else if (strcmp(arg[iarg],"mtk") == 0) {
|
} else if (strcmp(arg[iarg],"mtk") == 0) {
|
||||||
if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
|
if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
|
||||||
if (strcmp(arg[iarg+1],"yes") == 0) mtk_flag = 1;
|
mtk_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
|
||||||
else if (strcmp(arg[iarg+1],"no") == 0) mtk_flag = 0;
|
|
||||||
else error->all(FLERR,"Illegal fix bocs command");
|
|
||||||
iarg += 2;
|
iarg += 2;
|
||||||
} else if (strcmp(arg[iarg],"tloop") == 0) {
|
} else if (strcmp(arg[iarg],"tloop") == 0) {
|
||||||
if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
|
if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
|
||||||
|
|||||||
@ -337,8 +337,7 @@ void FixWallBodyPolygon::post_force(int /*vflag*/)
|
|||||||
|
|
||||||
num_contacts = 0;
|
num_contacts = 0;
|
||||||
facc[0] = facc[1] = facc[2] = 0;
|
facc[0] = facc[1] = facc[2] = 0;
|
||||||
vertex_against_wall(i, wall_pos, x, f, torque, side,
|
vertex_against_wall(i, wall_pos, x, f, torque, side, contact_list, num_contacts, facc);
|
||||||
contact_list, num_contacts, facc);
|
|
||||||
|
|
||||||
if (num_contacts >= 2) {
|
if (num_contacts >= 2) {
|
||||||
|
|
||||||
|
|||||||
@ -324,8 +324,7 @@ void PairBodyNparticle::compute(int eflag, int vflag)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
|
||||||
evdwl,0.0,fpair,delx,dely,delz);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -207,8 +207,7 @@ void PairBodyRoundedPolygon::compute(int eflag, int vflag)
|
|||||||
if (r > radi + radj + cut_inner) continue;
|
if (r > radi + radj + cut_inner) continue;
|
||||||
|
|
||||||
if (npi == 1 && npj == 1) {
|
if (npi == 1 && npj == 1) {
|
||||||
sphere_against_sphere(i, j, delx, dely, delz, rsq,
|
sphere_against_sphere(i, j, delx, dely, delz, rsq, k_nij, k_naij, x, v, f, evflag);
|
||||||
k_nij, k_naij, x, v, f, evflag);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -20,17 +20,11 @@
|
|||||||
#include "fix_brownian.h"
|
#include "fix_brownian.h"
|
||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "comm.h"
|
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
|
||||||
#include "math_extra.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "random_mars.h"
|
#include "random_mars.h"
|
||||||
#include "update.h"
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace FixConst;
|
using namespace FixConst;
|
||||||
|
|||||||
@ -21,17 +21,10 @@
|
|||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "atom_vec_ellipsoid.h"
|
#include "atom_vec_ellipsoid.h"
|
||||||
#include "comm.h"
|
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
|
||||||
#include "math_extra.h"
|
#include "math_extra.h"
|
||||||
#include "memory.h"
|
|
||||||
#include "random_mars.h"
|
#include "random_mars.h"
|
||||||
#include "update.h"
|
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace FixConst;
|
using namespace FixConst;
|
||||||
|
|||||||
@ -17,15 +17,12 @@
|
|||||||
Contributing author: Sam Cameron (University of Bristol)
|
Contributing author: Sam Cameron (University of Bristol)
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "fix_brownian.h"
|
#include "fix_brownian_base.h"
|
||||||
|
|
||||||
#include "atom.h"
|
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
#include "math_extra.h"
|
|
||||||
#include "memory.h"
|
|
||||||
#include "random_mars.h"
|
#include "random_mars.h"
|
||||||
#include "update.h"
|
#include "update.h"
|
||||||
|
|
||||||
|
|||||||
@ -20,17 +20,12 @@
|
|||||||
#include "fix_brownian_sphere.h"
|
#include "fix_brownian_sphere.h"
|
||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "comm.h"
|
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
|
||||||
#include "math_extra.h"
|
#include "math_extra.h"
|
||||||
#include "memory.h"
|
|
||||||
#include "random_mars.h"
|
#include "random_mars.h"
|
||||||
#include "update.h"
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace FixConst;
|
using namespace FixConst;
|
||||||
|
|||||||
@ -23,14 +23,11 @@
|
|||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "atom_vec_ellipsoid.h"
|
#include "atom_vec_ellipsoid.h"
|
||||||
#include "comm.h"
|
|
||||||
#include "domain.h"
|
#include "domain.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
|
||||||
#include "math_extra.h"
|
#include "math_extra.h"
|
||||||
#include "memory.h"
|
|
||||||
#include "update.h"
|
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
|
|||||||
@ -14,7 +14,6 @@
|
|||||||
#include "atom_vec_oxdna.h"
|
#include "atom_vec_oxdna.h"
|
||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "comm.h"
|
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
|
|
||||||
|
|||||||
@ -19,7 +19,6 @@
|
|||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "atom_vec_ellipsoid.h"
|
#include "atom_vec_ellipsoid.h"
|
||||||
#include "atom_vec_oxdna.h"
|
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
@ -30,7 +29,6 @@
|
|||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace MFOxdna;
|
using namespace MFOxdna;
|
||||||
|
|||||||
@ -17,8 +17,6 @@
|
|||||||
|
|
||||||
#include "pair_oxrna2_excv.h"
|
#include "pair_oxrna2_excv.h"
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
|
|||||||
@ -19,7 +19,6 @@
|
|||||||
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "atom_vec_ellipsoid.h"
|
#include "atom_vec_ellipsoid.h"
|
||||||
#include "atom_vec_oxdna.h"
|
|
||||||
#include "comm.h"
|
#include "comm.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
@ -31,7 +30,6 @@
|
|||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <utility>
|
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace MathConst;
|
using namespace MathConst;
|
||||||
|
|||||||
@ -18,13 +18,15 @@
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "pair_lj_sdk_coul_msm.h"
|
#include "pair_lj_sdk_coul_msm.h"
|
||||||
#include <cmath>
|
|
||||||
#include <cstring>
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
|
#include "error.h"
|
||||||
#include "force.h"
|
#include "force.h"
|
||||||
#include "kspace.h"
|
#include "kspace.h"
|
||||||
#include "neigh_list.h"
|
#include "neigh_list.h"
|
||||||
#include "error.h"
|
|
||||||
|
#include <cmath>
|
||||||
|
#include <cstring>
|
||||||
|
|
||||||
#include "lj_sdk_common.h"
|
#include "lj_sdk_common.h"
|
||||||
|
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "math_const.h"
|
#include "math_const.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "neighbor.h"
|
#include "neighbor.h"
|
||||||
#include "update.h"
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|||||||
@ -25,7 +25,6 @@
|
|||||||
#include "math_const.h"
|
#include "math_const.h"
|
||||||
#include "memory.h"
|
#include "memory.h"
|
||||||
#include "neighbor.h"
|
#include "neighbor.h"
|
||||||
#include "update.h"
|
|
||||||
|
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
|||||||
@ -397,8 +397,7 @@ void PairLJClass2::compute_outer(int eflag, int vflag)
|
|||||||
fpair = factor_lj*forcelj*r2inv;
|
fpair = factor_lj*forcelj*r2inv;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (evflag) ev_tally(i,j,nlocal,newton_pair,
|
if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
|
||||||
evdwl,0.0,fpair,delx,dely,delz);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,10 +18,11 @@
|
|||||||
|
|
||||||
#include "fix_wall_colloid.h"
|
#include "fix_wall_colloid.h"
|
||||||
|
|
||||||
#include <cmath>
|
|
||||||
#include "atom.h"
|
#include "atom.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
|
|
||||||
|
#include <cmath>
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
using namespace FixConst;
|
using namespace FixConst;
|
||||||
|
|
||||||
|
|||||||
@ -10,13 +10,6 @@
|
|||||||
|
|
||||||
|
|
||||||
#include "colvarproxy_lammps.h"
|
#include "colvarproxy_lammps.h"
|
||||||
#include <mpi.h>
|
|
||||||
#include <sys/stat.h>
|
|
||||||
#include <cerrno>
|
|
||||||
#include <cstring>
|
|
||||||
#include <iostream>
|
|
||||||
#include <memory>
|
|
||||||
#include <string>
|
|
||||||
|
|
||||||
#include "lammps.h"
|
#include "lammps.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
@ -26,6 +19,12 @@
|
|||||||
#include "colvarmodule.h"
|
#include "colvarmodule.h"
|
||||||
#include "colvarproxy.h"
|
#include "colvarproxy.h"
|
||||||
|
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <cerrno>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iostream>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
#define HASH_FAIL -1
|
#define HASH_FAIL -1
|
||||||
|
|
||||||
////////////////////////////////////////////////////////////////////////
|
////////////////////////////////////////////////////////////////////////
|
||||||
|
|||||||
@ -12,11 +12,6 @@
|
|||||||
|
|
||||||
#include "colvarproxy_lammps_version.h" // IWYU pragma: export
|
#include "colvarproxy_lammps_version.h" // IWYU pragma: export
|
||||||
|
|
||||||
#include <cstddef>
|
|
||||||
#include <mpi.h>
|
|
||||||
#include <string>
|
|
||||||
#include <vector>
|
|
||||||
|
|
||||||
#include "colvarmodule.h"
|
#include "colvarmodule.h"
|
||||||
#include "colvarproxy.h"
|
#include "colvarproxy.h"
|
||||||
#include "colvartypes.h"
|
#include "colvartypes.h"
|
||||||
|
|||||||