Merge branch 'master' into acks2_release

This commit is contained in:
Axel Kohlmeyer
2021-09-30 00:26:25 -04:00
568 changed files with 4363 additions and 4091 deletions

View File

@ -1,6 +1,6 @@
message(STATUS "Downloading and building OpenCL loader library") message(STATUS "Downloading and building OpenCL loader library")
set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2021.06.30.tar.gz" CACHE STRING "URL for OpenCL loader tarball") set(OPENCL_LOADER_URL "${LAMMPS_THIRDPARTY_URL}/opencl-loader-2021.09.18.tar.gz" CACHE STRING "URL for OpenCL loader tarball")
set(OPENCL_LOADER_MD5 "f9e55dd550cfbf77f46507adf7cb8fd2" CACHE STRING "MD5 checksum of OpenCL loader tarball") set(OPENCL_LOADER_MD5 "3b3882627964bd02e5c3b02065daac3c" CACHE STRING "MD5 checksum of OpenCL loader tarball")
mark_as_advanced(OPENCL_LOADER_URL) mark_as_advanced(OPENCL_LOADER_URL)
mark_as_advanced(OPENCL_LOADER_MD5) mark_as_advanced(OPENCL_LOADER_MD5)

View File

@ -71,6 +71,11 @@ if(GPU_API STREQUAL "CUDA")
# build arch/gencode commands for nvcc based on CUDA toolkit version and use choice # build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture # --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}") set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH}")
# apply the following to build "fat" CUDA binaries only for known CUDA toolkits
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
message(WARNING "Untested CUDA Toolkit version. Use at your own risk")
else()
# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0 # Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0")) if((CUDA_VERSION VERSION_GREATER_EQUAL "3.2") AND (CUDA_VERSION VERSION_LESS "9.0"))
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20] ") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_20,code=[sm_20,compute_20] ")
@ -107,8 +112,6 @@ if(GPU_API STREQUAL "CUDA")
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1") if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]") string(APPEND GPU_CUDA_GENCODE " -gencode arch=compute_86,code=[sm_86,compute_86]")
endif() endif()
if(CUDA_VERSION VERSION_GREATER_EQUAL "12.0")
message(WARNING "Unsupported CUDA version. Use at your own risk.")
endif() endif()
cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC} cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS ${CUDA_REQUEST_PIC}
@ -214,13 +217,20 @@ elseif(GPU_API STREQUAL "OPENCL")
elseif(GPU_API STREQUAL "HIP") elseif(GPU_API STREQUAL "HIP")
if(NOT DEFINED HIP_PATH) if(NOT DEFINED HIP_PATH)
if(NOT DEFINED ENV{HIP_PATH}) if(NOT DEFINED ENV{HIP_PATH})
set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation")
else() else()
set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation")
endif() endif()
endif() endif()
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) if(NOT DEFINED ROCM_PATH)
find_package(HIP REQUIRED) if(NOT DEFINED ENV{ROCM_PATH})
set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to ROCm installation")
else()
set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to ROCm installation")
endif()
endif()
list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} ${ROCM_PATH})
find_package(hip REQUIRED)
option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON) option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON)
if(NOT DEFINED HIP_PLATFORM) if(NOT DEFINED HIP_PLATFORM)
@ -322,10 +332,11 @@ elseif(GPU_API STREQUAL "HIP")
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp") set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp")
hip_add_library(gpu STATIC ${GPU_LIB_SOURCES}) add_library(gpu STATIC ${GPU_LIB_SOURCES})
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu)
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT) target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
target_compile_definitions(gpu PRIVATE -DUSE_HIP) target_compile_definitions(gpu PRIVATE -DUSE_HIP)
target_link_libraries(gpu PRIVATE hip::host)
if(HIP_USE_DEVICE_SORT) if(HIP_USE_DEVICE_SORT)
# add hipCUB # add hipCUB
@ -374,8 +385,9 @@ elseif(GPU_API STREQUAL "HIP")
endif() endif()
endif() endif()
hip_add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP) target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP)
target_link_libraries(hip_get_devices hip::host)
if(HIP_PLATFORM STREQUAL "nvcc") if(HIP_PLATFORM STREQUAL "nvcc")
target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__) target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__)

View File

@ -1,6 +1,8 @@
######################################################################## ########################################################################
# As of version 3.3.0 Kokkos requires C++14 # As of version 3.3.0 Kokkos requires C++14
if(CMAKE_CXX_STANDARD LESS 14)
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 14)
endif()
######################################################################## ########################################################################
# consistency checks and Kokkos options/settings required by LAMMPS # consistency checks and Kokkos options/settings required by LAMMPS
if(Kokkos_ENABLE_CUDA) if(Kokkos_ENABLE_CUDA)

View File

@ -19,6 +19,14 @@ if(DOWNLOAD_LATTE)
set(LATTE_MD5 "820e73a457ced178c08c71389a385de7" CACHE STRING "MD5 checksum of LATTE tarball") set(LATTE_MD5 "820e73a457ced178c08c71389a385de7" CACHE STRING "MD5 checksum of LATTE tarball")
mark_as_advanced(LATTE_URL) mark_as_advanced(LATTE_URL)
mark_as_advanced(LATTE_MD5) mark_as_advanced(LATTE_MD5)
# CMake cannot pass BLAS or LAPACK library variable to external project if they are a list
list(LENGTH BLAS_LIBRARIES} NUM_BLAS)
list(LENGTH LAPACK_LIBRARIES NUM_LAPACK)
if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1))
message(FATAL_ERROR "Cannot compile downloaded LATTE library due to a technical limitation")
endif()
include(ExternalProject) include(ExternalProject)
ExternalProject_Add(latte_build ExternalProject_Add(latte_build
URL ${LATTE_URL} URL ${LATTE_URL}

View File

@ -45,12 +45,12 @@ if(DOWNLOAD_N2P2)
# get path to MPI include directory when cross-compiling to windows # get path to MPI include directory when cross-compiling to windows
if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING) if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING)
get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES) get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1") set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER}) set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif() endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES) get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES)
set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1") set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}")
set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER}) set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER})
endif() endif()
endif() endif()
@ -69,6 +69,12 @@ if(DOWNLOAD_N2P2)
# echo final flag for debugging # echo final flag for debugging
message(STATUS "N2P2 BUILD OPTIONS: ${N2P2_BUILD_OPTIONS}") message(STATUS "N2P2 BUILD OPTIONS: ${N2P2_BUILD_OPTIONS}")
# must have "sed" command to compile n2p2 library (for now)
find_program(HAVE_SED sed)
if(NOT HAVE_SED)
message(FATAL_ERROR "Must have 'sed' program installed to compile 'n2p2' library for ML-HDNNP package")
endif()
# download compile n2p2 library. much patch MPI calls in LAMMPS interface to accommodate MPI-2 (e.g. for cross-compiling) # download compile n2p2 library. much patch MPI calls in LAMMPS interface to accommodate MPI-2 (e.g. for cross-compiling)
include(ExternalProject) include(ExternalProject)
ExternalProject_Add(n2p2_build ExternalProject_Add(n2p2_build

View File

@ -38,7 +38,7 @@ if(DOWNLOAD_QUIP)
set(temp "${temp}HAVE_LOCAL_E_MIX=0\nHAVE_QC=0\nHAVE_GAP=1\nHAVE_DESCRIPTORS_NONCOMMERCIAL=1\n") set(temp "${temp}HAVE_LOCAL_E_MIX=0\nHAVE_QC=0\nHAVE_GAP=1\nHAVE_DESCRIPTORS_NONCOMMERCIAL=1\n")
set(temp "${temp}HAVE_TURBOGAP=0\nHAVE_QR=1\nHAVE_THIRDPARTY=0\nHAVE_FX=0\nHAVE_SCME=0\nHAVE_MTP=0\n") set(temp "${temp}HAVE_TURBOGAP=0\nHAVE_QR=1\nHAVE_THIRDPARTY=0\nHAVE_FX=0\nHAVE_SCME=0\nHAVE_MTP=0\n")
set(temp "${temp}HAVE_MBD=0\nHAVE_TTM_NF=0\nHAVE_CH4=0\nHAVE_NETCDF4=0\nHAVE_MDCORE=0\nHAVE_ASAP=0\n") set(temp "${temp}HAVE_MBD=0\nHAVE_TTM_NF=0\nHAVE_CH4=0\nHAVE_NETCDF4=0\nHAVE_MDCORE=0\nHAVE_ASAP=0\n")
set(temp "${temp}HAVE_CGAL=0\nHAVE_METIS=0\nHAVE_LMTO_TBE=0\n") set(temp "${temp}HAVE_CGAL=0\nHAVE_METIS=0\nHAVE_LMTO_TBE=0\nHAVE_SCALAPACK=0\n")
file(WRITE ${CMAKE_BINARY_DIR}/quip.config "${temp}") file(WRITE ${CMAKE_BINARY_DIR}/quip.config "${temp}")
message(STATUS "QUIP download via git requested - we will build our own") message(STATUS "QUIP download via git requested - we will build our own")
@ -50,7 +50,7 @@ if(DOWNLOAD_QUIP)
GIT_TAG origin/public GIT_TAG origin/public
GIT_SHALLOW YES GIT_SHALLOW YES
GIT_PROGRESS YES GIT_PROGRESS YES
PATCH_COMMAND cp ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_BINARY_DIR}/quip.config <SOURCE_DIR>/arch/Makefile.lammps
CONFIGURE_COMMAND env QUIP_ARCH=lammps make config CONFIGURE_COMMAND env QUIP_ARCH=lammps make config
BUILD_COMMAND env QUIP_ARCH=lammps make libquip BUILD_COMMAND env QUIP_ARCH=lammps make libquip
INSTALL_COMMAND "" INSTALL_COMMAND ""

View File

@ -12,6 +12,13 @@ if(DOWNLOAD_MSCG)
mark_as_advanced(MSCG_URL) mark_as_advanced(MSCG_URL)
mark_as_advanced(MSCG_MD5) mark_as_advanced(MSCG_MD5)
# CMake cannot pass BLAS or LAPACK library variable to external project if they are a list
list(LENGTH BLAS_LIBRARIES} NUM_BLAS)
list(LENGTH LAPACK_LIBRARIES NUM_LAPACK)
if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1))
message(FATAL_ERROR "Cannot compile downloaded MSCG library due to a technical limitation")
endif()
include(ExternalProject) include(ExternalProject)
ExternalProject_Add(mscg_build ExternalProject_Add(mscg_build
URL ${MSCG_URL} URL ${MSCG_URL}

View File

@ -23,6 +23,11 @@ if(DOWNLOAD_SCAFACOS)
file(DOWNLOAD ${LAMMPS_THIRDPARTY_URL}/scafacos-1.0.1-fix.diff ${CMAKE_CURRENT_BINARY_DIR}/scafacos-1.0.1.fix.diff file(DOWNLOAD ${LAMMPS_THIRDPARTY_URL}/scafacos-1.0.1-fix.diff ${CMAKE_CURRENT_BINARY_DIR}/scafacos-1.0.1.fix.diff
EXPECTED_HASH MD5=4baa1333bb28fcce102d505e1992d032) EXPECTED_HASH MD5=4baa1333bb28fcce102d505e1992d032)
find_program(HAVE_PATCH patch)
if(NOT HAVE_PATCH)
message(FATAL_ERROR "The 'patch' program is required to build the ScaFaCoS library")
endif()
include(ExternalProject) include(ExternalProject)
ExternalProject_Add(scafacos_build ExternalProject_Add(scafacos_build
URL ${SCAFACOS_URL} URL ${SCAFACOS_URL}

View File

@ -26,6 +26,11 @@ if(DOWNLOAD_VORO)
set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS}) set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS})
endif() endif()
find_program(HAVE_PATCH patch)
if(NOT HAVE_PATCH)
message(FATAL_ERROR "The 'patch' program is required to build the voro++ library")
endif()
ExternalProject_Add(voro_build ExternalProject_Add(voro_build
URL ${VORO_URL} URL ${VORO_URL}
URL_MD5 ${VORO_MD5} URL_MD5 ${VORO_MD5}

View File

@ -1,7 +1,28 @@
[ [
{ include: [ "<bits/types/struct_rusage.h>", private, "<sys/resource.h>", public ] },
{ include: [ "<bits/exception.h>", public, "<exception>", public ] },
{ include: [ "@<Eigen/.*>", private, "<Eigen/Eigen>", public ] }, { include: [ "@<Eigen/.*>", private, "<Eigen/Eigen>", public ] },
{ include: [ "@<gtest/.*>", private, "\"gtest/gtest.h\"", public ] }, { include: [ "@<gtest/.*>", private, "\"gtest/gtest.h\"", public ] },
{ include: [ "@<gmock/.*>", private, "\"gmock/gmock.h\"", public ] }, { include: [ "@<gmock/.*>", private, "\"gmock/gmock.h\"", public ] },
{ include: [ "@<gmock/.*>", private, "\"gmock/gmock.h\"", public ] },
{ include: [ "@<(cell|c_loops|container).hh>", private, "<voro++.hh>", public ] },
{ include: [ "@\"atom_vec_.*.h\"", public, "\"style_atom.h\"", public ] },
{ include: [ "@\"body_.*.h\"", public, "\"style_body.h\"", public ] },
{ include: [ "@\"compute_.*.h\"", public, "\"style_compute.h\"", public ] },
{ include: [ "@\"fix_.*.h\"", public, "\"style_fix.h\"", public ] },
{ include: [ "@\"dump_.*.h\"", public, "\"style_dump.h\"", public ] },
{ include: [ "@\"min_.*.h\"", public, "\"style_minimize.h\"", public ] },
{ include: [ "@\"reader_.*.h\"", public, "\"style_reader.h\"", public ] },
{ include: [ "@\"region_.*.h\"", public, "\"style_region.h\"", public ] },
{ include: [ "@\"pair_.*.h\"", public, "\"style_pair.h\"", public ] },
{ include: [ "@\"angle_.*.h\"", public, "\"style_angle.h\"", public ] },
{ include: [ "@\"bond_.*.h\"", public, "\"style_bond.h\"", public ] },
{ include: [ "@\"dihedral_.*.h\"", public, "\"style_dihedral.h\"", public ] },
{ include: [ "@\"improper_.*.h\"", public, "\"style_improper.h\"", public ] },
{ include: [ "@\"kspace_.*.h\"", public, "\"style_kspace.h\"", public ] },
{ include: [ "@\"nbin_.*.h\"", public, "\"style_nbin.h\"", public ] },
{ include: [ "@\"npair_.*.h\"", public, "\"style_npair.h\"", public ] },
{ include: [ "@\"nstenci_.*.h\"", public, "\"style_nstencil.h\"", public ] },
{ include: [ "@\"ntopo_.*.h\"", public, "\"style_ntopo.h\"", public ] },
{ include: [ "<float.h>", public, "<cfloat>", public ] },
{ include: [ "<limits.h>", public, "<climits>", public ] },
{ include: [ "<bits/types/struct_tm.h>", private, "<ctime>", public ] },
] ]

View File

@ -0,0 +1,30 @@
# preset that will enable hip (clang/clang++) with support for MPI and OpenMP (on Linux boxes)
# prefer flang over gfortran, if available
find_program(CLANG_FORTRAN NAMES flang gfortran f95)
set(ENV{OMPI_FC} ${CLANG_FORTRAN})
set(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "" FORCE)
set(CMAKE_C_COMPILER "hipcc" CACHE STRING "" FORCE)
set(CMAKE_Fortran_COMPILER ${CLANG_FORTRAN} CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS_DEBUG "-Wall -Wextra -g -std=f2003" CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -DNDEBUG -std=f2003" CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
set(MPI_CXX "hipcc" CACHE STRING "" FORCE)
set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE)
unset(HAVE_OMP_H_INCLUDE CACHE)
set(OpenMP_C "hipcc" CACHE STRING "" FORCE)
set(OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" FORCE)
set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_CXX "hipcc" CACHE STRING "" FORCE)
set(OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" FORCE)
set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE)
set(OpenMP_omp_LIBRARY "libomp.so" CACHE PATH "" FORCE)

View File

@ -24,6 +24,7 @@ set(ALL_PACKAGES
DRUDE DRUDE
EFF EFF
EXTRA-COMPUTE EXTRA-COMPUTE
EXTRA-DUMP
EXTRA-FIX EXTRA-FIX
EXTRA-MOLECULE EXTRA-MOLECULE
EXTRA-PAIR EXTRA-PAIR

View File

@ -1,4 +1,4 @@
.TH LAMMPS "31 August 2021" "2021-08-31" .TH LAMMPS "29 September 2021" "2021-09-29"
.SH NAME .SH NAME
.B LAMMPS .B LAMMPS
\- Molecular Dynamics Simulator. \- Molecular Dynamics Simulator.

View File

@ -58,13 +58,16 @@ Report missing and unneeded '#include' statements (CMake only)
The conventions for how and when to use and order include statements in The conventions for how and when to use and order include statements in
LAMMPS are documented in :doc:`Modify_style`. To assist with following LAMMPS are documented in :doc:`Modify_style`. To assist with following
these conventions one can use the `Include What You Use tool <https://include-what-you-use.org/>`_. these conventions one can use the `Include What You Use tool <https://include-what-you-use.org/>`_.
This is still under development and for large and complex projects like LAMMPS This tool is still under development and for large and complex projects like LAMMPS
there are some false positives, so suggested changes need to be verified manually. there are some false positives, so suggested changes need to be verified manually.
It is recommended to use at least version 0.14, which has much fewer incorrect It is recommended to use at least version 0.16, which has much fewer incorrect
reports than earlier versions. reports than earlier versions. To install the IWYU toolkit, you need to have
the clang compiler **and** its development package installed. Download the IWYU
version that matches the version of the clang compiler, configure, build, and
install it.
The necessary steps to generate the report can be enabled via a The necessary steps to generate the report can be enabled via a CMake variable
CMake variable: during CMake configuration.
.. code-block:: bash .. code-block:: bash

View File

@ -71,7 +71,8 @@ LAMMPS can use them if they are available on your system.
-D FFTW3_INCLUDE_DIR=path # path to FFTW3 include files -D FFTW3_INCLUDE_DIR=path # path to FFTW3 include files
-D FFTW3_LIBRARY=path # path to FFTW3 libraries -D FFTW3_LIBRARY=path # path to FFTW3 libraries
-D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries -D FFTW3_OMP_LIBRARY=path # path to FFTW3 OpenMP wrapper libraries
-D FFT_FFTW_THREADS=on # enable using OpenMP threaded FFTW3 libraries
-D MKL_INCLUDE_DIR=path # ditto for Intel MKL library -D MKL_INCLUDE_DIR=path # ditto for Intel MKL library
-D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries -D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries
-D MKL_LIBRARY=path # path to MKL libraries -D MKL_LIBRARY=path # path to MKL libraries

View File

@ -11,6 +11,7 @@ of time and requests from the LAMMPS user community.
:maxdepth: 1 :maxdepth: 1
Developer_org Developer_org
Developer_parallel
Developer_flow Developer_flow
Developer_write Developer_write
Developer_notes Developer_notes

View File

@ -0,0 +1,120 @@
Communication
^^^^^^^^^^^^^
Following the partitioning scheme in use all per-atom data is
distributed across the MPI processes, which allows LAMMPS to handle very
large systems provided it uses a correspondingly large number of MPI
processes. Since The per-atom data (atom IDs, positions, velocities,
types, etc.) To be able to compute the short-range interactions MPI
processes need not only access to data of atoms they "own" but also
information about atoms from neighboring sub-domains, in LAMMPS referred
to as "ghost" atoms. These are copies of atoms storing required
per-atom data for up to the communication cutoff distance. The green
dashed-line boxes in the :ref:`domain-decomposition` figure illustrate
the extended ghost-atom sub-domain for one processor.
This approach is also used to implement periodic boundary
conditions: atoms that lie within the cutoff distance across a periodic
boundary are also stored as ghost atoms and taken from the periodic
replication of the sub-domain, which may be the same sub-domain, e.g. if
running in serial. As a consequence of this, force computation in
LAMMPS is not subject to minimum image conventions and thus cutoffs may
be larger than half the simulation domain.
.. _ghost-atom-comm:
.. figure:: img/ghost-comm.png
:align: center
ghost atom communication
This figure shows the ghost atom communication patterns between
sub-domains for "brick" (left) and "tiled" communication styles for
2d simulations. The numbers indicate MPI process ranks. Here the
sub-domains are drawn spatially separated for clarity. The
dashed-line box is the extended sub-domain of processor 0 which
includes its ghost atoms. The red- and blue-shaded boxes are the
regions of communicated ghost atoms.
Efficient communication patterns are needed to update the "ghost" atom
data, since that needs to be done at every MD time step or minimization
step. The diagrams of the `ghost-atom-comm` figure illustrate how ghost
atom communication is performed in two stages for a 2d simulation (three
in 3d) for both a regular and irregular partitioning of the simulation
box. For the regular case (left) atoms are exchanged first in the
*x*-direction, then in *y*, with four neighbors in the grid of processor
sub-domains.
In the *x* stage, processor ranks 1 and 2 send owned atoms in their
red-shaded regions to rank 0 (and vice versa). Then in the *y* stage,
ranks 3 and 4 send atoms in their blue-shaded regions to rank 0, which
includes ghost atoms they received in the *x* stage. Rank 0 thus
acquires all its ghost atoms; atoms in the solid blue corner regions
are communicated twice before rank 0 receives them.
For the irregular case (right) the two stages are similar, but a
processor can have more than one neighbor in each direction. In the
*x* stage, MPI ranks 1,2,3 send owned atoms in their red-shaded regions to
rank 0 (and vice versa). These include only atoms between the lower
and upper *y*-boundary of rank 0's sub-domain. In the *y* stage, ranks
4,5,6 send atoms in their blue-shaded regions to rank 0. This may
include ghost atoms they received in the *x* stage, but only if they
are needed by rank 0 to fill its extended ghost atom regions in the
+/-*y* directions (blue rectangles). Thus in this case, ranks 5 and
6 do not include ghost atoms they received from each other (in the *x*
stage) in the atoms they send to rank 0. The key point is that while
the pattern of communication is more complex in the irregular
partitioning case, it can still proceed in two stages (three in 3d)
via atom exchanges with only neighboring processors.
When attributes of owned atoms are sent to neighboring processors to
become attributes of their ghost atoms, LAMMPS calls this a "forward"
communication. On timesteps when atoms migrate to new owning processors
and neighbor lists are rebuilt, each processor creates a list of its
owned atoms which are ghost atoms in each of its neighbor processors.
These lists are used to pack per-atom coordinates (for example) into
message buffers in subsequent steps until the next reneighboring.
A "reverse" communication is when computed ghost atom attributes are
sent back to the processor who owns the atom. This is used (for
example) to sum partial forces on ghost atoms to the complete force on
owned atoms. The order of the two stages described in the
:ref:`ghost-atom-comm` figure is inverted and the same lists of atoms
are used to pack and unpack message buffers with per-atom forces. When
a received buffer is unpacked, the ghost forces are summed to owned atom
forces. As in forward communication, forces on atoms in the four blue
corners of the diagrams are sent, received, and summed twice (once at
each stage) before owning processors have the full force.
These two operations are used many places within LAMMPS aside from
exchange of coordinates and forces, for example by manybody potentials
to share intermediate per-atom values, or by rigid-body integrators to
enable each atom in a body to access body properties. Here are
additional details about how these communication operations are
performed in LAMMPS:
- When exchanging data with different processors, forward and reverse
communication is done using ``MPI_Send()`` and ``MPI_IRecv()`` calls.
If a processor is "exchanging" atoms with itself, only the pack and
unpack operations are performed, e.g. to create ghost atoms across
periodic boundaries when running on a single processor.
- For forward communication of owned atom coordinates, periodic box
lengths are added and subtracted when the receiving processor is
across a periodic boundary from the sender. There is then no need to
apply a minimum image convention when calculating distances between
atom pairs when building neighbor lists or computing forces.
- The cutoff distance for exchanging ghost atoms is typically equal to
the neighbor cutoff. But it can also chosen to be longer if needed,
e.g. half the diameter of a rigid body composed of multiple atoms or
over 3x the length of a stretched bond for dihedral interactions. It
can also exceed the periodic box size. For the regular communication
pattern (left), if the cutoff distance extends beyond a neighbor
processor's sub-domain, then multiple exchanges are performed in the
same direction. Each exchange is with the same neighbor processor,
but buffers are packed/unpacked using a different list of atoms. For
forward communication, in the first exchange a processor sends only
owned atoms. In subsequent exchanges, it sends ghost atoms received
in previous exchanges. For the irregular pattern (right) overlaps of
a processor's extended ghost-atom sub-domain with all other processors
in each dimension are detected.

View File

@ -0,0 +1,188 @@
Long-range interactions
^^^^^^^^^^^^^^^^^^^^^^^
For charged systems, LAMMPS can compute long-range Coulombic
interactions via the FFT-based particle-particle/particle-mesh (PPPM)
method implemented in :doc:`kspace style pppm and its variants
<kspace_style>`. For that Coulombic interactions are partitioned into
short- and long-range components. The short-ranged portion is computed
in real space as a loop over pairs of charges within a cutoff distance,
using neighbor lists. The long-range portion is computed in reciprocal
space using a kspace style. For the PPPM implementation the simulation
cell is overlaid with a regular FFT grid in 3d. It proceeds in several stages:
a) each atom's point charge is interpolated to nearby FFT grid points,
b) a forward 3d FFT is performed,
c) a convolution operation is performed in reciprocal space,
d) one or more inverse 3d FFTs are performed, and
e) electric field values from grid points near each atom are interpolated to compute
its forces.
For any of the spatial-decomposition partitioning schemes each processor
owns the brick-shaped portion of FFT grid points contained within its
sub-domain. The two interpolation operations use a stencil of grid
points surrounding each atom. To accommodate the stencil size, each
processor also stores a few layers of ghost grid points surrounding its
brick. Forward and reverse communication of grid point values is
performed similar to the corresponding :doc:`atom data communication
<Developer_par_comm>`. In this case, electric field values on owned
grid points are sent to neighboring processors to become ghost point
values. Likewise charge values on ghost points are sent and summed to
values on owned points.
For triclinic simulation boxes, the FFT grid planes are parallel to
the box faces, but the mapping of charge and electric field values
to/from grid points is done in reduced coordinates where the tilted
box is conceptually a unit cube, so that the stencil and FFT
operations are unchanged. However the FFT grid size required for a
given accuracy is larger for triclinic domains than it is for
orthogonal boxes.
.. _fft-parallel:
.. figure:: img/fft-decomp-parallel.png
:align: center
parallel FFT in PPPM
Stages of a parallel FFT for a simulation domain overlaid
with an 8x8x8 3d FFT grid, partitioned across 64 processors.
Within each of the 4 diagrams, grid cells of the same color are
owned by a single processor; for simplicity only cells owned by 4
or 8 of the 64 processors are colored. The two images on the left
illustrate brick-to-pencil communication. The two images on the
right illustrate pencil-to-pencil communication, which in this
case transposes the *y* and *z* dimensions of the grid.
Parallel 3d FFTs require substantial communication relative to their
computational cost. A 3d FFT is implemented by a series of 1d FFTs
along the *x-*, *y-*, and *z-*\ direction of the FFT grid. Thus the FFT
grid cannot be decomposed like atoms into 3 dimensions for parallel
processing of the FFTs but only in 1 (as planes) or 2 (as pencils)
dimensions and in between the steps the grid needs to be transposed to
have the FFT grid portion "owned" by each MPI process complete in the
direction of the 1d FFTs it has to perform. LAMMPS uses the
pencil-decomposition algorithm as shown in the :ref:`fft-parallel` figure.
Initially (far left), each processor owns a brick of same-color grid
cells (actually grid points) contained within in its sub-domain. A
brick-to-pencil communication operation converts this layout to 1d
pencils in the *x*-dimension (center left). Again, cells of the same
color are owned by the same processor. Each processor can then compute
a 1d FFT on each pencil of data it wholly owns using a call to the
configured FFT library. A pencil-to-pencil communication then converts
this layout to pencils in the *y* dimension (center right) which
effectively transposes the *x* and *y* dimensions of the grid, followed
by 1d FFTs in *y*. A final transpose of pencils from *y* to *z* (far
right) followed by 1d FFTs in *z* completes the forward FFT. The data
is left in a *z*-pencil layout for the convolution operation. One or
more inverse FFTs then perform the sequence of 1d FFTs and communication
steps in reverse order; the final layout of resulting grid values is the
same as the initial brick layout.
Each communication operation within the FFT (brick-to-pencil or
pencil-to-pencil or pencil-to-brick) converts one tiling of the 3d grid
to another, where a tiling in this context means an assignment of a
small brick-shaped subset of grid points to each processor, the union of
which comprise the entire grid. The parallel `fftMPI library
<https://lammps.github.io/fftmpi/>`_ written for LAMMPS allows arbitrary
definitions of the tiling so that an irregular partitioning of the
simulation domain can use it directly. Transforming data from one
tiling to another is implemented in `fftMPI` using point-to-point
communication, where each processor sends data to a few other
processors, since each tile in the initial tiling overlaps with a
handful of tiles in the final tiling.
The transformations could also be done using collective communication
across all $P$ processors with a single call to ``MPI_Alltoall()``, but
this is typically much slower. However, for the specialized brick and
pencil tiling illustrated in :ref:`fft-parallel` figure, collective
communication across the entire MPI communicator is not required. In
the example an :math:`8^3` grid with 512 grid cells is partitioned
across 64 processors; each processor owns a 2x2x2 3d brick of grid
cells. The initial brick-to-pencil communication (upper left to upper
right) only requires collective communication within subgroups of 4
processors, as illustrated by the 4 colors. More generally, a
brick-to-pencil communication can be performed by partitioning *P*
processors into :math:`P^{\frac{2}{3}}` subgroups of
:math:`P^{\frac{1}{3}}` processors each. Each subgroup performs
collective communication only within its subgroup. Similarly,
pencil-to-pencil communication can be performed by partitioning *P*
processors into :math:`P^{\frac{1}{2}}` subgroups of
:math:`P^{\frac{1}{2}}` processors each. This is illustrated in the
figure for the :math:`y \Rightarrow z` communication (center). An
eight-processor subgroup owns the front *yz* plane of data and performs
collective communication within the subgroup to transpose from a
*y*-pencil to *z*-pencil layout.
LAMMPS invokes point-to-point communication by default, but also
provides the option of partitioned collective communication when using a
:doc:`kspace_modify collective yes <kspace_modify>` command to switch to
that mode. In the latter case, the code detects the size of the
disjoint subgroups and partitions the single *P*-size communicator into
multiple smaller communicators, each of which invokes collective
communication. Testing on a large IBM Blue Gene/Q machine at Argonne
National Labs showed a significant improvement in FFT performance for
large processor counts; partitioned collective communication was faster
than point-to-point communication or global collective communication
involving all *P* processors.
Here are some additional details about FFTs for long-range and related
grid/particle operations that LAMMPS supports:
- The fftMPI library allows each grid dimension to be a multiple of
small prime factors (2,3,5), and allows any number of processors to
perform the FFT. The resulting brick and pencil decompositions are
thus not always as well-aligned but the size of subgroups of
processors for the two modes of communication (brick/pencil and
pencil/pencil) still scale as :math:`O(P^{\frac{1}{3}})` and
:math:`O(P^{\frac{1}{2}})`.
- For efficiency in performing 1d FFTs, the grid transpose
operations illustrated in Figure \ref{fig:fft} also involve
reordering the 3d data so that a different dimension is contiguous
in memory. This reordering can be done during the packing or
unpacking of buffers for MPI communication.
- For large systems and particularly a large number of MPI processes,
the dominant cost for parallel FFTs is often the communication, not
the computation of 1d FFTs, even though the latter scales as :math:`N
\log(N)` in the number of grid points *N* per grid direction. This is
due to the fact that only a 2d decomposition into pencils is possible
while atom data (and their corresponding short-range force and energy
computations) can be decomposed efficiently in 3d.
This can be addressed by reducing the number of MPI processes involved
in the MPI communication by using :doc:`hybrid MPI + OpenMP
parallelization <Speed_omp>`. This will use OpenMP parallelization
inside the MPI domains and while that may have a lower parallel
efficiency, it reduces the communication overhead.
As an alternative it is also possible to start a :ref:`multi-partition
<partition>` calculation and then use the :doc:`verlet/split
integrator <run_style>` to perform the PPPM computation on a
dedicated, separate partition of MPI processes. This uses an integer
"1:*p*" mapping of *p* sub-domains of the atom decomposition to one
sub-domain of the FFT grid decomposition and where pairwise non-bonded
and bonded forces and energies are computed on the larger partition
and the PPPM kspace computation concurrently on the smaller partition.
- LAMMPS also implements PPPM-based solvers for other long-range
interactions, dipole and dispersion (Lennard-Jones), which can be used
in conjunction with long-range Coulombics for point charges.
- LAMMPS implements a ``GridComm`` class which overlays the simulation
domain with a regular grid, partitions it across processors in a
manner consistent with processor sub-domains, and provides methods for
forward and reverse communication of owned and ghost grid point
values. It is used for PPPM as an FFT grid (as outlined above) and
also for the MSM algorithm which uses a cascade of grid sizes from
fine to coarse to compute long-range Coulombic forces. The GridComm
class is also useful for models where continuum fields interact with
particles. For example, the two-temperature model (TTM) defines heat
transfer between atoms (particles) and electrons (continuum gas) where
spatial variations in the electron temperature are computed by finite
differences of a discretized heat equation on a regular grid. The
:doc:`fix ttm/grid <fix_ttm>` command uses the ``GridComm`` class
internally to perform its grid operations on a distributed grid
instead of the original :doc:`fix ttm <fix_ttm>` which uses a
replicated grid.

View File

@ -0,0 +1,159 @@
Neighbor lists
^^^^^^^^^^^^^^
To compute forces efficiently, each processor creates a Verlet-style
neighbor list which enumerates all pairs of atoms *i,j* (*i* = owned,
*j* = owned or ghost) with separation less than the applicable
neighbor list cutoff distance. In LAMMPS the neighbor lists are stored
in a multiple-page data structure; each page is a contiguous chunk of
memory which stores vectors of neighbor atoms *j* for many *i* atoms.
This allows pages to be incrementally allocated or deallocated in blocks
as needed. Neighbor lists typically consume the most memory of any data
structure in LAMMPS. The neighbor list is rebuilt (from scratch) once
every few timesteps, then used repeatedly each step for force or other
computations. The neighbor cutoff distance is :math:`R_n = R_f +
\Delta_s`, where :math:`R_f` is the (largest) force cutoff defined by
the interatomic potential for computing short-range pairwise or manybody
forces and :math:`\Delta_s` is a "skin" distance that allows the list to
be used for multiple steps assuming that atoms do not move very far
between consecutive time steps. Typically the code triggers
reneighboring when any atom has moved half the skin distance since the
last reneighboring; this and other options of the neighbor list rebuild
can be adjusted with the :doc:`neigh_modify <neigh_modify>` command.
On steps when reneighboring is performed, atoms which have moved outside
their owning processor's sub-domain are first migrated to new processors
via communication. Periodic boundary conditions are also (only)
enforced on these steps to ensure each atom is re-assigned to the
correct processor. After migration, the atoms owned by each processor
are stored in a contiguous vector. Periodically each processor
spatially sorts owned atoms within its vector to reorder it for improved
cache efficiency in force computations and neighbor list building. For
that atoms are spatially binned and then reordered so that atoms in the
same bin are adjacent in the vector. Atom sorting can be disabled or
its settings modified with the :doc:`atom_modify <atom_modify>` command.
.. _neighbor-stencil:
.. figure:: img/neigh-stencil.png
:align: center
neighbor list stencils
A 2d simulation sub-domain (thick black line) and the corresponding
ghost atom cutoff region (dashed blue line) for both orthogonal
(left) and triclinic (right) domains. A regular grid of neighbor
bins (thin lines) overlays the entire simulation domain and need not
align with sub-domain boundaries; only the portion overlapping the
augmented sub-domain is shown. In the triclinic case it overlaps the
bounding box of the tilted rectangle. The blue- and red-shaded bins
represent a stencil of bins searched to find neighbors of a particular
atom (black dot).
To build a local neighbor list in linear time, the simulation domain is
overlaid (conceptually) with a regular 3d (or 2d) grid of neighbor bins,
as shown in the :ref:`neighbor-stencil` figure for 2d models and a
single MPI processor's sub-domain. Each processor stores a set of
neighbor bins which overlap its sub-domain extended by the neighbor
cutoff distance :math:`R_n`. As illustrated, the bins need not align
with processor boundaries; an integer number in each dimension is fit to
the size of the entire simulation box.
Most often LAMMPS builds what it calls a "half" neighbor list where
each *i,j* neighbor pair is stored only once, with either atom *i* or
*j* as the central atom. The build can be done efficiently by using a
pre-computed "stencil" of bins around a central origin bin which
contains the atom whose neighbors are being searched for. A stencil
is simply a list of integer offsets in *x,y,z* of nearby bins
surrounding the origin bin which are close enough to contain any
neighbor atom *j* within a distance :math:`R_n` from any atom *i* in the
origin bin. Note that for a half neighbor list, the stencil can be
asymmetric since each atom only need store half its nearby neighbors.
These stencils are illustrated in the figure for a half list and a bin
size of :math:`\frac{1}{2} R_n`. There are 13 red+blue stencil bins in
2d (for the orthogonal case, 15 for triclinic). In 3d there would be
63, 13 in the plane of bins that contain the origin bin and 25 in each
of the two planes above it in the *z* direction (75 for triclinic). The
reason the triclinic stencil has extra bins is because the bins tile the
bounding box of the entire triclinic domain and thus are not periodic
with respect to the simulation box itself. The stencil and logic for
determining which *i,j* pairs to include in the neighbor list are
altered slightly to account for this.
To build a neighbor list, a processor first loops over its "owned" plus
"ghost" atoms and assigns each to a neighbor bin. This uses an integer
vector to create a linked list of atom indices within each bin. It then
performs a triply-nested loop over its owned atoms *i*, the stencil of
bins surrounding atom *i*'s bin, and the *j* atoms in each stencil bin
(including ghost atoms). If the distance :math:`r_{ij} < R_n`, then
atom *j* is added to the vector of atom *i*'s neighbors.
Here are additional details about neighbor list build options LAMMPS
supports:
- The choice of bin size is an option; a size half of :math:`R_n` has
been found to be optimal for many typical cases. Smaller bins incur
additional overhead to loop over; larger bins require more distance
calculations. Note that for smaller bin sizes, the 2d stencil in the
figure would be more semi-circular in shape (hemispherical in 3d),
with bins near the corners of the square eliminated due to their
distance from the origin bin.
- Depending on the interatomic potential(s) and other commands used in
an input script, multiple neighbor lists and stencils with different
attributes may be needed. This includes lists with different cutoff
distances, e.g. for force computation versus occasional diagnostic
computations such as a radial distribution function, or for the
r-RESPA time integrator which can partition pairwise forces by
distance into subsets computed at different time intervals. It
includes "full" lists (as opposed to half lists) where each *i,j* pair
appears twice, stored once with *i* and *j*, and which use a larger
symmetric stencil. It also includes lists with partial enumeration of
ghost atom neighbors. The full and ghost-atom lists are used by
various manybody interatomic potentials. Lists may also use different
criteria for inclusion of a pair interaction. Typically this simply
depends only on the distance between two atoms and the cutoff
distance. But for finite-size coarse-grained particles with
individual diameters (e.g. polydisperse granular particles), it can
also depend on the diameters of the two particles.
- When using :doc:`pair style hybrid <pair_hybrid>` multiple sub-lists
of the master neighbor list for the full system need to be generated,
one for each sub-style, which contains only the *i,j* pairs needed to
compute interactions between subsets of atoms for the corresponding
potential. This means not all *i* or *j* atoms owned by a processor
are included in a particular sub-list.
- Some models use different cutoff lengths for pairwise interactions
between different kinds of particles which are stored in a single
neighbor list. One example is a solvated colloidal system with large
colloidal particles where colloid/colloid, colloid/solvent, and
solvent/solvent interaction cutoffs can be dramatically different.
Another is a model of polydisperse finite-size granular particles;
pairs of particles interact only when they are in contact with each
other. Mixtures with particle size ratios as high as 10-100x may be
used to model realistic systems. Efficient neighbor list building
algorithms for these kinds of systems are available in LAMMPS. They
include a method which uses different stencils for different cutoff
lengths and trims the stencil to only include bins that straddle the
cutoff sphere surface. More recently a method which uses both
multiple stencils and multiple bin sizes was developed; it builds
neighbor lists efficiently for systems with particles of any size
ratio, though other considerations (timestep size, force computations)
may limit the ability to model systems with huge polydispersity.
- For small and sparse systems and as a fallback method, LAMMPS also
supports neighbor list construction without binning by using a full
:math:`O(N^2)` loop over all *i,j* atom pairs in a sub-domain when
using the :doc:`neighbor nsq <neighbor>` command.
- Dependent on the "pair" setting of the :doc:`newton <newton>` command,
the "half" neighbor lists may contain **all** pairs of atoms where
atom *j* is a ghost atom (i.e. when the newton pair setting is *off*)
For the newton pair *on* setting the atom *j* is only added to the
list if its *z* coordinate is larger, or if equal the *y* coordinate
is larger, and that is equal, too, the *x* coordinate is larger. For
homogeneously dense systems that will result in picking neighbors from
a same size sector in always the same direction relative to the
"owned" atom and thus it should lead to similar length neighbor lists
and thus reduce the chance of a load imbalance.

View File

@ -0,0 +1,114 @@
OpenMP Parallelism
^^^^^^^^^^^^^^^^^^
The styles in the INTEL, KOKKOS, and OPENMP package offer to use OpenMP
thread parallelism to predominantly distribute loops over local data
and thus follow an orthogonal parallelization strategy to the
decomposition into spatial domains used by the :doc:`MPI partitioning
<Developer_par_part>`. For clarity, this section discusses only the
implementation in the OPENMP package as it is the simplest. The INTEL
and KOKKOS package offer additional options and are more complex since
they support more features and different hardware like co-processors
or GPUs.
One of the key decisions when implementing the OPENMP package was to
keep the changes to the source code small, so that it would be easier to
maintain the code and keep it in sync with the non-threaded standard
implementation. this is achieved by a) making the OPENMP version a
derived class from the regular version (e.g. ``PairLJCutOMP`` from
``PairLJCut``) and overriding only methods that are multi-threaded or
need to be modified to support multi-threading (similar to what was done
in the OPT package), b) keeping the structure in the modified code very
similar so that side-by-side comparisons are still useful, and c)
offloading additional functionality and multi-thread support functions
into three separate classes ``ThrOMP``, ``ThrData``, and ``FixOMP``.
``ThrOMP`` provides additional, multi-thread aware functionality not
available in the corresponding base class (e.g. ``Pair`` for
``PairLJCutOMP``) like multi-thread aware variants of the "tally"
functions. Those functions are made available through multiple
inheritance so those new functions have to have unique names to avoid
ambiguities; typically ``_thr`` is appended to the name of the function.
``ThrData`` is a classes that manages per-thread data structures.
It is used instead of extending the corresponding storage to per-thread
arrays to avoid slowdowns due to "false sharing" when multiple threads
update adjacent elements in an array and thus force the CPU cache lines
to be reset and re-fetched. ``FixOMP`` finally manages the "multi-thread
state" like settings and access to per-thread storage, it is activated
by the :doc:`package omp <package>` command.
Avoiding data races
"""""""""""""""""""
A key problem when implementing thread parallelism in an MD code is
to avoid data races when updating accumulated properties like forces,
energies, and stresses. When interactions are computed, they always
involve multiple atoms and thus there are race conditions when multiple
threads want to update per-atom data of the same atoms. Five possible
strategies have been considered to avoid this:
1) restructure the code so that there is no overlapping access possible
when computing in parallel, e.g. by breaking lists into multiple
parts and synchronizing threads in between.
2) have each thread be "responsible" for a specific group of atoms and
compute these interactions multiple times, once on each thread that
is responsible for a given atom and then have each thread only update
the properties of this atom.
3) use mutexes around functions and regions of code where the data race
could happen
4) use atomic operations when updating per-atom properties
5) use replicated per-thread data structures to accumulate data without
conflicts and then use a reduction to combine those results into the
data structures used by the regular style.
Option 5 was chosen for the OPENMP package because it would retain the
performance for the case of 1 thread and the code would be more
maintainable. Option 1 would require extensive code changes,
particularly to the neighbor list code; options 2 would have incurred a
2x or more performance penalty for the serial case; option 3 causes
significant overhead and would enforce serialization of operations in
inner loops and thus defeat the purpose of multi-threading; option 4
slows down the serial case although not quite as bad as option 2. The
downside of option 5 is that the overhead of the reduction operations
grows with the number of threads used, so there would be a crossover
point where options 2 or 4 would result in faster executing. That is
why option 2 for example is used in the GPU package because a GPU is a
processor with a massive number of threads. However, since the MPI
parallelization is generally more effective for typical MD systems, the
expectation is that thread parallelism is only used for a smaller number
of threads (2-8). At the time of its implementation, that number was
equivalent to the number of CPU cores per CPU socket on high-end
supercomputers.
Thus arrays like the force array are dimensioned to the number of atoms
times the number of threads when enabling OpenMP support and inside the
compute functions a pointer to a different chunk is obtained by each thread.
Similarly, accumulators like potential energy or virial are kept in
per-thread instances of the ``ThrData`` class and then only reduced and
stored in their global counterparts at the end of the force computation.
Loop scheduling
"""""""""""""""
Multi-thread parallelization is applied by distributing (outer) loops
statically across threads. Typically this would be the loop over local
atoms *i* when processing *i,j* pairs of atoms from a neighbor list.
The design of the neighbor list code results in atoms having a similar
number of neighbors for homogeneous systems and thus load imbalances
across threads are not common and typically happen for systems where
also the MPI parallelization would be unbalanced, which would typically
have a more pronounced impact on the performance. This same loop
scheduling scheme can also be applied to the reduction operations on
per-atom data to try and reduce the overhead of the reduction operation.
Neighbor list parallelization
"""""""""""""""""""""""""""""
In addition to the parallelization of force computations, also the
generation of the neighbor lists is parallelized. As explained
previously, neighbor lists are built by looping over "owned" atoms and
storing the neighbors in "pages". In the OPENMP variants of the
neighbor list code, each thread operates on a different chunk of "owned"
atoms and allocates and fills its own set of pages with neighbor list
data. This is achieved by each thread keeping its own instance of the
:cpp:class:`MyPage <LAMMPS_NS::MyPage>` page allocator class.

View File

@ -0,0 +1,89 @@
Partitioning
^^^^^^^^^^^^
The underlying spatial decomposition strategy used by LAMMPS for
distributed-memory parallelism is set with the :doc:`comm_style command
<comm_style>` and can be either "brick" (a regular grid) or "tiled".
.. _domain-decomposition:
.. figure:: img/domain-decomp.png
:align: center
domain decomposition
This figure shows the different kinds of domain decomposition used
for MPI parallelization: "brick" on the left with an orthogonal
(left) and a triclinic (middle) simulation domain, and a "tiled"
decomposition (right). The black lines show the division into
sub-domains and the contained atoms are "owned" by the corresponding
MPI process. The green dashed lines indicate how sub-domains are
extended with "ghost" atoms up to the communication cutoff distance.
The LAMMPS simulation box is a 3d or 2d volume, which can be orthogonal
or triclinic in shape, as illustrated in the :ref:`domain-decomposition`
figure for the 2d case. Orthogonal means the box edges are aligned with
the *x*, *y*, *z* Cartesian axes, and the box faces are thus all
rectangular. Triclinic allows for a more general parallelepiped shape
in which edges are aligned with three arbitrary vectors and the box
faces are parallelograms. In each dimension box faces can be periodic,
or non-periodic with fixed or shrink-wrapped boundaries. In the fixed
case, atoms which move outside the face are deleted; shrink-wrapped
means the position of the box face adjusts continuously to enclose all
the atoms.
For distributed-memory MPI parallelism, the simulation box is spatially
decomposed (partitioned) into non-overlapping sub-domains which fill the
box. The default partitioning, "brick", is most suitable when atom
density is roughly uniform, as shown in the left-side images of the
:ref:`domain-decomposition` figure. The sub-domains comprise a regular
grid and all sub-domains are identical in size and shape. Both the
orthogonal and triclinic boxes can deform continuously during a
simulation, e.g. to compress a solid or shear a liquid, in which case
the processor sub-domains likewise deform.
For models with non-uniform density, the number of particles per
processor can be load-imbalanced with the default partitioning. This
reduces parallel efficiency, as the overall simulation rate is limited
by the slowest processor, i.e. the one with the largest computational
load. For such models, LAMMPS supports multiple strategies to reduce
the load imbalance:
- The processor grid decomposition is by default based on the simulation
cell volume and tries to optimize the volume to surface ratio for the sub-domains.
This can be changed with the :doc:`processors command <processors>`.
- The parallel planes defining the size of the sub-domains can be shifted
with the :doc:`balance command <balance>`. Which can be done in addition
to choosing a more optimal processor grid.
- The recursive bisectioning algorithm in combination with the "tiled"
communication style can produce a partitioning with equal numbers of
particles in each sub-domain.
.. |decomp1| image:: img/decomp-regular.png
:width: 24%
.. |decomp2| image:: img/decomp-processors.png
:width: 24%
.. |decomp3| image:: img/decomp-balance.png
:width: 24%
.. |decomp4| image:: img/decomp-rcb.png
:width: 24%
|decomp1| |decomp2| |decomp3| |decomp4|
The pictures above demonstrate different decompositions for a 2d system
with 12 MPI ranks. The atom colors indicate the load imbalance of each
sub-domain with green being optimal and red the least optimal.
Due to the vacuum in the system, the default decomposition is unbalanced
with several MPI ranks without atoms (left). By forcing a 1x12x1
processor grid, every MPI rank does computations now, but number of
atoms per sub-domain is still uneven and the thin slice shape increases
the amount of communication between sub-domains (center left). With a
2x6x1 processor grid and shifting the sub-domain divisions, the load
imbalance is further reduced and the amount of communication required
between sub-domains is less (center right). And using the recursive
bisectioning leads to further improved decomposition (right).

View File

@ -0,0 +1,28 @@
Parallel algorithms
-------------------
LAMMPS is designed to enable running simulations in parallel using the
MPI parallel communication standard with distributed data via domain
decomposition. The parallelization aims to be efficient result in good
strong scaling (= good speedup for the same system) and good weak
scaling (= the computational cost of enlarging the system is
proportional to the system size). Additional parallelization using GPUs
or OpenMP can also be applied within the sub-domain assigned to an MPI
process. For clarity, most of the following illustrations show the 2d
simulation case. The underlying algorithms in those cases, however,
apply to both 2d and 3d cases equally well.
.. note::
The text and most of the figures in this chapter were adapted
for the manual from the section on parallel algorithms in the
:ref:`new LAMMPS paper <lammps_paper>`.
.. toctree::
:maxdepth: 1
Developer_par_part
Developer_par_comm
Developer_par_neigh
Developer_par_long
Developer_par_openmp

View File

@ -60,6 +60,9 @@ silently returning the result of a partial conversion or zero in cases
where the string is not a valid number. This behavior allows to more where the string is not a valid number. This behavior allows to more
easily detect typos or issues when processing input files. easily detect typos or issues when processing input files.
Similarly the :cpp:func:`logical() <LAMMPS_NS::utils::logical>` function
will convert a string into a boolean and will only accept certain words.
The *do_abort* flag should be set to ``true`` in case this function The *do_abort* flag should be set to ``true`` in case this function
is called only on a single MPI rank, as that will then trigger the is called only on a single MPI rank, as that will then trigger the
a call to ``Error::one()`` for errors instead of ``Error::all()`` a call to ``Error::one()`` for errors instead of ``Error::all()``
@ -83,6 +86,9 @@ strings for compliance without conversion.
.. doxygenfunction:: tnumeric .. doxygenfunction:: tnumeric
:project: progguide :project: progguide
.. doxygenfunction:: logical
:project: progguide
String processing String processing
^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^
@ -203,6 +209,9 @@ Convenience functions
.. doxygenfunction:: date2num .. doxygenfunction:: date2num
:project: progguide :project: progguide
.. doxygenfunction:: current_date
:project: progguide
Customized standard functions Customized standard functions
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

View File

@ -40,11 +40,10 @@ We use it to show how to identify the origin of a segmentation fault.
After recompiling LAMMPS and running the input you should get something like this: After recompiling LAMMPS and running the input you should get something like this:
.. code-block: .. code-block::
$ ./lmp -in in.melt $ ./lmp -in in.melt
LAMMPS (19 Mar 2020) LAMMPS (19 Mar 2020)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:94)
using 1 OpenMP thread(s) per MPI task using 1 OpenMP thread(s) per MPI task
Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 Lattice spacing in x,y,z = 1.6796 1.6796 1.6796
Created orthogonal box = (0 0 0) to (16.796 16.796 16.796) Created orthogonal box = (0 0 0) to (16.796 16.796 16.796)

View File

@ -4,28 +4,41 @@ Citing LAMMPS
Core Algorithms Core Algorithms
^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^
Since LAMMPS is a community project, there is not a single one The paper mentioned below is the best overview of LAMMPS, but there are
publication or reference that describes **all** of LAMMPS. also publications describing particular models or algorithms implemented
The canonical publication that describes the foundation, that is in LAMMPS or complementary software that is has interfaces to. Please
the basic spatial decomposition approach, the neighbor finding, see below for how to cite contributions to LAMMPS.
and basic communications algorithms used in LAMMPS is:
.. _lammps_paper:
The latest canonical publication that describes the basic features, the
source code design, the program structure, the spatial decomposition
approach, the neighbor finding, basic communications algorithms, and how
users and developers have contributed to LAMMPS is:
`LAMMPS - A flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales, Comp. Phys. Comm. (accepted 09/2021), DOI:10.1016/j.cpc.2021.108171 <https://doi.org/10.1016/j.cpc.2021.108171>`_
So a project using LAMMPS or a derivative application that uses LAMMPS
as a simulation engine should cite this paper. The paper is expected to
be published in its final form under the same DOI in the first half
of 2022. Please also give the URL of the LAMMPS website in your paper,
namely https://www.lammps.org.
The original publication describing the parallel algorithms used in the
initial versions of LAMMPS is:
`S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). <http://www.sandia.gov/~sjplimp/papers/jcompphys95.pdf>`_ `S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). <http://www.sandia.gov/~sjplimp/papers/jcompphys95.pdf>`_
So any project using LAMMPS (or a derivative application using LAMMPS as
a simulation engine) should cite this paper. A new publication
describing the developments and improvements of LAMMPS in the 25 years
since then is currently in preparation.
DOI for the LAMMPS code DOI for the LAMMPS code
^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^
LAMMPS developers use the `Zenodo service at CERN LAMMPS developers use the `Zenodo service at CERN <https://zenodo.org/>`_
<https://zenodo.org/>`_ to create digital object identifies (DOI) for to create digital object identifies (DOI) for stable releases of the
stable releases of the LAMMPS code. There are two types of DOIs for the LAMMPS source code. There are two types of DOIs for the LAMMPS source code.
LAMMPS source code: the canonical DOI for **all** versions of LAMMPS,
which will always point to the **latest** stable release version is: The canonical DOI for **all** versions of LAMMPS, which will always
point to the **latest** stable release version is:
- DOI: `10.5281/zenodo.3726416 <https://dx.doi.org/10.5281/zenodo.3726416>`_ - DOI: `10.5281/zenodo.3726416 <https://dx.doi.org/10.5281/zenodo.3726416>`_
@ -45,11 +58,13 @@ about LAMMPS and its features.
Citing contributions Citing contributions
^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^
LAMMPS has many features and that use either previously published LAMMPS has many features that use either previously published methods
methods and algorithms or novel features. It also includes potential and algorithms or novel features. It also includes potential parameter
parameter filed for specific models. Where available, a reminder about files for specific models. Where available, a reminder about references
references for optional features used in a specific run is printed to for optional features used in a specific run is printed to the screen
the screen and log file. Style and output location can be selected with and log file. Style and output location can be selected with the
the :ref:`-cite command-line switch <cite>`. Additional references are :ref:`-cite command-line switch <cite>`. Additional references are
given in the documentation of the :doc:`corresponding commands given in the documentation of the :doc:`corresponding commands
<Commands_all>` or in the :doc:`Howto tutorials <Howto>`. <Commands_all>` or in the :doc:`Howto tutorials <Howto>`. So please
make certain, that you provide the proper acknowledgments and citations
in any published works using LAMMPS.

View File

@ -26,7 +26,7 @@ available online are listed below.
* `Tutorials <https://www.lammps.org/tutorials.html>`_ * `Tutorials <https://www.lammps.org/tutorials.html>`_
* `Pre- and post-processing tools for LAMMPS <https://www.lammps.org/prepost.html>`_ * `Pre- and post-processing tools for LAMMPS <https://www.lammps.org/prepost.html>`_
* `Other software usable with LAMMPS <https://www.lammps.org/offsite.html>`_ * `Other software usable with LAMMPS <https://www.lammps.org/external.html>`_
* `Viz tools usable with LAMMPS <https://www.lammps.org/viz.html>`_ * `Viz tools usable with LAMMPS <https://www.lammps.org/viz.html>`_
* `Benchmark performance <https://www.lammps.org/bench.html>`_ * `Benchmark performance <https://www.lammps.org/bench.html>`_

View File

@ -34,7 +34,7 @@ simple example demonstrating its use:
int lmpargc = sizeof(lmpargv)/sizeof(const char *); int lmpargc = sizeof(lmpargv)/sizeof(const char *);
/* create LAMMPS instance */ /* create LAMMPS instance */
handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL); handle = lammps_open_no_mpi(lmpargc, (char **)lmpargv, NULL);
if (handle == NULL) { if (handle == NULL) {
printf("LAMMPS initialization failed"); printf("LAMMPS initialization failed");
lammps_mpi_finalize(); lammps_mpi_finalize();

View File

@ -115,8 +115,8 @@ External contributions
If you prefer to do so, you can also develop and support your add-on If you prefer to do so, you can also develop and support your add-on
feature **without** having it included in the LAMMPS distribution, for feature **without** having it included in the LAMMPS distribution, for
example as a download from a website of your own. See the `Offsite example as a download from a website of your own. See the `External
LAMMPS packages and tools <https://www.lammps.org/offsite.html>`_ page LAMMPS packages and tools <https://www.lammps.org/external.html>`_ page
of the LAMMPS website for examples of groups that do this. We are happy of the LAMMPS website for examples of groups that do this. We are happy
to advertise your package and website from that page. Simply email the to advertise your package and website from that page. Simply email the
`developers <https://www.lammps.org/authors.html>`_ with info about your `developers <https://www.lammps.org/authors.html>`_ with info about your

View File

@ -305,19 +305,22 @@ you are uncertain, please ask.
FILE pointers and only be done on MPI rank 0. Use the :cpp:func:`utils::logmesg` FILE pointers and only be done on MPI rank 0. Use the :cpp:func:`utils::logmesg`
convenience function where possible. convenience function where possible.
- header files should only include the absolute minimum number of - Header files, especially those defining a "style", should only use
include files and **must not** contain any ``using`` statements; the absolute minimum number of include files and **must not** contain
rather the include statements should be put into the corresponding any ``using`` statements. Typically that would be only the header for
implementation files. For implementation files, the the base class. Instead any include statements should be put into the
"include-what-you-use" principle should be employed. However, when corresponding implementation files and forward declarations be used.
including the ``pointers.h`` header (or one of the base classes For implementation files, the "include what you use" principle should
derived from it) certain headers will be included and thus need to be be employed. However, there is the notable exception that when the
specified. These are: `mpi.h`, `cstddef`, `cstdio`, `cstdlib`, ``pointers.h`` header is included (or one of the base classes derived
`string`, `utils.h`, `fmt/format.h`, `climits`, `cinttypes`. This also from it) certain headers will always be included and thus do not need
means any header can assume that `FILE`, `NULL`, and `INT_MAX` are to be explicitly specified.
defined. These are: `mpi.h`, `cstddef`, `cstdio`, `cstdlib`, `string`, `utils.h`,
`vector`, `fmt/format.h`, `climits`, `cinttypes`.
This also means any such file can assume that `FILE`, `NULL`, and
`INT_MAX` are defined.
- header files that define a new LAMMPS style (i.e. that have a - Header files that define a new LAMMPS style (i.e. that have a
``SomeStyle(some/name,SomeName);`` macro in them) should only use the ``SomeStyle(some/name,SomeName);`` macro in them) should only use the
include file for the base class and otherwise use forward declarations include file for the base class and otherwise use forward declarations
and pointers; when interfacing to a library use the PIMPL (pointer and pointers; when interfacing to a library use the PIMPL (pointer
@ -325,7 +328,7 @@ you are uncertain, please ask.
that contains all library specific data (and thus requires the library that contains all library specific data (and thus requires the library
header) but use a forward declaration and define the struct only in header) but use a forward declaration and define the struct only in
the implementation file. This is a **strict** requirement since this the implementation file. This is a **strict** requirement since this
is where type clashes between packages and hard to fine bugs have is where type clashes between packages and hard to find bugs have
regularly manifested in the past. regularly manifested in the past.
- Please use clang-format only to reformat files that you have - Please use clang-format only to reformat files that you have

View File

@ -2,17 +2,25 @@ Basics of running LAMMPS
======================== ========================
LAMMPS is run from the command line, reading commands from a file via LAMMPS is run from the command line, reading commands from a file via
the -in command line flag, or from standard input. the -in command line flag, or from standard input. Using the "-in
Using the "-in in.file" variant is recommended: in.file" variant is recommended (see note below). The name of the
LAMMPS executable is either ``lmp`` or ``lmp_<machine>`` with
`<machine>` being the machine string used when compiling LAMMPS. This
is required when compiling LAMMPS with the traditional build system
(e.g. with ``make mpi``), but optional when using CMake to configure and
build LAMMPS:
.. code-block:: bash .. code-block:: bash
$ lmp_serial -in in.file $ lmp_serial -in in.file
$ lmp_serial < in.file $ lmp_serial < in.file
$ lmp -in in.file
$ lmp < in.file
$ /path/to/lammps/src/lmp_serial -i in.file $ /path/to/lammps/src/lmp_serial -i in.file
$ mpirun -np 4 lmp_mpi -in in.file $ mpirun -np 4 lmp_mpi -in in.file
$ mpiexec -np 4 lmp -in in.file
$ mpirun -np 8 /path/to/lammps/src/lmp_mpi -in in.file $ mpirun -np 8 /path/to/lammps/src/lmp_mpi -in in.file
$ mpirun -np 6 /usr/local/bin/lmp -in in.file $ mpiexec -n 6 /usr/local/bin/lmp -in in.file
You normally run the LAMMPS command in the directory where your input You normally run the LAMMPS command in the directory where your input
script is located. That is also where output files are produced by script is located. That is also where output files are produced by
@ -23,7 +31,7 @@ executable itself can be placed elsewhere.
.. note:: .. note::
The redirection operator "<" will not always work when running The redirection operator "<" will not always work when running
in parallel with mpirun; for those systems the -in form is required. in parallel with mpirun or mpiexec; for those systems the -in form is required.
As LAMMPS runs it prints info to the screen and a logfile named As LAMMPS runs it prints info to the screen and a logfile named
*log.lammps*\ . More info about output is given on the *log.lammps*\ . More info about output is given on the

View File

@ -7,7 +7,7 @@ steps are often necessary to setup and analyze a simulation. A list
of such tools can be found on the `LAMMPS webpage <lws_>`_ at these links: of such tools can be found on the `LAMMPS webpage <lws_>`_ at these links:
* `Pre/Post processing <https://www.lammps.org/prepost.html>`_ * `Pre/Post processing <https://www.lammps.org/prepost.html>`_
* `Offsite LAMMPS packages & tools <https://www.lammps.org/offsite.html>`_ * `External LAMMPS packages & tools <https://www.lammps.org/external.html>`_
* `Pizza.py toolkit <pizza_>`_ * `Pizza.py toolkit <pizza_>`_
The last link for `Pizza.py <pizza_>`_ is a Python-based tool developed at The last link for `Pizza.py <pizza_>`_ is a Python-based tool developed at

View File

@ -8,9 +8,8 @@ fix brownian command
fix brownian/sphere command fix brownian/sphere command
=========================== ===========================
fix brownian/sphere command fix brownian/asphere command
=========================== ============================
Syntax Syntax
"""""" """"""

View File

@ -38,7 +38,7 @@ Syntax
*intersect* args = two or more group IDs *intersect* args = two or more group IDs
*dynamic* args = parent-ID keyword value ... *dynamic* args = parent-ID keyword value ...
one or more keyword/value pairs may be appended one or more keyword/value pairs may be appended
keyword = *region* or *var* or *every* keyword = *region* or *var* or *property* or *every*
*region* value = region-ID *region* value = region-ID
*var* value = name of variable *var* value = name of variable
*property* value = name of custom integer or floating point vector *property* value = name of custom integer or floating point vector

Binary file not shown.

After

Width:  |  Height:  |  Size: 129 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

BIN
doc/src/img/decomp-rcb.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 121 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 547 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
doc/src/img/ghost-comm.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

View File

@ -27,7 +27,7 @@ Syntax
on = set Newton pairwise flag on (currently not allowed) on = set Newton pairwise flag on (currently not allowed)
*pair/only* = *off* or *on* *pair/only* = *off* or *on*
off = apply "gpu" suffix to all available styles in the GPU package (default) off = apply "gpu" suffix to all available styles in the GPU package (default)
on - apply "gpu" suffix only pair styles on = apply "gpu" suffix only pair styles
*binsize* value = size *binsize* value = size
size = bin size for neighbor list construction (distance units) size = bin size for neighbor list construction (distance units)
*split* = fraction *split* = fraction

View File

@ -198,8 +198,8 @@ same:
Coefficients must be defined for each pair of atoms types via the Coefficients must be defined for each pair of atoms types via the
:doc:`pair_coeff <pair_coeff>` command as described above, or in the :doc:`pair_coeff <pair_coeff>` command as described above, or in the
data file read by the :doc:`read_data <read_data>` commands, or by "Pair Coeffs" or "PairIJ Coeffs" section of the data file read by the
mixing as described below. :doc:`read_data <read_data>` command, or by mixing as described below.
For all of the *hybrid*, *hybrid/overlay*, and *hybrid/scaled* styles, For all of the *hybrid*, *hybrid/overlay*, and *hybrid/scaled* styles,
every atom type pair I,J (where I <= J) must be assigned to at least one every atom type pair I,J (where I <= J) must be assigned to at least one
@ -208,14 +208,21 @@ examples above, or in the data file read by the :doc:`read_data
<read_data>`, or by mixing as described below. Also all sub-styles <read_data>`, or by mixing as described below. Also all sub-styles
must be used at least once in a :doc:`pair_coeff <pair_coeff>` command. must be used at least once in a :doc:`pair_coeff <pair_coeff>` command.
.. note::
LAMMPS never performs mixing of parameters from different sub-styles,
**even** if they use the same type of coefficients, e.g. contain
a Lennard-Jones potential variant. Those parameters must be provided
explicitly.
If you want there to be no interactions between a particular pair of If you want there to be no interactions between a particular pair of
atom types, you have 3 choices. You can assign the type pair to some atom types, you have 3 choices. You can assign the pair of atom types
sub-style and use the :doc:`neigh_modify exclude type <neigh_modify>` to some sub-style and use the :doc:`neigh_modify exclude type <neigh_modify>`
command. You can assign it to some sub-style and set the coefficients command. You can assign it to some sub-style and set the coefficients
so that there is effectively no interaction (e.g. epsilon = 0.0 in a LJ so that there is effectively no interaction (e.g. epsilon = 0.0 in a LJ
potential). Or, for *hybrid*, *hybrid/overlay*, or *hybrid/scaled* potential). Or, for *hybrid*, *hybrid/overlay*, or *hybrid/scaled*
simulations, you can use this form of the pair_coeff command in your simulations, you can use this form of the pair_coeff command in your
input script: input script or the "PairIJ Coeffs" section of your data file:
.. code-block:: LAMMPS .. code-block:: LAMMPS
@ -238,19 +245,20 @@ styles with different requirements.
---------- ----------
Different force fields (e.g. CHARMM vs AMBER) may have different rules Different force fields (e.g. CHARMM vs. AMBER) may have different rules
for applying weightings that change the strength of pairwise for applying exclusions or weights that change the strength of pairwise
interactions between pairs of atoms that are also 1-2, 1-3, and 1-4 non-bonded interactions between pairs of atoms that are also 1-2, 1-3,
neighbors in the molecular bond topology, as normally set by the and 1-4 neighbors in the molecular bond topology. This is normally a
:doc:`special_bonds <special_bonds>` command. Different weights can be global setting defined the :doc:`special_bonds <special_bonds>` command.
assigned to different pair hybrid sub-styles via the :doc:`pair_modify However, different weights can be assigned to different hybrid
special <pair_modify>` command. This allows multiple force fields to be sub-styles via the :doc:`pair_modify special <pair_modify>` command.
used in a model of a hybrid system, however, there is no consistent This allows multiple force fields to be used in a model of a hybrid
approach to determine parameters automatically for the interactions system, however, there is no consistent approach to determine parameters
between the two force fields, this is only recommended when particles automatically for the interactions **between** atoms of the two force
fields, thus this approach this is only recommended when particles
described by the different force fields do not mix. described by the different force fields do not mix.
Here is an example for mixing CHARMM and AMBER: The global *amber* Here is an example for combining CHARMM and AMBER: The global *amber*
setting sets the 1-4 interactions to non-zero scaling factors and setting sets the 1-4 interactions to non-zero scaling factors and
then overrides them with 0.0 only for CHARMM: then overrides them with 0.0 only for CHARMM:
@ -260,7 +268,7 @@ then overrides them with 0.0 only for CHARMM:
pair_style hybrid lj/charmm/coul/long 8.0 10.0 lj/cut/coul/long 10.0 pair_style hybrid lj/charmm/coul/long 8.0 10.0 lj/cut/coul/long 10.0
pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0 pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0
The this input achieves the same effect: This input achieves the same effect:
.. code-block:: LAMMPS .. code-block:: LAMMPS
@ -270,9 +278,9 @@ The this input achieves the same effect:
pair_modify pair lj/cut/coul/long special coul 0.0 0.0 0.83333333 pair_modify pair lj/cut/coul/long special coul 0.0 0.0 0.83333333
pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0 pair_modify pair lj/charmm/coul/long special lj/coul 0.0 0.0 0.0
Here is an example for mixing Tersoff with OPLS/AA based on Here is an example for combining Tersoff with OPLS/AA based on
a data file that defines bonds for all atoms where for the a data file that defines bonds for all atoms where - for the
Tersoff part of the system the force constants for the bonded Tersoff part of the system - the force constants for the bonded
interactions have been set to 0. Note the global settings are interactions have been set to 0. Note the global settings are
effectively *lj/coul 0.0 0.0 0.5* as required for OPLS/AA: effectively *lj/coul 0.0 0.0 0.5* as required for OPLS/AA:

View File

@ -619,7 +619,7 @@ of analysis.
* - bond * - bond
- atom-ID molecule-ID atom-type x y z - atom-ID molecule-ID atom-type x y z
* - charge * - charge
- atom-type q x y z - atom-ID atom-type q x y z
* - dipole * - dipole
- atom-ID atom-type q x y z mux muy muz - atom-ID atom-type q x y z mux muy muz
* - dpd * - dpd

View File

@ -1,7 +1,7 @@
Sphinx==4.0.3 Sphinx==4.0.3
sphinxcontrib-spelling sphinxcontrib-spelling==7.2.1
git+git://github.com/akohlmey/sphinx-fortran@parallel-read git+git://github.com/akohlmey/sphinx-fortran@parallel-read
sphinx_tabs sphinx_tabs==3.2.0
breathe breathe==4.31.0
Pygments Pygments==2.10.0
six six==1.16.0

View File

@ -418,6 +418,7 @@ html_context['current_version'] = os.environ.get('LAMMPS_WEBSITE_BUILD_VERSION',
html_context['git_commit'] = git_commit html_context['git_commit'] = git_commit
html_context['versions'] = [ html_context['versions'] = [
('latest', 'https://docs.lammps.org/latest/'), ('latest', 'https://docs.lammps.org/latest/'),
('stable', 'https://docs.lammps.org/stable/'),
(version, 'https://docs.lammps.org/') (version, 'https://docs.lammps.org/')
] ]
html_context['downloads'] = [('PDF', 'Manual.pdf')] html_context['downloads'] = [('PDF', 'Manual.pdf')]

View File

@ -2265,6 +2265,7 @@ Nmols
nn nn
nnodes nnodes
Nocedal Nocedal
nO
nocite nocite
nocoeff nocoeff
nodeless nodeless
@ -2443,6 +2444,7 @@ packings
padua padua
Padua Padua
pafi pafi
PairIJ
palegoldenrod palegoldenrod
palegreen palegreen
paleturquoise paleturquoise
@ -3662,6 +3664,7 @@ Yc
ycm ycm
Yeh Yeh
yellowgreen yellowgreen
yEs
Yethiraj Yethiraj
yflag yflag
yhi yhi

View File

@ -1,3 +1,9 @@
IMPORTANT NOTE: This example has not been updated since 2014,
so it is not likely to work anymore out of the box. There have
been changes to LAMMPS and its library interface that would need
to be applied. Please see the manual for the documentation of
the library interface.
This directory has an application that runs classical MD via LAMMPS, This directory has an application that runs classical MD via LAMMPS,
but uses quantum forces calculated by the Quest DFT (density but uses quantum forces calculated by the Quest DFT (density
functional) code in place of the usual classical MD forces calculated functional) code in place of the usual classical MD forces calculated

View File

@ -1,3 +1,9 @@
IMPORTANT NOTE: This example has not been updated since 2013,
so it is not likely to work anymore out of the box. There have
been changes to LAMMPS and its library interface that would need
to be applied. Please see the manual for the documentation of
the library interface.
This directory has an application that models grain growth in the This directory has an application that models grain growth in the
presence of strain. presence of strain.

View File

@ -28,13 +28,9 @@
#include <cstdlib> #include <cstdlib>
#include <cstring> #include <cstring>
#include "lammps.h" // these are LAMMPS include files #define LAMMPS_LIB_MPI // to make lammps_open() visible
#include "input.h"
#include "atom.h"
#include "library.h" #include "library.h"
using namespace LAMMPS_NS;
int main(int narg, char **arg) int main(int narg, char **arg)
{ {
// setup MPI and various communicators // setup MPI and various communicators
@ -74,7 +70,7 @@ int main(int narg, char **arg)
char str1[32],str2[32],str3[32]; char str1[32],str2[32],str3[32];
char **lmparg = new char*[8]; char **lmparg = new char*[8];
lmparg[0] = NULL; // required placeholder for program name lmparg[0] = (char *) "LAMMPS"; // required placeholder for program name
lmparg[1] = (char *) "-screen"; lmparg[1] = (char *) "-screen";
sprintf(str1,"screen.%d",instance); sprintf(str1,"screen.%d",instance);
lmparg[2] = str1; lmparg[2] = str1;
@ -86,13 +82,9 @@ int main(int narg, char **arg)
sprintf(str3,"%g",temperature + instance*tdelta); sprintf(str3,"%g",temperature + instance*tdelta);
lmparg[7] = str3; lmparg[7] = str3;
// open N instances of LAMMPS // create N instances of LAMMPS
// either of these methods will work
LAMMPS *lmp = new LAMMPS(8,lmparg,comm_lammps); void *lmp = lammps_open(8,lmparg,comm_lammps,NULL);
//LAMMPS *lmp;
//lammps_open(8,lmparg,comm_lammps,(void **) &lmp);
delete [] lmparg; delete [] lmparg;
@ -103,7 +95,7 @@ int main(int narg, char **arg)
// query final temperature and print result for each instance // query final temperature and print result for each instance
double *ptr = (double *) double *ptr = (double *)
lammps_extract_compute(lmp,(char *) "thermo_temp",0,0); lammps_extract_compute(lmp,"thermo_temp",LMP_STYLE_GLOBAL,LMP_TYPE_SCALAR);
double finaltemp = *ptr; double finaltemp = *ptr;
double *temps = new double[ninstance]; double *temps = new double[ninstance];
@ -125,7 +117,7 @@ int main(int narg, char **arg)
// delete LAMMPS instances // delete LAMMPS instances
delete lmp; lammps_close(lmp);
// close down MPI // close down MPI

View File

@ -13,7 +13,7 @@ like below.
mpicc -c -O -Wall -g -I$HOME/lammps/src liblammpsplugin.c mpicc -c -O -Wall -g -I$HOME/lammps/src liblammpsplugin.c
mpicc -c -O -Wall -g simple.c mpicc -c -O -Wall -g simple.c
mpicc simple.o liblammsplugin.o -ldl -o simpleC mpicc simple.o liblammpsplugin.o -ldl -o simpleC
You also need to build LAMMPS as a shared library You also need to build LAMMPS as a shared library
(see examples/COUPLE/README), e.g. (see examples/COUPLE/README), e.g.

View File

@ -38,44 +38,98 @@ liblammpsplugin_t *liblammpsplugin_load(const char *lib)
#define ADDSYM(symbol) lmp->symbol = dlsym(handle,"lammps_" #symbol) #define ADDSYM(symbol) lmp->symbol = dlsym(handle,"lammps_" #symbol)
ADDSYM(open); ADDSYM(open);
ADDSYM(open_no_mpi); ADDSYM(open_no_mpi);
ADDSYM(open_fortran);
ADDSYM(close); ADDSYM(close);
ADDSYM(version);
ADDSYM(mpi_init);
ADDSYM(mpi_finalize);
ADDSYM(kokkos_finalize);
ADDSYM(python_finalize);
ADDSYM(file); ADDSYM(file);
ADDSYM(command); ADDSYM(command);
ADDSYM(commands_list); ADDSYM(commands_list);
ADDSYM(commands_string); ADDSYM(commands_string);
ADDSYM(free);
ADDSYM(extract_setting); ADDSYM(get_natoms);
ADDSYM(extract_global); ADDSYM(get_thermo);
ADDSYM(extract_box); ADDSYM(extract_box);
ADDSYM(reset_box);
ADDSYM(memory_usage);
ADDSYM(get_mpi_comm);
ADDSYM(extract_setting);
ADDSYM(extract_global_datatype);
ADDSYM(extract_global);
ADDSYM(extract_atom_datatype);
ADDSYM(extract_atom); ADDSYM(extract_atom);
ADDSYM(extract_compute); ADDSYM(extract_compute);
ADDSYM(extract_fix); ADDSYM(extract_fix);
ADDSYM(extract_variable); ADDSYM(extract_variable);
ADDSYM(get_thermo);
ADDSYM(get_natoms);
ADDSYM(set_variable); ADDSYM(set_variable);
ADDSYM(reset_box);
ADDSYM(gather_atoms); ADDSYM(gather_atoms);
ADDSYM(gather_atoms_concat); ADDSYM(gather_atoms_concat);
ADDSYM(gather_atoms_subset); ADDSYM(gather_atoms_subset);
ADDSYM(scatter_atoms); ADDSYM(scatter_atoms);
ADDSYM(scatter_atoms_subset); ADDSYM(scatter_atoms_subset);
ADDSYM(gather_bonds);
ADDSYM(set_fix_external_callback); ADDSYM(create_atoms);
ADDSYM(config_has_package); ADDSYM(find_pair_neighlist);
ADDSYM(config_package_count); ADDSYM(find_fix_neighlist);
ADDSYM(config_package_name); ADDSYM(find_compute_neighlist);
ADDSYM(neighlist_num_elements);
ADDSYM(neighlist_element_neighbors);
ADDSYM(version);
ADDSYM(get_os_info);
ADDSYM(config_has_mpi_support);
ADDSYM(config_has_gzip_support); ADDSYM(config_has_gzip_support);
ADDSYM(config_has_png_support); ADDSYM(config_has_png_support);
ADDSYM(config_has_jpeg_support); ADDSYM(config_has_jpeg_support);
ADDSYM(config_has_ffmpeg_support); ADDSYM(config_has_ffmpeg_support);
ADDSYM(config_has_exceptions); ADDSYM(config_has_exceptions);
ADDSYM(create_atoms);
ADDSYM(config_has_package);
ADDSYM(config_package_count);
ADDSYM(config_package_name);
ADDSYM(config_accelerator);
ADDSYM(has_gpu_device);
ADDSYM(get_gpu_device_info);
ADDSYM(has_style);
ADDSYM(style_count);
ADDSYM(style_name);
ADDSYM(has_id);
ADDSYM(id_count);
ADDSYM(id_name);
ADDSYM(plugin_count);
ADDSYM(plugin_name);
ADDSYM(set_fix_external_callback);
ADDSYM(fix_external_get_force);
ADDSYM(fix_external_set_energy_global);
ADDSYM(fix_external_set_energy_peratom);
ADDSYM(fix_external_set_virial_global);
ADDSYM(fix_external_set_virial_peratom);
ADDSYM(fix_external_set_vector_length);
ADDSYM(fix_external_set_vector);
ADDSYM(free);
ADDSYM(is_running);
ADDSYM(force_timeout);
#ifdef LAMMPS_EXCEPTIONS #ifdef LAMMPS_EXCEPTIONS
lmp->has_exceptions = 1; lmp->has_exceptions = 1;
ADDSYM(has_error); ADDSYM(has_error);

View File

@ -39,75 +39,121 @@ extern "C" {
#if defined(LAMMPS_BIGBIG) #if defined(LAMMPS_BIGBIG)
typedef void (*FixExternalFnPtr)(void *, int64_t, int, int64_t *, double **, double **); typedef void (*FixExternalFnPtr)(void *, int64_t, int, int64_t *, double **, double **);
#elif defined(LAMMPS_SMALLBIG) #elif defined(LAMMPS_SMALLSMALL)
typedef void (*FixExternalFnPtr)(void *, int64_t, int, int *, double **, double **);
#else
typedef void (*FixExternalFnPtr)(void *, int, int, int *, double **, double **); typedef void (*FixExternalFnPtr)(void *, int, int, int *, double **, double **);
#else
typedef void (*FixExternalFnPtr)(void *, int64_t, int, int *, double **, double **);
#endif #endif
struct _liblammpsplugin { struct _liblammpsplugin {
int abiversion; int abiversion;
int has_exceptions; int has_exceptions;
void *handle; void *handle;
void (*open)(int, char **, MPI_Comm, void **); void *(*open)(int, char **, MPI_Comm, void **);
void (*open_no_mpi)(int, char **, void **); void *(*open_no_mpi)(int, char **, void **);
void *(*open_fortran)(int, char **, void **, int);
void (*close)(void *); void (*close)(void *);
int (*version)(void *);
void (*mpi_init)();
void (*mpi_finalize)();
void (*kokkos_finalize)();
void (*python_finalize)();
void (*file)(void *, char *); void (*file)(void *, char *);
char *(*command)(void *, char *); char *(*command)(void *, const char *);
void (*commands_list)(void *, int, char **); void (*commands_list)(void *, int, const char **);
void (*commands_string)(void *, char *); void (*commands_string)(void *, const char *);
void (*free)(void *);
int (*extract_setting)(void *, char *); double (*get_natoms)(void *);
void *(*extract_global)(void *, char *); double (*get_thermo)(void *, char *);
void (*extract_box)(void *, double *, double *, void (*extract_box)(void *, double *, double *,
double *, double *, double *, int *, int *); double *, double *, double *, int *, int *);
void *(*extract_atom)(void *, char *);
void *(*extract_compute)(void *, char *, int, int);
void *(*extract_fix)(void *, char *, int, int, int, int);
void *(*extract_variable)(void *, char *, char *);
double (*get_thermo)(void *, char *);
int (*get_natoms)(void *);
int (*set_variable)(void *, char *, char *);
void (*reset_box)(void *, double *, double *, double, double, double); void (*reset_box)(void *, double *, double *, double, double, double);
void (*memory_usage)(void *, double *);
int (*get_mpi_comm)(void *);
int (*extract_setting)(void *, const char *);
int *(*extract_global_datatype)(void *, const char *);
void *(*extract_global)(void *, const char *);
void *(*extract_atom_datatype)(void *, const char *);
void *(*extract_atom)(void *, const char *);
void *(*extract_compute)(void *, const char *, int, int);
void *(*extract_fix)(void *, const char *, int, int, int, int);
void *(*extract_variable)(void *, const char *, char *);
int (*set_variable)(void *, char *, char *);
void (*gather_atoms)(void *, char *, int, int, void *); void (*gather_atoms)(void *, char *, int, int, void *);
void (*gather_atoms_concat)(void *, char *, int, int, void *); void (*gather_atoms_concat)(void *, char *, int, int, void *);
void (*gather_atoms_subset)(void *, char *, int, int, int, int *, void *); void (*gather_atoms_subset)(void *, char *, int, int, int, int *, void *);
void (*scatter_atoms)(void *, char *, int, int, void *); void (*scatter_atoms)(void *, char *, int, int, void *);
void (*scatter_atoms_subset)(void *, char *, int, int, int, int *, void *); void (*scatter_atoms_subset)(void *, char *, int, int, int, int *, void *);
void (*set_fix_external_callback)(void *, char *, FixExternalFnPtr, void*); void (*gather_bonds)(void *, void *);
int (*config_has_package)(char * package_name); // lammps_create_atoms() takes tagint and imageint as args
int (*config_package_count)(); // ifdef insures they are compatible with rest of LAMMPS
int (*config_package_name)(int index, char * buffer, int max_size); // caller must match to how LAMMPS library is built
#ifndef LAMMPS_BIGBIG
void (*create_atoms)(void *, int, int *, int *, double *,
double *, int *, int);
#else
void (*create_atoms)(void *, int, int64_t *, int *, double *,
double *, int64_t *, int);
#endif
int (*find_pair_neighlist)(void *, const char *, int, int, int);
int (*find_fix_neighlist)(void *, const char *, int);
int (*find_compute_neighlist)(void *, char *, int);
int (*neighlist_num_elements)(void *, int);
void (*neighlist_element_neighbors)(void *, int, int, int *, int *, int **);
int (*version)(void *);
void (*get_os_info)(char *, int);
int (*config_has_mpi_support)();
int (*config_has_gzip_support)(); int (*config_has_gzip_support)();
int (*config_has_png_support)(); int (*config_has_png_support)();
int (*config_has_jpeg_support)(); int (*config_has_jpeg_support)();
int (*config_has_ffmpeg_support)(); int (*config_has_ffmpeg_support)();
int (*config_has_exceptions)(); int (*config_has_exceptions)();
int (*find_pair_neighlist)(void* ptr, char * style, int exact, int nsub, int request); int (*config_has_package)(const char *);
int (*find_fix_neighlist)(void* ptr, char * id, int request); int (*config_package_count)();
int (*find_compute_neighlist)(void* ptr, char * id, int request); int (*config_package_name)(int, char *, int);
int (*neighlist_num_elements)(void* ptr, int idx);
void (*neighlist_element_neighbors)(void * ptr, int idx, int element, int * iatom, int * numneigh, int ** neighbors);
// lammps_create_atoms() takes tagint and imageint as args int (*config_accelerator)(const char *, const char *, const char *);
// ifdef insures they are compatible with rest of LAMMPS int (*has_gpu_device)();
// caller must match to how LAMMPS library is built void (*get_gpu_device_info)(char *, int);
#ifdef LAMMPS_BIGBIG int (*has_style)(void *, const char *, const char *);
void (*create_atoms)(void *, int, int64_t *, int *, int (*style_count)(void *, const char *);
double *, double *, int64_t *, int); int (*style_name)(void *, const char *, int, char *, int);
#else
void (*create_atoms)(void *, int, int *, int *, int (*has_id)(void *, const char *, const char *);
double *, double *, int *, int); int (*id_count)(void *, const char *);
#endif int (*id_name)(void *, const char *, int, char *, int);
int (*plugin_count)();
int (*plugin_name)(int, char *, char *, int);
void (*set_fix_external_callback)(void *, const char *, FixExternalFnPtr, void*);
void (*fix_external_get_force)(void *, const char *);
void (*fix_external_set_energy_global)(void *, const char *, double);
void (*fix_external_set_energy_peratom)(void *, const char *, double *);
void (*fix_external_set_virial_global)(void *, const char *, double *);
void (*fix_external_set_virial_peratom)(void *, const char *, double **);
void (*fix_external_set_vector_length)(void *, const char *, int);
void (*fix_external_set_vector)(void *, const char *, int, double);
void (*free)(void *);
void (*is_running)(void *);
void (*force_timeout)(void *);
int (*has_error)(void *); int (*has_error)(void *);
int (*get_last_error_message)(void *, char *, int); int (*get_last_error_message)(void *, char *, int);

View File

@ -1,9 +1,12 @@
LAMMPS (18 Feb 2020) LAMMPS (31 Aug 2021)
Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
Created orthogonal box = (0 0 0) to (6.71838 6.71838 6.71838) using 1 OpenMP thread(s) per MPI task
Lattice spacing in x,y,z = 1.6795962 1.6795962 1.6795962
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
1 by 1 by 1 MPI processor grid 1 by 1 by 1 MPI processor grid
Created 256 atoms Created 256 atoms
create_atoms CPU = 0.000297844 secs using lattice units in orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
create_atoms CPU = 0.001 seconds
Neighbor list info ... Neighbor list info ...
update every 20 steps, delay 0 steps, check no update every 20 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000 max neighbors/atom: 2000, page size: 100000
@ -14,108 +17,108 @@ Neighbor list info ...
(1) pair lj/cut, perpetual (1) pair lj/cut, perpetual
attributes: half, newton on attributes: half, newton on
pair build: half/bin/atomonly/newton pair build: half/bin/atomonly/newton
stencil: half/bin/3d/newton stencil: half/bin/3d
bin: standard bin: standard
Setting up Verlet run ... Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 0 Current step : 0
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
0 1.44 -6.7733681 0 -4.6218056 -5.0244179 0 1.44 -6.7733681 0 -4.6218056 -5.0244179
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175 10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
Loop time of 0.00164276 on 1 procs for 10 steps with 256 atoms Loop time of 0.00239712 on 1 procs for 10 steps with 256 atoms
Performance: 2629719.113 tau/day, 6087.313 timesteps/s Performance: 1802163.347 tau/day, 4171.674 timesteps/s
93.7% CPU use with 1 MPI tasks x no OpenMP threads 97.2% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0014956 | 0.0014956 | 0.0014956 | 0.0 | 91.04 Pair | 0.0020572 | 0.0020572 | 0.0020572 | 0.0 | 85.82
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 8.045e-05 | 8.045e-05 | 8.045e-05 | 0.0 | 4.90 Comm | 0.00018731 | 0.00018731 | 0.00018731 | 0.0 | 7.81
Output | 1.1399e-05 | 1.1399e-05 | 1.1399e-05 | 0.0 | 0.69 Output | 4.478e-05 | 4.478e-05 | 4.478e-05 | 0.0 | 1.87
Modify | 3.7431e-05 | 3.7431e-05 | 3.7431e-05 | 0.0 | 2.28 Modify | 6.3637e-05 | 6.3637e-05 | 6.3637e-05 | 0.0 | 2.65
Other | | 1.789e-05 | | | 1.09 Other | | 4.419e-05 | | | 1.84
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1431 ave 1431 max 1431 min Nghost: 1431.00 ave 1431 max 1431 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9984 ave 9984 max 9984 min Neighs: 9984.00 ave 9984 max 9984 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9984 Total # of neighbors = 9984
Ave neighs/atom = 39 Ave neighs/atom = 39.000000
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 10 Current step : 10
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175 10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
20 0.6239063 -5.557644 0 -4.6254403 0.97451173 20 0.6239063 -5.557644 0 -4.6254403 0.97451173
Loop time of 0.00199768 on 1 procs for 10 steps with 256 atoms Loop time of 0.00329271 on 1 procs for 10 steps with 256 atoms
Performance: 2162504.180 tau/day, 5005.797 timesteps/s Performance: 1311987.619 tau/day, 3037.008 timesteps/s
99.8% CPU use with 1 MPI tasks x no OpenMP threads 96.4% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0018518 | 0.0018518 | 0.0018518 | 0.0 | 92.70 Pair | 0.0029015 | 0.0029015 | 0.0029015 | 0.0 | 88.12
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 7.9768e-05 | 7.9768e-05 | 7.9768e-05 | 0.0 | 3.99 Comm | 0.00021807 | 0.00021807 | 0.00021807 | 0.0 | 6.62
Output | 1.1433e-05 | 1.1433e-05 | 1.1433e-05 | 0.0 | 0.57 Output | 4.9163e-05 | 4.9163e-05 | 4.9163e-05 | 0.0 | 1.49
Modify | 3.6904e-05 | 3.6904e-05 | 3.6904e-05 | 0.0 | 1.85 Modify | 7.0573e-05 | 7.0573e-05 | 7.0573e-05 | 0.0 | 2.14
Other | | 1.773e-05 | | | 0.89 Other | | 5.339e-05 | | | 1.62
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1431 ave 1431 max 1431 min Nghost: 1431.00 ave 1431 max 1431 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9952 ave 9952 max 9952 min Neighs: 9952.00 ave 9952 max 9952 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9952 Total # of neighbors = 9952
Ave neighs/atom = 38.875 Ave neighs/atom = 38.875000
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 20 Current step : 20
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
20 0.6239063 -5.5404291 0 -4.6082254 1.0394285 20 0.6239063 -5.5404291 0 -4.6082254 1.0394285
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278 21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
Loop time of 0.000304321 on 1 procs for 1 steps with 256 atoms Loop time of 0.000638039 on 1 procs for 1 steps with 256 atoms
Performance: 1419553.695 tau/day, 3286.004 timesteps/s Performance: 677074.599 tau/day, 1567.302 timesteps/s
98.9% CPU use with 1 MPI tasks x no OpenMP threads 98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.00027815 | 0.00027815 | 0.00027815 | 0.0 | 91.40 Pair | 0.00042876 | 0.00042876 | 0.00042876 | 0.0 | 67.20
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 8.321e-06 | 8.321e-06 | 8.321e-06 | 0.0 | 2.73 Comm | 5.2872e-05 | 5.2872e-05 | 5.2872e-05 | 0.0 | 8.29
Output | 1.0513e-05 | 1.0513e-05 | 1.0513e-05 | 0.0 | 3.45 Output | 0.00012218 | 0.00012218 | 0.00012218 | 0.0 | 19.15
Modify | 3.968e-06 | 3.968e-06 | 3.968e-06 | 0.0 | 1.30 Modify | 1.3762e-05 | 1.3762e-05 | 1.3762e-05 | 0.0 | 2.16
Other | | 3.365e-06 | | | 1.11 Other | | 2.047e-05 | | | 3.21
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1431 ave 1431 max 1431 min Nghost: 1431.00 ave 1431 max 1431 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9705 ave 9705 max 9705 min Neighs: 9705.00 ave 9705 max 9705 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9705 Total # of neighbors = 9705
Ave neighs/atom = 37.9102 Ave neighs/atom = 37.910156
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Force on 1 atom via extract_atom: 26.9581 Force on 1 atom via extract_atom: 26.9581
@ -124,136 +127,136 @@ Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 21 Current step : 21
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278 21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597 31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
Loop time of 0.00196027 on 1 procs for 10 steps with 256 atoms Loop time of 0.00281277 on 1 procs for 10 steps with 256 atoms
Performance: 2203779.175 tau/day, 5101.341 timesteps/s Performance: 1535852.558 tau/day, 3555.214 timesteps/s
99.7% CPU use with 1 MPI tasks x no OpenMP threads 92.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0018146 | 0.0018146 | 0.0018146 | 0.0 | 92.57 Pair | 0.0024599 | 0.0024599 | 0.0024599 | 0.0 | 87.45
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 8.0268e-05 | 8.0268e-05 | 8.0268e-05 | 0.0 | 4.09 Comm | 0.00020234 | 0.00020234 | 0.00020234 | 0.0 | 7.19
Output | 1.0973e-05 | 1.0973e-05 | 1.0973e-05 | 0.0 | 0.56 Output | 3.6436e-05 | 3.6436e-05 | 3.6436e-05 | 0.0 | 1.30
Modify | 3.6913e-05 | 3.6913e-05 | 3.6913e-05 | 0.0 | 1.88 Modify | 6.7542e-05 | 6.7542e-05 | 6.7542e-05 | 0.0 | 2.40
Other | | 1.756e-05 | | | 0.90 Other | | 4.655e-05 | | | 1.65
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1431 ave 1431 max 1431 min Nghost: 1431.00 ave 1431 max 1431 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9688 ave 9688 max 9688 min Neighs: 9688.00 ave 9688 max 9688 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9688 Total # of neighbors = 9688
Ave neighs/atom = 37.8438 Ave neighs/atom = 37.843750
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 31 Current step : 31
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597 31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681 51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
Loop time of 0.00433063 on 1 procs for 20 steps with 256 atoms Loop time of 0.00560916 on 1 procs for 20 steps with 256 atoms
Performance: 1995088.941 tau/day, 4618.261 timesteps/s Performance: 1540338.414 tau/day, 3565.598 timesteps/s
99.3% CPU use with 1 MPI tasks x no OpenMP threads 99.2% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0035121 | 0.0035121 | 0.0035121 | 0.0 | 81.10 Pair | 0.0044403 | 0.0044403 | 0.0044403 | 0.0 | 79.16
Neigh | 0.00050258 | 0.00050258 | 0.00050258 | 0.0 | 11.61 Neigh | 0.00056186 | 0.00056186 | 0.00056186 | 0.0 | 10.02
Comm | 0.00019444 | 0.00019444 | 0.00019444 | 0.0 | 4.49 Comm | 0.00036797 | 0.00036797 | 0.00036797 | 0.0 | 6.56
Output | 1.2092e-05 | 1.2092e-05 | 1.2092e-05 | 0.0 | 0.28 Output | 3.676e-05 | 3.676e-05 | 3.676e-05 | 0.0 | 0.66
Modify | 7.2917e-05 | 7.2917e-05 | 7.2917e-05 | 0.0 | 1.68 Modify | 0.00011282 | 0.00011282 | 0.00011282 | 0.0 | 2.01
Other | | 3.647e-05 | | | 0.84 Other | | 8.943e-05 | | | 1.59
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1421 ave 1421 max 1421 min Nghost: 1421.00 ave 1421 max 1421 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9700 ave 9700 max 9700 min Neighs: 9700.00 ave 9700 max 9700 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9700 Total # of neighbors = 9700
Ave neighs/atom = 37.8906 Ave neighs/atom = 37.890625
Neighbor list builds = 1 Neighbor list builds = 1
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 51 Current step : 51
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681 51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941 61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
Loop time of 0.00196567 on 1 procs for 10 steps with 256 atoms Loop time of 0.00373815 on 1 procs for 10 steps with 256 atoms
Performance: 2197727.285 tau/day, 5087.332 timesteps/s Performance: 1155650.623 tau/day, 2675.117 timesteps/s
99.7% CPU use with 1 MPI tasks x no OpenMP threads 98.0% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0018222 | 0.0018222 | 0.0018222 | 0.0 | 92.70 Pair | 0.0030908 | 0.0030908 | 0.0030908 | 0.0 | 82.68
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 7.8285e-05 | 7.8285e-05 | 7.8285e-05 | 0.0 | 3.98 Comm | 0.00038189 | 0.00038189 | 0.00038189 | 0.0 | 10.22
Output | 1.0862e-05 | 1.0862e-05 | 1.0862e-05 | 0.0 | 0.55 Output | 4.1615e-05 | 4.1615e-05 | 4.1615e-05 | 0.0 | 1.11
Modify | 3.6719e-05 | 3.6719e-05 | 3.6719e-05 | 0.0 | 1.87 Modify | 0.00013851 | 0.00013851 | 0.00013851 | 0.0 | 3.71
Other | | 1.764e-05 | | | 0.90 Other | | 8.533e-05 | | | 2.28
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1421 ave 1421 max 1421 min Nghost: 1421.00 ave 1421 max 1421 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9700 ave 9700 max 9700 min Neighs: 9700.00 ave 9700 max 9700 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9700 Total # of neighbors = 9700
Ave neighs/atom = 37.8906 Ave neighs/atom = 37.890625
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 61 Current step : 61
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941 61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
81 0.77743907 -5.7735004 0 -4.6118971 0.090822641 81 0.77743907 -5.7735004 0 -4.6118971 0.090822641
Loop time of 0.00430528 on 1 procs for 20 steps with 256 atoms Loop time of 0.00612177 on 1 procs for 20 steps with 256 atoms
Performance: 2006838.581 tau/day, 4645.460 timesteps/s Performance: 1411356.519 tau/day, 3267.029 timesteps/s
99.8% CPU use with 1 MPI tasks x no OpenMP threads 98.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0034931 | 0.0034931 | 0.0034931 | 0.0 | 81.13 Pair | 0.0047211 | 0.0047211 | 0.0047211 | 0.0 | 77.12
Neigh | 0.00050437 | 0.00050437 | 0.00050437 | 0.0 | 11.72 Neigh | 0.00083088 | 0.00083088 | 0.00083088 | 0.0 | 13.57
Comm | 0.0001868 | 0.0001868 | 0.0001868 | 0.0 | 4.34 Comm | 0.00032716 | 0.00032716 | 0.00032716 | 0.0 | 5.34
Output | 1.1699e-05 | 1.1699e-05 | 1.1699e-05 | 0.0 | 0.27 Output | 3.9891e-05 | 3.9891e-05 | 3.9891e-05 | 0.0 | 0.65
Modify | 7.3308e-05 | 7.3308e-05 | 7.3308e-05 | 0.0 | 1.70 Modify | 0.00010926 | 0.00010926 | 0.00010926 | 0.0 | 1.78
Other | | 3.604e-05 | | | 0.84 Other | | 9.346e-05 | | | 1.53
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1405 ave 1405 max 1405 min Nghost: 1405.00 ave 1405 max 1405 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9701 ave 9701 max 9701 min Neighs: 9701.00 ave 9701 max 9701 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9701 Total # of neighbors = 9701
Ave neighs/atom = 37.8945 Ave neighs/atom = 37.894531
Neighbor list builds = 1 Neighbor list builds = 1
Dangerous builds not checked Dangerous builds not checked
Deleted 256 atoms, new total = 0 Deleted 256 atoms, new total = 0
@ -261,34 +264,34 @@ Setting up Verlet run ...
Unit style : lj Unit style : lj
Current step : 81 Current step : 81
Time step : 0.005 Time step : 0.005
Per MPI rank memory allocation (min/avg/max) = 2.63 | 2.63 | 2.63 Mbytes Per MPI rank memory allocation (min/avg/max) = 2.630 | 2.630 | 2.630 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
81 0.6239063 -5.5404291 0 -4.6082254 1.0394285 81 0.6239063 -5.5404291 0 -4.6082254 1.0394285
91 0.75393007 -5.7375259 0 -4.6110484 0.39357367 91 0.75393007 -5.7375259 0 -4.6110484 0.39357367
Loop time of 0.00195843 on 1 procs for 10 steps with 256 atoms Loop time of 0.00319065 on 1 procs for 10 steps with 256 atoms
Performance: 2205851.941 tau/day, 5106.139 timesteps/s Performance: 1353954.393 tau/day, 3134.154 timesteps/s
99.7% CPU use with 1 MPI tasks x no OpenMP threads 99.2% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0018143 | 0.0018143 | 0.0018143 | 0.0 | 92.64 Pair | 0.0027828 | 0.0027828 | 0.0027828 | 0.0 | 87.22
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 7.8608e-05 | 7.8608e-05 | 7.8608e-05 | 0.0 | 4.01 Comm | 0.00023286 | 0.00023286 | 0.00023286 | 0.0 | 7.30
Output | 1.0786e-05 | 1.0786e-05 | 1.0786e-05 | 0.0 | 0.55 Output | 4.0459e-05 | 4.0459e-05 | 4.0459e-05 | 0.0 | 1.27
Modify | 3.7106e-05 | 3.7106e-05 | 3.7106e-05 | 0.0 | 1.89 Modify | 7.3576e-05 | 7.3576e-05 | 7.3576e-05 | 0.0 | 2.31
Other | | 1.762e-05 | | | 0.90 Other | | 6.094e-05 | | | 1.91
Nlocal: 256 ave 256 max 256 min Nlocal: 256.000 ave 256 max 256 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1431 ave 1431 max 1431 min Nghost: 1431.00 ave 1431 max 1431 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 9705 ave 9705 max 9705 min Neighs: 9705.00 ave 9705 max 9705 min
Histogram: 1 0 0 0 0 0 0 0 0 0 Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9705 Total # of neighbors = 9705
Ave neighs/atom = 37.9102 Ave neighs/atom = 37.910156
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Total wall time: 0:00:00 Total wall time: 0:00:00

View File

@ -1,9 +1,12 @@
LAMMPS (18 Feb 2020) LAMMPS (31 Aug 2021)
Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:98)
Created orthogonal box = (0 0 0) to (6.71838 6.71838 6.71838) using 1 OpenMP thread(s) per MPI task
Lattice spacing in x,y,z = 1.6795962 1.6795962 1.6795962
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
1 by 1 by 2 MPI processor grid 1 by 1 by 2 MPI processor grid
Created 256 atoms Created 256 atoms
create_atoms CPU = 0.000265157 secs using lattice units in orthogonal box = (0.0000000 0.0000000 0.0000000) to (6.7183848 6.7183848 6.7183848)
create_atoms CPU = 0.003 seconds
Neighbor list info ... Neighbor list info ...
update every 20 steps, delay 0 steps, check no update every 20 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000 max neighbors/atom: 2000, page size: 100000
@ -14,7 +17,7 @@ Neighbor list info ...
(1) pair lj/cut, perpetual (1) pair lj/cut, perpetual
attributes: half, newton on attributes: half, newton on
pair build: half/bin/atomonly/newton pair build: half/bin/atomonly/newton
stencil: half/bin/3d/newton stencil: half/bin/3d
bin: standard bin: standard
Setting up Verlet run ... Setting up Verlet run ...
Unit style : lj Unit style : lj
@ -24,30 +27,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
0 1.44 -6.7733681 0 -4.6218056 -5.0244179 0 1.44 -6.7733681 0 -4.6218056 -5.0244179
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175 10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
Loop time of 0.00115264 on 2 procs for 10 steps with 256 atoms Loop time of 0.00330899 on 2 procs for 10 steps with 256 atoms
Performance: 3747912.946 tau/day, 8675.724 timesteps/s Performance: 1305535.501 tau/day, 3022.073 timesteps/s
94.5% CPU use with 2 MPI tasks x no OpenMP threads 75.7% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.00074885 | 0.00075021 | 0.00075156 | 0.0 | 65.09 Pair | 0.0013522 | 0.0013813 | 0.0014104 | 0.1 | 41.74
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.00031829 | 0.00031943 | 0.00032056 | 0.0 | 27.71 Comm | 0.00049139 | 0.00054241 | 0.00059342 | 0.0 | 16.39
Output | 9.306e-06 | 2.6673e-05 | 4.4041e-05 | 0.0 | 2.31 Output | 3.6986e-05 | 0.00056588 | 0.0010948 | 0.0 | 17.10
Modify | 2.0684e-05 | 2.0891e-05 | 2.1098e-05 | 0.0 | 1.81 Modify | 4.3909e-05 | 4.3924e-05 | 4.3939e-05 | 0.0 | 1.33
Other | | 3.544e-05 | | | 3.07 Other | | 0.0007755 | | | 23.44
Nlocal: 128 ave 128 max 128 min Nlocal: 128.000 ave 128 max 128 min
Histogram: 2 0 0 0 0 0 0 0 0 0 Histogram: 2 0 0 0 0 0 0 0 0 0
Nghost: 1109 ave 1109 max 1109 min Nghost: 1109.00 ave 1109 max 1109 min
Histogram: 2 0 0 0 0 0 0 0 0 0 Histogram: 2 0 0 0 0 0 0 0 0 0
Neighs: 4992 ave 4992 max 4992 min Neighs: 4992.00 ave 4992 max 4992 min
Histogram: 2 0 0 0 0 0 0 0 0 0 Histogram: 2 0 0 0 0 0 0 0 0 0
Total # of neighbors = 9984 Total # of neighbors = 9984
Ave neighs/atom = 39 Ave neighs/atom = 39.000000
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
@ -58,30 +61,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175 10 1.1298532 -6.3095502 0 -4.6213906 -2.6058175
20 0.6239063 -5.557644 0 -4.6254403 0.97451173 20 0.6239063 -5.557644 0 -4.6254403 0.97451173
Loop time of 0.00120443 on 2 procs for 10 steps with 256 atoms Loop time of 0.00648485 on 2 procs for 10 steps with 256 atoms
Performance: 3586761.860 tau/day, 8302.689 timesteps/s Performance: 666168.017 tau/day, 1542.056 timesteps/s
95.5% CPU use with 2 MPI tasks x no OpenMP threads 44.3% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.00087798 | 0.00091359 | 0.0009492 | 0.0 | 75.85 Pair | 0.0022373 | 0.0024405 | 0.0026437 | 0.4 | 37.63
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.00016739 | 0.00020368 | 0.00023997 | 0.0 | 16.91 Comm | 0.0024446 | 0.0026464 | 0.0028481 | 0.4 | 40.81
Output | 1.0124e-05 | 3.0513e-05 | 5.0901e-05 | 0.0 | 2.53 Output | 3.9069e-05 | 0.00059734 | 0.0011556 | 0.0 | 9.21
Modify | 1.89e-05 | 1.9812e-05 | 2.0725e-05 | 0.0 | 1.64 Modify | 4.869e-05 | 4.912e-05 | 4.9551e-05 | 0.0 | 0.76
Other | | 3.683e-05 | | | 3.06 Other | | 0.0007515 | | | 11.59
Nlocal: 128 ave 134 max 122 min Nlocal: 128.000 ave 134 max 122 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Nghost: 1109 ave 1115 max 1103 min Nghost: 1109.00 ave 1115 max 1103 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Neighs: 4976 ave 5205 max 4747 min Neighs: 4976.00 ave 5205 max 4747 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Total # of neighbors = 9952 Total # of neighbors = 9952
Ave neighs/atom = 38.875 Ave neighs/atom = 38.875000
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
@ -92,34 +95,34 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
20 0.6239063 -5.5404291 0 -4.6082254 1.0394285 20 0.6239063 -5.5404291 0 -4.6082254 1.0394285
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278 21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
Loop time of 0.000206062 on 2 procs for 1 steps with 256 atoms Loop time of 0.00128072 on 2 procs for 1 steps with 256 atoms
Performance: 2096456.406 tau/day, 4852.908 timesteps/s Performance: 337310.921 tau/day, 780.812 timesteps/s
94.1% CPU use with 2 MPI tasks x no OpenMP threads 60.2% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.00012947 | 0.00013524 | 0.00014101 | 0.0 | 65.63 Pair | 0.00047351 | 0.00049064 | 0.00050777 | 0.0 | 38.31
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 1.858e-05 | 2.4113e-05 | 2.9647e-05 | 0.0 | 11.70 Comm | 7.6767e-05 | 9.3655e-05 | 0.00011054 | 0.0 | 7.31
Output | 8.699e-06 | 2.4204e-05 | 3.9708e-05 | 0.0 | 11.75 Output | 5.4217e-05 | 0.00026297 | 0.00047172 | 0.0 | 20.53
Modify | 2.34e-06 | 2.3705e-06 | 2.401e-06 | 0.0 | 1.15 Modify | 1.1554e-05 | 1.2026e-05 | 1.2498e-05 | 0.0 | 0.94
Other | | 2.013e-05 | | | 9.77 Other | | 0.0004214 | | | 32.91
Nlocal: 128 ave 135 max 121 min Nlocal: 128.000 ave 135 max 121 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Nghost: 1109 ave 1116 max 1102 min Nghost: 1109.00 ave 1116 max 1102 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Neighs: 4852.5 ave 5106 max 4599 min Neighs: 4852.50 ave 5106 max 4599 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Total # of neighbors = 9705 Total # of neighbors = 9705
Ave neighs/atom = 37.9102 Ave neighs/atom = 37.910156
Force on 1 atom via extract_atom: -18.109
Force on 1 atom via extract_variable: -18.109
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Force on 1 atom via extract_atom: -18.109
Force on 1 atom via extract_variable: -18.109
Force on 1 atom via extract_atom: 26.9581 Force on 1 atom via extract_atom: 26.9581
Force on 1 atom via extract_variable: 26.9581 Force on 1 atom via extract_variable: 26.9581
Setting up Verlet run ... Setting up Verlet run ...
@ -130,30 +133,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
21 0.63845863 -5.5628733 0 -4.6089263 0.99398278 21 0.63845863 -5.5628733 0 -4.6089263 0.99398278
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597 31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
Loop time of 0.00119048 on 2 procs for 10 steps with 256 atoms Loop time of 0.00784933 on 2 procs for 10 steps with 256 atoms
Performance: 3628802.105 tau/day, 8400.005 timesteps/s Performance: 550365.761 tau/day, 1273.995 timesteps/s
98.0% CPU use with 2 MPI tasks x no OpenMP threads 59.6% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.00085276 | 0.00089699 | 0.00094123 | 0.0 | 75.35 Pair | 0.0019235 | 0.0033403 | 0.0047572 | 2.5 | 42.56
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.00016896 | 0.00021444 | 0.00025992 | 0.0 | 18.01 Comm | 0.0016948 | 0.003118 | 0.0045411 | 2.5 | 39.72
Output | 9.413e-06 | 2.5939e-05 | 4.2465e-05 | 0.0 | 2.18 Output | 3.6445e-05 | 0.00064636 | 0.0012563 | 0.0 | 8.23
Modify | 1.8977e-05 | 2.0009e-05 | 2.1042e-05 | 0.0 | 1.68 Modify | 6.2842e-05 | 6.3209e-05 | 6.3577e-05 | 0.0 | 0.81
Other | | 3.31e-05 | | | 2.78 Other | | 0.0006814 | | | 8.68
Nlocal: 128 ave 135 max 121 min Nlocal: 128.000 ave 135 max 121 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Nghost: 1109 ave 1116 max 1102 min Nghost: 1109.00 ave 1116 max 1102 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Neighs: 4844 ave 5096 max 4592 min Neighs: 4844.00 ave 5096 max 4592 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Total # of neighbors = 9688 Total # of neighbors = 9688
Ave neighs/atom = 37.8438 Ave neighs/atom = 37.843750
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
@ -164,30 +167,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
31 0.7494946 -5.7306417 0 -4.6107913 0.41043597 31 0.7494946 -5.7306417 0 -4.6107913 0.41043597
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681 51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
Loop time of 0.00252603 on 2 procs for 20 steps with 256 atoms Loop time of 0.00696051 on 2 procs for 20 steps with 256 atoms
Performance: 3420382.192 tau/day, 7917.551 timesteps/s Performance: 1241287.730 tau/day, 2873.351 timesteps/s
99.2% CPU use with 2 MPI tasks x no OpenMP threads 79.2% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0016245 | 0.0017014 | 0.0017784 | 0.2 | 67.36 Pair | 0.0028267 | 0.0036088 | 0.004391 | 1.3 | 51.85
Neigh | 0.00025359 | 0.0002563 | 0.00025901 | 0.0 | 10.15 Neigh | 0.00040272 | 0.00040989 | 0.00041707 | 0.0 | 5.89
Comm | 0.00036863 | 0.00045124 | 0.00053385 | 0.0 | 17.86 Comm | 0.00081061 | 0.0015825 | 0.0023544 | 1.9 | 22.74
Output | 9.839e-06 | 2.8031e-05 | 4.6223e-05 | 0.0 | 1.11 Output | 3.6006e-05 | 0.00062106 | 0.0012061 | 0.0 | 8.92
Modify | 3.7027e-05 | 3.9545e-05 | 4.2063e-05 | 0.0 | 1.57 Modify | 6.8937e-05 | 7.1149e-05 | 7.336e-05 | 0.0 | 1.02
Other | | 4.948e-05 | | | 1.96 Other | | 0.0006671 | | | 9.58
Nlocal: 128 ave 132 max 124 min Nlocal: 128.000 ave 132 max 124 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Nghost: 1100 ave 1101 max 1099 min Nghost: 1100.00 ave 1101 max 1099 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Neighs: 4850 ave 4953 max 4747 min Neighs: 4850.00 ave 4953 max 4747 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Total # of neighbors = 9700 Total # of neighbors = 9700
Ave neighs/atom = 37.8906 Ave neighs/atom = 37.890625
Neighbor list builds = 1 Neighbor list builds = 1
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
@ -198,30 +201,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
51 0.71349216 -5.6772387 0 -4.6111811 0.52117681 51 0.71349216 -5.6772387 0 -4.6111811 0.52117681
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941 61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
Loop time of 0.00115444 on 2 procs for 10 steps with 256 atoms Loop time of 0.00155862 on 2 procs for 10 steps with 256 atoms
Performance: 3742065.976 tau/day, 8662.190 timesteps/s Performance: 2771678.197 tau/day, 6415.922 timesteps/s
96.5% CPU use with 2 MPI tasks x no OpenMP threads 95.0% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.00087346 | 0.00089311 | 0.00091275 | 0.0 | 77.36 Pair | 0.0012369 | 0.001266 | 0.001295 | 0.1 | 81.22
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.00016192 | 0.0001823 | 0.00020269 | 0.0 | 15.79 Comm | 0.00019462 | 0.00022315 | 0.00025169 | 0.0 | 14.32
Output | 9.49e-06 | 2.6234e-05 | 4.2978e-05 | 0.0 | 2.27 Output | 2.0217e-05 | 2.1945e-05 | 2.3673e-05 | 0.0 | 1.41
Modify | 1.9095e-05 | 1.9843e-05 | 2.0591e-05 | 0.0 | 1.72 Modify | 2.562e-05 | 2.5759e-05 | 2.5898e-05 | 0.0 | 1.65
Other | | 3.296e-05 | | | 2.85 Other | | 2.181e-05 | | | 1.40
Nlocal: 128 ave 132 max 124 min Nlocal: 128.000 ave 132 max 124 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Nghost: 1100 ave 1101 max 1099 min Nghost: 1100.00 ave 1101 max 1099 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Neighs: 4850 ave 4953 max 4747 min Neighs: 4850.00 ave 4953 max 4747 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Total # of neighbors = 9700 Total # of neighbors = 9700
Ave neighs/atom = 37.8906 Ave neighs/atom = 37.890625
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Setting up Verlet run ... Setting up Verlet run ...
@ -232,30 +235,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
61 0.78045421 -5.7781094 0 -4.6120011 0.093808941 61 0.78045421 -5.7781094 0 -4.6120011 0.093808941
81 0.77743907 -5.7735004 0 -4.6118971 0.090822641 81 0.77743907 -5.7735004 0 -4.6118971 0.090822641
Loop time of 0.00244325 on 2 procs for 20 steps with 256 atoms Loop time of 0.00351607 on 2 procs for 20 steps with 256 atoms
Performance: 3536279.919 tau/day, 8185.833 timesteps/s Performance: 2457288.612 tau/day, 5688.168 timesteps/s
99.0% CPU use with 2 MPI tasks x no OpenMP threads 97.9% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0016916 | 0.0017038 | 0.001716 | 0.0 | 69.73 Pair | 0.0023896 | 0.0024147 | 0.0024397 | 0.1 | 68.67
Neigh | 0.00025229 | 0.00025512 | 0.00025795 | 0.0 | 10.44 Neigh | 0.00037331 | 0.00040456 | 0.0004358 | 0.0 | 11.51
Comm | 0.00035772 | 0.00036918 | 0.00038064 | 0.0 | 15.11 Comm | 0.00050571 | 0.00051343 | 0.00052116 | 0.0 | 14.60
Output | 1.0858e-05 | 2.7875e-05 | 4.4891e-05 | 0.0 | 1.14 Output | 2.6424e-05 | 5.6547e-05 | 8.667e-05 | 0.0 | 1.61
Modify | 3.817e-05 | 3.9325e-05 | 4.048e-05 | 0.0 | 1.61 Modify | 5.0287e-05 | 5.1071e-05 | 5.1856e-05 | 0.0 | 1.45
Other | | 4.796e-05 | | | 1.96 Other | | 7.58e-05 | | | 2.16
Nlocal: 128 ave 128 max 128 min Nlocal: 128.000 ave 128 max 128 min
Histogram: 2 0 0 0 0 0 0 0 0 0 Histogram: 2 0 0 0 0 0 0 0 0 0
Nghost: 1088.5 ave 1092 max 1085 min Nghost: 1088.50 ave 1092 max 1085 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Neighs: 4850.5 ave 4851 max 4850 min Neighs: 4850.50 ave 4851 max 4850 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Total # of neighbors = 9701 Total # of neighbors = 9701
Ave neighs/atom = 37.8945 Ave neighs/atom = 37.894531
Neighbor list builds = 1 Neighbor list builds = 1
Dangerous builds not checked Dangerous builds not checked
Deleted 256 atoms, new total = 0 Deleted 256 atoms, new total = 0
@ -267,30 +270,30 @@ Per MPI rank memory allocation (min/avg/max) = 2.624 | 2.624 | 2.624 Mbytes
Step Temp E_pair E_mol TotEng Press Step Temp E_pair E_mol TotEng Press
81 0.6239063 -5.5404291 0 -4.6082254 1.0394285 81 0.6239063 -5.5404291 0 -4.6082254 1.0394285
91 0.75393007 -5.7375259 0 -4.6110484 0.39357367 91 0.75393007 -5.7375259 0 -4.6110484 0.39357367
Loop time of 0.00118092 on 2 procs for 10 steps with 256 atoms Loop time of 0.0109747 on 2 procs for 10 steps with 256 atoms
Performance: 3658158.625 tau/day, 8467.960 timesteps/s Performance: 393631.731 tau/day, 911.185 timesteps/s
98.6% CPU use with 2 MPI tasks x no OpenMP threads 53.5% CPU use with 2 MPI tasks x 1 OpenMP threads
MPI task timing breakdown: MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total Section | min time | avg time | max time |%varavg| %total
--------------------------------------------------------------- ---------------------------------------------------------------
Pair | 0.0008476 | 0.00089265 | 0.00093771 | 0.0 | 75.59 Pair | 0.0012057 | 0.0012732 | 0.0013407 | 0.2 | 11.60
Neigh | 0 | 0 | 0 | 0.0 | 0.00 Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.00016335 | 0.00020946 | 0.00025557 | 0.0 | 17.74 Comm | 0.00018882 | 0.00025686 | 0.00032489 | 0.0 | 2.34
Output | 8.87e-06 | 2.5733e-05 | 4.2595e-05 | 0.0 | 2.18 Output | 2.1943e-05 | 0.0047067 | 0.0093915 | 6.8 | 42.89
Modify | 1.8755e-05 | 1.9814e-05 | 2.0872e-05 | 0.0 | 1.68 Modify | 2.4614e-05 | 2.5439e-05 | 2.6264e-05 | 0.0 | 0.23
Other | | 3.326e-05 | | | 2.82 Other | | 0.004712 | | | 42.94
Nlocal: 128 ave 135 max 121 min Nlocal: 128.000 ave 135 max 121 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Nghost: 1109 ave 1116 max 1102 min Nghost: 1109.00 ave 1116 max 1102 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Neighs: 4852.5 ave 5106 max 4599 min Neighs: 4852.50 ave 5106 max 4599 min
Histogram: 1 0 0 0 0 0 0 0 0 1 Histogram: 1 0 0 0 0 0 0 0 0 1
Total # of neighbors = 9705 Total # of neighbors = 9705
Ave neighs/atom = 37.9102 Ave neighs/atom = 37.910156
Neighbor list builds = 0 Neighbor list builds = 0
Dangerous builds not checked Dangerous builds not checked
Total wall time: 0:00:00 Total wall time: 0:00:00

View File

@ -87,7 +87,7 @@ int main(int narg, char **arg)
MPI_Abort(MPI_COMM_WORLD,1); MPI_Abort(MPI_COMM_WORLD,1);
} }
} }
if (lammps == 1) plugin->open(0,NULL,comm_lammps,&lmp); if (lammps == 1) lmp = plugin->open(0,NULL,comm_lammps,NULL);
while (1) { while (1) {
if (me == 0) { if (me == 0) {
@ -139,7 +139,7 @@ int main(int narg, char **arg)
cmds[0] = (char *)"run 10"; cmds[0] = (char *)"run 10";
cmds[1] = (char *)"run 20"; cmds[1] = (char *)"run 20";
if (lammps == 1) plugin->commands_list(lmp,2,cmds); if (lammps == 1) plugin->commands_list(lmp,2,(const char **)cmds);
/* delete all atoms /* delete all atoms
create_atoms() to create new ones with old coords, vels create_atoms() to create new ones with old coords, vels
@ -164,12 +164,13 @@ int main(int narg, char **arg)
if (lammps == 1) { if (lammps == 1) {
plugin->close(lmp); plugin->close(lmp);
MPI_Barrier(comm_lammps);
MPI_Comm_free(&comm_lammps);
liblammpsplugin_release(plugin); liblammpsplugin_release(plugin);
} }
/* close down MPI */ /* close down MPI */
if (lammps == 1) MPI_Comm_free(&comm_lammps);
MPI_Barrier(MPI_COMM_WORLD); MPI_Barrier(MPI_COMM_WORLD);
MPI_Finalize(); MPI_Finalize();
} }

View File

@ -8,7 +8,7 @@ bond_style harmonic
bond_coeff 1 100 1.122462 # K R0 bond_coeff 1 100 1.122462 # K R0
velocity all create 1.0 8008 loop geom velocity all create 1.0 8008 loop geom
pair_style lj/cut/coul/long 1.122462 20 pair_style lj/cut/coul/long/soft 2 0.5 10.0 1.122462 20
pair_coeff * * 1.0 1.0 1.122462 # charges pair_coeff * * 1.0 1.0 1.122462 # charges
kspace_style pppm 1.0e-3 kspace_style pppm 1.0e-3
pair_modify shift yes pair_modify shift yes

View File

@ -1476,7 +1476,9 @@ int colvarmodule::write_output_files()
bi != biases.end(); bi != biases.end();
bi++) { bi++) {
// Only write output files if they have not already been written this time step // Only write output files if they have not already been written this time step
if ((*bi)->output_freq == 0 || (cvm::step_absolute() % (*bi)->output_freq) != 0) { if ((*bi)->output_freq == 0 ||
cvm::step_relative() == 0 ||
(cvm::step_absolute() % (*bi)->output_freq) != 0) {
error_code |= (*bi)->write_output_files(); error_code |= (*bi)->write_output_files();
} }
error_code |= (*bi)->write_state_to_replicas(); error_code |= (*bi)->write_state_to_replicas();

View File

@ -1,3 +1,3 @@
#ifndef COLVARS_VERSION #ifndef COLVARS_VERSION
#define COLVARS_VERSION "2021-08-06" #define COLVARS_VERSION "2021-09-21"
#endif #endif

View File

@ -462,7 +462,6 @@ int UCL_Device::set_platform(int pid) {
_num_devices = 0; _num_devices = 0;
for (int i=0; i<num_unpart; i++) { for (int i=0; i<num_unpart; i++) {
cl_uint num_subdevices = 1; cl_uint num_subdevices = 1;
cl_device_id *subdevice_list = device_list + i;
#ifdef CL_VERSION_1_2 #ifdef CL_VERSION_1_2
cl_device_affinity_domain adomain; cl_device_affinity_domain adomain;
@ -479,19 +478,21 @@ int UCL_Device::set_platform(int pid) {
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL, CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, 0, NULL,
&num_subdevices)); &num_subdevices));
if (num_subdevices > 1) { if (num_subdevices > 1) {
subdevice_list = new cl_device_id[num_subdevices]; cl_device_id *subdevice_list = new cl_device_id[num_subdevices];
CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices, CL_SAFE_CALL(clCreateSubDevices(device_list[i], props, num_subdevices,
subdevice_list, &num_subdevices)); subdevice_list, &num_subdevices));
for (int j=0; j<num_subdevices; j++) {
_cl_devices.push_back(device_list[i]);
add_properties(device_list[i]);
_num_devices++;
}
delete[] subdevice_list;
} else {
_cl_devices.push_back(device_list[i]);
add_properties(device_list[i]);
_num_devices++;
} }
#endif #endif
for (int j=0; j<num_subdevices; j++) {
_num_devices++;
_cl_devices.push_back(subdevice_list[j]);
add_properties(subdevice_list[j]);
}
if (num_subdevices > 1) delete[] subdevice_list;
} // for i } // for i
#endif #endif
@ -555,16 +556,22 @@ void UCL_Device::add_properties(cl_device_id device_list) {
sizeof(float_width),&float_width,nullptr)); sizeof(float_width),&float_width,nullptr));
op.preferred_vector_width32=float_width; op.preferred_vector_width32=float_width;
// Determine if double precision is supported
cl_uint double_width; cl_uint double_width;
CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_SAFE_CALL(clGetDeviceInfo(device_list,
CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE,
sizeof(double_width),&double_width,nullptr)); sizeof(double_width),&double_width,nullptr));
op.preferred_vector_width64=double_width; op.preferred_vector_width64=double_width;
if (double_width==0)
op.double_precision=false; // Determine if double precision is supported: All bits in the mask must be set.
else cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO|
CL_FP_ROUND_TO_INF|CL_FP_INF_NAN|CL_FP_DENORM);
cl_device_fp_config double_avail;
CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_DOUBLE_FP_CONFIG,
sizeof(double_avail),&double_avail,nullptr));
if ((double_avail & double_mask) == double_mask)
op.double_precision=true; op.double_precision=true;
else
op.double_precision=false;
CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_SAFE_CALL(clGetDeviceInfo(device_list,
CL_DEVICE_PROFILING_TIMER_RESOLUTION, CL_DEVICE_PROFILING_TIMER_RESOLUTION,

View File

@ -38,8 +38,10 @@ namespace ucl_opencl {
/// Class for timing OpenCL events /// Class for timing OpenCL events
class UCL_Timer { class UCL_Timer {
public: public:
inline UCL_Timer() : _total_time(0.0f), _initialized(false), has_measured_time(false) { } inline UCL_Timer() : start_event(nullptr), stop_event(nullptr), _total_time(0.0f),
inline UCL_Timer(UCL_Device &dev) : _total_time(0.0f), _initialized(false), has_measured_time(false) _initialized(false), has_measured_time(false) { }
inline UCL_Timer(UCL_Device &dev) : start_event(nullptr), stop_event(nullptr), _total_time(0.0f),
_initialized(false), has_measured_time(false)
{ init(dev); } { init(dev); }
inline ~UCL_Timer() { clear(); } inline ~UCL_Timer() { clear(); }

View File

@ -127,9 +127,8 @@ class Answer {
/// Add forces and torques from the GPU into a LAMMPS pointer /// Add forces and torques from the GPU into a LAMMPS pointer
void get_answers(double **f, double **tor); void get_answers(double **f, double **tor);
inline double get_answers(double **f, double **tor, double *eatom, inline double get_answers(double **f, double **tor, double *eatom, double **vatom,
double **vatom, double *virial, double &ecoul, double *virial, double &ecoul, int &error_flag_in) {
int &error_flag_in) {
double ta=MPI_Wtime(); double ta=MPI_Wtime();
time_answer.sync_stop(); time_answer.sync_stop();
_time_cpu_idle+=MPI_Wtime()-ta; _time_cpu_idle+=MPI_Wtime()-ta;

View File

@ -34,7 +34,7 @@ BornCoulLongT::BornCoulLong() : BaseCharge<numtyp,acctyp>(),
} }
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
BornCoulLongT::~BornCoulLongT() { BornCoulLongT::~BornCoulLong() {
clear(); clear();
} }

View File

@ -34,7 +34,7 @@ BornCoulWolfT::BornCoulWolf() : BaseCharge<numtyp,acctyp>(),
} }
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
BornCoulWolfT::~BornCoulWolfT() { BornCoulWolfT::~BornCoulWolf() {
clear(); clear();
} }

View File

@ -34,7 +34,7 @@ BuckCoulLongT::BuckCoulLong() : BaseCharge<numtyp,acctyp>(),
} }
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
BuckCoulLongT::~BuckCoulLongT() { BuckCoulLongT::~BuckCoulLong() {
clear(); clear();
} }

View File

@ -333,6 +333,12 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
gpu_barrier(); gpu_barrier();
} }
// check if double precision support is available
#if defined(_SINGLE_DOUBLE) || defined(_DOUBLE_DOUBLE)
if (!gpu->double_precision())
return -16;
#endif
// Setup auto bin size calculation for calls from atom::sort // Setup auto bin size calculation for calls from atom::sort
// - This is repeated in neighbor init with additional info // - This is repeated in neighbor init with additional info
if (_user_cell_size<0.0) { if (_user_cell_size<0.0) {
@ -348,7 +354,7 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu,
} }
template <class numtyp, class acctyp> template <class numtyp, class acctyp>
int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) { int DeviceT::set_ocl_params(std::string s_config, const std::string &extra_args) {
#ifdef USE_OPENCL #ifdef USE_OPENCL
#include "lal_pre_ocl_config.h" #include "lal_pre_ocl_config.h"
@ -368,7 +374,7 @@ int DeviceT::set_ocl_params(std::string s_config, std::string extra_args) {
int token_count=0; int token_count=0;
std::string params[18]; std::string params[18];
char ocl_config[2048]; char ocl_config[2048];
strcpy(ocl_config,s_config.c_str()); strncpy(ocl_config,s_config.c_str(),2047);
char *pch = strtok(ocl_config,","); char *pch = strtok(ocl_config,",");
_ocl_config_name=pch; _ocl_config_name=pch;
pch = strtok(nullptr,","); pch = strtok(nullptr,",");
@ -546,14 +552,9 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
return -3; return -3;
if (_user_cell_size<0.0) { if (_user_cell_size<0.0) {
#ifndef LAL_USE_OLD_NEIGHBOR
_neighbor_shared.setup_auto_cell_size(true,cutoff,nbor->simd_size());
#else
_neighbor_shared.setup_auto_cell_size(false,cutoff,nbor->simd_size()); _neighbor_shared.setup_auto_cell_size(false,cutoff,nbor->simd_size());
#endif
} else } else
_neighbor_shared.setup_auto_cell_size(false,_user_cell_size, _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,nbor->simd_size());
nbor->simd_size());
nbor->set_cutoff(cutoff); nbor->set_cutoff(cutoff);
return 0; return 0;
@ -992,10 +993,8 @@ int DeviceT::compile_kernels() {
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(0) || static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(0) ||
static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(1) || static_cast<size_t>(_block_cell_2d) > gpu->group_size_dim(1) ||
static_cast<size_t>(_block_cell_id) > gpu->group_size_dim(0) || static_cast<size_t>(_block_cell_id) > gpu->group_size_dim(0) ||
static_cast<size_t>(_max_shared_types*_max_shared_types* static_cast<size_t>(_max_shared_types*_max_shared_types*sizeof(numtyp)*17 > gpu->slm_size()) ||
sizeof(numtyp)*17 > gpu->slm_size()) || static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) > gpu->slm_size()))
static_cast<size_t>(_max_bio_shared_types*2*sizeof(numtyp) >
gpu->slm_size()))
return -13; return -13;
if (_block_pair % _simd_size != 0 || _block_bio_pair % _simd_size != 0 || if (_block_pair % _simd_size != 0 || _block_bio_pair % _simd_size != 0 ||
@ -1071,9 +1070,8 @@ void lmp_clear_device() {
global_device.clear_device(); global_device.clear_device();
} }
double lmp_gpu_forces(double **f, double **tor, double *eatom, double lmp_gpu_forces(double **f, double **tor, double *eatom, double **vatom,
double **vatom, double *virial, double &ecoul, double *virial, double &ecoul, int &error_flag) {
int &error_flag) {
return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul,error_flag); return global_device.fix_gpu(f,tor,eatom,vatom,virial,ecoul,error_flag);
} }

View File

@ -163,17 +163,15 @@ class Device {
{ ans_queue.push(ans); } { ans_queue.push(ans); }
/// Add "answers" (force,energies,etc.) into LAMMPS structures /// Add "answers" (force,energies,etc.) into LAMMPS structures
inline double fix_gpu(double **f, double **tor, double *eatom, inline double fix_gpu(double **f, double **tor, double *eatom, double **vatom,
double **vatom, double *virial, double &ecoul, double *virial, double &ecoul, int &error_flag) {
int &error_flag) {
error_flag=0; error_flag=0;
atom.data_unavail(); atom.data_unavail();
if (ans_queue.empty()==false) { if (ans_queue.empty()==false) {
stop_host_timer(); stop_host_timer();
double evdw=0.0; double evdw=0.0;
while (ans_queue.empty()==false) { while (ans_queue.empty()==false) {
evdw+=ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul, evdw += ans_queue.front()->get_answers(f,tor,eatom,vatom,virial,ecoul,error_flag);
error_flag);
ans_queue.pop(); ans_queue.pop();
} }
return evdw; return evdw;
@ -350,7 +348,7 @@ class Device {
int _data_in_estimate, _data_out_estimate; int _data_in_estimate, _data_out_estimate;
std::string _ocl_config_name, _ocl_config_string, _ocl_compile_string; std::string _ocl_config_name, _ocl_config_string, _ocl_compile_string;
int set_ocl_params(std::string, std::string); int set_ocl_params(std::string, const std::string &);
}; };
} }

View File

@ -39,7 +39,7 @@ bool Neighbor::init(NeighborShared *shared, const int inum,
const int block_cell_2d, const int block_cell_id, const int block_cell_2d, const int block_cell_id,
const int block_nbor_build, const int threads_per_atom, const int block_nbor_build, const int threads_per_atom,
const int simd_size, const bool time_device, const int simd_size, const bool time_device,
const std::string compile_flags, const bool ilist_map) { const std::string &compile_flags, const bool ilist_map) {
clear(); clear();
_ilist_map = ilist_map; _ilist_map = ilist_map;
@ -743,7 +743,7 @@ void Neighbor::build_nbor_list(double **x, const int inum, const int host_inum,
mn = _max_nbors; mn = _max_nbors;
const numtyp i_cell_size=static_cast<numtyp>(1.0/_cell_size); const numtyp i_cell_size=static_cast<numtyp>(1.0/_cell_size);
const int neigh_block=_block_cell_id; const int neigh_block=_block_cell_id;
const int GX=(int)ceil((float)nall/neigh_block); const int GX=(int)ceil((double)nall/neigh_block);
const numtyp sublo0=static_cast<numtyp>(sublo[0]); const numtyp sublo0=static_cast<numtyp>(sublo[0]);
const numtyp sublo1=static_cast<numtyp>(sublo[1]); const numtyp sublo1=static_cast<numtyp>(sublo[1]);
const numtyp sublo2=static_cast<numtyp>(sublo[2]); const numtyp sublo2=static_cast<numtyp>(sublo[2]);

View File

@ -71,7 +71,7 @@ class Neighbor {
const int block_cell_2d, const int block_cell_id, const int block_cell_2d, const int block_cell_id,
const int block_nbor_build, const int threads_per_atom, const int block_nbor_build, const int threads_per_atom,
const int simd_size, const bool time_device, const int simd_size, const bool time_device,
const std::string compile_flags, const bool ilist_map); const std::string &compile_flags, const bool ilist_map);
/// Set the cutoff+skin /// Set the cutoff+skin
inline void set_cutoff(const double cutoff) { inline void set_cutoff(const double cutoff) {

View File

@ -89,7 +89,7 @@ double NeighborShared::best_cell_size(const double subx, const double suby,
} }
void NeighborShared::compile_kernels(UCL_Device &dev, const int gpu_nbor, void NeighborShared::compile_kernels(UCL_Device &dev, const int gpu_nbor,
const std::string flags) { const std::string &flags) {
if (_compiled) if (_compiled)
return; return;

View File

@ -87,7 +87,7 @@ class NeighborShared {
/// Compile kernels for neighbor lists /// Compile kernels for neighbor lists
void compile_kernels(UCL_Device &dev, const int gpu_nbor, void compile_kernels(UCL_Device &dev, const int gpu_nbor,
const std::string flags); const std::string &flags);
// ----------------------------- Kernels // ----------------------------- Kernels
UCL_Program *nbor_program, *build_program; UCL_Program *nbor_program, *build_program;

View File

@ -69,14 +69,14 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
flag=device->init(*ans,nlocal,nall); flag=device->init(*ans,nlocal,nall);
if (flag!=0) if (flag!=0)
return 0; return nullptr;
if (sizeof(grdtyp)==sizeof(double) && device->double_precision()==false) { if (sizeof(grdtyp)==sizeof(double) && device->double_precision()==false) {
flag=-15; flag=-15;
return 0; return nullptr;
} }
if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) { if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) {
flag=-4; flag=-4;
return 0; return nullptr;
} }
ucl_device=device->gpu; ucl_device=device->gpu;
@ -168,7 +168,7 @@ grdtyp * PPPMT::init(const int nlocal, const int nall, FILE *_screen,
UCL_READ_WRITE)==UCL_SUCCESS); UCL_READ_WRITE)==UCL_SUCCESS);
if (!success) { if (!success) {
flag=-3; flag=-3;
return 0; return nullptr;
} }
error_flag.device.zero(); error_flag.device.zero();
@ -342,13 +342,15 @@ void PPPMT::interp(const grdtyp qqrd2e_scale) {
vd_brick.update_device(true); vd_brick.update_device(true);
time_in.stop(); time_in.stop();
int ainum=this->ans->inum();
if (ainum==0)
return;
time_interp.start(); time_interp.start();
// Compute the block size and grid size to keep all cores busy // Compute the block size and grid size to keep all cores busy
int BX=this->block_size(); int BX=this->block_size();
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX)); int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/BX));
int ainum=this->ans->inum();
k_interp.set_size(GX,BX); k_interp.set_size(GX,BX);
k_interp.run(&atom->x, &atom->q, &ainum, &vd_brick, &d_rho_coeff, k_interp.run(&atom->x, &atom->q, &ainum, &vd_brick, &d_rho_coeff,
&_npts_x, &_npts_yx, &_brick_x, &_brick_y, &_brick_z, &_delxinv, &_npts_x, &_npts_yx, &_brick_x, &_brick_y, &_brick_z, &_delxinv,

View File

@ -2,8 +2,8 @@ SHELL = /bin/sh
# ------ FILES ------ # ------ FILES ------
SRC_FILES = $(wildcard src/ML-PACE/*.cpp) SRC_FILES = $(wildcard src/USER-PACE/*.cpp)
SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES)) SRC = $(filter-out src/USER-PACE/pair_pace.cpp, $(SRC_FILES))
# ------ DEFINITIONS ------ # ------ DEFINITIONS ------
@ -12,7 +12,7 @@ OBJ = $(SRC:.cpp=.o)
# ------ SETTINGS ------ # ------ SETTINGS ------
CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE CXXFLAGS = -O3 -fPIC -Isrc/USER-PACE
ARCHIVE = ar ARCHIVE = ar
ARCHFLAG = -rc ARCHFLAG = -rc

View File

@ -1,3 +1,3 @@
pace_SYSINC =-I../../lib/pace/src/ML-PACE pace_SYSINC =-I../../lib/pace/src/USER-PACE
pace_SYSLIB = -L../../lib/pace/ -lpace pace_SYSLIB = -L../../lib/pace/ -lpace
pace_SYSPATH = pace_SYSPATH =

View File

@ -92,8 +92,12 @@ class numpy_wrapper:
if dim == LAMMPS_AUTODETECT: if dim == LAMMPS_AUTODETECT:
if dtype in (LAMMPS_INT_2D, LAMMPS_DOUBLE_2D, LAMMPS_INT64_2D): if dtype in (LAMMPS_INT_2D, LAMMPS_DOUBLE_2D, LAMMPS_INT64_2D):
# TODO add other fields # TODO add other fields
if name in ("x", "v", "f", "angmom", "torque", "csforce", "vforce"): if name in ("x", "v", "f", "x0", "omega", "angmom", "torque", "vforce", "vest"):
dim = 3 dim = 3
elif name == "smd_data_9":
dim = 9
elif name == "smd_stress":
dim = 6
else: else:
dim = 2 dim = 2
else: else:
@ -386,6 +390,9 @@ class numpy_wrapper:
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
def iarray(self, c_int_type, raw_ptr, nelem, dim=1): def iarray(self, c_int_type, raw_ptr, nelem, dim=1):
if raw_ptr is None:
return None
import numpy as np import numpy as np
np_int_type = self._ctype_to_numpy_int(c_int_type) np_int_type = self._ctype_to_numpy_int(c_int_type)
@ -405,7 +412,11 @@ class numpy_wrapper:
# ------------------------------------------------------------------------- # -------------------------------------------------------------------------
def darray(self, raw_ptr, nelem, dim=1): def darray(self, raw_ptr, nelem, dim=1):
if raw_ptr is None:
return None
import numpy as np import numpy as np
if dim == 1: if dim == 1:
ptr = cast(raw_ptr, POINTER(c_double * nelem)) ptr = cast(raw_ptr, POINTER(c_double * nelem))
else: else:

10
src/.gitignore vendored
View File

@ -860,8 +860,6 @@
/fix_ti_rs.h /fix_ti_rs.h
/fix_ti_spring.cpp /fix_ti_spring.cpp
/fix_ti_spring.h /fix_ti_spring.h
/fix_ttm.cpp
/fix_ttm.h
/fix_tune_kspace.cpp /fix_tune_kspace.cpp
/fix_tune_kspace.h /fix_tune_kspace.h
/fix_wall_body_polygon.cpp /fix_wall_body_polygon.cpp
@ -921,6 +919,7 @@
/improper_ring.h /improper_ring.h
/improper_umbrella.cpp /improper_umbrella.cpp
/improper_umbrella.h /improper_umbrella.h
/interlayer_taper.h
/kissfft.h /kissfft.h
/lj_sdk_common.h /lj_sdk_common.h
/math_complex.h /math_complex.h
@ -935,7 +934,6 @@
/msm_cg.h /msm_cg.h
/neb.cpp /neb.cpp
/neb.h /neb.h
/pair_adp.cpp /pair_adp.cpp
/pair_adp.h /pair_adp.h
/pair_agni.cpp /pair_agni.cpp
@ -996,6 +994,8 @@
/pair_cosine_squared.h /pair_cosine_squared.h
/pair_coul_diel.cpp /pair_coul_diel.cpp
/pair_coul_diel.h /pair_coul_diel.h
/pair_coul_exclude.cpp
/pair_coul_exclude.h
/pair_coul_long.cpp /pair_coul_long.cpp
/pair_coul_long.h /pair_coul_long.h
/pair_coul_msm.cpp /pair_coul_msm.cpp
@ -1433,6 +1433,10 @@
/fix_srp.h /fix_srp.h
/fix_tfmc.cpp /fix_tfmc.cpp
/fix_tfmc.h /fix_tfmc.h
/fix_ttm.cpp
/fix_ttm.h
/fix_ttm_grid.cpp
/fix_ttm_grid.h
/fix_ttm_mod.cpp /fix_ttm_mod.cpp
/fix_ttm_mod.h /fix_ttm_mod.h
/pair_born_coul_long_cs.cpp /pair_born_coul_long_cs.cpp

View File

@ -301,8 +301,7 @@ void PairLineLJ::compute(int eflag, int vflag)
} }
} }
if (evflag) ev_tally(i,j,nlocal,newton_pair, if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
evdwl,0.0,fpair,delx,dely,delz);
} }
} }

View File

@ -375,8 +375,7 @@ void PairTriLJ::compute(int eflag, int vflag)
} }
} }
if (evflag) ev_tally(i,j,nlocal,newton_pair, if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
evdwl,0.0,fpair,delx,dely,delz);
} }
} }

View File

@ -39,7 +39,6 @@
#include <cstring> #include <cstring>
#include <map> #include <map>
#include <utility> #include <utility>
#include <vector>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;

View File

@ -37,7 +37,6 @@
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <vector>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;
@ -234,9 +233,7 @@ FixBocs::FixBocs(LAMMPS *lmp, int narg, char **arg) :
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg],"mtk") == 0) { } else if (strcmp(arg[iarg],"mtk") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command"); if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");
if (strcmp(arg[iarg+1],"yes") == 0) mtk_flag = 1; mtk_flag = utils::logical(FLERR,arg[iarg+1],false,lmp);
else if (strcmp(arg[iarg+1],"no") == 0) mtk_flag = 0;
else error->all(FLERR,"Illegal fix bocs command");
iarg += 2; iarg += 2;
} else if (strcmp(arg[iarg],"tloop") == 0) { } else if (strcmp(arg[iarg],"tloop") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command"); if (iarg+2 > narg) error->all(FLERR,"Illegal fix bocs command");

View File

@ -337,8 +337,7 @@ void FixWallBodyPolygon::post_force(int /*vflag*/)
num_contacts = 0; num_contacts = 0;
facc[0] = facc[1] = facc[2] = 0; facc[0] = facc[1] = facc[2] = 0;
vertex_against_wall(i, wall_pos, x, f, torque, side, vertex_against_wall(i, wall_pos, x, f, torque, side, contact_list, num_contacts, facc);
contact_list, num_contacts, facc);
if (num_contacts >= 2) { if (num_contacts >= 2) {

View File

@ -324,8 +324,7 @@ void PairBodyNparticle::compute(int eflag, int vflag)
} }
} }
if (evflag) ev_tally(i,j,nlocal,newton_pair, if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
evdwl,0.0,fpair,delx,dely,delz);
} }
} }

View File

@ -207,8 +207,7 @@ void PairBodyRoundedPolygon::compute(int eflag, int vflag)
if (r > radi + radj + cut_inner) continue; if (r > radi + radj + cut_inner) continue;
if (npi == 1 && npj == 1) { if (npi == 1 && npj == 1) {
sphere_against_sphere(i, j, delx, dely, delz, rsq, sphere_against_sphere(i, j, delx, dely, delz, rsq, k_nij, k_naij, x, v, f, evflag);
k_nij, k_naij, x, v, f, evflag);
continue; continue;
} }

View File

@ -20,17 +20,11 @@
#include "fix_brownian.h" #include "fix_brownian.h"
#include "atom.h" #include "atom.h"
#include "comm.h"
#include "domain.h" #include "domain.h"
#include "error.h" #include "error.h"
#include "force.h"
#include "math_extra.h"
#include "memory.h"
#include "random_mars.h" #include "random_mars.h"
#include "update.h"
#include <cmath> #include <cmath>
#include <cstring>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;

View File

@ -21,17 +21,10 @@
#include "atom.h" #include "atom.h"
#include "atom_vec_ellipsoid.h" #include "atom_vec_ellipsoid.h"
#include "comm.h"
#include "domain.h" #include "domain.h"
#include "error.h" #include "error.h"
#include "force.h"
#include "math_extra.h" #include "math_extra.h"
#include "memory.h"
#include "random_mars.h" #include "random_mars.h"
#include "update.h"
#include <cmath>
#include <cstring>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;

View File

@ -17,15 +17,12 @@
Contributing author: Sam Cameron (University of Bristol) Contributing author: Sam Cameron (University of Bristol)
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "fix_brownian.h" #include "fix_brownian_base.h"
#include "atom.h"
#include "comm.h" #include "comm.h"
#include "domain.h" #include "domain.h"
#include "error.h" #include "error.h"
#include "force.h" #include "force.h"
#include "math_extra.h"
#include "memory.h"
#include "random_mars.h" #include "random_mars.h"
#include "update.h" #include "update.h"

View File

@ -20,17 +20,12 @@
#include "fix_brownian_sphere.h" #include "fix_brownian_sphere.h"
#include "atom.h" #include "atom.h"
#include "comm.h"
#include "domain.h" #include "domain.h"
#include "error.h" #include "error.h"
#include "force.h"
#include "math_extra.h" #include "math_extra.h"
#include "memory.h"
#include "random_mars.h" #include "random_mars.h"
#include "update.h"
#include <cmath> #include <cmath>
#include <cstring>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;

View File

@ -23,14 +23,11 @@
#include "atom.h" #include "atom.h"
#include "atom_vec_ellipsoid.h" #include "atom_vec_ellipsoid.h"
#include "comm.h"
#include "domain.h" #include "domain.h"
#include "error.h" #include "error.h"
#include "force.h"
#include "math_extra.h" #include "math_extra.h"
#include "memory.h"
#include "update.h"
#include <cmath>
#include <cstring> #include <cstring>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;

View File

@ -14,7 +14,6 @@
#include "atom_vec_oxdna.h" #include "atom_vec_oxdna.h"
#include "atom.h" #include "atom.h"
#include "comm.h"
#include "error.h" #include "error.h"
#include "force.h" #include "force.h"

View File

@ -19,7 +19,6 @@
#include "atom.h" #include "atom.h"
#include "atom_vec_ellipsoid.h" #include "atom_vec_ellipsoid.h"
#include "atom_vec_oxdna.h"
#include "comm.h" #include "comm.h"
#include "error.h" #include "error.h"
#include "force.h" #include "force.h"
@ -30,7 +29,6 @@
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <utility>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace MFOxdna; using namespace MFOxdna;

View File

@ -17,8 +17,6 @@
#include "pair_oxrna2_excv.h" #include "pair_oxrna2_excv.h"
#include <cstring>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- /* ----------------------------------------------------------------------

View File

@ -19,7 +19,6 @@
#include "atom.h" #include "atom.h"
#include "atom_vec_ellipsoid.h" #include "atom_vec_ellipsoid.h"
#include "atom_vec_oxdna.h"
#include "comm.h" #include "comm.h"
#include "error.h" #include "error.h"
#include "force.h" #include "force.h"
@ -31,7 +30,6 @@
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>
#include <utility>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace MathConst; using namespace MathConst;

View File

@ -18,13 +18,15 @@
------------------------------------------------------------------------- */ ------------------------------------------------------------------------- */
#include "pair_lj_sdk_coul_msm.h" #include "pair_lj_sdk_coul_msm.h"
#include <cmath>
#include <cstring>
#include "atom.h" #include "atom.h"
#include "error.h"
#include "force.h" #include "force.h"
#include "kspace.h" #include "kspace.h"
#include "neigh_list.h" #include "neigh_list.h"
#include "error.h"
#include <cmath>
#include <cstring>
#include "lj_sdk_common.h" #include "lj_sdk_common.h"

View File

@ -25,7 +25,6 @@
#include "math_const.h" #include "math_const.h"
#include "memory.h" #include "memory.h"
#include "neighbor.h" #include "neighbor.h"
#include "update.h"
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>

View File

@ -25,7 +25,6 @@
#include "math_const.h" #include "math_const.h"
#include "memory.h" #include "memory.h"
#include "neighbor.h" #include "neighbor.h"
#include "update.h"
#include <cmath> #include <cmath>
#include <cstring> #include <cstring>

View File

@ -397,8 +397,7 @@ void PairLJClass2::compute_outer(int eflag, int vflag)
fpair = factor_lj*forcelj*r2inv; fpair = factor_lj*forcelj*r2inv;
} }
if (evflag) ev_tally(i,j,nlocal,newton_pair, if (evflag) ev_tally(i,j,nlocal,newton_pair,evdwl,0.0,fpair,delx,dely,delz);
evdwl,0.0,fpair,delx,dely,delz);
} }
} }
} }

View File

@ -18,10 +18,11 @@
#include "fix_wall_colloid.h" #include "fix_wall_colloid.h"
#include <cmath>
#include "atom.h" #include "atom.h"
#include "error.h" #include "error.h"
#include <cmath>
using namespace LAMMPS_NS; using namespace LAMMPS_NS;
using namespace FixConst; using namespace FixConst;

View File

@ -10,13 +10,6 @@
#include "colvarproxy_lammps.h" #include "colvarproxy_lammps.h"
#include <mpi.h>
#include <sys/stat.h>
#include <cerrno>
#include <cstring>
#include <iostream>
#include <memory>
#include <string>
#include "lammps.h" #include "lammps.h"
#include "error.h" #include "error.h"
@ -26,6 +19,12 @@
#include "colvarmodule.h" #include "colvarmodule.h"
#include "colvarproxy.h" #include "colvarproxy.h"
#include <sys/stat.h>
#include <cerrno>
#include <cstring>
#include <iostream>
#include <memory>
#define HASH_FAIL -1 #define HASH_FAIL -1
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////

View File

@ -12,11 +12,6 @@
#include "colvarproxy_lammps_version.h" // IWYU pragma: export #include "colvarproxy_lammps_version.h" // IWYU pragma: export
#include <cstddef>
#include <mpi.h>
#include <string>
#include <vector>
#include "colvarmodule.h" #include "colvarmodule.h"
#include "colvarproxy.h" #include "colvarproxy.h"
#include "colvartypes.h" #include "colvartypes.h"

Some files were not shown because too many files have changed in this diff Show More