diff --git a/cmake/Modules/Packages/GPU.cmake b/cmake/Modules/Packages/GPU.cmake index aaa784ca8b..a57715d294 100644 --- a/cmake/Modules/Packages/GPU.cmake +++ b/cmake/Modules/Packages/GPU.cmake @@ -217,13 +217,20 @@ elseif(GPU_API STREQUAL "OPENCL") elseif(GPU_API STREQUAL "HIP") if(NOT DEFINED HIP_PATH) if(NOT DEFINED ENV{HIP_PATH}) - set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to which HIP has been installed") + set(HIP_PATH "/opt/rocm/hip" CACHE PATH "Path to HIP installation") else() - set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to which HIP has been installed") + set(HIP_PATH $ENV{HIP_PATH} CACHE PATH "Path to HIP installation") endif() endif() - set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH}) - find_package(HIP REQUIRED) + if(NOT DEFINED ROCM_PATH) + if(NOT DEFINED ENV{ROCM_PATH}) + set(ROCM_PATH "/opt/rocm" CACHE PATH "Path to ROCm installation") + else() + set(ROCM_PATH $ENV{ROCM_PATH} CACHE PATH "Path to ROCm installation") + endif() + endif() + list(APPEND CMAKE_PREFIX_PATH ${HIP_PATH} ${ROCM_PATH}) + find_package(hip REQUIRED) option(HIP_USE_DEVICE_SORT "Use GPU sorting" ON) if(NOT DEFINED HIP_PLATFORM) @@ -325,10 +332,11 @@ elseif(GPU_API STREQUAL "HIP") set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h ${LAMMPS_LIB_BINARY_DIR}/gpu/*.cu.cpp") - hip_add_library(gpu STATIC ${GPU_LIB_SOURCES}) + add_library(gpu STATIC ${GPU_LIB_SOURCES}) target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu) target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT) target_compile_definitions(gpu PRIVATE -DUSE_HIP) + target_link_libraries(gpu PRIVATE hip::host) if(HIP_USE_DEVICE_SORT) # add hipCUB @@ -377,8 +385,9 @@ elseif(GPU_API STREQUAL "HIP") endif() endif() - hip_add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) + add_executable(hip_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp) target_compile_definitions(hip_get_devices PRIVATE -DUCL_HIP) + target_link_libraries(hip_get_devices hip::host) if(HIP_PLATFORM STREQUAL "nvcc") target_compile_definitions(gpu PRIVATE -D__HIP_PLATFORM_NVCC__) diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index a1cf680266..d5fccad4ba 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -1,6 +1,8 @@ ######################################################################## # As of version 3.3.0 Kokkos requires C++14 -set(CMAKE_CXX_STANDARD 14) +if(CMAKE_CXX_STANDARD LESS 14) + set(CMAKE_CXX_STANDARD 14) +endif() ######################################################################## # consistency checks and Kokkos options/settings required by LAMMPS if(Kokkos_ENABLE_CUDA) diff --git a/cmake/Modules/Packages/LATTE.cmake b/cmake/Modules/Packages/LATTE.cmake index ddf31a68ed..a96e850f7e 100644 --- a/cmake/Modules/Packages/LATTE.cmake +++ b/cmake/Modules/Packages/LATTE.cmake @@ -19,6 +19,14 @@ if(DOWNLOAD_LATTE) set(LATTE_MD5 "820e73a457ced178c08c71389a385de7" CACHE STRING "MD5 checksum of LATTE tarball") mark_as_advanced(LATTE_URL) mark_as_advanced(LATTE_MD5) + + # CMake cannot pass BLAS or LAPACK library variable to external project if they are a list + list(LENGTH BLAS_LIBRARIES} NUM_BLAS) + list(LENGTH LAPACK_LIBRARIES NUM_LAPACK) + if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1)) + message(FATAL_ERROR "Cannot compile downloaded LATTE library due to a technical limitation") + endif() + include(ExternalProject) ExternalProject_Add(latte_build URL ${LATTE_URL} diff --git a/cmake/Modules/Packages/ML-HDNNP.cmake b/cmake/Modules/Packages/ML-HDNNP.cmake index 44873b9929..e27b3a1410 100644 --- a/cmake/Modules/Packages/ML-HDNNP.cmake +++ b/cmake/Modules/Packages/ML-HDNNP.cmake @@ -45,12 +45,12 @@ if(DOWNLOAD_N2P2) # get path to MPI include directory when cross-compiling to windows if((CMAKE_SYSTEM_NAME STREQUAL Windows) AND CMAKE_CROSSCOMPILING) get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES) - set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1") + set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}") set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER}) endif() if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel") get_target_property(N2P2_MPI_INCLUDE MPI::MPI_CXX INTERFACE_INCLUDE_DIRECTORIES) - set(N2P2_PROJECT_OPTIONS "-I ${N2P2_MPI_INCLUDE} -DMPICH_SKIP_MPICXX=1") + set(N2P2_PROJECT_OPTIONS "-I${N2P2_MPI_INCLUDE}") set(MPI_CXX_COMPILER ${CMAKE_CXX_COMPILER}) endif() endif() @@ -69,6 +69,12 @@ if(DOWNLOAD_N2P2) # echo final flag for debugging message(STATUS "N2P2 BUILD OPTIONS: ${N2P2_BUILD_OPTIONS}") + # must have "sed" command to compile n2p2 library (for now) + find_program(HAVE_SED sed) + if(NOT HAVE_SED) + message(FATAL_ERROR "Must have 'sed' program installed to compile 'n2p2' library for ML-HDNNP package") + endif() + # download compile n2p2 library. much patch MPI calls in LAMMPS interface to accommodate MPI-2 (e.g. for cross-compiling) include(ExternalProject) ExternalProject_Add(n2p2_build diff --git a/cmake/Modules/Packages/ML-QUIP.cmake b/cmake/Modules/Packages/ML-QUIP.cmake index 5a80e63d55..92418e8939 100644 --- a/cmake/Modules/Packages/ML-QUIP.cmake +++ b/cmake/Modules/Packages/ML-QUIP.cmake @@ -50,7 +50,7 @@ if(DOWNLOAD_QUIP) GIT_TAG origin/public GIT_SHALLOW YES GIT_PROGRESS YES - PATCH_COMMAND cp ${CMAKE_BINARY_DIR}/quip.config /arch/Makefile.lammps + PATCH_COMMAND ${CMAKE_COMMAND} -E copy_if_different ${CMAKE_BINARY_DIR}/quip.config /arch/Makefile.lammps CONFIGURE_COMMAND env QUIP_ARCH=lammps make config BUILD_COMMAND env QUIP_ARCH=lammps make libquip INSTALL_COMMAND "" diff --git a/cmake/Modules/Packages/MSCG.cmake b/cmake/Modules/Packages/MSCG.cmake index 6ac62cb012..cf3d506c82 100644 --- a/cmake/Modules/Packages/MSCG.cmake +++ b/cmake/Modules/Packages/MSCG.cmake @@ -12,6 +12,13 @@ if(DOWNLOAD_MSCG) mark_as_advanced(MSCG_URL) mark_as_advanced(MSCG_MD5) + # CMake cannot pass BLAS or LAPACK library variable to external project if they are a list + list(LENGTH BLAS_LIBRARIES} NUM_BLAS) + list(LENGTH LAPACK_LIBRARIES NUM_LAPACK) + if((NUM_BLAS GREATER 1) OR (NUM_LAPACK GREATER 1)) + message(FATAL_ERROR "Cannot compile downloaded MSCG library due to a technical limitation") + endif() + include(ExternalProject) ExternalProject_Add(mscg_build URL ${MSCG_URL} diff --git a/cmake/Modules/Packages/SCAFACOS.cmake b/cmake/Modules/Packages/SCAFACOS.cmake index fd355420c3..de611a1edb 100644 --- a/cmake/Modules/Packages/SCAFACOS.cmake +++ b/cmake/Modules/Packages/SCAFACOS.cmake @@ -23,6 +23,11 @@ if(DOWNLOAD_SCAFACOS) file(DOWNLOAD ${LAMMPS_THIRDPARTY_URL}/scafacos-1.0.1-fix.diff ${CMAKE_CURRENT_BINARY_DIR}/scafacos-1.0.1.fix.diff EXPECTED_HASH MD5=4baa1333bb28fcce102d505e1992d032) + find_program(HAVE_PATCH patch) + if(NOT HAVE_PATCH) + message(FATAL_ERROR "The 'patch' program is required to build the ScaFaCoS library") + endif() + include(ExternalProject) ExternalProject_Add(scafacos_build URL ${SCAFACOS_URL} diff --git a/cmake/Modules/Packages/VORONOI.cmake b/cmake/Modules/Packages/VORONOI.cmake index 7feea4c52e..c010469677 100644 --- a/cmake/Modules/Packages/VORONOI.cmake +++ b/cmake/Modules/Packages/VORONOI.cmake @@ -26,6 +26,11 @@ if(DOWNLOAD_VORO) set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS}) endif() + find_program(HAVE_PATCH patch) + if(NOT HAVE_PATCH) + message(FATAL_ERROR "The 'patch' program is required to build the voro++ library") + endif() + ExternalProject_Add(voro_build URL ${VORO_URL} URL_MD5 ${VORO_MD5} diff --git a/cmake/presets/hip_amd.cmake b/cmake/presets/hip_amd.cmake new file mode 100644 index 0000000000..4b8945e0c7 --- /dev/null +++ b/cmake/presets/hip_amd.cmake @@ -0,0 +1,30 @@ +# preset that will enable hip (clang/clang++) with support for MPI and OpenMP (on Linux boxes) + +# prefer flang over gfortran, if available +find_program(CLANG_FORTRAN NAMES flang gfortran f95) +set(ENV{OMPI_FC} ${CLANG_FORTRAN}) + +set(CMAKE_CXX_COMPILER "hipcc" CACHE STRING "" FORCE) +set(CMAKE_C_COMPILER "hipcc" CACHE STRING "" FORCE) +set(CMAKE_Fortran_COMPILER ${CLANG_FORTRAN} CACHE STRING "" FORCE) +set(CMAKE_CXX_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE) +set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS_DEBUG "-Wall -Wextra -g -std=f2003" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG -std=f2003" CACHE STRING "" FORCE) +set(CMAKE_Fortran_FLAGS_RELEASE "-O3 -DNDEBUG -std=f2003" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS_DEBUG "-Wall -Wextra -g" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS_RELWITHDEBINFO "-Wall -Wextra -g -O2 -DNDEBUG" CACHE STRING "" FORCE) +set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE) + +set(MPI_CXX "hipcc" CACHE STRING "" FORCE) +set(MPI_CXX_COMPILER "mpicxx" CACHE STRING "" FORCE) + +unset(HAVE_OMP_H_INCLUDE CACHE) +set(OpenMP_C "hipcc" CACHE STRING "" FORCE) +set(OpenMP_C_FLAGS "-fopenmp" CACHE STRING "" FORCE) +set(OpenMP_C_LIB_NAMES "omp" CACHE STRING "" FORCE) +set(OpenMP_CXX "hipcc" CACHE STRING "" FORCE) +set(OpenMP_CXX_FLAGS "-fopenmp" CACHE STRING "" FORCE) +set(OpenMP_CXX_LIB_NAMES "omp" CACHE STRING "" FORCE) +set(OpenMP_omp_LIBRARY "libomp.so" CACHE PATH "" FORCE) diff --git a/doc/lammps.1 b/doc/lammps.1 index fb79b8d774..c868a2a86f 100644 --- a/doc/lammps.1 +++ b/doc/lammps.1 @@ -1,4 +1,4 @@ -.TH LAMMPS "20 September 2021" "2021-09-20" +.TH LAMMPS "29 September 2021" "2021-09-29" .SH NAME .B LAMMPS \- Molecular Dynamics Simulator. diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index 074a6349e6..c7397935d8 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -71,7 +71,8 @@ LAMMPS can use them if they are available on your system. -D FFTW3_INCLUDE_DIR=path # path to FFTW3 include files -D FFTW3_LIBRARY=path # path to FFTW3 libraries - -D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries + -D FFTW3_OMP_LIBRARY=path # path to FFTW3 OpenMP wrapper libraries + -D FFT_FFTW_THREADS=on # enable using OpenMP threaded FFTW3 libraries -D MKL_INCLUDE_DIR=path # ditto for Intel MKL library -D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries -D MKL_LIBRARY=path # path to MKL libraries diff --git a/doc/src/Developer.rst b/doc/src/Developer.rst index f54bc4152f..f68007486d 100644 --- a/doc/src/Developer.rst +++ b/doc/src/Developer.rst @@ -11,6 +11,7 @@ of time and requests from the LAMMPS user community. :maxdepth: 1 Developer_org + Developer_parallel Developer_flow Developer_write Developer_notes diff --git a/doc/src/Developer_par_comm.rst b/doc/src/Developer_par_comm.rst new file mode 100644 index 0000000000..2e108dda13 --- /dev/null +++ b/doc/src/Developer_par_comm.rst @@ -0,0 +1,120 @@ +Communication +^^^^^^^^^^^^^ + +Following the partitioning scheme in use all per-atom data is +distributed across the MPI processes, which allows LAMMPS to handle very +large systems provided it uses a correspondingly large number of MPI +processes. Since The per-atom data (atom IDs, positions, velocities, +types, etc.) To be able to compute the short-range interactions MPI +processes need not only access to data of atoms they "own" but also +information about atoms from neighboring sub-domains, in LAMMPS referred +to as "ghost" atoms. These are copies of atoms storing required +per-atom data for up to the communication cutoff distance. The green +dashed-line boxes in the :ref:`domain-decomposition` figure illustrate +the extended ghost-atom sub-domain for one processor. + +This approach is also used to implement periodic boundary +conditions: atoms that lie within the cutoff distance across a periodic +boundary are also stored as ghost atoms and taken from the periodic +replication of the sub-domain, which may be the same sub-domain, e.g. if +running in serial. As a consequence of this, force computation in +LAMMPS is not subject to minimum image conventions and thus cutoffs may +be larger than half the simulation domain. + +.. _ghost-atom-comm: +.. figure:: img/ghost-comm.png + :align: center + + ghost atom communication + + This figure shows the ghost atom communication patterns between + sub-domains for "brick" (left) and "tiled" communication styles for + 2d simulations. The numbers indicate MPI process ranks. Here the + sub-domains are drawn spatially separated for clarity. The + dashed-line box is the extended sub-domain of processor 0 which + includes its ghost atoms. The red- and blue-shaded boxes are the + regions of communicated ghost atoms. + +Efficient communication patterns are needed to update the "ghost" atom +data, since that needs to be done at every MD time step or minimization +step. The diagrams of the `ghost-atom-comm` figure illustrate how ghost +atom communication is performed in two stages for a 2d simulation (three +in 3d) for both a regular and irregular partitioning of the simulation +box. For the regular case (left) atoms are exchanged first in the +*x*-direction, then in *y*, with four neighbors in the grid of processor +sub-domains. + +In the *x* stage, processor ranks 1 and 2 send owned atoms in their +red-shaded regions to rank 0 (and vice versa). Then in the *y* stage, +ranks 3 and 4 send atoms in their blue-shaded regions to rank 0, which +includes ghost atoms they received in the *x* stage. Rank 0 thus +acquires all its ghost atoms; atoms in the solid blue corner regions +are communicated twice before rank 0 receives them. + +For the irregular case (right) the two stages are similar, but a +processor can have more than one neighbor in each direction. In the +*x* stage, MPI ranks 1,2,3 send owned atoms in their red-shaded regions to +rank 0 (and vice versa). These include only atoms between the lower +and upper *y*-boundary of rank 0's sub-domain. In the *y* stage, ranks +4,5,6 send atoms in their blue-shaded regions to rank 0. This may +include ghost atoms they received in the *x* stage, but only if they +are needed by rank 0 to fill its extended ghost atom regions in the ++/-*y* directions (blue rectangles). Thus in this case, ranks 5 and +6 do not include ghost atoms they received from each other (in the *x* +stage) in the atoms they send to rank 0. The key point is that while +the pattern of communication is more complex in the irregular +partitioning case, it can still proceed in two stages (three in 3d) +via atom exchanges with only neighboring processors. + +When attributes of owned atoms are sent to neighboring processors to +become attributes of their ghost atoms, LAMMPS calls this a "forward" +communication. On timesteps when atoms migrate to new owning processors +and neighbor lists are rebuilt, each processor creates a list of its +owned atoms which are ghost atoms in each of its neighbor processors. +These lists are used to pack per-atom coordinates (for example) into +message buffers in subsequent steps until the next reneighboring. + +A "reverse" communication is when computed ghost atom attributes are +sent back to the processor who owns the atom. This is used (for +example) to sum partial forces on ghost atoms to the complete force on +owned atoms. The order of the two stages described in the +:ref:`ghost-atom-comm` figure is inverted and the same lists of atoms +are used to pack and unpack message buffers with per-atom forces. When +a received buffer is unpacked, the ghost forces are summed to owned atom +forces. As in forward communication, forces on atoms in the four blue +corners of the diagrams are sent, received, and summed twice (once at +each stage) before owning processors have the full force. + +These two operations are used many places within LAMMPS aside from +exchange of coordinates and forces, for example by manybody potentials +to share intermediate per-atom values, or by rigid-body integrators to +enable each atom in a body to access body properties. Here are +additional details about how these communication operations are +performed in LAMMPS: + +- When exchanging data with different processors, forward and reverse + communication is done using ``MPI_Send()`` and ``MPI_IRecv()`` calls. + If a processor is "exchanging" atoms with itself, only the pack and + unpack operations are performed, e.g. to create ghost atoms across + periodic boundaries when running on a single processor. + +- For forward communication of owned atom coordinates, periodic box + lengths are added and subtracted when the receiving processor is + across a periodic boundary from the sender. There is then no need to + apply a minimum image convention when calculating distances between + atom pairs when building neighbor lists or computing forces. + +- The cutoff distance for exchanging ghost atoms is typically equal to + the neighbor cutoff. But it can also chosen to be longer if needed, + e.g. half the diameter of a rigid body composed of multiple atoms or + over 3x the length of a stretched bond for dihedral interactions. It + can also exceed the periodic box size. For the regular communication + pattern (left), if the cutoff distance extends beyond a neighbor + processor's sub-domain, then multiple exchanges are performed in the + same direction. Each exchange is with the same neighbor processor, + but buffers are packed/unpacked using a different list of atoms. For + forward communication, in the first exchange a processor sends only + owned atoms. In subsequent exchanges, it sends ghost atoms received + in previous exchanges. For the irregular pattern (right) overlaps of + a processor's extended ghost-atom sub-domain with all other processors + in each dimension are detected. diff --git a/doc/src/Developer_par_long.rst b/doc/src/Developer_par_long.rst new file mode 100644 index 0000000000..f297cf3fa6 --- /dev/null +++ b/doc/src/Developer_par_long.rst @@ -0,0 +1,188 @@ +Long-range interactions +^^^^^^^^^^^^^^^^^^^^^^^ + +For charged systems, LAMMPS can compute long-range Coulombic +interactions via the FFT-based particle-particle/particle-mesh (PPPM) +method implemented in :doc:`kspace style pppm and its variants +`. For that Coulombic interactions are partitioned into +short- and long-range components. The short-ranged portion is computed +in real space as a loop over pairs of charges within a cutoff distance, +using neighbor lists. The long-range portion is computed in reciprocal +space using a kspace style. For the PPPM implementation the simulation +cell is overlaid with a regular FFT grid in 3d. It proceeds in several stages: + +a) each atom's point charge is interpolated to nearby FFT grid points, +b) a forward 3d FFT is performed, +c) a convolution operation is performed in reciprocal space, +d) one or more inverse 3d FFTs are performed, and +e) electric field values from grid points near each atom are interpolated to compute + its forces. + +For any of the spatial-decomposition partitioning schemes each processor +owns the brick-shaped portion of FFT grid points contained within its +sub-domain. The two interpolation operations use a stencil of grid +points surrounding each atom. To accommodate the stencil size, each +processor also stores a few layers of ghost grid points surrounding its +brick. Forward and reverse communication of grid point values is +performed similar to the corresponding :doc:`atom data communication +`. In this case, electric field values on owned +grid points are sent to neighboring processors to become ghost point +values. Likewise charge values on ghost points are sent and summed to +values on owned points. + +For triclinic simulation boxes, the FFT grid planes are parallel to +the box faces, but the mapping of charge and electric field values +to/from grid points is done in reduced coordinates where the tilted +box is conceptually a unit cube, so that the stencil and FFT +operations are unchanged. However the FFT grid size required for a +given accuracy is larger for triclinic domains than it is for +orthogonal boxes. + +.. _fft-parallel: +.. figure:: img/fft-decomp-parallel.png + :align: center + + parallel FFT in PPPM + + Stages of a parallel FFT for a simulation domain overlaid + with an 8x8x8 3d FFT grid, partitioned across 64 processors. + Within each of the 4 diagrams, grid cells of the same color are + owned by a single processor; for simplicity only cells owned by 4 + or 8 of the 64 processors are colored. The two images on the left + illustrate brick-to-pencil communication. The two images on the + right illustrate pencil-to-pencil communication, which in this + case transposes the *y* and *z* dimensions of the grid. + +Parallel 3d FFTs require substantial communication relative to their +computational cost. A 3d FFT is implemented by a series of 1d FFTs +along the *x-*, *y-*, and *z-*\ direction of the FFT grid. Thus the FFT +grid cannot be decomposed like atoms into 3 dimensions for parallel +processing of the FFTs but only in 1 (as planes) or 2 (as pencils) +dimensions and in between the steps the grid needs to be transposed to +have the FFT grid portion "owned" by each MPI process complete in the +direction of the 1d FFTs it has to perform. LAMMPS uses the +pencil-decomposition algorithm as shown in the :ref:`fft-parallel` figure. + +Initially (far left), each processor owns a brick of same-color grid +cells (actually grid points) contained within in its sub-domain. A +brick-to-pencil communication operation converts this layout to 1d +pencils in the *x*-dimension (center left). Again, cells of the same +color are owned by the same processor. Each processor can then compute +a 1d FFT on each pencil of data it wholly owns using a call to the +configured FFT library. A pencil-to-pencil communication then converts +this layout to pencils in the *y* dimension (center right) which +effectively transposes the *x* and *y* dimensions of the grid, followed +by 1d FFTs in *y*. A final transpose of pencils from *y* to *z* (far +right) followed by 1d FFTs in *z* completes the forward FFT. The data +is left in a *z*-pencil layout for the convolution operation. One or +more inverse FFTs then perform the sequence of 1d FFTs and communication +steps in reverse order; the final layout of resulting grid values is the +same as the initial brick layout. + +Each communication operation within the FFT (brick-to-pencil or +pencil-to-pencil or pencil-to-brick) converts one tiling of the 3d grid +to another, where a tiling in this context means an assignment of a +small brick-shaped subset of grid points to each processor, the union of +which comprise the entire grid. The parallel `fftMPI library +`_ written for LAMMPS allows arbitrary +definitions of the tiling so that an irregular partitioning of the +simulation domain can use it directly. Transforming data from one +tiling to another is implemented in `fftMPI` using point-to-point +communication, where each processor sends data to a few other +processors, since each tile in the initial tiling overlaps with a +handful of tiles in the final tiling. + +The transformations could also be done using collective communication +across all $P$ processors with a single call to ``MPI_Alltoall()``, but +this is typically much slower. However, for the specialized brick and +pencil tiling illustrated in :ref:`fft-parallel` figure, collective +communication across the entire MPI communicator is not required. In +the example an :math:`8^3` grid with 512 grid cells is partitioned +across 64 processors; each processor owns a 2x2x2 3d brick of grid +cells. The initial brick-to-pencil communication (upper left to upper +right) only requires collective communication within subgroups of 4 +processors, as illustrated by the 4 colors. More generally, a +brick-to-pencil communication can be performed by partitioning *P* +processors into :math:`P^{\frac{2}{3}}` subgroups of +:math:`P^{\frac{1}{3}}` processors each. Each subgroup performs +collective communication only within its subgroup. Similarly, +pencil-to-pencil communication can be performed by partitioning *P* +processors into :math:`P^{\frac{1}{2}}` subgroups of +:math:`P^{\frac{1}{2}}` processors each. This is illustrated in the +figure for the :math:`y \Rightarrow z` communication (center). An +eight-processor subgroup owns the front *yz* plane of data and performs +collective communication within the subgroup to transpose from a +*y*-pencil to *z*-pencil layout. + +LAMMPS invokes point-to-point communication by default, but also +provides the option of partitioned collective communication when using a +:doc:`kspace_modify collective yes ` command to switch to +that mode. In the latter case, the code detects the size of the +disjoint subgroups and partitions the single *P*-size communicator into +multiple smaller communicators, each of which invokes collective +communication. Testing on a large IBM Blue Gene/Q machine at Argonne +National Labs showed a significant improvement in FFT performance for +large processor counts; partitioned collective communication was faster +than point-to-point communication or global collective communication +involving all *P* processors. + +Here are some additional details about FFTs for long-range and related +grid/particle operations that LAMMPS supports: + +- The fftMPI library allows each grid dimension to be a multiple of + small prime factors (2,3,5), and allows any number of processors to + perform the FFT. The resulting brick and pencil decompositions are + thus not always as well-aligned but the size of subgroups of + processors for the two modes of communication (brick/pencil and + pencil/pencil) still scale as :math:`O(P^{\frac{1}{3}})` and + :math:`O(P^{\frac{1}{2}})`. + +- For efficiency in performing 1d FFTs, the grid transpose + operations illustrated in Figure \ref{fig:fft} also involve + reordering the 3d data so that a different dimension is contiguous + in memory. This reordering can be done during the packing or + unpacking of buffers for MPI communication. + +- For large systems and particularly a large number of MPI processes, + the dominant cost for parallel FFTs is often the communication, not + the computation of 1d FFTs, even though the latter scales as :math:`N + \log(N)` in the number of grid points *N* per grid direction. This is + due to the fact that only a 2d decomposition into pencils is possible + while atom data (and their corresponding short-range force and energy + computations) can be decomposed efficiently in 3d. + + This can be addressed by reducing the number of MPI processes involved + in the MPI communication by using :doc:`hybrid MPI + OpenMP + parallelization `. This will use OpenMP parallelization + inside the MPI domains and while that may have a lower parallel + efficiency, it reduces the communication overhead. + + As an alternative it is also possible to start a :ref:`multi-partition + ` calculation and then use the :doc:`verlet/split + integrator ` to perform the PPPM computation on a + dedicated, separate partition of MPI processes. This uses an integer + "1:*p*" mapping of *p* sub-domains of the atom decomposition to one + sub-domain of the FFT grid decomposition and where pairwise non-bonded + and bonded forces and energies are computed on the larger partition + and the PPPM kspace computation concurrently on the smaller partition. + +- LAMMPS also implements PPPM-based solvers for other long-range + interactions, dipole and dispersion (Lennard-Jones), which can be used + in conjunction with long-range Coulombics for point charges. + +- LAMMPS implements a ``GridComm`` class which overlays the simulation + domain with a regular grid, partitions it across processors in a + manner consistent with processor sub-domains, and provides methods for + forward and reverse communication of owned and ghost grid point + values. It is used for PPPM as an FFT grid (as outlined above) and + also for the MSM algorithm which uses a cascade of grid sizes from + fine to coarse to compute long-range Coulombic forces. The GridComm + class is also useful for models where continuum fields interact with + particles. For example, the two-temperature model (TTM) defines heat + transfer between atoms (particles) and electrons (continuum gas) where + spatial variations in the electron temperature are computed by finite + differences of a discretized heat equation on a regular grid. The + :doc:`fix ttm/grid ` command uses the ``GridComm`` class + internally to perform its grid operations on a distributed grid + instead of the original :doc:`fix ttm ` which uses a + replicated grid. diff --git a/doc/src/Developer_par_neigh.rst b/doc/src/Developer_par_neigh.rst new file mode 100644 index 0000000000..4b286d77d8 --- /dev/null +++ b/doc/src/Developer_par_neigh.rst @@ -0,0 +1,159 @@ +Neighbor lists +^^^^^^^^^^^^^^ + +To compute forces efficiently, each processor creates a Verlet-style +neighbor list which enumerates all pairs of atoms *i,j* (*i* = owned, +*j* = owned or ghost) with separation less than the applicable +neighbor list cutoff distance. In LAMMPS the neighbor lists are stored +in a multiple-page data structure; each page is a contiguous chunk of +memory which stores vectors of neighbor atoms *j* for many *i* atoms. +This allows pages to be incrementally allocated or deallocated in blocks +as needed. Neighbor lists typically consume the most memory of any data +structure in LAMMPS. The neighbor list is rebuilt (from scratch) once +every few timesteps, then used repeatedly each step for force or other +computations. The neighbor cutoff distance is :math:`R_n = R_f + +\Delta_s`, where :math:`R_f` is the (largest) force cutoff defined by +the interatomic potential for computing short-range pairwise or manybody +forces and :math:`\Delta_s` is a "skin" distance that allows the list to +be used for multiple steps assuming that atoms do not move very far +between consecutive time steps. Typically the code triggers +reneighboring when any atom has moved half the skin distance since the +last reneighboring; this and other options of the neighbor list rebuild +can be adjusted with the :doc:`neigh_modify ` command. + +On steps when reneighboring is performed, atoms which have moved outside +their owning processor's sub-domain are first migrated to new processors +via communication. Periodic boundary conditions are also (only) +enforced on these steps to ensure each atom is re-assigned to the +correct processor. After migration, the atoms owned by each processor +are stored in a contiguous vector. Periodically each processor +spatially sorts owned atoms within its vector to reorder it for improved +cache efficiency in force computations and neighbor list building. For +that atoms are spatially binned and then reordered so that atoms in the +same bin are adjacent in the vector. Atom sorting can be disabled or +its settings modified with the :doc:`atom_modify ` command. + +.. _neighbor-stencil: +.. figure:: img/neigh-stencil.png + :align: center + + neighbor list stencils + + A 2d simulation sub-domain (thick black line) and the corresponding + ghost atom cutoff region (dashed blue line) for both orthogonal + (left) and triclinic (right) domains. A regular grid of neighbor + bins (thin lines) overlays the entire simulation domain and need not + align with sub-domain boundaries; only the portion overlapping the + augmented sub-domain is shown. In the triclinic case it overlaps the + bounding box of the tilted rectangle. The blue- and red-shaded bins + represent a stencil of bins searched to find neighbors of a particular + atom (black dot). + +To build a local neighbor list in linear time, the simulation domain is +overlaid (conceptually) with a regular 3d (or 2d) grid of neighbor bins, +as shown in the :ref:`neighbor-stencil` figure for 2d models and a +single MPI processor's sub-domain. Each processor stores a set of +neighbor bins which overlap its sub-domain extended by the neighbor +cutoff distance :math:`R_n`. As illustrated, the bins need not align +with processor boundaries; an integer number in each dimension is fit to +the size of the entire simulation box. + +Most often LAMMPS builds what it calls a "half" neighbor list where +each *i,j* neighbor pair is stored only once, with either atom *i* or +*j* as the central atom. The build can be done efficiently by using a +pre-computed "stencil" of bins around a central origin bin which +contains the atom whose neighbors are being searched for. A stencil +is simply a list of integer offsets in *x,y,z* of nearby bins +surrounding the origin bin which are close enough to contain any +neighbor atom *j* within a distance :math:`R_n` from any atom *i* in the +origin bin. Note that for a half neighbor list, the stencil can be +asymmetric since each atom only need store half its nearby neighbors. + +These stencils are illustrated in the figure for a half list and a bin +size of :math:`\frac{1}{2} R_n`. There are 13 red+blue stencil bins in +2d (for the orthogonal case, 15 for triclinic). In 3d there would be +63, 13 in the plane of bins that contain the origin bin and 25 in each +of the two planes above it in the *z* direction (75 for triclinic). The +reason the triclinic stencil has extra bins is because the bins tile the +bounding box of the entire triclinic domain and thus are not periodic +with respect to the simulation box itself. The stencil and logic for +determining which *i,j* pairs to include in the neighbor list are +altered slightly to account for this. + +To build a neighbor list, a processor first loops over its "owned" plus +"ghost" atoms and assigns each to a neighbor bin. This uses an integer +vector to create a linked list of atom indices within each bin. It then +performs a triply-nested loop over its owned atoms *i*, the stencil of +bins surrounding atom *i*'s bin, and the *j* atoms in each stencil bin +(including ghost atoms). If the distance :math:`r_{ij} < R_n`, then +atom *j* is added to the vector of atom *i*'s neighbors. + +Here are additional details about neighbor list build options LAMMPS +supports: + +- The choice of bin size is an option; a size half of :math:`R_n` has + been found to be optimal for many typical cases. Smaller bins incur + additional overhead to loop over; larger bins require more distance + calculations. Note that for smaller bin sizes, the 2d stencil in the + figure would be more semi-circular in shape (hemispherical in 3d), + with bins near the corners of the square eliminated due to their + distance from the origin bin. + +- Depending on the interatomic potential(s) and other commands used in + an input script, multiple neighbor lists and stencils with different + attributes may be needed. This includes lists with different cutoff + distances, e.g. for force computation versus occasional diagnostic + computations such as a radial distribution function, or for the + r-RESPA time integrator which can partition pairwise forces by + distance into subsets computed at different time intervals. It + includes "full" lists (as opposed to half lists) where each *i,j* pair + appears twice, stored once with *i* and *j*, and which use a larger + symmetric stencil. It also includes lists with partial enumeration of + ghost atom neighbors. The full and ghost-atom lists are used by + various manybody interatomic potentials. Lists may also use different + criteria for inclusion of a pair interaction. Typically this simply + depends only on the distance between two atoms and the cutoff + distance. But for finite-size coarse-grained particles with + individual diameters (e.g. polydisperse granular particles), it can + also depend on the diameters of the two particles. + +- When using :doc:`pair style hybrid ` multiple sub-lists + of the master neighbor list for the full system need to be generated, + one for each sub-style, which contains only the *i,j* pairs needed to + compute interactions between subsets of atoms for the corresponding + potential. This means not all *i* or *j* atoms owned by a processor + are included in a particular sub-list. + +- Some models use different cutoff lengths for pairwise interactions + between different kinds of particles which are stored in a single + neighbor list. One example is a solvated colloidal system with large + colloidal particles where colloid/colloid, colloid/solvent, and + solvent/solvent interaction cutoffs can be dramatically different. + Another is a model of polydisperse finite-size granular particles; + pairs of particles interact only when they are in contact with each + other. Mixtures with particle size ratios as high as 10-100x may be + used to model realistic systems. Efficient neighbor list building + algorithms for these kinds of systems are available in LAMMPS. They + include a method which uses different stencils for different cutoff + lengths and trims the stencil to only include bins that straddle the + cutoff sphere surface. More recently a method which uses both + multiple stencils and multiple bin sizes was developed; it builds + neighbor lists efficiently for systems with particles of any size + ratio, though other considerations (timestep size, force computations) + may limit the ability to model systems with huge polydispersity. + +- For small and sparse systems and as a fallback method, LAMMPS also + supports neighbor list construction without binning by using a full + :math:`O(N^2)` loop over all *i,j* atom pairs in a sub-domain when + using the :doc:`neighbor nsq ` command. + +- Dependent on the "pair" setting of the :doc:`newton ` command, + the "half" neighbor lists may contain **all** pairs of atoms where + atom *j* is a ghost atom (i.e. when the newton pair setting is *off*) + For the newton pair *on* setting the atom *j* is only added to the + list if its *z* coordinate is larger, or if equal the *y* coordinate + is larger, and that is equal, too, the *x* coordinate is larger. For + homogeneously dense systems that will result in picking neighbors from + a same size sector in always the same direction relative to the + "owned" atom and thus it should lead to similar length neighbor lists + and thus reduce the chance of a load imbalance. diff --git a/doc/src/Developer_par_openmp.rst b/doc/src/Developer_par_openmp.rst new file mode 100644 index 0000000000..91c649a7b8 --- /dev/null +++ b/doc/src/Developer_par_openmp.rst @@ -0,0 +1,114 @@ +OpenMP Parallelism +^^^^^^^^^^^^^^^^^^ + +The styles in the INTEL, KOKKOS, and OPENMP package offer to use OpenMP +thread parallelism to predominantly distribute loops over local data +and thus follow an orthogonal parallelization strategy to the +decomposition into spatial domains used by the :doc:`MPI partitioning +`. For clarity, this section discusses only the +implementation in the OPENMP package as it is the simplest. The INTEL +and KOKKOS package offer additional options and are more complex since +they support more features and different hardware like co-processors +or GPUs. + +One of the key decisions when implementing the OPENMP package was to +keep the changes to the source code small, so that it would be easier to +maintain the code and keep it in sync with the non-threaded standard +implementation. this is achieved by a) making the OPENMP version a +derived class from the regular version (e.g. ``PairLJCutOMP`` from +``PairLJCut``) and overriding only methods that are multi-threaded or +need to be modified to support multi-threading (similar to what was done +in the OPT package), b) keeping the structure in the modified code very +similar so that side-by-side comparisons are still useful, and c) +offloading additional functionality and multi-thread support functions +into three separate classes ``ThrOMP``, ``ThrData``, and ``FixOMP``. +``ThrOMP`` provides additional, multi-thread aware functionality not +available in the corresponding base class (e.g. ``Pair`` for +``PairLJCutOMP``) like multi-thread aware variants of the "tally" +functions. Those functions are made available through multiple +inheritance so those new functions have to have unique names to avoid +ambiguities; typically ``_thr`` is appended to the name of the function. +``ThrData`` is a classes that manages per-thread data structures. +It is used instead of extending the corresponding storage to per-thread +arrays to avoid slowdowns due to "false sharing" when multiple threads +update adjacent elements in an array and thus force the CPU cache lines +to be reset and re-fetched. ``FixOMP`` finally manages the "multi-thread +state" like settings and access to per-thread storage, it is activated +by the :doc:`package omp ` command. + +Avoiding data races +""""""""""""""""""" + +A key problem when implementing thread parallelism in an MD code is +to avoid data races when updating accumulated properties like forces, +energies, and stresses. When interactions are computed, they always +involve multiple atoms and thus there are race conditions when multiple +threads want to update per-atom data of the same atoms. Five possible +strategies have been considered to avoid this: + +1) restructure the code so that there is no overlapping access possible + when computing in parallel, e.g. by breaking lists into multiple + parts and synchronizing threads in between. +2) have each thread be "responsible" for a specific group of atoms and + compute these interactions multiple times, once on each thread that + is responsible for a given atom and then have each thread only update + the properties of this atom. +3) use mutexes around functions and regions of code where the data race + could happen +4) use atomic operations when updating per-atom properties +5) use replicated per-thread data structures to accumulate data without + conflicts and then use a reduction to combine those results into the + data structures used by the regular style. + +Option 5 was chosen for the OPENMP package because it would retain the +performance for the case of 1 thread and the code would be more +maintainable. Option 1 would require extensive code changes, +particularly to the neighbor list code; options 2 would have incurred a +2x or more performance penalty for the serial case; option 3 causes +significant overhead and would enforce serialization of operations in +inner loops and thus defeat the purpose of multi-threading; option 4 +slows down the serial case although not quite as bad as option 2. The +downside of option 5 is that the overhead of the reduction operations +grows with the number of threads used, so there would be a crossover +point where options 2 or 4 would result in faster executing. That is +why option 2 for example is used in the GPU package because a GPU is a +processor with a massive number of threads. However, since the MPI +parallelization is generally more effective for typical MD systems, the +expectation is that thread parallelism is only used for a smaller number +of threads (2-8). At the time of its implementation, that number was +equivalent to the number of CPU cores per CPU socket on high-end +supercomputers. + +Thus arrays like the force array are dimensioned to the number of atoms +times the number of threads when enabling OpenMP support and inside the +compute functions a pointer to a different chunk is obtained by each thread. +Similarly, accumulators like potential energy or virial are kept in +per-thread instances of the ``ThrData`` class and then only reduced and +stored in their global counterparts at the end of the force computation. + + +Loop scheduling +""""""""""""""" + +Multi-thread parallelization is applied by distributing (outer) loops +statically across threads. Typically this would be the loop over local +atoms *i* when processing *i,j* pairs of atoms from a neighbor list. +The design of the neighbor list code results in atoms having a similar +number of neighbors for homogeneous systems and thus load imbalances +across threads are not common and typically happen for systems where +also the MPI parallelization would be unbalanced, which would typically +have a more pronounced impact on the performance. This same loop +scheduling scheme can also be applied to the reduction operations on +per-atom data to try and reduce the overhead of the reduction operation. + +Neighbor list parallelization +""""""""""""""""""""""""""""" + +In addition to the parallelization of force computations, also the +generation of the neighbor lists is parallelized. As explained +previously, neighbor lists are built by looping over "owned" atoms and +storing the neighbors in "pages". In the OPENMP variants of the +neighbor list code, each thread operates on a different chunk of "owned" +atoms and allocates and fills its own set of pages with neighbor list +data. This is achieved by each thread keeping its own instance of the +:cpp:class:`MyPage ` page allocator class. diff --git a/doc/src/Developer_par_part.rst b/doc/src/Developer_par_part.rst new file mode 100644 index 0000000000..f797f559e2 --- /dev/null +++ b/doc/src/Developer_par_part.rst @@ -0,0 +1,89 @@ +Partitioning +^^^^^^^^^^^^ + +The underlying spatial decomposition strategy used by LAMMPS for +distributed-memory parallelism is set with the :doc:`comm_style command +` and can be either "brick" (a regular grid) or "tiled". + +.. _domain-decomposition: +.. figure:: img/domain-decomp.png + :align: center + + domain decomposition + + This figure shows the different kinds of domain decomposition used + for MPI parallelization: "brick" on the left with an orthogonal + (left) and a triclinic (middle) simulation domain, and a "tiled" + decomposition (right). The black lines show the division into + sub-domains and the contained atoms are "owned" by the corresponding + MPI process. The green dashed lines indicate how sub-domains are + extended with "ghost" atoms up to the communication cutoff distance. + +The LAMMPS simulation box is a 3d or 2d volume, which can be orthogonal +or triclinic in shape, as illustrated in the :ref:`domain-decomposition` +figure for the 2d case. Orthogonal means the box edges are aligned with +the *x*, *y*, *z* Cartesian axes, and the box faces are thus all +rectangular. Triclinic allows for a more general parallelepiped shape +in which edges are aligned with three arbitrary vectors and the box +faces are parallelograms. In each dimension box faces can be periodic, +or non-periodic with fixed or shrink-wrapped boundaries. In the fixed +case, atoms which move outside the face are deleted; shrink-wrapped +means the position of the box face adjusts continuously to enclose all +the atoms. + +For distributed-memory MPI parallelism, the simulation box is spatially +decomposed (partitioned) into non-overlapping sub-domains which fill the +box. The default partitioning, "brick", is most suitable when atom +density is roughly uniform, as shown in the left-side images of the +:ref:`domain-decomposition` figure. The sub-domains comprise a regular +grid and all sub-domains are identical in size and shape. Both the +orthogonal and triclinic boxes can deform continuously during a +simulation, e.g. to compress a solid or shear a liquid, in which case +the processor sub-domains likewise deform. + + +For models with non-uniform density, the number of particles per +processor can be load-imbalanced with the default partitioning. This +reduces parallel efficiency, as the overall simulation rate is limited +by the slowest processor, i.e. the one with the largest computational +load. For such models, LAMMPS supports multiple strategies to reduce +the load imbalance: + +- The processor grid decomposition is by default based on the simulation + cell volume and tries to optimize the volume to surface ratio for the sub-domains. + This can be changed with the :doc:`processors command `. +- The parallel planes defining the size of the sub-domains can be shifted + with the :doc:`balance command `. Which can be done in addition + to choosing a more optimal processor grid. +- The recursive bisectioning algorithm in combination with the "tiled" + communication style can produce a partitioning with equal numbers of + particles in each sub-domain. + + +.. |decomp1| image:: img/decomp-regular.png + :width: 24% + +.. |decomp2| image:: img/decomp-processors.png + :width: 24% + +.. |decomp3| image:: img/decomp-balance.png + :width: 24% + +.. |decomp4| image:: img/decomp-rcb.png + :width: 24% + +|decomp1| |decomp2| |decomp3| |decomp4| + +The pictures above demonstrate different decompositions for a 2d system +with 12 MPI ranks. The atom colors indicate the load imbalance of each +sub-domain with green being optimal and red the least optimal. + +Due to the vacuum in the system, the default decomposition is unbalanced +with several MPI ranks without atoms (left). By forcing a 1x12x1 +processor grid, every MPI rank does computations now, but number of +atoms per sub-domain is still uneven and the thin slice shape increases +the amount of communication between sub-domains (center left). With a +2x6x1 processor grid and shifting the sub-domain divisions, the load +imbalance is further reduced and the amount of communication required +between sub-domains is less (center right). And using the recursive +bisectioning leads to further improved decomposition (right). diff --git a/doc/src/Developer_parallel.rst b/doc/src/Developer_parallel.rst new file mode 100644 index 0000000000..c7bfcfca9e --- /dev/null +++ b/doc/src/Developer_parallel.rst @@ -0,0 +1,28 @@ +Parallel algorithms +------------------- + +LAMMPS is designed to enable running simulations in parallel using the +MPI parallel communication standard with distributed data via domain +decomposition. The parallelization aims to be efficient result in good +strong scaling (= good speedup for the same system) and good weak +scaling (= the computational cost of enlarging the system is +proportional to the system size). Additional parallelization using GPUs +or OpenMP can also be applied within the sub-domain assigned to an MPI +process. For clarity, most of the following illustrations show the 2d +simulation case. The underlying algorithms in those cases, however, +apply to both 2d and 3d cases equally well. + +.. note:: + + The text and most of the figures in this chapter were adapted + for the manual from the section on parallel algorithms in the + :ref:`new LAMMPS paper `. + +.. toctree:: + :maxdepth: 1 + + Developer_par_part + Developer_par_comm + Developer_par_neigh + Developer_par_long + Developer_par_openmp diff --git a/doc/src/Intro_citing.rst b/doc/src/Intro_citing.rst index 978def9f15..0e10b7559a 100644 --- a/doc/src/Intro_citing.rst +++ b/doc/src/Intro_citing.rst @@ -4,28 +4,41 @@ Citing LAMMPS Core Algorithms ^^^^^^^^^^^^^^^ -Since LAMMPS is a community project, there is not a single one -publication or reference that describes **all** of LAMMPS. -The canonical publication that describes the foundation, that is -the basic spatial decomposition approach, the neighbor finding, -and basic communications algorithms used in LAMMPS is: +The paper mentioned below is the best overview of LAMMPS, but there are +also publications describing particular models or algorithms implemented +in LAMMPS or complementary software that is has interfaces to. Please +see below for how to cite contributions to LAMMPS. - `S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). `_ +.. _lammps_paper: -So any project using LAMMPS (or a derivative application using LAMMPS as -a simulation engine) should cite this paper. A new publication -describing the developments and improvements of LAMMPS in the 25 years -since then is currently in preparation. +The latest canonical publication that describes the basic features, the +source code design, the program structure, the spatial decomposition +approach, the neighbor finding, basic communications algorithms, and how +users and developers have contributed to LAMMPS is: + + `LAMMPS - A flexible simulation tool for particle-based materials modeling at the atomic, meso, and continuum scales, Comp. Phys. Comm. (accepted 09/2021), DOI:10.1016/j.cpc.2021.108171 `_ + +So a project using LAMMPS or a derivative application that uses LAMMPS +as a simulation engine should cite this paper. The paper is expected to +be published in its final form under the same DOI in the first half +of 2022. Please also give the URL of the LAMMPS website in your paper, +namely https://www.lammps.org. + +The original publication describing the parallel algorithms used in the +initial versions of LAMMPS is: + + `S. Plimpton, Fast Parallel Algorithms for Short-Range Molecular Dynamics, J Comp Phys, 117, 1-19 (1995). `_ DOI for the LAMMPS code ^^^^^^^^^^^^^^^^^^^^^^^ -LAMMPS developers use the `Zenodo service at CERN -`_ to create digital object identifies (DOI) for -stable releases of the LAMMPS code. There are two types of DOIs for the -LAMMPS source code: the canonical DOI for **all** versions of LAMMPS, -which will always point to the **latest** stable release version is: +LAMMPS developers use the `Zenodo service at CERN `_ +to create digital object identifies (DOI) for stable releases of the +LAMMPS source code. There are two types of DOIs for the LAMMPS source code. + +The canonical DOI for **all** versions of LAMMPS, which will always +point to the **latest** stable release version is: - DOI: `10.5281/zenodo.3726416 `_ @@ -45,11 +58,13 @@ about LAMMPS and its features. Citing contributions ^^^^^^^^^^^^^^^^^^^^ -LAMMPS has many features and that use either previously published -methods and algorithms or novel features. It also includes potential -parameter filed for specific models. Where available, a reminder about -references for optional features used in a specific run is printed to -the screen and log file. Style and output location can be selected with -the :ref:`-cite command-line switch `. Additional references are +LAMMPS has many features that use either previously published methods +and algorithms or novel features. It also includes potential parameter +files for specific models. Where available, a reminder about references +for optional features used in a specific run is printed to the screen +and log file. Style and output location can be selected with the +:ref:`-cite command-line switch `. Additional references are given in the documentation of the :doc:`corresponding commands -` or in the :doc:`Howto tutorials `. +` or in the :doc:`Howto tutorials `. So please +make certain, that you provide the proper acknowledgments and citations +in any published works using LAMMPS. diff --git a/doc/src/Library_create.rst b/doc/src/Library_create.rst index 3566cb3cc9..8043819891 100644 --- a/doc/src/Library_create.rst +++ b/doc/src/Library_create.rst @@ -34,7 +34,7 @@ simple example demonstrating its use: int lmpargc = sizeof(lmpargv)/sizeof(const char *); /* create LAMMPS instance */ - handle = lammps_open_no_mpi(lmpargc, lmpargv, NULL); + handle = lammps_open_no_mpi(lmpargc, (char **)lmpargv, NULL); if (handle == NULL) { printf("LAMMPS initialization failed"); lammps_mpi_finalize(); diff --git a/doc/src/PDF/colvars-refman-lammps.pdf b/doc/src/PDF/colvars-refman-lammps.pdf index 8b6e5bffdc..011b3d0f04 100644 Binary files a/doc/src/PDF/colvars-refman-lammps.pdf and b/doc/src/PDF/colvars-refman-lammps.pdf differ diff --git a/doc/src/Run_basics.rst b/doc/src/Run_basics.rst index 3d57633df2..5f1211d093 100644 --- a/doc/src/Run_basics.rst +++ b/doc/src/Run_basics.rst @@ -2,17 +2,25 @@ Basics of running LAMMPS ======================== LAMMPS is run from the command line, reading commands from a file via -the -in command line flag, or from standard input. -Using the "-in in.file" variant is recommended: +the -in command line flag, or from standard input. Using the "-in +in.file" variant is recommended (see note below). The name of the +LAMMPS executable is either ``lmp`` or ``lmp_`` with +`` being the machine string used when compiling LAMMPS. This +is required when compiling LAMMPS with the traditional build system +(e.g. with ``make mpi``), but optional when using CMake to configure and +build LAMMPS: .. code-block:: bash $ lmp_serial -in in.file $ lmp_serial < in.file + $ lmp -in in.file + $ lmp < in.file $ /path/to/lammps/src/lmp_serial -i in.file $ mpirun -np 4 lmp_mpi -in in.file + $ mpiexec -np 4 lmp -in in.file $ mpirun -np 8 /path/to/lammps/src/lmp_mpi -in in.file - $ mpirun -np 6 /usr/local/bin/lmp -in in.file + $ mpiexec -n 6 /usr/local/bin/lmp -in in.file You normally run the LAMMPS command in the directory where your input script is located. That is also where output files are produced by @@ -23,7 +31,7 @@ executable itself can be placed elsewhere. .. note:: The redirection operator "<" will not always work when running - in parallel with mpirun; for those systems the -in form is required. + in parallel with mpirun or mpiexec; for those systems the -in form is required. As LAMMPS runs it prints info to the screen and a logfile named *log.lammps*\ . More info about output is given on the diff --git a/doc/src/group.rst b/doc/src/group.rst index e72eeb7c19..36559ba68a 100644 --- a/doc/src/group.rst +++ b/doc/src/group.rst @@ -38,7 +38,7 @@ Syntax *intersect* args = two or more group IDs *dynamic* args = parent-ID keyword value ... one or more keyword/value pairs may be appended - keyword = *region* or *var* or *every* + keyword = *region* or *var* or *property* or *every* *region* value = region-ID *var* value = name of variable *property* value = name of custom integer or floating point vector diff --git a/doc/src/img/decomp-balance.png b/doc/src/img/decomp-balance.png new file mode 100644 index 0000000000..eb00e8e89a Binary files /dev/null and b/doc/src/img/decomp-balance.png differ diff --git a/doc/src/img/decomp-processors.png b/doc/src/img/decomp-processors.png new file mode 100644 index 0000000000..0d68f3679f Binary files /dev/null and b/doc/src/img/decomp-processors.png differ diff --git a/doc/src/img/decomp-rcb.png b/doc/src/img/decomp-rcb.png new file mode 100644 index 0000000000..0e38efa7ea Binary files /dev/null and b/doc/src/img/decomp-rcb.png differ diff --git a/doc/src/img/decomp-regular.png b/doc/src/img/decomp-regular.png new file mode 100644 index 0000000000..a8c645033d Binary files /dev/null and b/doc/src/img/decomp-regular.png differ diff --git a/doc/src/img/domain-decomp.png b/doc/src/img/domain-decomp.png new file mode 100644 index 0000000000..a0a5cc06f2 Binary files /dev/null and b/doc/src/img/domain-decomp.png differ diff --git a/doc/src/img/fft-decomp-parallel.png b/doc/src/img/fft-decomp-parallel.png new file mode 100644 index 0000000000..80f69bd033 Binary files /dev/null and b/doc/src/img/fft-decomp-parallel.png differ diff --git a/doc/src/img/ghost-comm.png b/doc/src/img/ghost-comm.png new file mode 100644 index 0000000000..a402daa054 Binary files /dev/null and b/doc/src/img/ghost-comm.png differ diff --git a/doc/src/img/neigh-stencil.png b/doc/src/img/neigh-stencil.png new file mode 100644 index 0000000000..7d06f6ae14 Binary files /dev/null and b/doc/src/img/neigh-stencil.png differ diff --git a/doc/utils/requirements.txt b/doc/utils/requirements.txt index 7e4563a1ec..f367727d20 100644 --- a/doc/utils/requirements.txt +++ b/doc/utils/requirements.txt @@ -1,7 +1,7 @@ Sphinx==4.0.3 -sphinxcontrib-spelling +sphinxcontrib-spelling==7.2.1 git+git://github.com/akohlmey/sphinx-fortran@parallel-read -sphinx_tabs -breathe -Pygments -six +sphinx_tabs==3.2.0 +breathe==4.31.0 +Pygments==2.10.0 +six==1.16.0 diff --git a/examples/PACKAGES/charge_regulation/in.chreg-polymer b/examples/PACKAGES/charge_regulation/in.chreg-polymer index 0adab9b5e7..055032c018 100644 --- a/examples/PACKAGES/charge_regulation/in.chreg-polymer +++ b/examples/PACKAGES/charge_regulation/in.chreg-polymer @@ -8,7 +8,7 @@ bond_style harmonic bond_coeff 1 100 1.122462 # K R0 velocity all create 1.0 8008 loop geom -pair_style lj/cut/coul/long 1.122462 20 +pair_style lj/cut/coul/long/soft 2 0.5 10.0 1.122462 20 pair_coeff * * 1.0 1.0 1.122462 # charges kspace_style pppm 1.0e-3 pair_modify shift yes diff --git a/lib/colvars/colvarmodule.cpp b/lib/colvars/colvarmodule.cpp index 405c68244b..ee14703726 100644 --- a/lib/colvars/colvarmodule.cpp +++ b/lib/colvars/colvarmodule.cpp @@ -1476,7 +1476,9 @@ int colvarmodule::write_output_files() bi != biases.end(); bi++) { // Only write output files if they have not already been written this time step - if ((*bi)->output_freq == 0 || (cvm::step_absolute() % (*bi)->output_freq) != 0) { + if ((*bi)->output_freq == 0 || + cvm::step_relative() == 0 || + (cvm::step_absolute() % (*bi)->output_freq) != 0) { error_code |= (*bi)->write_output_files(); } error_code |= (*bi)->write_state_to_replicas(); diff --git a/lib/colvars/colvars_version.h b/lib/colvars/colvars_version.h index dd56c39f3a..3f050c7e7c 100644 --- a/lib/colvars/colvars_version.h +++ b/lib/colvars/colvars_version.h @@ -1,3 +1,3 @@ #ifndef COLVARS_VERSION -#define COLVARS_VERSION "2021-08-06" +#define COLVARS_VERSION "2021-09-21" #endif diff --git a/lib/gpu/geryon/ocl_device.h b/lib/gpu/geryon/ocl_device.h index e5efc239da..003b4b3ba7 100644 --- a/lib/gpu/geryon/ocl_device.h +++ b/lib/gpu/geryon/ocl_device.h @@ -556,16 +556,22 @@ void UCL_Device::add_properties(cl_device_id device_list) { sizeof(float_width),&float_width,nullptr)); op.preferred_vector_width32=float_width; - // Determine if double precision is supported cl_uint double_width; CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, sizeof(double_width),&double_width,nullptr)); op.preferred_vector_width64=double_width; - if (double_width==0) - op.double_precision=false; - else + + // Determine if double precision is supported: All bits in the mask must be set. + cl_device_fp_config double_mask = (CL_FP_FMA|CL_FP_ROUND_TO_NEAREST|CL_FP_ROUND_TO_ZERO| + CL_FP_ROUND_TO_INF|CL_FP_INF_NAN|CL_FP_DENORM); + cl_device_fp_config double_avail; + CL_SAFE_CALL(clGetDeviceInfo(device_list,CL_DEVICE_DOUBLE_FP_CONFIG, + sizeof(double_avail),&double_avail,nullptr)); + if ((double_avail & double_mask) == double_mask) op.double_precision=true; + else + op.double_precision=false; CL_SAFE_CALL(clGetDeviceInfo(device_list, CL_DEVICE_PROFILING_TIMER_RESOLUTION, diff --git a/lib/gpu/lal_born_coul_long.cpp b/lib/gpu/lal_born_coul_long.cpp index 8c7084f4a4..36f46f2684 100644 --- a/lib/gpu/lal_born_coul_long.cpp +++ b/lib/gpu/lal_born_coul_long.cpp @@ -34,7 +34,7 @@ BornCoulLongT::BornCoulLong() : BaseCharge(), } template -BornCoulLongT::~BornCoulLongT() { +BornCoulLongT::~BornCoulLong() { clear(); } diff --git a/lib/gpu/lal_born_coul_wolf.cpp b/lib/gpu/lal_born_coul_wolf.cpp index 9aac866353..9a0653dd23 100644 --- a/lib/gpu/lal_born_coul_wolf.cpp +++ b/lib/gpu/lal_born_coul_wolf.cpp @@ -34,7 +34,7 @@ BornCoulWolfT::BornCoulWolf() : BaseCharge(), } template -BornCoulWolfT::~BornCoulWolfT() { +BornCoulWolfT::~BornCoulWolf() { clear(); } diff --git a/lib/gpu/lal_buck_coul_long.cpp b/lib/gpu/lal_buck_coul_long.cpp index 60205a2ad6..98c97ea908 100644 --- a/lib/gpu/lal_buck_coul_long.cpp +++ b/lib/gpu/lal_buck_coul_long.cpp @@ -34,7 +34,7 @@ BuckCoulLongT::BuckCoulLong() : BaseCharge(), } template -BuckCoulLongT::~BuckCoulLongT() { +BuckCoulLongT::~BuckCoulLong() { clear(); } diff --git a/lib/gpu/lal_device.cpp b/lib/gpu/lal_device.cpp index 9dbd02dd3e..59eac78483 100644 --- a/lib/gpu/lal_device.cpp +++ b/lib/gpu/lal_device.cpp @@ -333,6 +333,12 @@ int DeviceT::init_device(MPI_Comm world, MPI_Comm replica, const int ngpu, gpu_barrier(); } + // check if double precision support is available + #if defined(_SINGLE_DOUBLE) || defined(_DOUBLE_DOUBLE) + if (!gpu->double_precision()) + return -16; + #endif + // Setup auto bin size calculation for calls from atom::sort // - This is repeated in neighbor init with additional info if (_user_cell_size<0.0) { @@ -546,14 +552,9 @@ int DeviceT::init_nbor(Neighbor *nbor, const int nlocal, return -3; if (_user_cell_size<0.0) { - #ifndef LAL_USE_OLD_NEIGHBOR - _neighbor_shared.setup_auto_cell_size(true,cutoff,nbor->simd_size()); - #else _neighbor_shared.setup_auto_cell_size(false,cutoff,nbor->simd_size()); - #endif } else - _neighbor_shared.setup_auto_cell_size(false,_user_cell_size, - nbor->simd_size()); + _neighbor_shared.setup_auto_cell_size(false,_user_cell_size,nbor->simd_size()); nbor->set_cutoff(cutoff); return 0; diff --git a/lib/pace/Makefile b/lib/pace/Makefile index ac9f3a3151..c2e1892ddd 100644 --- a/lib/pace/Makefile +++ b/lib/pace/Makefile @@ -2,8 +2,8 @@ SHELL = /bin/sh # ------ FILES ------ -SRC_FILES = $(wildcard src/ML-PACE/*.cpp) -SRC = $(filter-out src/ML-PACE/pair_pace.cpp, $(SRC_FILES)) +SRC_FILES = $(wildcard src/USER-PACE/*.cpp) +SRC = $(filter-out src/USER-PACE/pair_pace.cpp, $(SRC_FILES)) # ------ DEFINITIONS ------ @@ -12,7 +12,7 @@ OBJ = $(SRC:.cpp=.o) # ------ SETTINGS ------ -CXXFLAGS = -O3 -fPIC -Isrc/ML-PACE +CXXFLAGS = -O3 -fPIC -Isrc/USER-PACE ARCHIVE = ar ARCHFLAG = -rc diff --git a/lib/pace/Makefile.lammps b/lib/pace/Makefile.lammps index 89761c1b4b..17820716df 100644 --- a/lib/pace/Makefile.lammps +++ b/lib/pace/Makefile.lammps @@ -1,3 +1,3 @@ -pace_SYSINC =-I../../lib/pace/src/ML-PACE +pace_SYSINC =-I../../lib/pace/src/USER-PACE pace_SYSLIB = -L../../lib/pace/ -lpace pace_SYSPATH = diff --git a/src/.gitignore b/src/.gitignore index 6c0a838c1b..174ee35be5 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -858,8 +858,6 @@ /fix_ti_rs.h /fix_ti_spring.cpp /fix_ti_spring.h -/fix_ttm.cpp -/fix_ttm.h /fix_tune_kspace.cpp /fix_tune_kspace.h /fix_wall_body_polygon.cpp @@ -919,6 +917,7 @@ /improper_ring.h /improper_umbrella.cpp /improper_umbrella.h +/interlayer_taper.h /kissfft.h /lj_sdk_common.h /math_complex.h @@ -933,7 +932,6 @@ /msm_cg.h /neb.cpp /neb.h - /pair_adp.cpp /pair_adp.h /pair_agni.cpp @@ -994,6 +992,8 @@ /pair_cosine_squared.h /pair_coul_diel.cpp /pair_coul_diel.h +/pair_coul_exclude.cpp +/pair_coul_exclude.h /pair_coul_long.cpp /pair_coul_long.h /pair_coul_msm.cpp @@ -1431,6 +1431,10 @@ /fix_srp.h /fix_tfmc.cpp /fix_tfmc.h +/fix_ttm.cpp +/fix_ttm.h +/fix_ttm_grid.cpp +/fix_ttm_grid.h /fix_ttm_mod.cpp /fix_ttm_mod.h /pair_born_coul_long_cs.cpp diff --git a/src/DRUDE/fix_drude_transform.cpp b/src/DRUDE/fix_drude_transform.cpp index ed42d2b548..3f8b0cfe26 100644 --- a/src/DRUDE/fix_drude_transform.cpp +++ b/src/DRUDE/fix_drude_transform.cpp @@ -13,16 +13,18 @@ ------------------------------------------------------------------------- */ /** Fix Drude Transform ******************************************************/ + #include "fix_drude_transform.h" +#include "atom.h" +#include "comm.h" +#include "domain.h" +#include "error.h" +#include "fix_drude.h" +#include "modify.h" + #include #include -#include "fix_drude.h" -#include "atom.h" -#include "domain.h" -#include "comm.h" -#include "error.h" -#include "modify.h" using namespace LAMMPS_NS; using namespace FixConst; diff --git a/src/DRUDE/fix_drude_transform.h b/src/DRUDE/fix_drude_transform.h index 7ee85d2b42..495ec8b175 100644 --- a/src/DRUDE/fix_drude_transform.h +++ b/src/DRUDE/fix_drude_transform.h @@ -25,10 +25,10 @@ FixStyle(drude/transform/inverse,FixDrudeTransform); namespace LAMMPS_NS { -template class FixDrudeTransform : public Fix { +template class FixDrudeTransform: public Fix { public: - FixDrudeTransform(class LAMMPS *, int, char **); - ~FixDrudeTransform(); + FixDrudeTransform(class LAMMPS *, int, char **); + ~FixDrudeTransform(); int setmask(); void init(); void setup(int vflag); diff --git a/src/EXTRA-FIX/fix_npt_cauchy.cpp b/src/EXTRA-FIX/fix_npt_cauchy.cpp index 07077cdabb..d9a0f850db 100644 --- a/src/EXTRA-FIX/fix_npt_cauchy.cpp +++ b/src/EXTRA-FIX/fix_npt_cauchy.cpp @@ -2442,7 +2442,7 @@ double FixNPTCauchy::memory_usage() void FixNPTCauchy::CauchyStat_init() { if (comm->me == 0) { - std::string mesg = fmt::format("Using fix npt/cauchy with alpha={:f.8}\n",alpha); + std::string mesg = fmt::format("Using fix npt/cauchy with alpha={:.8f}\n",alpha); if (restartPK==1) { mesg += " (this is a continuation run)\n"; } else { @@ -2463,7 +2463,7 @@ void FixNPTCauchy::CauchyStat_init() error->all(FLERR,"Illegal fix npt/cauchy command: Alpha cannot be zero or negative."); if (restart_stored < 0) { - modify->add_fix(std::string(id_store) + "all STORE global 1 6"); + modify->add_fix(std::string(id_store) + " all STORE global 1 6"); restart_stored = modify->find_fix(id_store); } init_store = (FixStore *)modify->fix[restart_stored]; diff --git a/src/EXTRA-PAIR/pair_coul_exclude.cpp b/src/EXTRA-PAIR/pair_coul_exclude.cpp index 404fc9c784..74890bcf08 100644 --- a/src/EXTRA-PAIR/pair_coul_exclude.cpp +++ b/src/EXTRA-PAIR/pair_coul_exclude.cpp @@ -189,7 +189,7 @@ void PairCoulExclude::init_style() init for one type pair i,j and corresponding j,i ------------------------------------------------------------------------- */ -double PairCoulExclude::init_one(int i, int j) +double PairCoulExclude::init_one(int /*i*/, int /*j*/) { return cut_global; } diff --git a/src/GPU/gpu_extra.h b/src/GPU/gpu_extra.h index bd55d11021..c2d9931e0a 100644 --- a/src/GPU/gpu_extra.h +++ b/src/GPU/gpu_extra.h @@ -78,7 +78,11 @@ inline void check_flag(int error_flag, LAMMPS_NS::Error *error, MPI_Comm &world) else if (all_success == -13) error->all(FLERR, "Invalid device configuration."); else if (all_success == -15) - error->all(FLERR, "P3M built for FP64 and GPU device is FP32 only."); + error->all(FLERR, "PPPM was compiled for double precision floating point " + "but GPU device supports single precision only."); + else if (all_success == -16) + error->all(FLERR, "GPU library was compiled for double or mixed precision " + "floating point but GPU device supports single precision only."); else error->all(FLERR, "Unknown error in GPU library"); } diff --git a/src/H5MD/dump_h5md.cpp b/src/H5MD/dump_h5md.cpp index 43219fb035..bc9c98caa0 100644 --- a/src/H5MD/dump_h5md.cpp +++ b/src/H5MD/dump_h5md.cpp @@ -28,12 +28,12 @@ #include "update.h" #include "version.h" +#include "ch5md.h" + #include #include #include -#include "ch5md.h" - using namespace LAMMPS_NS; #define MYMIN(a,b) ((a) < (b) ? (a) : (b)) diff --git a/src/KIM/kim_init.cpp b/src/KIM/kim_init.cpp index ea3dc46da4..a4003b7510 100644 --- a/src/KIM/kim_init.cpp +++ b/src/KIM/kim_init.cpp @@ -1,4 +1,3 @@ -// clang-format off /* ---------------------------------------------------------------------- LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator https://www.lammps.org/, Sandia National Laboratories @@ -85,21 +84,23 @@ void KimInit::command(int narg, char **arg) if ((narg < 2) || (narg > 3)) error->all(FLERR, "Illegal 'kim init' command"); if (domain->box_exist) - error->all(FLERR, "Must use 'kim init' command before " - "simulation box is defined"); + error->all(FLERR, "Must use 'kim init' command before simulation box is defined"); char *model_name = utils::strdup(arg[0]); char *user_units = utils::strdup(arg[1]); if (narg == 3) { auto arg_str = std::string(arg[2]); - if (arg_str == "unit_conversion_mode") unit_conversion_mode = true; + if (arg_str == "unit_conversion_mode") + unit_conversion_mode = true; else { - error->all(FLERR, "Illegal 'kim init' command.\nThe argument " - "followed by unit_style {} is an optional " - "argument and when is used must " - "be unit_conversion_mode", user_units); + error->all(FLERR, + "Illegal 'kim init' command.\n" + "The argument followed by unit_style {} is an optional argument and when " + "is used must be unit_conversion_mode", + user_units); } - } else unit_conversion_mode = false; + } else + unit_conversion_mode = false; char *model_units; KIM_Model *pkim = nullptr; @@ -117,14 +118,9 @@ void KimInit::command(int narg, char **arg) /* ---------------------------------------------------------------------- */ namespace { -void get_kim_unit_names( - char const * const system, - KIM_LengthUnit & lengthUnit, - KIM_EnergyUnit & energyUnit, - KIM_ChargeUnit & chargeUnit, - KIM_TemperatureUnit & temperatureUnit, - KIM_TimeUnit & timeUnit, - Error * error) +void get_kim_unit_names(char const *const system, KIM_LengthUnit &lengthUnit, + KIM_EnergyUnit &energyUnit, KIM_ChargeUnit &chargeUnit, + KIM_TemperatureUnit &temperatureUnit, KIM_TimeUnit &timeUnit, Error *error) { const std::string system_str(system); if (system_str == "real") { @@ -157,20 +153,64 @@ void get_kim_unit_names( chargeUnit = KIM_CHARGE_UNIT_e; temperatureUnit = KIM_TEMPERATURE_UNIT_K; timeUnit = KIM_TIME_UNIT_fs; - } else if ((system_str == "lj") || - (system_str == "micro") || - (system_str == "nano")) { - error->all(FLERR, "LAMMPS unit_style {} not supported " - "by KIM models", system_str); + } else if ((system_str == "lj") || (system_str == "micro") || (system_str == "nano")) { + error->all(FLERR, "LAMMPS unit_style {} not supported by KIM models", system_str); } else { error->all(FLERR, "Unknown unit_style"); } } -} // namespace +} // namespace -void KimInit::determine_model_type_and_units(char * model_name, - char * user_units, - char ** model_units, +void KimInit::print_dirs(struct KIM_Collections *const collections) const +{ + int kim_error = 0; + int dirListExtent = 0; + int dirCounter = 0; + + std::string mesg = "#=== KIM is looking for 'Portable Models' in these directories ===\n"; + std::vector collection_list; + collection_list.push_back(KIM_COLLECTION_currentWorkingDirectory); + collection_list.push_back(KIM_COLLECTION_environmentVariable); + collection_list.push_back(KIM_COLLECTION_user); + collection_list.push_back(KIM_COLLECTION_system); + + for (auto col : collection_list) { + kim_error = KIM_Collections_CacheListOfDirectoryNames( + collections, col, KIM_COLLECTION_ITEM_TYPE_portableModel, &dirListExtent); + if (!kim_error) { + for (int i = 0; i < dirListExtent; ++i) { + char const *name; + kim_error = KIM_Collections_GetDirectoryName(collections, i, &name); + // Don't check for error due to bug in kim-api-2.2.1 and below. +#if ((KIM_VERSION_MAJOR * 1000 + KIM_VERSION_MINOR) * 1000 + KIM_VERSION_PATCH) <= 2002001 + kim_error = 0; +#endif + if (!kim_error) mesg += fmt::format("# {:2}: {}\n", ++dirCounter, name); + } + } + } + + dirCounter = 0; + mesg += "#=== KIM is looking for 'Simulator Models' in these directories ===\n"; + for (auto col : collection_list) { + kim_error = KIM_Collections_CacheListOfDirectoryNames( + collections, col, KIM_COLLECTION_ITEM_TYPE_simulatorModel, &dirListExtent); + if (!kim_error) { + for (int i = 0; i < dirListExtent; ++i) { + char const *name; + kim_error = KIM_Collections_GetDirectoryName(collections, i, &name); + // Don't check for error due to bug in kim-api-2.2.1 and below. +#if ((KIM_VERSION_MAJOR * 1000 + KIM_VERSION_MINOR) * 1000 + KIM_VERSION_PATCH) <= 2002001 + kim_error = 0; +#endif + if (!kim_error) mesg += fmt::format("# {:2}: {}\n", ++dirCounter, name); + } + } + } + input->write_echo(mesg); +} + +void KimInit::determine_model_type_and_units(char *model_name, char *user_units, char **model_units, KIM_Model *&pkim) { KIM_LengthUnit lengthUnit; @@ -179,33 +219,26 @@ void KimInit::determine_model_type_and_units(char * model_name, KIM_TemperatureUnit temperatureUnit; KIM_TimeUnit timeUnit; int units_accepted; - KIM_Collections * collections; + KIM_Collections *collections; KIM_CollectionItemType itemType; int kim_error = KIM_Collections_Create(&collections); - if (kim_error) - error->all(FLERR, "Unable to access KIM Collections to find Model"); + if (kim_error) error->all(FLERR, "Unable to access KIM Collections to find Model"); auto logID = fmt::format("{}_Collections", comm->me); KIM_Collections_SetLogID(collections, logID.c_str()); + print_dirs(collections); + kim_error = KIM_Collections_GetItemType(collections, model_name, &itemType); if (kim_error) error->all(FLERR, "KIM Model name not found"); KIM_Collections_Destroy(&collections); - if (KIM_CollectionItemType_Equal(itemType, - KIM_COLLECTION_ITEM_TYPE_portableModel)) { - get_kim_unit_names(user_units, lengthUnit, energyUnit, - chargeUnit, temperatureUnit, timeUnit, error); - int kim_error = KIM_Model_Create(KIM_NUMBERING_zeroBased, - lengthUnit, - energyUnit, - chargeUnit, - temperatureUnit, - timeUnit, - model_name, - &units_accepted, - &pkim); + if (KIM_CollectionItemType_Equal(itemType, KIM_COLLECTION_ITEM_TYPE_portableModel)) { + get_kim_unit_names(user_units, lengthUnit, energyUnit, chargeUnit, temperatureUnit, timeUnit, + error); + int kim_error = KIM_Model_Create(KIM_NUMBERING_zeroBased, lengthUnit, energyUnit, chargeUnit, + temperatureUnit, timeUnit, model_name, &units_accepted, &pkim); if (kim_error) error->all(FLERR, "Unable to load KIM Simulator Model"); @@ -219,20 +252,12 @@ void KimInit::determine_model_type_and_units(char * model_name, } else if (unit_conversion_mode) { KIM_Model_Destroy(&pkim); int const num_systems = 5; - char const * const systems[num_systems] - = {"metal", "real", "si", "cgs", "electron"}; - for (int i=0; i < num_systems; ++i) { - get_kim_unit_names(systems[i], lengthUnit, energyUnit, - chargeUnit, temperatureUnit, timeUnit, error); - kim_error = KIM_Model_Create(KIM_NUMBERING_zeroBased, - lengthUnit, - energyUnit, - chargeUnit, - temperatureUnit, - timeUnit, - model_name, - &units_accepted, - &pkim); + char const *const systems[num_systems] = {"metal", "real", "si", "cgs", "electron"}; + for (int i = 0; i < num_systems; ++i) { + get_kim_unit_names(systems[i], lengthUnit, energyUnit, chargeUnit, temperatureUnit, + timeUnit, error); + kim_error = KIM_Model_Create(KIM_NUMBERING_zeroBased, lengthUnit, energyUnit, chargeUnit, + temperatureUnit, timeUnit, model_name, &units_accepted, &pkim); if (units_accepted) { logID = fmt::format("{}_Model", comm->me); KIM_Model_SetLogID(pkim, logID.c_str()); @@ -246,12 +271,10 @@ void KimInit::determine_model_type_and_units(char * model_name, KIM_Model_Destroy(&pkim); error->all(FLERR, "KIM Model does not support the requested unit system"); } - } else if (KIM_CollectionItemType_Equal( - itemType, KIM_COLLECTION_ITEM_TYPE_simulatorModel)) { - KIM_SimulatorModel * simulatorModel; + } else if (KIM_CollectionItemType_Equal(itemType, KIM_COLLECTION_ITEM_TYPE_simulatorModel)) { + KIM_SimulatorModel *simulatorModel; kim_error = KIM_SimulatorModel_Create(model_name, &simulatorModel); - if (kim_error) - error->all(FLERR, "Unable to load KIM Simulator Model"); + if (kim_error) error->all(FLERR, "Unable to load KIM Simulator Model"); model_type = SM; logID = fmt::format("{}_SimulatorModel", comm->me); @@ -264,13 +287,11 @@ void KimInit::determine_model_type_and_units(char * model_name, KIM_SimulatorModel_GetNumberOfSimulatorFields(simulatorModel, &sim_fields); KIM_SimulatorModel_CloseTemplateMap(simulatorModel); for (int i = 0; i < sim_fields; ++i) { - KIM_SimulatorModel_GetSimulatorFieldMetadata( - simulatorModel, i, &sim_lines, &sim_field); + KIM_SimulatorModel_GetSimulatorFieldMetadata(simulatorModel, i, &sim_lines, &sim_field); const std::string sim_field_str(sim_field); if (sim_field_str == "units") { - KIM_SimulatorModel_GetSimulatorFieldLine( - simulatorModel, i, 0, &sim_value); + KIM_SimulatorModel_GetSimulatorFieldLine(simulatorModel, i, 0, &sim_value); *model_units = utils::strdup(sim_value); break; } @@ -280,16 +301,15 @@ void KimInit::determine_model_type_and_units(char * model_name, const std::string model_units_str(*model_units); const std::string user_units_str(user_units); if ((!unit_conversion_mode) && (model_units_str != user_units_str)) { - error->all(FLERR, "Incompatible units for KIM Simulator Model" - ", required units = {}", model_units_str); + error->all(FLERR, "Incompatible units for KIM Simulator Model, required units = {}", + model_units_str); } } } /* ---------------------------------------------------------------------- */ -void KimInit::do_init(char *model_name, char *user_units, char *model_units, - KIM_Model *&pkim) +void KimInit::do_init(char *model_name, char *user_units, char *model_units, KIM_Model *&pkim) { // create storage proxy fix. delete existing fix, if needed. @@ -304,8 +324,7 @@ void KimInit::do_init(char *model_name, char *user_units, char *model_units, fix_store->setptr("model_units", (void *) model_units); // Begin output to log file - input->write_echo("#=== BEGIN kim init ===================================" - "=======\n"); + input->write_echo("#=== BEGIN kim init ==========================================\n"); KIM_SimulatorModel *simulatorModel; if (model_type == SM) { @@ -316,18 +335,16 @@ void KimInit::do_init(char *model_name, char *user_units, char *model_units, KIM_SimulatorModel_SetLogID(simulatorModel, logID.c_str()); char const *sim_name, *sim_version; - KIM_SimulatorModel_GetSimulatorNameAndVersion( - simulatorModel, &sim_name, &sim_version); + KIM_SimulatorModel_GetSimulatorNameAndVersion(simulatorModel, &sim_name, &sim_version); const std::string sim_name_str(sim_name); - if (sim_name_str != "LAMMPS") - error->all(FLERR, "Incompatible KIM Simulator Model"); + if (sim_name_str != "LAMMPS") error->all(FLERR, "Incompatible KIM Simulator Model"); if (comm->me == 0) { auto mesg = fmt::format("# Using KIM Simulator Model : {}\n" - "# For Simulator : {} {}\n" - "# Running on : LAMMPS {}\n#\n", model_name, - sim_name_str, sim_version, lmp->version); + "# For Simulator : {} {}\n" + "# Running on : LAMMPS {}\n#\n", + model_name, sim_name_str, sim_version, lmp->version); utils::logmesg(lmp, mesg); } @@ -350,18 +367,16 @@ void KimInit::do_init(char *model_name, char *user_units, char *model_units, // Set the skin and timestep default values as // 2.0 Angstroms and 1.0 femtosecond - const std::string skin_cmd = - (model_units_str == "real") ? "neighbor 2.0 bin # Angstroms": - (model_units_str == "metal") ? "neighbor 2.0 bin # Angstroms": - (model_units_str == "si") ? "neighbor 2e-10 bin # meters": - (model_units_str == "cgs") ? "neighbor 2e-8 bin # centimeters": - "neighbor 3.77945224 bin # Bohr"; - const std::string step_cmd = - (model_units_str == "real") ? "timestep 1.0 # femtoseconds": - (model_units_str == "metal") ? "timestep 1.0e-3 # picoseconds": - (model_units_str == "si") ? "timestep 1e-15 # seconds": - (model_units_str == "cgs") ? "timestep 1e-15 # seconds": - "timestep 1.0 # femtoseconds"; + const std::string skin_cmd = (model_units_str == "real") ? "neighbor 2.0 bin # Angstroms" + : (model_units_str == "metal") ? "neighbor 2.0 bin # Angstroms" + : (model_units_str == "si") ? "neighbor 2e-10 bin # meters" + : (model_units_str == "cgs") ? "neighbor 2e-8 bin # centimeters" + : "neighbor 3.77945224 bin # Bohr"; + const std::string step_cmd = (model_units_str == "real") ? "timestep 1.0 # femtoseconds" + : (model_units_str == "metal") ? "timestep 1.0e-3 # picoseconds" + : (model_units_str == "si") ? "timestep 1e-15 # seconds" + : (model_units_str == "cgs") ? "timestep 1e-15 # seconds" + : "timestep 1.0 # femtoseconds"; input->one(skin_cmd); input->one(step_cmd); @@ -373,14 +388,12 @@ void KimInit::do_init(char *model_name, char *user_units, char *model_units, // init model for (int i = 0; i < sim_fields; ++i) { - KIM_SimulatorModel_GetSimulatorFieldMetadata( - simulatorModel, i, &sim_lines, &sim_field); + KIM_SimulatorModel_GetSimulatorFieldMetadata(simulatorModel, i, &sim_lines, &sim_field); const std::string sim_field_str(sim_field); if (sim_field_str == "model-init") { for (int j = 0; j < sim_lines; ++j) { - KIM_SimulatorModel_GetSimulatorFieldLine( - simulatorModel, i, j, &sim_value); + KIM_SimulatorModel_GetSimulatorFieldLine(simulatorModel, i, j, &sim_value); input->one(sim_value); } break; @@ -404,31 +417,28 @@ void KimInit::do_init(char *model_name, char *user_units, char *model_units, int max_len(0); for (int i = 0; i < numberOfParameters; ++i) { - KIM_Model_GetParameterMetadata(pkim, i, &kim_DataType, - &extent, &str_name, &str_desc); - max_len = MAX(max_len, (int)strlen(str_name)); + KIM_Model_GetParameterMetadata(pkim, i, &kim_DataType, &extent, &str_name, &str_desc); + max_len = MAX(max_len, (int) strlen(str_name)); } max_len = MAX(18, max_len + 1); - mesg += fmt::format(" No. | {:<{}} | data type | extent\n", - "Parameter name", max_len); + mesg += fmt::format(" No. | {:<{}} | data type | extent\n", "Parameter name", max_len); mesg += fmt::format("{:-<{}}\n", "-", max_len + 35); for (int i = 0; i < numberOfParameters; ++i) { - KIM_Model_GetParameterMetadata(pkim, i, &kim_DataType, - &extent, &str_name, &str_desc); + KIM_Model_GetParameterMetadata(pkim, i, &kim_DataType, &extent, &str_name, &str_desc); auto data_type = std::string("\""); data_type += KIM_DataType_ToString(kim_DataType) + std::string("\""); - mesg += fmt::format(" {:<8} | {:<{}} | {:<10} | {}\n", i + 1, str_name, - max_len, data_type, extent); + mesg += fmt::format(" {:<8} | {:<{}} | {:<10} | {}\n", i + 1, str_name, max_len, data_type, + extent); } - } else mesg += "No mutable parameters.\n"; + } else + mesg += "No mutable parameters.\n"; KIM_Model_Destroy(&pkim); input->write_echo(mesg); } // End output to log file - input->write_echo("#=== END kim init =====================================" - "=======\n\n"); + input->write_echo("#=== END kim init ============================================\n\n"); } /* ---------------------------------------------------------------------- */ @@ -446,24 +456,11 @@ void KimInit::do_variables(const std::string &from, const std::string &to) int ier; std::string var_str; int v_unit; - const char *units[] = {"mass", - "distance", - "time", - "energy", - "velocity", - "force", - "torque", - "temperature", - "pressure", - "viscosity", - "charge", - "dipole", - "efield", - "density", - nullptr}; + const char *units[] = {"mass", "distance", "time", "energy", "velocity", + "force", "torque", "temperature", "pressure", "viscosity", + "charge", "dipole", "efield", "density", nullptr}; - input->write_echo(fmt::format("# Conversion factors from {} to {}:\n", - from, to)); + input->write_echo(fmt::format("# Conversion factors from {} to {}:\n", from, to)); auto variable = input->variable; for (int i = 0; units[i] != nullptr; ++i) { @@ -473,24 +470,23 @@ void KimInit::do_variables(const std::string &from, const std::string &to) variable->set(var_str + " internal 1.0"); v_unit = variable->find(var_str.c_str()); } - ier = lammps_unit_conversion(units[i], from, to, - conversion_factor); + ier = lammps_unit_conversion(units[i], from, to, conversion_factor); if (ier != 0) - error->all(FLERR, "Unable to obtain conversion factor: " - "unit = {}; from = {}; to = {}", - units[i], from, to); + error->all(FLERR, + "Unable to obtain conversion factor: " + "unit = {}; from = {}; to = {}", + units[i], from, to); variable->internal_set(v_unit, conversion_factor); - input->write_echo(fmt::format("variable {:<15s} internal {:<15.12e}\n", - var_str, conversion_factor)); + input->write_echo( + fmt::format("variable {:<15s} internal {:<15.12e}\n", var_str, conversion_factor)); } input->write_echo("#\n"); } /* ---------------------------------------------------------------------- */ -void KimInit::write_log_cite(class LAMMPS *lmp, - KimInit::model_type_enum model_type, +void KimInit::write_log_cite(class LAMMPS *lmp, KimInit::model_type_enum model_type, char *model_name) { if (!lmp->citeme) return; @@ -501,7 +497,7 @@ void KimInit::write_log_cite(class LAMMPS *lmp, std::string cite_id; if (kim_id.empty()) { - cite_id = fmt::format("KIM potential: unpublished, \"{}\"\n",model_name_str); + cite_id = fmt::format("KIM potential: unpublished, \"{}\"\n", model_name_str); } else { KIM_Collections *collections; int err = KIM_Collections_Create(&collections); @@ -513,12 +509,10 @@ void KimInit::write_log_cite(class LAMMPS *lmp, int extent; if (model_type == MO) { err = KIM_Collections_CacheListOfItemMetadataFiles( - collections, KIM_COLLECTION_ITEM_TYPE_portableModel, - model_name, &extent); + collections, KIM_COLLECTION_ITEM_TYPE_portableModel, model_name, &extent); } else if (model_type == SM) { err = KIM_Collections_CacheListOfItemMetadataFiles( - collections, KIM_COLLECTION_ITEM_TYPE_simulatorModel, - model_name, &extent); + collections, KIM_COLLECTION_ITEM_TYPE_simulatorModel, model_name, &extent); } else { lmp->error->all(FLERR, "Unknown model type"); } @@ -529,19 +523,18 @@ void KimInit::write_log_cite(class LAMMPS *lmp, } cite_id = fmt::format("OpenKIM potential: https://openkim.org/cite/" - "{}#item-citation\n\n",kim_id); + "{}#item-citation\n\n", + kim_id); for (int i = 0; i < extent; ++i) { char const *fileName; int availableAsString; char const *fileString; - err = KIM_Collections_GetItemMetadataFile( - collections, i, &fileName, nullptr, nullptr, - &availableAsString, &fileString); + err = KIM_Collections_GetItemMetadataFile(collections, i, &fileName, nullptr, nullptr, + &availableAsString, &fileString); if (err) continue; - if (utils::strmatch(fileName, "^kimcite") && availableAsString) - cite_id += fileString; + if (utils::strmatch(fileName, "^kimcite") && availableAsString) cite_id += fileString; } KIM_Collections_Destroy(&collections); } diff --git a/src/KIM/kim_init.h b/src/KIM/kim_init.h index fa042f2723..56922533ab 100644 --- a/src/KIM/kim_init.h +++ b/src/KIM/kim_init.h @@ -62,7 +62,8 @@ #include "pointers.h" // Forward declaration. -typedef struct KIM_Model KIM_Model; +struct KIM_Model; +struct KIM_Collections; namespace LAMMPS_NS { @@ -80,6 +81,8 @@ class KimInit : protected Pointers { void determine_model_type_and_units(char *, char *, char **, KIM_Model *&); void do_init(char *, char *, char *, KIM_Model *&); void do_variables(const std::string &, const std::string &); + + void print_dirs(struct KIM_Collections * const collections) const; }; } // namespace LAMMPS_NS diff --git a/src/KOKKOS/compute_coord_atom_kokkos.cpp b/src/KOKKOS/compute_coord_atom_kokkos.cpp index b71cd1ec4a..9f0e7fc435 100644 --- a/src/KOKKOS/compute_coord_atom_kokkos.cpp +++ b/src/KOKKOS/compute_coord_atom_kokkos.cpp @@ -59,7 +59,7 @@ ComputeCoordAtomKokkos::ComputeCoordAtomKokkos(LAMMPS *lmp, int narg /* ---------------------------------------------------------------------- */ template -ComputeCoordAtomKokkos::~ComputeCoordAtomKokkos() +ComputeCoordAtomKokkos::~ComputeCoordAtomKokkos() { if (copymode) return; diff --git a/src/KOKKOS/fix_rx_kokkos.h b/src/KOKKOS/fix_rx_kokkos.h index a782958045..61e4a05946 100644 --- a/src/KOKKOS/fix_rx_kokkos.h +++ b/src/KOKKOS/fix_rx_kokkos.h @@ -196,7 +196,7 @@ class FixRxKokkos : public FixRX { double& h0, VectorType& y, VectorType& rwk, UserDataType& userData) const; //!< ODE Solver diagnostics. - void odeDiagnostics(void); + void odeDiagnostics(); //!< Special counters per-ode. int *diagnosticCounterPerODEnSteps; @@ -231,7 +231,7 @@ class FixRxKokkos : public FixRX { bool update_kinetics_data; - void create_kinetics_data(void); + void create_kinetics_data(); // Need a dual-view and device-view for dpdThetaLocal and sumWeights since they're used in several callbacks. DAT::tdual_efloat_1d k_dpdThetaLocal, k_sumWeights; diff --git a/src/KOKKOS/pair_eam_alloy_kokkos.cpp b/src/KOKKOS/pair_eam_alloy_kokkos.cpp index a8e65c4a92..9421946c3e 100644 --- a/src/KOKKOS/pair_eam_alloy_kokkos.cpp +++ b/src/KOKKOS/pair_eam_alloy_kokkos.cpp @@ -17,23 +17,23 @@ ------------------------------------------------------------------------- */ #include "pair_eam_alloy_kokkos.h" -#include -#include -#include "kokkos.h" -#include "pair_kokkos.h" + #include "atom_kokkos.h" -#include "force.h" +#include "atom_masks.h" #include "comm.h" -#include "neighbor.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory_kokkos.h" -#include "error.h" -#include "atom_masks.h" - -#include "tokenizer.h" +#include "neighbor.h" +#include "pair_kokkos.h" #include "potential_file_reader.h" +#include +#include + using namespace LAMMPS_NS; // Cannot use virtual inheritance on the GPU, so must duplicate code @@ -44,8 +44,8 @@ template PairEAMAlloyKokkos::PairEAMAlloyKokkos(LAMMPS *lmp) : PairEAM(lmp) { respa_enable = 0; + single_enable = 0; one_coeff = 1; - manybody_flag = 1; kokkosable = 1; atomKK = (AtomKokkos *) atom; @@ -261,6 +261,8 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) virial[5] += ev.v[5]; } + if (vflag_fdotr) pair_virial_fdotr_compute(this); + if (eflag_atom) { if (need_dup) Kokkos::Experimental::contribute(d_eatom, dup_eatom); @@ -275,8 +277,6 @@ void PairEAMAlloyKokkos::compute(int eflag_in, int vflag_in) k_vatom.template sync(); } - if (vflag_fdotr) pair_virial_fdotr_compute(this); - copymode = 0; // free duplicated memory @@ -322,6 +322,11 @@ void PairEAMAlloyKokkos::init_style() } +/* ---------------------------------------------------------------------- + convert read-in funcfl potential(s) to standard array format + interpolate all file values to a single grid and cutoff +------------------------------------------------------------------------- */ + template void PairEAMAlloyKokkos::file2array() { @@ -524,7 +529,7 @@ void PairEAMAlloyKokkos::unpack_reverse_comm(int n, int *list, doubl h_rho[j] += buf[m++]; } - k_fp.modify_host(); + k_rho.modify_host(); } /* ---------------------------------------------------------------------- */ @@ -581,8 +586,8 @@ void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelA template KOKKOS_INLINE_FUNCTION void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelB, const int &ii, EV_FLOAT& ev) const { - // fp = derivative of embedding energy at each atom // phi = embedding energy at each atom // if rho > rhomax (e.g. due to close approach of two atoms), @@ -620,7 +624,6 @@ void PairEAMAlloyKokkos::operator()(TagPairEAMAlloyKernelB, c if (eflag_global) ev.evdwl += phi; if (eflag_atom) d_eatom[i] += phi; } - } template diff --git a/src/KOKKOS/pair_eam_fs_kokkos.cpp b/src/KOKKOS/pair_eam_fs_kokkos.cpp index b12de79b37..5fbd14d8b3 100644 --- a/src/KOKKOS/pair_eam_fs_kokkos.cpp +++ b/src/KOKKOS/pair_eam_fs_kokkos.cpp @@ -17,23 +17,23 @@ ------------------------------------------------------------------------- */ #include "pair_eam_fs_kokkos.h" -#include -#include -#include "kokkos.h" -#include "pair_kokkos.h" + #include "atom_kokkos.h" -#include "force.h" +#include "atom_masks.h" #include "comm.h" -#include "neighbor.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory_kokkos.h" -#include "error.h" -#include "atom_masks.h" - -#include "tokenizer.h" +#include "neighbor.h" +#include "pair_kokkos.h" #include "potential_file_reader.h" +#include +#include + using namespace LAMMPS_NS; // Cannot use virtual inheritance on the GPU, so must duplicate code @@ -43,9 +43,9 @@ using namespace LAMMPS_NS; template PairEAMFSKokkos::PairEAMFSKokkos(LAMMPS *lmp) : PairEAM(lmp) { - one_coeff = 1; - manybody_flag = 1; respa_enable = 0; + single_enable = 0; + one_coeff = 1; kokkosable = 1; atomKK = (AtomKokkos *) atom; @@ -200,9 +200,9 @@ void PairEAMFSKokkos::compute(int eflag_in, int vflag_in) // communicate derivative of embedding function (on the device) - k_fp.template sync(); - comm->forward_comm_pair(this); k_fp.template modify(); + comm->forward_comm_pair(this); + k_fp.template sync(); // compute kernel C @@ -322,6 +322,11 @@ void PairEAMFSKokkos::init_style() } +/* ---------------------------------------------------------------------- + convert read-in funcfl potential(s) to standard array format + interpolate all file values to a single grid and cutoff +------------------------------------------------------------------------- */ + template void PairEAMFSKokkos::file2array() { @@ -581,8 +586,8 @@ void PairEAMFSKokkos::operator()(TagPairEAMFSKernelA::operator()(TagPairEAMFSKernelB, const i if (eflag_global) ev.evdwl += phi; if (eflag_atom) d_eatom[i] += phi; } - } template diff --git a/src/KOKKOS/pair_eam_kokkos.cpp b/src/KOKKOS/pair_eam_kokkos.cpp index c9d2808075..417efc3f7d 100644 --- a/src/KOKKOS/pair_eam_kokkos.cpp +++ b/src/KOKKOS/pair_eam_kokkos.cpp @@ -17,18 +17,20 @@ ------------------------------------------------------------------------- */ #include "pair_eam_kokkos.h" -#include -#include "kokkos.h" -#include "pair_kokkos.h" + #include "atom_kokkos.h" -#include "force.h" +#include "atom_masks.h" #include "comm.h" -#include "neighbor.h" +#include "error.h" +#include "force.h" +#include "kokkos.h" +#include "memory_kokkos.h" #include "neigh_list_kokkos.h" #include "neigh_request.h" -#include "memory_kokkos.h" -#include "error.h" -#include "atom_masks.h" +#include "neighbor.h" +#include "pair_kokkos.h" + +#include using namespace LAMMPS_NS; diff --git a/src/KOKKOS/pair_exp6_rx_kokkos.h b/src/KOKKOS/pair_exp6_rx_kokkos.h index 4d20c59482..40917d832e 100644 --- a/src/KOKKOS/pair_exp6_rx_kokkos.h +++ b/src/KOKKOS/pair_exp6_rx_kokkos.h @@ -45,7 +45,7 @@ struct PairExp6ParamDataTypeKokkos epsilonOld2, alphaOld2, rmOld2, mixWtSite2old; // Default constructor -- nullify everything. - PairExp6ParamDataTypeKokkos(void) + PairExp6ParamDataTypeKokkos() : n(0) {} }; @@ -63,7 +63,7 @@ struct PairExp6ParamDataTypeKokkosVect nTotalold; // Default constructor -- nullify everything. - PairExp6ParamDataTypeKokkosVect(void) + PairExp6ParamDataTypeKokkosVect() {} }; diff --git a/src/MAKE/MACHINES/Makefile.aarch64_g++_openmpi_armpl b/src/MAKE/MACHINES/Makefile.aarch64_g++_openmpi_armpl index 4174c9c5e7..2ebd2ac744 100644 --- a/src/MAKE/MACHINES/Makefile.aarch64_g++_openmpi_armpl +++ b/src/MAKE/MACHINES/Makefile.aarch64_g++_openmpi_armpl @@ -8,12 +8,12 @@ SHELL = /bin/sh export OMPI_CXX = g++ CC = mpicxx -CCFLAGS = -O3 -march=native -mcpu=native +CCFLAGS = -O3 -march=native -mcpu=native -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -O +LINKFLAGS = -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/MACHINES/Makefile.aarch64_g++_serial_armpl b/src/MAKE/MACHINES/Makefile.aarch64_g++_serial_armpl index 5cb6fa0cde..054b530bc8 100644 --- a/src/MAKE/MACHINES/Makefile.aarch64_g++_serial_armpl +++ b/src/MAKE/MACHINES/Makefile.aarch64_g++_serial_armpl @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = g++ -CCFLAGS = -O3 -march=native -mcpu=native +CCFLAGS = -O3 -march=native -mcpu=native -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = g++ -LINKFLAGS = -O +LINKFLAGS = -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/MACHINES/Makefile.cygwin b/src/MAKE/MACHINES/Makefile.cygwin index 4c47860a56..3d4a50a8ea 100644 --- a/src/MAKE/MACHINES/Makefile.cygwin +++ b/src/MAKE/MACHINES/Makefile.cygwin @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -O +CCFLAGS = -O2 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -O +LINKFLAGS = -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/MACHINES/Makefile.mac b/src/MAKE/MACHINES/Makefile.mac index 67381fe622..fb749b5759 100644 --- a/src/MAKE/MACHINES/Makefile.mac +++ b/src/MAKE/MACHINES/Makefile.mac @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = c++ -CCFLAGS = -O +CCFLAGS = -O -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = c++ -LINKFLAGS = -O +LINKFLAGS = -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/MACHINES/Makefile.mac_mpi b/src/MAKE/MACHINES/Makefile.mac_mpi index 0b6b4b5ba2..4718c94e51 100644 --- a/src/MAKE/MACHINES/Makefile.mac_mpi +++ b/src/MAKE/MACHINES/Makefile.mac_mpi @@ -8,12 +8,12 @@ SHELL = /bin/sh # unless additional compiler/linker flags or libraries needed for your machine CC = /opt/local/bin/mpicxx-openmpi-mp -CCFLAGS = -O3 +CCFLAGS = -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = /opt/local/bin/mpicxx-openmpi-mp -LINKFLAGS = -O3 +LINKFLAGS = -O3 -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/MACHINES/Makefile.ubuntu b/src/MAKE/MACHINES/Makefile.ubuntu index 6c419ffdfa..f030ce64df 100644 --- a/src/MAKE/MACHINES/Makefile.ubuntu +++ b/src/MAKE/MACHINES/Makefile.ubuntu @@ -11,12 +11,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpic++ -CCFLAGS = -g -O3 # -Wunused +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpic++ -LINKFLAGS = -g -O3 +LINKFLAGS = -g -O3 -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/MACHINES/Makefile.ubuntu_simple b/src/MAKE/MACHINES/Makefile.ubuntu_simple index 98897f964f..e8b58fc804 100644 --- a/src/MAKE/MACHINES/Makefile.ubuntu_simple +++ b/src/MAKE/MACHINES/Makefile.ubuntu_simple @@ -10,12 +10,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpic++ -CCFLAGS = -g -O3 # -Wunused +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpic++ -LINKFLAGS = -g -O3 +LINKFLAGS = -g -O3 -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/Makefile.mpi b/src/MAKE/Makefile.mpi index 9776b0153e..42f48b4e2c 100644 --- a/src/MAKE/Makefile.mpi +++ b/src/MAKE/Makefile.mpi @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -O3 +LINKFLAGS = -g -O3 -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/Makefile.serial b/src/MAKE/Makefile.serial index 0f5952f317..b527919147 100644 --- a/src/MAKE/Makefile.serial +++ b/src/MAKE/Makefile.serial @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = g++ -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = g++ -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.g++_mpich b/src/MAKE/OPTIONS/Makefile.g++_mpich index 4ea855cfeb..e0c77437f5 100644 --- a/src/MAKE/OPTIONS/Makefile.g++_mpich +++ b/src/MAKE/OPTIONS/Makefile.g++_mpich @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -cxx=g++ -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -cxx=g++ -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.g++_mpich_link b/src/MAKE/OPTIONS/Makefile.g++_mpich_link index 7b92a3e77a..4f2855a9cc 100644 --- a/src/MAKE/OPTIONS/Makefile.g++_mpich_link +++ b/src/MAKE/OPTIONS/Makefile.g++_mpich_link @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = g++ -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = g++ -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.g++_openmpi_link b/src/MAKE/OPTIONS/Makefile.g++_openmpi_link index 6fc71fe2a5..0c9997dbb0 100644 --- a/src/MAKE/OPTIONS/Makefile.g++_openmpi_link +++ b/src/MAKE/OPTIONS/Makefile.g++_openmpi_link @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = g++ -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = g++ -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.g++_serial b/src/MAKE/OPTIONS/Makefile.g++_serial index 4f6f0afe22..d6b9bf3221 100644 --- a/src/MAKE/OPTIONS/Makefile.g++_serial +++ b/src/MAKE/OPTIONS/Makefile.g++_serial @@ -6,13 +6,13 @@ SHELL = /bin/sh # compiler/linker settings # specify flags and libraries needed for your compiler -CC = g++ -std=c++11 -CCFLAGS = -g -O3 +CC = g++ +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M -LINK = g++ -std=c++11 -LINKFLAGS = -g -O +LINK = g++ +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.gpu b/src/MAKE/OPTIONS/Makefile.gpu index 26c98c120d..9ad5cf477c 100644 --- a/src/MAKE/OPTIONS/Makefile.gpu +++ b/src/MAKE/OPTIONS/Makefile.gpu @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.icc_mpich b/src/MAKE/OPTIONS/Makefile.icc_mpich index cf76506da5..c630c42c26 100644 --- a/src/MAKE/OPTIONS/Makefile.icc_mpich +++ b/src/MAKE/OPTIONS/Makefile.icc_mpich @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -cxx=icc -CCFLAGS = -g -O3 -restrict +CCFLAGS = -g -O3 -restrict -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -cxx=icc -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.icc_mpich_link b/src/MAKE/OPTIONS/Makefile.icc_mpich_link index 3994968430..8b89d2509a 100644 --- a/src/MAKE/OPTIONS/Makefile.icc_mpich_link +++ b/src/MAKE/OPTIONS/Makefile.icc_mpich_link @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = icc -CCFLAGS = -g -O3 -restrict +CCFLAGS = -g -O3 -restrict -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = icc -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.icc_openmpi b/src/MAKE/OPTIONS/Makefile.icc_openmpi index 72e3d44093..0a2c9598a6 100644 --- a/src/MAKE/OPTIONS/Makefile.icc_openmpi +++ b/src/MAKE/OPTIONS/Makefile.icc_openmpi @@ -8,12 +8,12 @@ SHELL = /bin/sh export OMPI_CXX = icc CC = mpicxx -CCFLAGS = -g -O3 -restrict +CCFLAGS = -g -O3 -restrict -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.icc_openmpi_link b/src/MAKE/OPTIONS/Makefile.icc_openmpi_link index e44486aeb5..825d4cdff0 100644 --- a/src/MAKE/OPTIONS/Makefile.icc_openmpi_link +++ b/src/MAKE/OPTIONS/Makefile.icc_openmpi_link @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = icc -CCFLAGS = -g -O3 -restrict +CCFLAGS = -g -O3 -restrict -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = icc -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.icc_serial b/src/MAKE/OPTIONS/Makefile.icc_serial index a81c73c718..2d2da54c68 100644 --- a/src/MAKE/OPTIONS/Makefile.icc_serial +++ b/src/MAKE/OPTIONS/Makefile.icc_serial @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = icc -CCFLAGS = -g -O3 -restrict +CCFLAGS = -g -O3 -restrict -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = icc -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.jpeg b/src/MAKE/OPTIONS/Makefile.jpeg index e8f1f3e96a..268e7b94e8 100644 --- a/src/MAKE/OPTIONS/Makefile.jpeg +++ b/src/MAKE/OPTIONS/Makefile.jpeg @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.omp b/src/MAKE/OPTIONS/Makefile.omp index 0f49cdb15c..573c2d826b 100644 --- a/src/MAKE/OPTIONS/Makefile.omp +++ b/src/MAKE/OPTIONS/Makefile.omp @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 -restrict -fopenmp +CCFLAGS = -g -O3 -restrict -fopenmp -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -O -fopenmp +LINKFLAGS = -g -O -fopenmp -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.opt b/src/MAKE/OPTIONS/Makefile.opt index 8919e6e1d9..2cb5540fd4 100644 --- a/src/MAKE/OPTIONS/Makefile.opt +++ b/src/MAKE/OPTIONS/Makefile.opt @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 -restrict +CCFLAGS = -g -O3 -restrict -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MAKE/OPTIONS/Makefile.png b/src/MAKE/OPTIONS/Makefile.png index 9fd7b9b79c..40ebe43d32 100644 --- a/src/MAKE/OPTIONS/Makefile.png +++ b/src/MAKE/OPTIONS/Makefile.png @@ -7,12 +7,12 @@ SHELL = /bin/sh # specify flags and libraries needed for your compiler CC = mpicxx -CCFLAGS = -g -O3 +CCFLAGS = -g -O3 -std=c++11 SHFLAGS = -fPIC DEPFLAGS = -M LINK = mpicxx -LINKFLAGS = -g -O +LINKFLAGS = -g -O -std=c++11 LIB = SIZE = size diff --git a/src/MPIIO/dump_xyz_mpiio.cpp b/src/MPIIO/dump_xyz_mpiio.cpp index 24fd130e3d..f322a0da58 100644 --- a/src/MPIIO/dump_xyz_mpiio.cpp +++ b/src/MPIIO/dump_xyz_mpiio.cpp @@ -16,17 +16,18 @@ Contributing author: Paul Coffman (IBM) ------------------------------------------------------------------------- */ -#include "omp_compat.h" #include "dump_xyz_mpiio.h" -#include -#include -#include "domain.h" -#include "update.h" #include "compute.h" -#include "memory.h" +#include "domain.h" #include "error.h" +#include "memory.h" +#include "update.h" +#include +#include + +#include "omp_compat.h" #if defined(_OPENMP) #include #endif diff --git a/src/MSCG/fix_mscg.cpp b/src/MSCG/fix_mscg.cpp index 8387ff4ebf..1aa644b976 100644 --- a/src/MSCG/fix_mscg.cpp +++ b/src/MSCG/fix_mscg.cpp @@ -140,14 +140,14 @@ void FixMSCG::post_constructor() tagint *tag = atom->tag; int *type = atom->type; int *num_bond = atom->num_bond; - int **bond_atom = atom->bond_atom; + tagint **bond_atom = atom->bond_atom; int *num_angle = atom->num_angle; - int **angle_atom1 = atom->angle_atom1; - int **angle_atom3 = atom->angle_atom3; + tagint **angle_atom1 = atom->angle_atom1; + tagint **angle_atom3 = atom->angle_atom3; int *num_dihedral = atom->num_dihedral; - int **dihedral_atom1 = atom->dihedral_atom1; - int **dihedral_atom3 = atom->dihedral_atom3; - int **dihedral_atom4 = atom->dihedral_atom4; + tagint **dihedral_atom1 = atom->dihedral_atom1; + tagint **dihedral_atom3 = atom->dihedral_atom3; + tagint **dihedral_atom4 = atom->dihedral_atom4; double *prd_half = domain->prd_half; int i,ii,j,jj,jnum,k,l; diff --git a/src/Makefile b/src/Makefile index 4e6fb5cea9..7f02c1e84b 100644 --- a/src/Makefile +++ b/src/Makefile @@ -260,12 +260,11 @@ uppercase_internal=$(if $1,$$(subst $(firstword $1),$(call uppercase_internal,$( uppercase=$(eval uppercase_RESULT:=$(call uppercase_internal,$(uppercase_TABLE),$1))$(uppercase_RESULT) PACKAGEUC = $(call uppercase,$(PACKAGE)) -PACKUSERUC = $(call uppercase,$(PACKUSER)) +PACKAGESORTED = $(sort $(PACKAGEUC)) YESDIR = $(call uppercase,$(@:yes-%=%)) NODIR = $(call uppercase,$(@:no-%=%)) LIBDIR = $(@:lib-%=%) -LIBUSERDIR = $(@:lib-%=%) # List of all targets @@ -332,7 +331,7 @@ lmpinstalledpkgs.h: $(SRC) $(INC) @echo '#ifndef LMP_INSTALLED_PKGS_H' > ${TMPNAME}.lmpinstalled @echo '#define LMP_INSTALLED_PKGS_H' >> ${TMPNAME}.lmpinstalled @echo 'const char * LAMMPS_NS::LAMMPS::installed_packages[] = {' >> ${TMPNAME}.lmpinstalled - @for p in $(PACKAGEUC) $(PACKUSERUC); do info=$$($(SHELL) Package.sh $$p installed); \ + @for p in $(PACKAGEUC); do info=$$($(SHELL) Package.sh $$p installed); \ [ -n "$$info" ] && echo "\"$$info\"" | sed -e 's/".*package \(.*\)"/"\1",/' >> ${TMPNAME}.lmpinstalled || :; done @echo ' NULL };' >> ${TMPNAME}.lmpinstalled @echo '#endif' >> ${TMPNAME}.lmpinstalled @@ -469,7 +468,7 @@ tar: @cd ..; tar cvzf src/$(ROOT)_src.tar.gz \ src/Make* src/Package.sh src/Depend.sh src/Install.sh src/Fetch.sh \ src/MAKE src/DEPEND src/*.cpp src/*.h src/STUBS \ - $(patsubst %,src/%,$(PACKAGEUC)) $(patsubst %,src/%,$(PACKUSERUC)) \ + $(patsubst %,src/%,$(PACKAGEUC)) \ --exclude=*/.svn @cd STUBS; $(MAKE) @echo "Created $(ROOT)_src.tar.gz" @@ -502,9 +501,7 @@ format-tests: # Package management package: - @echo 'Standard packages:' $(PACKAGE) - @echo '' - @echo 'User-contributed packages:' $(PACKUSER) + @echo 'Available packages:' $(PACKAGE) @echo '' @echo 'Packages that need system libraries:' $(PACKSYS) @echo '' @@ -615,9 +612,6 @@ lib-%: @if [ -e ../lib/$(LIBDIR)/Install.py ]; then \ echo "Installing lib $(@:lib-%=%)"; \ ( cd ../lib/$(LIBDIR); $(PYTHON) Install.py $(args) ); \ - elif [ -e ../lib/$(LIBUSERDIR)/Install.py ]; then \ - echo "Installing lib $(@:lib-%=%)"; \ - ( cd ../lib/$(LIBUSERDIR); $(PYTHON) Install.py $(args) ); \ else \ echo "Install script for lib $(@:lib-%=%) does not exist"; \ fi; touch main.cpp @@ -630,28 +624,21 @@ lib-%: # purge = delete obsolete and auto-generated package files package-status ps: - @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p status; done - @echo '' - @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p status; done + @for p in $(PACKAGESORTED); do $(SHELL) Package.sh $$p status; done package-installed pi: - @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p installed; done - @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p installed; done + @for p in $(PACKAGESORTED); do $(SHELL) Package.sh $$p installed; done package-update pu: purge + @echo 'Updating installed packages:' @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p update; done - @echo '' - @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p update; done package-overwrite: purge - @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p overwrite; done - @echo '' - @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p overwrite; done + @echo 'Overwriting installed packages:' + @for p in $(PACKAGESORTED); do $(SHELL) Package.sh $$p overwrite; done package-diff pd: - @for p in $(PACKAGEUC); do $(SHELL) Package.sh $$p diff; done - @echo '' - @for p in $(PACKUSERUC); do $(SHELL) Package.sh $$p diff; done + @for p in $(PACKAGESORTED); do $(SHELL) Package.sh $$p diff; done purge: Purge.list @echo 'Purging obsolete and auto-generated source files' diff --git a/src/Package.sh b/src/Package.sh index f776a02e48..aa217fb555 100755 --- a/src/Package.sh +++ b/src/Package.sh @@ -45,9 +45,8 @@ elif (test $2 = "installed") then # perform a re-install, but only if the package is already installed elif (test $2 = "update") then - echo "Updating src files from $1 package files" if (test $installed = 1) then - echo " updating package $1" + echo "Updating src files from $1 package files" if (test -e Install.sh) then /bin/sh Install.sh 2 else @@ -55,16 +54,14 @@ elif (test $2 = "update") then fi cd .. /bin/sh Depend.sh $1 - else - echo " $1 package is not installed" fi # overwrite, only if installed # overwrite package file with src file, if the two are different elif (test $2 = "overwrite") then - echo "Overwriting $1 package files with src files" if (test $installed = 1) then + echo "Overwriting $1 package files with src files" for file in *.cpp *.h; do if (test ! -e ../$file) then continue diff --git a/src/REACTION/fix_bond_react.cpp b/src/REACTION/fix_bond_react.cpp index ac009821cf..69c9c87ddf 100644 --- a/src/REACTION/fix_bond_react.cpp +++ b/src/REACTION/fix_bond_react.cpp @@ -910,7 +910,8 @@ void FixBondReact::post_integrate() int j; for (rxnID = 0; rxnID < nreacts; rxnID++) { - if (max_rxn[rxnID] <= reaction_count_total[rxnID]) continue; + if ((update->ntimestep % nevery[rxnID]) || + (max_rxn[rxnID] <= reaction_count_total[rxnID])) continue; for (int ii = 0; ii < nall; ii++) { partner[ii] = 0; finalpartner[ii] = 0; diff --git a/src/compute_angle_local.cpp b/src/compute_angle_local.cpp index 7401d8b214..2bceb91dd5 100644 --- a/src/compute_angle_local.cpp +++ b/src/compute_angle_local.cpp @@ -194,7 +194,7 @@ void ComputeAngleLocal::compute_local() int ComputeAngleLocal::compute_angles(int flag) { - int i,m,n,na,atom1,atom2,atom3,imol,iatom,atype,ivar; + int i,m,na,atom1,atom2,atom3,imol,iatom,atype,ivar; tagint tagprev; double delx1,dely1,delz1,delx2,dely2,delz2; double rsq1,rsq2,r1,r2,c,theta; diff --git a/src/dump_atom.cpp b/src/dump_atom.cpp index f3a03735a3..0dbd3b3278 100644 --- a/src/dump_atom.cpp +++ b/src/dump_atom.cpp @@ -560,7 +560,8 @@ void DumpAtom::write_binary(int n, double *mybuf) void DumpAtom::write_string(int n, double *mybuf) { - fwrite(mybuf,sizeof(char),n,fp); + if (mybuf) + fwrite(mybuf,sizeof(char),n,fp); } /* ---------------------------------------------------------------------- */ diff --git a/src/dump_cfg.cpp b/src/dump_cfg.cpp index 28f5a35a4b..8e87f4104e 100644 --- a/src/dump_cfg.cpp +++ b/src/dump_cfg.cpp @@ -233,7 +233,8 @@ void DumpCFG::write_data(int n, double *mybuf) void DumpCFG::write_string(int n, double *mybuf) { - fwrite(mybuf,sizeof(char),n,fp); + if (mybuf) + fwrite(mybuf,sizeof(char),n,fp); } /* ---------------------------------------------------------------------- */ diff --git a/src/dump_custom.cpp b/src/dump_custom.cpp index 16c67927bf..b2acdbfc51 100644 --- a/src/dump_custom.cpp +++ b/src/dump_custom.cpp @@ -1234,7 +1234,8 @@ void DumpCustom::write_binary(int n, double *mybuf) void DumpCustom::write_string(int n, double *mybuf) { - fwrite(mybuf,sizeof(char),n,fp); + if (mybuf) + fwrite(mybuf,sizeof(char),n,fp); } /* ---------------------------------------------------------------------- */ diff --git a/src/dump_local.cpp b/src/dump_local.cpp index f9a970d2b2..96d8944e2f 100644 --- a/src/dump_local.cpp +++ b/src/dump_local.cpp @@ -399,7 +399,8 @@ void DumpLocal::write_data(int n, double *mybuf) void DumpLocal::write_string(int n, double *mybuf) { - fwrite(mybuf,sizeof(char),n,fp); + if (mybuf) + fwrite(mybuf,sizeof(char),n,fp); } /* ---------------------------------------------------------------------- */ diff --git a/src/dump_xyz.cpp b/src/dump_xyz.cpp index ebbd432f5d..e009937959 100644 --- a/src/dump_xyz.cpp +++ b/src/dump_xyz.cpp @@ -13,12 +13,14 @@ ------------------------------------------------------------------------- */ #include "dump_xyz.h" -#include + #include "atom.h" #include "error.h" #include "memory.h" #include "update.h" +#include + using namespace LAMMPS_NS; #define ONELINE 128 @@ -194,7 +196,8 @@ void DumpXYZ::write_data(int n, double *mybuf) void DumpXYZ::write_string(int n, double *mybuf) { - fwrite(mybuf,sizeof(char),n,fp); + if (mybuf) + fwrite(mybuf,sizeof(char),n,fp); } /* ---------------------------------------------------------------------- */ diff --git a/src/fix_dt_reset.cpp b/src/fix_dt_reset.cpp index 409729c242..c80c976504 100644 --- a/src/fix_dt_reset.cpp +++ b/src/fix_dt_reset.cpp @@ -171,7 +171,7 @@ void FixDtReset::end_of_step() if (vsq > 0.0) dtv = xmax / sqrt(vsq); if (fsq > 0.0) dtf = sqrt(2.0 * xmax / (ftm2v * sqrt(fsq) * massinv)); dt = MIN(dtv, dtf); - if (emax > 0.0 && vsq > 0.0 && fsq > 0.0) { + if ((emax > 0.0) && (fsq * vsq > 0.0)) { dte = emax / sqrt(fsq * vsq) / sqrt(ftm2v * mvv2e); dt = MIN(dt, dte); } diff --git a/src/neighbor.cpp b/src/neighbor.cpp index c88885fc54..74bb3bf762 100644 --- a/src/neighbor.cpp +++ b/src/neighbor.cpp @@ -915,11 +915,14 @@ int Neighbor::init_pair() requests[i]->index_bin = -1; flag = lists[i]->bin_method; if (flag == 0) continue; - for (j = 0; j < nbin; j++) - if (neigh_bin[j]->istyle == flag) break; - if (j < nbin && !requests[i]->unique) { - requests[i]->index_bin = j; - continue; + if (!requests[i]->unique) { + for (j = 0; j < nbin; j++) + if (neigh_bin[j]->istyle == flag && + neigh_bin[j]->cutoff_custom == 0.0) break; + if (j < nbin) { + requests[i]->index_bin = j; + continue; + } } BinCreator &bin_creator = binclass[flag-1]; @@ -936,11 +939,14 @@ int Neighbor::init_pair() requests[i]->index_stencil = -1; flag = lists[i]->stencil_method; if (flag == 0) continue; - for (j = 0; j < nstencil; j++) - if (neigh_stencil[j]->istyle == flag) break; - if (j < nstencil && !requests[i]->unique) { - requests[i]->index_stencil = j; - continue; + if (!requests[i]->unique) { + for (j = 0; j < nstencil; j++) + if (neigh_stencil[j]->istyle == flag && + neigh_stencil[j]->cutoff_custom == 0.0) break; + if (j < nstencil) { + requests[i]->index_stencil = j; + continue; + } } StencilCreator &stencil_creator = stencilclass[flag-1]; @@ -2515,6 +2521,7 @@ void Neighbor::modify_params(int narg, char **arg) int i; // Invalidate old user cutoffs + comm->ncollections_cutoff = 0; interval_collection_flag = 1; custom_collection_flag = 1; @@ -2546,9 +2553,10 @@ void Neighbor::modify_params(int narg, char **arg) error->all(FLERR,"Invalid collection/type command"); int ntypes = atom->ntypes; - int n, nlo, nhi, i, j, k; + int nlo, nhi, i, k; // Invalidate old user cutoffs + comm->ncollections_cutoff = 0; interval_collection_flag = 0; custom_collection_flag = 1; @@ -2556,10 +2564,12 @@ void Neighbor::modify_params(int narg, char **arg) memory->create(type2collection,ntypes+1,"neigh:type2collection"); // Erase previous mapping + for (i = 1; i <= ntypes; i++) type2collection[i] = -1; // For each custom range, define mapping for types in interval + for (i = 0; i < ncollections; i++){ std::vector words = Tokenizer(arg[iarg+2+i], ",").as_vector(); for (const auto &word : words) { @@ -2573,6 +2583,7 @@ void Neighbor::modify_params(int narg, char **arg) } // Check for undefined atom type + for (i = 1; i <= ntypes; i++){ if (type2collection[i] == -1) { error->all(FLERR,"Type missing in collection/type commnd"); diff --git a/src/pair_hybrid.cpp b/src/pair_hybrid.cpp index d1b5e5bd2e..b5daa111da 100644 --- a/src/pair_hybrid.cpp +++ b/src/pair_hybrid.cpp @@ -512,13 +512,16 @@ void PairHybrid::coeff(int narg, char **arg) // then unset setflag/map assigned to that style before setting it below // in case pair coeff for this sub-style is being called for 2nd time - if (!none && styles[m]->one_coeff) + if (!none && styles[m]->one_coeff) { + if ((strcmp(arg[0],"*") != 0) || (strcmp(arg[1],"*") != 0)) + error->all(FLERR,"Incorrect args for pair coefficients"); for (int i = 1; i <= atom->ntypes; i++) for (int j = i; j <= atom->ntypes; j++) if (nmap[i][j] && map[i][j][0] == m) { setflag[i][j] = 0; nmap[i][j] = 0; } + } // set setflag and which type pairs map to which sub-style // if sub-style is none: set hybrid setflag, wipe out map diff --git a/src/pair_hybrid_overlay.cpp b/src/pair_hybrid_overlay.cpp index db12750f40..e93473e3c9 100644 --- a/src/pair_hybrid_overlay.cpp +++ b/src/pair_hybrid_overlay.cpp @@ -70,6 +70,12 @@ void PairHybridOverlay::coeff(int narg, char **arg) arg[2+multflag] = arg[1]; arg[1+multflag] = arg[0]; + // ensure that one_coeff flag is honored + + if (!none && styles[m]->one_coeff) + if ((strcmp(arg[0],"*") != 0) || (strcmp(arg[1],"*") != 0)) + error->all(FLERR,"Incorrect args for pair coefficients"); + // invoke sub-style coeff() starting with 1st remaining arg if (!none) styles[m]->coeff(narg-1-multflag,arg+1+multflag); diff --git a/src/pair_hybrid_scaled.cpp b/src/pair_hybrid_scaled.cpp index 90e30dd9b2..5bf593d147 100644 --- a/src/pair_hybrid_scaled.cpp +++ b/src/pair_hybrid_scaled.cpp @@ -474,6 +474,12 @@ void PairHybridScaled::coeff(int narg, char **arg) arg[2 + multflag] = arg[1]; arg[1 + multflag] = arg[0]; + // ensure that one_coeff flag is honored + + if (!none && styles[m]->one_coeff) + if ((strcmp(arg[0],"*") != 0) || (strcmp(arg[1],"*") != 0)) + error->all(FLERR,"Incorrect args for pair coefficients"); + // invoke sub-style coeff() starting with 1st remaining arg if (!none) styles[m]->coeff(narg - 1 - multflag, &arg[1 + multflag]); diff --git a/src/read_data.cpp b/src/read_data.cpp index c33c65f676..574117e93e 100644 --- a/src/read_data.cpp +++ b/src/read_data.cpp @@ -1321,7 +1321,7 @@ void ReadData::bonds(int firstpass) int *count = nullptr; if (firstpass) { memory->create(count,nlocal,"read_data:count"); - memset(count,0,nlocal*sizeof(int)); + if (count) memset(count,0,nlocal*sizeof(int)); } // read and process bonds @@ -1395,7 +1395,7 @@ void ReadData::angles(int firstpass) int *count = nullptr; if (firstpass) { memory->create(count,nlocal,"read_data:count"); - memset(count,0,nlocal*sizeof(int)); + if (count) memset(count,0,nlocal*sizeof(int)); } // read and process angles @@ -1469,7 +1469,7 @@ void ReadData::dihedrals(int firstpass) int *count = nullptr; if (firstpass) { memory->create(count,nlocal,"read_data:count"); - memset(count,0,nlocal*sizeof(int)); + if (count) memset(count,0,nlocal*sizeof(int)); } // read and process dihedrals @@ -1543,7 +1543,7 @@ void ReadData::impropers(int firstpass) int *count = nullptr; if (firstpass) { memory->create(count,nlocal,"read_data:count"); - memset(count,0,nlocal*sizeof(int)); + if (count) memset(count,0,nlocal*sizeof(int)); } // read and process impropers diff --git a/src/version.h b/src/version.h index 6b4ecca26b..c1b2b627a8 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define LAMMPS_VERSION "20 Sep 2021" +#define LAMMPS_VERSION "29 Sep 2021" diff --git a/tools/singularity/centos7.def b/tools/singularity/centos7.def index f64db0649b..8a3235b58f 100644 --- a/tools/singularity/centos7.def +++ b/tools/singularity/centos7.def @@ -36,7 +36,7 @@ From: centos:7 # manually install Plumed mkdir plumed cd plumed - version=2.6.1 + version=2.7.2 curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz tar -xzf plumed.tar.gz cd plumed-${version} diff --git a/tools/singularity/centos8.def b/tools/singularity/centos8.def index c48d2718eb..e35f97f453 100644 --- a/tools/singularity/centos8.def +++ b/tools/singularity/centos8.def @@ -3,7 +3,7 @@ From: centos:8 %post dnf -y install epel-release dnf-utils - dnf config-manager --set-enabled PowerTools + dnf config-manager --set-enabled powertools dnf -y update dnf -y install vim-enhanced git file make cmake patch which file ninja-build \ ccache gcc-c++ gcc-gfortran clang libomp-devel gdb valgrind libubsan libasan libtsan \ @@ -42,7 +42,7 @@ From: centos:8 # manually install Plumed mkdir plumed cd plumed - version=2.6.1 + version=2.7.2 curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz tar -xzf plumed.tar.gz cd plumed-${version} diff --git a/tools/singularity/fedora34_mingw.def b/tools/singularity/fedora34_mingw.def index 2f8118778f..40e6f72861 100644 --- a/tools/singularity/fedora34_mingw.def +++ b/tools/singularity/fedora34_mingw.def @@ -30,6 +30,7 @@ From: fedora:34 mingw32-readline mingw64-readline \ mingw32-termcap mingw64-termcap \ mingw32-zlib mingw64-zlib \ + mingw32-zstd mingw64-zstd \ enchant python3-virtualenv doxygen latexmk \ texlive-latex-fonts texlive-pslatex texlive-collection-latexrecommended \ texlive-latex texlive-latexconfig doxygen-latex texlive-collection-latex \ diff --git a/tools/singularity/rocky8.def b/tools/singularity/rocky8.def new file mode 100644 index 0000000000..0827b1d548 --- /dev/null +++ b/tools/singularity/rocky8.def @@ -0,0 +1,110 @@ +BootStrap: docker +From: rockylinux/rockylinux:8 + +%post + dnf -y install epel-release dnf-utils + dnf config-manager --set-enabled powertools + dnf -y update + dnf -y install vim-enhanced git file make cmake patch which file ninja-build \ + ccache gcc-c++ gcc-gfortran clang libomp-devel gdb valgrind libubsan libasan libtsan \ + eigen3-devel openblas-devel libpng-devel libjpeg-devel platform-python-devel \ + openmpi-devel mpich-devel fftw-devel voro++-devel gsl-devel hdf5-devel \ + netcdf-devel netcdf-cxx-devel netcdf-mpich-devel netcdf-openmpi-devel \ + enchant python3-virtualenv doxygen diffutils latexmk readline-devel \ + texlive-latex-fonts texlive-pslatex texlive-collection-latexrecommended \ + texlive-latex texlive-latexconfig doxygen-latex texlive-collection-latex \ + texlive-latex-bin texlive-lualatex-math texlive-fncychap texlive-tabulary \ + texlive-framed texlive-wrapfig texlive-upquote texlive-capt-of \ + texlive-needspace texlive-titlesec texlive-anysize texlive-dvipng \ + blas-devel lapack-devel libyaml-devel openkim-models kim-api-devel \ + zstd libzstd-devel + dnf clean all + + # we need to reset any module variables + # inherited from the host. + unset __LMOD_REF_COUNT__LMFILES_ + unset __LMOD_REF_COUNT_PATH + unset __LMOD_REF_COUNT_LD_LIBRARY_PATH + unset __LMOD_REF_COUNT_MANPATH + unset __LMOD_REF_COUNT_MODULEPATH + unset __LMOD_REF_COUNT_LOADEDMODULES + unset _LMFILES_ + unset MODULEPATH + unset MODULESHOME + unset MODULEPATH_ROOT + unset LOADEDMODULES + unset LMOD_SYSTEM_DEFAULT_MODULES + + # load MPI by default + . /etc/profile + module load mpi + + # manually install Plumed + mkdir plumed + cd plumed + version=2.7.2 + curl -L -o plumed.tar.gz https://github.com/plumed/plumed2/releases/download/v${version}/plumed-src-${version}.tgz + tar -xzf plumed.tar.gz + cd plumed-${version} + ./configure --disable-doc --prefix=/usr + make + make install + # fix up installation for CentOS and Fedora + mv -v /usr/lib64/pkgconfig/plumed* /usr/share/pkgconfig/ + cd ../../ + rm -rvf plumed + + # create missing readline pkgconfig file + cat > /usr/lib64/pkgconfig/readline.pc <$CUSTOM_PROMPT_ENV <> /etc/ld.so.conf.d/nvidia.conf - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf + libcublas-${CUDA_PKG_VERSION} \ + libcublas-dev-${CUDA_PKG_VERSION} # add missing symlink - ln -s /usr/local/cuda-11.0 /usr/local/cuda - ln -s /usr/local/cuda-11.0/lib64/stubs/libcuda.so /usr/local/cuda-11.0/lib64/stubs/libcuda.so.1 + ln -s /usr/local/cuda-${CUDA_PKG_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_PKG_VERSION}/lib64/stubs/libcuda.so.1 ########################################################################### # NVIDIA OpenCL @@ -134,7 +130,7 @@ From: ubuntu:18.04 ########################################################################### export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64 - git clone -b rocm-3.7.x https://github.com/ROCmSoftwarePlatform/hipCUB.git + git clone -b release/rocm-rel-4.3 https://github.com/ROCmSoftwarePlatform/hipCUB.git mkdir hipCUB/build cd hipCUB/build CXX=hipcc cmake -D BUILD_TEST=off .. @@ -169,7 +165,7 @@ From: ubuntu:18.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/singularity/ubuntu18.04_intel_opencl.def b/tools/singularity/ubuntu18.04_intel_opencl.def index 01f0d78d0a..95c744c67d 100644 --- a/tools/singularity/ubuntu18.04_intel_opencl.def +++ b/tools/singularity/ubuntu18.04_intel_opencl.def @@ -106,7 +106,7 @@ From: ubuntu:18.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/singularity/ubuntu18.04_nvidia.def b/tools/singularity/ubuntu18.04_nvidia.def index 2b6fcf8c45..359e1d1c4d 100644 --- a/tools/singularity/ubuntu18.04_nvidia.def +++ b/tools/singularity/ubuntu18.04_nvidia.def @@ -1,5 +1,5 @@ BootStrap: docker -From: nvidia/cuda:11.0-devel-ubuntu18.04 +From: nvidia/cuda:11.4.1-devel-ubuntu18.04 %post export DEBIAN_FRONTEND=noninteractive @@ -105,7 +105,7 @@ From: nvidia/cuda:11.0-devel-ubuntu18.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/singularity/ubuntu20.04.def b/tools/singularity/ubuntu20.04.def index 7f081ab2e3..f85d3ca614 100644 --- a/tools/singularity/ubuntu20.04.def +++ b/tools/singularity/ubuntu20.04.def @@ -100,7 +100,7 @@ From: ubuntu:20.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/singularity/ubuntu20.04_amd_rocm.def b/tools/singularity/ubuntu20.04_amd_rocm.def index 9db8265629..2b4176f183 100644 --- a/tools/singularity/ubuntu20.04_amd_rocm.def +++ b/tools/singularity/ubuntu20.04_amd_rocm.def @@ -3,7 +3,7 @@ From: ubuntu:20.04 %environment export PATH=/usr/lib/ccache:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64 - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm-4.2.0/llvm/lib + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/opt/rocm/lib:/opt/rocm-4.3.0/llvm/lib %post export DEBIAN_FRONTEND=noninteractive apt-get update @@ -91,7 +91,7 @@ From: ubuntu:20.04 ########################################################################### export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64 - git clone -b rocm-4.1.x https://github.com/ROCmSoftwarePlatform/hipCUB.git + git clone -b release/rocm-rel-4.3 https://github.com/ROCmSoftwarePlatform/hipCUB.git mkdir hipCUB/build cd hipCUB/build CXX=hipcc cmake -D BUILD_TEST=off .. @@ -126,7 +126,7 @@ From: ubuntu:20.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/singularity/ubuntu20.04_gpu.def b/tools/singularity/ubuntu20.04_gpu.def index 44f975d2c8..3ea759078b 100644 --- a/tools/singularity/ubuntu20.04_gpu.def +++ b/tools/singularity/ubuntu20.04_gpu.def @@ -2,11 +2,11 @@ BootStrap: docker From: ubuntu:20.04 %environment - export PATH=/usr/lib/ccache:/usr/local/cuda-11.0/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64 - export CUDADIR=/usr/local/cuda-11.0 - export CUDA_PATH=/usr/local/cuda-11.0 - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.0/lib64:/opt/rocm/lib:/opt/rocm-4.2.0/llvm/lib - export LIBRARY_PATH=/usr/local/cuda-11.0/lib64/stubs + export PATH=/usr/lib/ccache:/usr/local/cuda-11.4/bin:${PATH}:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64 + export CUDADIR=/usr/local/cuda-11.4 + export CUDA_PATH=/usr/local/cuda-11.4 + export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/cuda-11.4/lib64:/opt/rocm/lib:/opt/rocm-4.3.0/llvm/lib + export LIBRARY_PATH=/usr/local/cuda-11.4/lib64/stubs %post export DEBIAN_FRONTEND=noninteractive apt-get update @@ -101,23 +101,19 @@ From: ubuntu:20.04 add-apt-repository "deb http://developer.download.nvidia.com/compute/cuda/repos/ubuntu2004/x86_64/ /" apt-get update - export CUDA_PKG_VERSION=11.0 + export CUDA_PKG_VERSION=11.4 apt-get install -y --no-install-recommends \ - cuda-libraries-$CUDA_PKG_VERSION \ - cuda-command-line-tools-$CUDA_PKG_VERSION \ - cuda-libraries-dev-$CUDA_PKG_VERSION \ - cuda-minimal-build-$CUDA_PKG_VERSION \ + cuda-libraries-${CUDA_PKG_VERSION} \ + cuda-command-line-tools-${CUDA_PKG_VERSION} \ + cuda-libraries-dev-${CUDA_PKG_VERSION} \ + cuda-minimal-build-${CUDA_PKG_VERSION} \ cuda-compat-$CUDA_PKG_VERSION \ - libcublas-11-0 \ - libcublas-dev-11-0 - - echo "/usr/local/nvidia/lib" >> /etc/ld.so.conf.d/nvidia.conf - echo "/usr/local/nvidia/lib64" >> /etc/ld.so.conf.d/nvidia.conf + libcublas-${CUDA_PKG_VERSION} \ + libcublas-dev-${CUDA_PKG_VERSION} # add missing symlink - ln -s /usr/local/cuda-11.0 /usr/local/cuda - ln -s /usr/local/cuda-11.0/lib64/stubs/libcuda.so /usr/local/cuda-11.0/lib64/stubs/libcuda.so.1 + ln -s /usr/local/cuda-${CUDA_PKG_VERSION}/lib64/stubs/libcuda.so /usr/local/cuda-${CUDA_PKG_VERSION}/lib64/stubs/libcuda.so.1 ########################################################################### # NVIDIA OpenCL @@ -131,7 +127,7 @@ From: ubuntu:20.04 ########################################################################### export PATH=$PATH:/opt/rocm/bin:/opt/rocm/profiler/bin:/opt/rocm/opencl/bin/x86_64 - git clone -b rocm-4.2.x https://github.com/ROCmSoftwarePlatform/hipCUB.git + git clone -b release/rocm-rel-4.3 https://github.com/ROCmSoftwarePlatform/hipCUB.git mkdir hipCUB/build cd hipCUB/build CXX=hipcc cmake -D BUILD_TEST=off .. @@ -166,7 +162,7 @@ From: ubuntu:20.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/singularity/ubuntu20.04_intel_opencl.def b/tools/singularity/ubuntu20.04_intel_opencl.def index 82ca53a851..7c83ecb5b1 100644 --- a/tools/singularity/ubuntu20.04_intel_opencl.def +++ b/tools/singularity/ubuntu20.04_intel_opencl.def @@ -99,7 +99,7 @@ From: ubuntu:20.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/singularity/ubuntu20.04_nvidia.def b/tools/singularity/ubuntu20.04_nvidia.def index 7bbc3ab0b5..ddcbd34db9 100644 --- a/tools/singularity/ubuntu20.04_nvidia.def +++ b/tools/singularity/ubuntu20.04_nvidia.def @@ -1,5 +1,5 @@ BootStrap: docker -From: nvidia/cuda:11.0-devel-ubuntu20.04 +From: nvidia/cuda:11.4.1-devel-ubuntu20.04 %post export DEBIAN_FRONTEND=noninteractive @@ -102,7 +102,7 @@ From: nvidia/cuda:11.0-devel-ubuntu20.04 # Plumed ########################################################################### - export PLUMED_PKG_VERSION=2.6.1 + export PLUMED_PKG_VERSION=2.7.2 mkdir plumed cd plumed diff --git a/tools/swig/CMakeLists.txt b/tools/swig/CMakeLists.txt index 204b351ed6..966837dc2f 100644 --- a/tools/swig/CMakeLists.txt +++ b/tools/swig/CMakeLists.txt @@ -90,7 +90,15 @@ if(BUILD_SWIG_TCL) # build loadable Tcl module set_property(SOURCE lammps.i PROPERTY SWIG_MODULE_NAME tcllammps) swig_add_library(tcllammps TYPE MODULE LANGUAGE tcl SOURCES lammps.i) - find_package(TCL REQUIRED) + find_package(TCL) + if(NOT TCL_FOUND) + message(FATAL_ERROR "Tcl development headers and libraries are required") + endif() + find_package(TclStub) + if(TCL_STUB_LIBRARY) + target_compile_definitions(tcllammps PRIVATE USE_TCL_STUBS) + target_link_libraries(tcllammps PRIVATE ${TCL_STUB_LIBRARY}) + endif() target_include_directories(tcllammps PRIVATE ${TCL_INCLUDE_PATH}) swig_link_libraries(tcllammps PRIVATE lammps ${TCL_LIBRARY}) # build extended Tcl shell binary diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index bb746c13ec..2491c26796 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -7,7 +7,7 @@ add_test(NAME RunLammps COMMAND $ -log none -echo none -in in.empty WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(RunLammps PROPERTIES - ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1" + ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=1" PASS_REGULAR_EXPRESSION "^LAMMPS \\([0-9]+ [A-Za-z]+ 2[0-9][0-9][0-9]\\)") # check if the compiled executable will print the help message @@ -15,7 +15,7 @@ add_test(NAME HelpMessage COMMAND $ -h WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(HelpMessage PROPERTIES - ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1" + ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=1" PASS_REGULAR_EXPRESSION ".*Large-scale Atomic/Molecular Massively Parallel Simulator -.*Usage example:.*") # check if the compiled executable will error out on an invalid command line flag @@ -23,7 +23,7 @@ add_test(NAME InvalidFlag COMMAND $ -xxx WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) set_tests_properties(InvalidFlag PROPERTIES - ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1" + ENVIRONMENT "TSAN_OPTIONS=ignore_noninstrumented_modules=1;HWLOC_HIDE_ERRORS=1" PASS_REGULAR_EXPRESSION "ERROR: Invalid command-line argument.*") if(BUILD_MPI)