more changes to doc pages and CMakeLists.txt
This commit is contained in:
@ -371,8 +371,8 @@ if(PKG_USER-NETCDF)
|
||||
endif()
|
||||
|
||||
if(PKG_USER-SMD)
|
||||
option(DOWNLOAD_Eigen3 "Download Eigen3 (instead of using the system's one)" OFF)
|
||||
if(DOWNLOAD_Eigen3)
|
||||
option(DOWNLOAD_EIGEN3 "Download Eigen3 (instead of using the system's one)" OFF)
|
||||
if(DOWNLOAD_EIGEN3)
|
||||
include(ExternalProject)
|
||||
ExternalProject_Add(Eigen3_build
|
||||
URL http://bitbucket.org/eigen/eigen/get/3.3.4.tar.gz
|
||||
@ -385,7 +385,7 @@ if(PKG_USER-SMD)
|
||||
else()
|
||||
find_package(Eigen3)
|
||||
if(NOT Eigen3_FOUND)
|
||||
message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_Eigen3=ON to download it")
|
||||
message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_EIGEN3=ON to download it")
|
||||
endif()
|
||||
endif()
|
||||
include_directories(${EIGEN3_INCLUDE_DIR})
|
||||
@ -807,16 +807,26 @@ if(PKG_GPU)
|
||||
${GPU_SOURCES_DIR}/fix_gpu.h
|
||||
${GPU_SOURCES_DIR}/fix_gpu.cpp)
|
||||
|
||||
set(GPU_API "OpenCL" CACHE STRING "API used by GPU package")
|
||||
set_property(CACHE GPU_API PROPERTY STRINGS OpenCL CUDA)
|
||||
set(GPU_API "opencl" CACHE STRING "API used by GPU package")
|
||||
set_property(CACHE GPU_API PROPERTY STRINGS opencl cuda)
|
||||
string(TOUPPER ${GPU_API} GPU_API_DEFINE)
|
||||
|
||||
set(GPU_PREC "SINGLE_DOUBLE" CACHE STRING "LAMMPS GPU precision size")
|
||||
set_property(CACHE GPU_PREC PROPERTY STRINGS SINGLE_DOUBLE SINGLE_SINGLE DOUBLE_DOUBLE)
|
||||
set(GPU_PREC "mixed" CACHE STRING "LAMMPS GPU precision")
|
||||
set_property(CACHE GPU_PREC PROPERTY STRINGS double mixed single)
|
||||
string(TOUPPER ${GPU_PREC} GPU_PREC_DEFINE)
|
||||
|
||||
if(GPU_PREC_DEFINE STREQUAL "DOUBLE")
|
||||
set(GPU_PREC_SETTING "DOUBLE_DOUBLE")
|
||||
elseif(GPU_PREC_DEFINE STREQUAL "MIXED")
|
||||
set(GPU_PREC_SETTING "SINGLE_DOUBLE")
|
||||
elseif(GPU_PREC_DEFINE STREQUAL "SINGLE")
|
||||
set(GPU_PREC_SETTING "SINGLE_SINGLE")
|
||||
endif()
|
||||
|
||||
file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cpp)
|
||||
file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu)
|
||||
|
||||
if(GPU_API STREQUAL "CUDA")
|
||||
if(GPU_API_DEFINE STREQUAL "CUDA")
|
||||
find_package(CUDA REQUIRED)
|
||||
find_program(BIN2C bin2c)
|
||||
if(NOT BIN2C)
|
||||
@ -824,7 +834,7 @@ if(PKG_GPU)
|
||||
endif()
|
||||
option(CUDPP_OPT "Enable CUDPP_OPT" ON)
|
||||
|
||||
set(GPU_ARCH "sm_30" CACHE STRING "LAMMPS GPU CUDA SM architecture (e.g. sm_60)")
|
||||
set(GPU_ARCH "30" CACHE STRING "LAMMPS GPU CUDA SM architecture (e.g. 60)")
|
||||
|
||||
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/*.cu)
|
||||
list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)
|
||||
@ -838,10 +848,10 @@ if(PKG_GPU)
|
||||
endif()
|
||||
|
||||
cuda_compile_cubin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS
|
||||
-DUNIX -O3 -Xptxas -v --use_fast_math -DNV_KERNEL -DUCL_CUDADR -arch=${GPU_ARCH} -D_${GPU_PREC})
|
||||
-DUNIX -O3 -Xptxas -v --use_fast_math -DNV_KERNEL -DUCL_CUDADR -arch=sm_${GPU_ARCH} -D_${GPU_PREC_SETTING})
|
||||
|
||||
cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS $<$<BOOL:${BUILD_SHARED_LIBS}>:-Xcompiler=-fPIC>
|
||||
-DUNIX -O3 -Xptxas -v --use_fast_math -DUCL_CUDADR -arch=${GPU_ARCH} -D_${GPU_PREC})
|
||||
-DUNIX -O3 -Xptxas -v --use_fast_math -DUCL_CUDADR -arch=sm_${GPU_ARCH} -D_${GPU_PREC_SETTING})
|
||||
|
||||
foreach(CU_OBJ ${GPU_GEN_OBJS})
|
||||
get_filename_component(CU_NAME ${CU_OBJ} NAME_WE)
|
||||
@ -858,7 +868,7 @@ if(PKG_GPU)
|
||||
add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS})
|
||||
target_link_libraries(gpu ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
|
||||
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS})
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC} -DMPI_GERYON -DUCL_NO_EXIT)
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT)
|
||||
if(CUDPP_OPT)
|
||||
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
|
||||
target_compile_definitions(gpu PRIVATE -DUSE_CUDPP)
|
||||
@ -872,10 +882,11 @@ if(PKG_GPU)
|
||||
target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
|
||||
|
||||
|
||||
elseif(GPU_API STREQUAL "OpenCL")
|
||||
elseif(GPU_API_DEFINE STREQUAL "OPENCL")
|
||||
find_package(OpenCL REQUIRED)
|
||||
set(OCL_TUNE "GENERIC" CACHE STRING "OpenCL Device Tuning")
|
||||
set_property(CACHE OCL_TUNE PROPERTY STRINGS INTEL FERMI KEPLER CYPRESS GENERIC)
|
||||
set(OCL_TUNE "generic" CACHE STRING "OpenCL Device Tuning")
|
||||
set_property(CACHE OCL_TUNE PROPERTY STRINGS intel fermi kepler cypress generic)
|
||||
string(TOUPPER ${OCL_TUNE} OCL_TUNE_DEFINE)
|
||||
|
||||
include(OpenCLUtils)
|
||||
set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
|
||||
@ -897,7 +908,7 @@ if(PKG_GPU)
|
||||
add_library(gpu STATIC ${GPU_LIB_SOURCES})
|
||||
target_link_libraries(gpu ${OpenCL_LIBRARIES})
|
||||
target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS})
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
|
||||
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE_DEFINE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
|
||||
target_compile_definitions(gpu PRIVATE -DUSE_OPENCL)
|
||||
|
||||
list(APPEND LAMMPS_LINK_LIBS gpu)
|
||||
@ -1148,9 +1159,9 @@ if(BUILD_MPI)
|
||||
endif()
|
||||
if(PKG_GPU)
|
||||
message(STATUS "GPU Api: ${GPU_API}")
|
||||
if(GPU_API STREQUAL "CUDA")
|
||||
message(STATUS "GPU Arch: ${GPU_ARCH}")
|
||||
elseif(GPU_API STREQUAL "OpenCL")
|
||||
if(GPU_API_DEFINE STREQUAL "CUDA")
|
||||
message(STATUS "GPU Arch: sm_${GPU_ARCH}")
|
||||
elseif(GPU_API_DEFINE STREQUAL "OPENCL")
|
||||
message(STATUS "OCL Tune: ${OCL_TUNE}")
|
||||
endif()
|
||||
message(STATUS "GPU Precision: ${GPU_PREC}")
|
||||
|
||||
@ -19,18 +19,21 @@ as described on the "Install"_Install.html doc page.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Build_cmake
|
||||
Build_make
|
||||
Build_link
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Build_basics
|
||||
Build_settings
|
||||
Build_package
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Build_extras
|
||||
|
||||
|
||||
@ -18,7 +18,6 @@ CMake and make:
|
||||
"Build the LAMMPS documentation"_#doc
|
||||
"Install LAMMPS after a build"_#install :ul
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
Serial vs parallel build :h3,link(serial)
|
||||
|
||||
@ -49,7 +49,6 @@ This is the list of packages that may require additional steps.
|
||||
"USER-SMD"_#user-smd,
|
||||
"USER-VTK"_#user-vtk :tb(c=6,ea=c)
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
COMPRESS package :h4,link(compress)
|
||||
@ -81,15 +80,15 @@ which GPU hardware to build for.
|
||||
|
||||
-D GPU_API=value # value = opencl (default) or cuda
|
||||
-D GPU_PREC=value # precision setting
|
||||
# value = single or mixed (default) or double
|
||||
# value = double or mixed (default) or single
|
||||
-D OCL_TUNE=value # hardware choice for GPU_API=opencl
|
||||
# generic (default) or intel (Intel CPU) or phi (Intel Xeon Phi) or fermi, kepler, cypress (NVIDIA)
|
||||
# generic (default) or intel (Intel CPU) or fermi, kepler, cypress (NVIDIA)
|
||||
-D GPU_ARCH=value # hardware choice for GPU_API=cuda
|
||||
# value = sm20 (Fermi) or sm30 (Kepler) or sm50 (Maxwell) or sm60 (Pascal) or sm70 (Volta)
|
||||
# value = 20 (Fermi) or 30 (Kepler) or 50 (Maxwell) or 60 (Pascal) or 70 (Volta)
|
||||
# default is Cuda-compiler dependent, but typically Fermi
|
||||
-D CUDPP_OPT=value # optimization setting for GPU_API=cudea
|
||||
# enables CUDA Performance Primitives Optimizations
|
||||
# on (default) or off :pre
|
||||
# yes (default) or no :pre
|
||||
|
||||
[Traditional make]:
|
||||
|
||||
@ -119,7 +118,7 @@ Makefile.machine you start from via the -h, -a, -p, -e switches, and
|
||||
also save a copy of the new Makefile if desired:
|
||||
|
||||
CUDA_HOME = where NVIDIA CUDA software is installed on your system
|
||||
CUDA_ARCH = what GPU hardware you have (see help message for details)
|
||||
CUDA_ARCH = what GPU hardware you have (same as CMake, see help message for details)
|
||||
CUDA_PRECISION = precision (double, mixed, single)
|
||||
EXTRAMAKE = which Makefile.lammps.* file to copy to Makefile.lammps :ul
|
||||
|
||||
@ -163,7 +162,7 @@ package?" page.
|
||||
|
||||
[CMake build]:
|
||||
|
||||
-D DOWNLOAD_KIM=value # download OpenKIM API v1 for build, value = off (default) or on
|
||||
-D DOWNLOAD_KIM=value # download OpenKIM API v1 for build, value = no (default) or yes
|
||||
-D KIM_LIBRARY=path # path to KIM shared library (only needed if a custom location)
|
||||
-D KIM_INCLUDE_DIR=path # path to KIM include directory (only needed if a custom location) :pre
|
||||
|
||||
@ -183,17 +182,65 @@ make lib-kim args="-p /usr/local/kim-api" # use an existing KIM API installation
|
||||
make lib-kim args="-p /usr/local/kim-api -a EAM_Dynamo_Ackland_W__MO_141627196590_002" # ditto but add one model or driver :pre
|
||||
|
||||
:line
|
||||
|
||||
|
||||
KOKKOS package :h4,link(kokkos)
|
||||
|
||||
To build with this package, you must choose which hardware you want to
|
||||
build for, either CPUs (multi-threading via OpenMP) or KNLs (OpenMP)
|
||||
or GPUs (Cuda).
|
||||
or GPUs (NVIDIA Cuda).
|
||||
|
||||
For a CMake or make build, these are the possible choices for the
|
||||
KOKKOS_ARCH settings described below. Note that for CMake, these are
|
||||
really Kokkos variables, not LAMMPS variables. Hence you must use
|
||||
case-sensitive values, e.g. BDW, not bdw.
|
||||
|
||||
ARMv80 = ARMv8.0 Compatible CPU
|
||||
ARMv81 = ARMv8.1 Compatible CPU
|
||||
ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU
|
||||
BGQ = IBM Blue Gene/Q CPUs
|
||||
Power8 = IBM POWER8 CPUs
|
||||
Power9 = IBM POWER9 CPUs
|
||||
SNB = Intel Sandy/Ivy Bridge CPUs
|
||||
HSW = Intel Haswell CPUs
|
||||
BDW = Intel Broadwell Xeon E-class CPUs
|
||||
SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)
|
||||
KNC = Intel Knights Corner Xeon Phi
|
||||
KNL = Intel Knights Landing Xeon Phi
|
||||
Kepler30 = NVIDIA Kepler generation CC 3.0
|
||||
Kepler32 = NVIDIA Kepler generation CC 3.2
|
||||
Kepler35 = NVIDIA Kepler generation CC 3.5
|
||||
Kepler37 = NVIDIA Kepler generation CC 3.7
|
||||
Maxwell50 = NVIDIA Maxwell generation CC 5.0
|
||||
Maxwell52 = NVIDIA Maxwell generation CC 5.2
|
||||
Maxwell53 = NVIDIA Maxwell generation CC 5.3
|
||||
Pascal60 = NVIDIA Pascal generation CC 6.0
|
||||
Pascal61 = NVIDIA Pascal generation CC 6.1 :ul
|
||||
|
||||
[CMake build]:
|
||||
|
||||
TODO: how to do this, how to select CPU vs KNL vs GPU, and specify
|
||||
the particular flavor of hardware: e.g. HSW vs BWL
|
||||
For multicore CPUs using OpenMP, set these 2 variables.
|
||||
|
||||
-D KOKKOS_ARCH=archCPU # archCPU = CPU from list above :pre
|
||||
-D KOKKOS_ENABLE_OPENMP=yes :pre
|
||||
|
||||
For Intel KNLs using OpenMP, set these 2 variables:
|
||||
|
||||
-D KOKKOS_ARCH=KNL
|
||||
-D KOKKOS_ENABLE_OPENMP=yes :pre
|
||||
|
||||
For NVIDIA GPUs using CUDA, set these 4 variables:
|
||||
|
||||
-D KOKKOS_ARCH="archCPU;archGPU" # archCPU = CPU from list above that is hosting the GPU
|
||||
# archGPU = GPU from list above
|
||||
-D KOKKOS_ENABLE_CUDA=yes
|
||||
-D KOKKOS_ENABLE_OPENMP=yes
|
||||
-D CMAKE_CXX_COMPILER=wrapper # wrapper = full path to Cuda nvcc wrapper :pre
|
||||
|
||||
The wrapper value is the Cuda nvcc compiler wrapper provided in the
|
||||
Kokkos library: lib/kokkos/bin/nvcc_wrapper. The setting should
|
||||
include the full path name to the wrapper, e.g.
|
||||
|
||||
-D CMAKE_CXX_COMPILER=/home/username/lammps/lib/kokkos/bin/nvcc_wrapper :pre
|
||||
|
||||
[Traditional make]:
|
||||
|
||||
@ -204,16 +251,7 @@ src/MAKE/OPTIONS/Makefile.kokkos* files for examples.
|
||||
For multicore CPUs using OpenMP:
|
||||
|
||||
KOKKOS_DEVICES = OpenMP
|
||||
KOKKOS_ARCH = HSW :pre
|
||||
|
||||
Possible values are:
|
||||
|
||||
HSW for Intel Haswell
|
||||
SNB for Intel SandyBridge
|
||||
BDW for Intel Broadwell
|
||||
BGQ for IBM BlueGene Q
|
||||
Power7 for IBM
|
||||
Power8 for IBM :ul
|
||||
KOKKOS_ARCH = archCPU # archCPU = CPU from list above :pre
|
||||
|
||||
For Intel KNLs using OpenMP:
|
||||
|
||||
@ -223,8 +261,8 @@ KOKKOS_ARCH = KNL :pre
|
||||
For NVIDIA GPUs using CUDA:
|
||||
|
||||
KOKKOS_DEVICES = Cuda
|
||||
KOKKOS_ARCH = Pascal60,Power8 # P100 hosted by an IBM Power8, etc
|
||||
KOKKOS_ARCH = Kepler37,Power8 # K80 hosted by an IBM Power8, etc :pre
|
||||
KOKKOS_ARCH = archCPU,archGPU # archCPU = CPU from list above that is hosting the GPU
|
||||
# archGPU = GPU from list above :pre
|
||||
|
||||
For GPUs, you also need these 2 lines in your Makefile.machine before
|
||||
the CC line is defined, in this case for use with OpenMPI mpicxx. The
|
||||
@ -245,7 +283,7 @@ library.
|
||||
|
||||
[CMake build]:
|
||||
|
||||
-D DOWNLOAD_LATTE=value # download LATTE for build, value = off (default) or on
|
||||
-D DOWNLOAD_LATTE=value # download LATTE for build, value = no (default) or yes
|
||||
-D LATTE_LIBRARY=path # path to LATTE shared library (only needed if a custom location) :pre
|
||||
|
||||
[Traditional make]:
|
||||
@ -320,7 +358,7 @@ lib/mscg/README and MSCG/Install files for more details.
|
||||
|
||||
[CMake build]:
|
||||
|
||||
-D DOWNLOAD_MSCG=value # download MSCG for build, value = off (default) or on
|
||||
-D DOWNLOAD_MSCG=value # download MSCG for build, value = no (default) or yes
|
||||
-D MSCG_LIBRARY=path # path to MSCG shared library (only needed if a custom location)
|
||||
-D MSCG_INCLUDE_DIR=path # path to MSCG include directory (only needed if a custom location) :pre
|
||||
|
||||
@ -400,12 +438,12 @@ lib/python/README for more details.
|
||||
|
||||
-D PYTHON_EXECUTABLE=path # path to Python executable to use :pre
|
||||
|
||||
Without this setting, CMake will you your system default Python. To
|
||||
use a different Python version, you can either create a virtualenv,
|
||||
activate it and then run cmake. Or you can set the PYTHON_EXECUTABLE
|
||||
variable to specify which Python interpreter should be used. Note
|
||||
note that you will also need to have the development headers installed
|
||||
for this version, e.g. python2-devel.
|
||||
Without this setting, CMake will ues the default Python on your
|
||||
system. To use a different Python version, you can either create a
|
||||
virtualenv, activate it and then run cmake. Or you can set the
|
||||
PYTHON_EXECUTABLE variable to specify which Python interpreter should
|
||||
be used. Note note that you will also need to have the development
|
||||
headers installed for this version, e.g. python2-devel.
|
||||
|
||||
[Traditional make]:
|
||||
|
||||
@ -464,7 +502,7 @@ library"_voro_home.
|
||||
|
||||
[CMake build]:
|
||||
|
||||
-D DOWNLOAD_VORO=value # download Voro++ for build, value = off (default) or on
|
||||
-D DOWNLOAD_VORO=value # download Voro++ for build, value = no (default) or yes
|
||||
-D VORO_LIBRARY=path # (only needed if at custom location) path to VORO shared library
|
||||
-D VORO_INCLUDE_DIR=path # (only needed if at custom location) path to VORO include directory :pre
|
||||
|
||||
@ -486,7 +524,6 @@ created in lib/voronoi to point to the Voro++ src dir. When LAMMPS
|
||||
builds in src it will use these links. You should not need to edit
|
||||
the lib/voronoi/Makefile.lammps file.
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
USER-ATC package :h4,link(user-atc)
|
||||
@ -642,15 +679,16 @@ USER-INTEL package :h4,link(user-intel)
|
||||
|
||||
To build with this package, you must choose which hardware you want to
|
||||
build for, either Intel CPUs or Intel KNLs. You should also typically
|
||||
install the USER-OMP package, as it can be used in tandem with the
|
||||
USER-INTEL package to good effect, as explained on the "Speed
|
||||
"install the USER-OMP package"_#user-omp, as it can be used in tandem
|
||||
with the USER-INTEL package to good effect, as explained on the "Speed
|
||||
intel"_Speed_intel.html doc page.
|
||||
|
||||
[CMake build]:
|
||||
|
||||
-D INTEL_ARCH=value # value = cpu (default) or knl :pre
|
||||
-D BUILD_OMP=yes # also required to build with the USER-INTEl package :pre
|
||||
|
||||
Requires an Intel compiler, Intel TBB and MKL and has to be built with "-D BUILD_OMP=on".
|
||||
Requires an Intel compiler as well as the Intel TBB and MKL libraries.
|
||||
|
||||
[Traditional make]:
|
||||
|
||||
@ -821,22 +859,19 @@ successfully build on your system.
|
||||
|
||||
USER-SMD package :h4,link(user-smd)
|
||||
|
||||
To build with this package, you must download the Eigen library.
|
||||
Eigen is a template library, so you do not need to build it.
|
||||
To build with this package, you must download the Eigen3 library.
|
||||
Eigen3 is a template library, so you do not need to build it.
|
||||
|
||||
[CMake build]:
|
||||
|
||||
-D EIGEN3_INCLUDE_DIR=path # path to Eigen library :pre
|
||||
-D DOWNLOAD_EIGEN3 # download Eigen3, value = no (default) or yes
|
||||
-D EIGEN3_INCLUDE_DIR=path # path to Eigen library (only needed if a custom location) :pre
|
||||
|
||||
TODO: there is no download option for the Eigen lib?
|
||||
|
||||
CMake will not download the Eigen library. But once you have done
|
||||
that, a CMake build of LAMMPS with "-D PKG_USER-SMD=yes" should work.
|
||||
Set EIGEN3_INCLUDE_DIR if CMake cannot find the Eigen library.
|
||||
Set EIGEN3_INCLUDE_DIR if CMake cannot find the Eigen3 library.
|
||||
|
||||
[Traditional make]:
|
||||
|
||||
You can download the Eigen library manually if you prefer; follow the
|
||||
You can download the Eigen3 library manually if you prefer; follow the
|
||||
instructions in lib/smd/README. You can also do it in one step from
|
||||
the lammps/src dir, using a command like these, which simply invoke
|
||||
the lib/smd/Install.py script with the specified args:
|
||||
|
||||
@ -130,16 +130,16 @@ the Git or SVN repositories, no packages are pre-installed.
|
||||
|
||||
[CMake shortcuts for installing many packages]:
|
||||
|
||||
Instead of specifying all the CMake options via the command-line, CMake allows
|
||||
initializing the variable cache using script files. These are regular CMake
|
||||
files which can manipulate and set variables, and can also contain control flow
|
||||
constructs.
|
||||
Instead of specifying all the CMake options via the command-line,
|
||||
CMake allows initializing the variable cache using script files. These
|
||||
are regular CMake files which can manipulate and set variables, and
|
||||
can also contain control flow constructs.
|
||||
|
||||
LAMMPS includes several of these files to define configuration "presets",
|
||||
similar to the options that exist for the Make based system. Using these files
|
||||
you can enable/disable portions of the available packages in LAMMPS. If you need a
|
||||
custom preset you can take one of them as a starting point and customize it to your
|
||||
needs.
|
||||
LAMMPS includes several of these files to define configuration
|
||||
"presets", similar to the options that exist for the Make based
|
||||
system. Using these files you can enable/disable portions of the
|
||||
available packages in LAMMPS. If you need a custom preset you can take
|
||||
one of them as a starting point and customize it to your needs.
|
||||
|
||||
cmake -C ../cmake/presets/all_on.cmake \[OPTIONS\] ../cmake | enable all packages
|
||||
cmake -C ../cmake/presets/all_off.cmake \[OPTIONS\] ../cmake | disable all packages
|
||||
@ -149,8 +149,9 @@ cmake -C ../cmake/presets/std_nolib.cmake \[OPTIONS\] ../cmake | enable standard
|
||||
cmake -C ../cmake/presets/nolib.cmake \[OPTIONS\] ../cmake | disable all packages that do not require extra libraries
|
||||
cmake -C ../cmake/presets/manual_selection.cmake \[OPTIONS\] ../cmake | example of how to create a manual selection of packages :tb(s=|,a=l)
|
||||
|
||||
NOTE: Running cmake this way manipulates the variable cache in your current
|
||||
build directory. You can combine presets and options with multiple cmake runs.
|
||||
NOTE: Running cmake this way manipulates the variable cache in your
|
||||
current build directory. You can combine presets and options with
|
||||
multiple cmake runs.
|
||||
|
||||
[Example:]
|
||||
|
||||
|
||||
@ -21,7 +21,6 @@ explain how to do this for building both with CMake and make.
|
||||
"Workaround for long long integers"_#longlong
|
||||
"Error handling exceptions"_#exceptions when using LAMMPS as a library :all(b)
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
FFT library :h3,link(fft)
|
||||
@ -38,6 +37,10 @@ LAMMPS can use them if they are available on your system.
|
||||
-D FFT_SINGLE=value # yes or no (default), no = double precision
|
||||
-D FFT_PACK=value # array (default) or pointer or memcpy :pre
|
||||
|
||||
NOTE: The values for the FFT variable must be in upper-case.
|
||||
This is an exception to the rule that all CMake variables can
|
||||
be specified with lower-case values.
|
||||
|
||||
Usually these settings are all that is needed. If CMake cannot find
|
||||
the FFT library, you can set these variables:
|
||||
|
||||
@ -50,10 +53,11 @@ the FFT library, you can set these variables:
|
||||
|
||||
[Makefile.machine settings]:
|
||||
|
||||
FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW2, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISSFFT
|
||||
FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW2, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS
|
||||
# default is KISS if not specified
|
||||
FFT_INC = -DFFT_SINGLE # do not specify for double precision
|
||||
FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY :pre
|
||||
# default is FFT_PACK_ARRAY if not specified
|
||||
|
||||
FFT_INC = -I/usr/local/include
|
||||
FFT_PATH = -L/usr/local/lib
|
||||
@ -84,9 +88,10 @@ pppm"_kspace_style.html command. The "Run output"_doc page gives more
|
||||
details.
|
||||
|
||||
FFTW is a fast, portable FFT library that should also work on any
|
||||
platform and can be faster than KISS FFT. You can download it from
|
||||
"www.fftw.org"_http://www.fftw.org. Both the (obsolete) legacy version
|
||||
2.1.X and the newer 3.X versions are supported.
|
||||
platform and can be faster than the KISS FFT library. You can
|
||||
download it from "www.fftw.org"_http://www.fftw.org. Both the
|
||||
(obsolete) legacy version 2.1.X and the newer 3.X versions are
|
||||
supported.
|
||||
|
||||
NOTE: FFTW2 has not been updated since 1999 and has been declared
|
||||
obsolete by its developers.
|
||||
@ -148,7 +153,7 @@ adequate.
|
||||
[Makefile.machine setting]:
|
||||
|
||||
LMP_INC = -DLAMMPS_SMALLBIG # or -DLAMMPS_BIGBIG or -DLAMMPS_SMALLSMALL :pre
|
||||
|
||||
# default is LAMMMPS_SMALLBIG if not specified
|
||||
[CMake and make info]:
|
||||
|
||||
The default "smallbig" setting allows for simulations with:
|
||||
@ -298,10 +303,10 @@ aligned on 64-byte boundaries.
|
||||
|
||||
[CMake variable]:
|
||||
|
||||
-D LAMMPS_MEMALIGN=value # 8, 16, 32, 64 (default) :pre
|
||||
-D LAMMPS_MEMALIGN=value # 0, 8, 16, 32, 64 (default) :pre
|
||||
|
||||
Use a LAMMPS_MEMALIGN value of 0 to disable using posix_memalign()
|
||||
and revert to using the malloc() C-library function instead. When
|
||||
and revert to using the malloc() C-library function instead. When
|
||||
compiling LAMMPS for Windows systems, malloc() will always be used
|
||||
and this setting ignored.
|
||||
|
||||
|
||||
@ -16,6 +16,7 @@ commands in it are used to define a LAMMPS simulation.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Commands_input
|
||||
Commands_parse
|
||||
@ -23,6 +24,7 @@ commands in it are used to define a LAMMPS simulation.
|
||||
Commands_category
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Commands_all
|
||||
Commands_fix
|
||||
|
||||
@ -19,6 +19,7 @@ additional details for many of them.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Errors_common
|
||||
Errors_bugs
|
||||
|
||||
@ -20,6 +20,7 @@ need the source code.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Install_linux
|
||||
Install_mac
|
||||
|
||||
@ -15,7 +15,6 @@ Binaries are available for many different versions of Linux:
|
||||
"Pre-built Ubuntu Linux executables"_#ubuntu
|
||||
"Pre-built Gentoo Linux executable"_#gentoo :all(b)
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
Pre-built binary RPMs for Fedora/RedHat/CentOS/openSUSE :h4,link(rpm)
|
||||
|
||||
@ -15,6 +15,7 @@ These pages provide a brief introduction to LAMMPS.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Intro_overview
|
||||
Manual_version
|
||||
|
||||
@ -58,7 +58,6 @@ Terry Stouch (Lexicon Pharmaceuticals, formerly at Bristol Myers Squibb)
|
||||
Steve Lustig (Dupont)
|
||||
Jim Belak and Roy Pollock (LLNL) :ul
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
Here is a timeline for when various individuals contributed to a new
|
||||
@ -239,7 +238,7 @@ Aug11 : angle_style cosine/shift and cosine/shift/exp : Carsten Svaneborg
|
||||
Aug11 : dihedral_style cosine/shift/exp : Carsten Svaneborg
|
||||
Aug11 : pair_style dipole/sf : Mario Orsi
|
||||
Aug11 : fix addtorque and compute temp/rotate : Laurent Joly (U Lyon)
|
||||
Aug11 : FFT support via FFTW3, MKL, ACML, KISSFFT libraries : \
|
||||
Aug11 : FFT support via FFTW3, MKL, ACML, KISS FFT libraries : \
|
||||
Axel Kohlmeyer (Temple U)
|
||||
Jun11 : pair_style adp : Chris Weinberger (Sandia), Stephen Foiles (Sandia), \
|
||||
Chandra Veer Singh (Cornell)
|
||||
|
||||
@ -84,7 +84,7 @@ every LAMMPS command.
|
||||
Modify
|
||||
Python
|
||||
Errors
|
||||
Build_manual
|
||||
Manual_build
|
||||
|
||||
.. toctree::
|
||||
:caption: Index
|
||||
|
||||
@ -122,4 +122,3 @@ software installed. "http://calibre-ebook.com/"_http://calibre-ebook.com/
|
||||
You first create the ePUB file with 'make epub' and then do:
|
||||
|
||||
ebook-convert LAMMPS.epub LAMMPS.mobi :pre
|
||||
|
||||
@ -24,11 +24,13 @@ contribute"_Modify_contribute.html doc page.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Modify_overview
|
||||
Modify_contribute
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Modify_atom
|
||||
Modify_pair
|
||||
@ -38,6 +40,7 @@ contribute"_Modify_contribute.html doc page.
|
||||
Modify_command
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Modify_dump
|
||||
Modify_kspace
|
||||
@ -46,6 +49,7 @@ contribute"_Modify_contribute.html doc page.
|
||||
Modify_body
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Modify_thermo
|
||||
Modify_variable
|
||||
|
||||
@ -23,6 +23,7 @@ LAMMPS build process.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Packages_standard
|
||||
Packages_user
|
||||
|
||||
@ -99,7 +99,6 @@ as contained in the file name.
|
||||
"USER-UEF"_#PKG-USER-UEF,
|
||||
"USER-VTK"_#PKG-USER-VTK :tb(c=6,ea=c)
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
ASPHERE package :link(PKG-ASPHERE),h4
|
||||
@ -1007,7 +1006,6 @@ lib/voronoi/README
|
||||
"compute voronoi/atom"_compute_voronoi_atom.html
|
||||
examples/voronoi :ul
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
USER-ATC package :link(PKG-USER-ATC),h4
|
||||
|
||||
@ -16,10 +16,12 @@ used together.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Python_overview
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Python_run
|
||||
Python_shlib
|
||||
@ -31,6 +33,7 @@ used together.
|
||||
Python_examples
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Python_call
|
||||
|
||||
|
||||
@ -19,6 +19,7 @@ they can contain.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Run_basics
|
||||
Run_options
|
||||
|
||||
@ -34,7 +34,6 @@ For example, the lmp_mpi executable might be launched as follows:
|
||||
mpirun -np 16 lmp_mpi -v f tmp.out -l my.log -sc none -i in.alloy
|
||||
mpirun -np 16 lmp_mpi -var f tmp.out -log my.log -screen none -in in.alloy :pre
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
[-echo style] :link(echo)
|
||||
|
||||
@ -31,15 +31,18 @@ hardware platforms.
|
||||
<!-- RST
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Speed_bench
|
||||
Speed_measure
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Speed_tips
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
Speed_packages
|
||||
Speed_compare
|
||||
|
||||
@ -43,89 +43,22 @@ same functionality can eventually be supported on a variety of GPU
|
||||
hardware. :l
|
||||
:ule
|
||||
|
||||
Here is a quick overview of how to enable and use the GPU package:
|
||||
|
||||
build the library in lib/gpu for your GPU hardware with the desired precision settings
|
||||
install the GPU package and build LAMMPS as usual
|
||||
use the mpirun command to set the number of MPI tasks/node which determines the number of MPI tasks/GPU
|
||||
specify the # of GPUs per node
|
||||
use GPU styles in your input script :ul
|
||||
|
||||
The latter two steps can be done using the "-pk gpu" and "-sf gpu"
|
||||
"command-line switches"_Run_options.html respectively. Or the effect
|
||||
of the "-pk" or "-sf" switches can be duplicated by adding the
|
||||
"package gpu"_package.html or "suffix gpu"_suffix.html commands
|
||||
respectively to your input script.
|
||||
|
||||
[Required hardware/software:]
|
||||
|
||||
To use this package, you currently need to have an NVIDIA GPU and
|
||||
install the NVIDIA CUDA software on your system:
|
||||
|
||||
Check if you have an NVIDIA GPU: cat /proc/driver/nvidia/gpus/0/information
|
||||
Go to http://www.nvidia.com/object/cuda_get.html
|
||||
Install a driver and toolkit appropriate for your system (SDK is not necessary)
|
||||
Run lammps/lib/gpu/nvc_get_devices (after building the GPU library, see below) to list supported devices and properties :ul
|
||||
Check if you have an NVIDIA GPU: cat
|
||||
/proc/driver/nvidia/gpus/0/information Go to
|
||||
http://www.nvidia.com/object/cuda_get.html Install a driver and
|
||||
toolkit appropriate for your system (SDK is not necessary) Run
|
||||
lammps/lib/gpu/nvc_get_devices (after building the GPU library, see
|
||||
below) to list supported devices and properties :ul
|
||||
|
||||
[Building LAMMPS with the GPU package:]
|
||||
|
||||
This requires two steps (a,b): build the GPU library, then build
|
||||
LAMMPS with the GPU package. You can do both these steps in one line
|
||||
as described on the "Packages details"_Packages_details.html#GPU doc
|
||||
page.
|
||||
|
||||
Or you can follow these two (a,b) steps:
|
||||
|
||||
(a) Build the GPU library
|
||||
|
||||
The GPU library is in lammps/lib/gpu. Select a Makefile.machine (in
|
||||
lib/gpu) appropriate for your system. You should pay special
|
||||
attention to 3 settings in this makefile.
|
||||
|
||||
CUDA_HOME = needs to be where NVIDIA CUDA software is installed on your system
|
||||
CUDA_ARCH = needs to be appropriate to your GPUs
|
||||
CUDA_PREC = precision (double, mixed, single) you desire :ul
|
||||
|
||||
See lib/gpu/Makefile.linux.double for examples of the ARCH settings
|
||||
for different GPU choices, e.g. Fermi vs Kepler. It also lists the
|
||||
possible precision settings:
|
||||
|
||||
CUDA_PREC = -D_SINGLE_SINGLE # single precision for all calculations
|
||||
CUDA_PREC = -D_DOUBLE_DOUBLE # double precision for all calculations
|
||||
CUDA_PREC = -D_SINGLE_DOUBLE # accumulation of forces, etc, in double :pre
|
||||
|
||||
The last setting is the mixed mode referred to above. Note that your
|
||||
GPU must support double precision to use either the 2nd or 3rd of
|
||||
these settings.
|
||||
|
||||
To build the library, type:
|
||||
|
||||
make -f Makefile.machine :pre
|
||||
|
||||
If successful, it will produce the files libgpu.a and Makefile.lammps.
|
||||
|
||||
The latter file has 3 settings that need to be appropriate for the
|
||||
paths and settings for the CUDA system software on your machine.
|
||||
Makefile.lammps is a copy of the file specified by the EXTRAMAKE
|
||||
setting in Makefile.machine. You can change EXTRAMAKE or create your
|
||||
own Makefile.lammps.machine if needed.
|
||||
|
||||
Note that to change the precision of the GPU library, you need to
|
||||
re-build the entire library. Do a "clean" first, e.g. "make -f
|
||||
Makefile.linux clean", followed by the make command above.
|
||||
|
||||
(b) Build LAMMPS with the GPU package
|
||||
|
||||
cd lammps/src
|
||||
make yes-gpu
|
||||
make machine :pre
|
||||
|
||||
No additional compile/link flags are needed in Makefile.machine.
|
||||
|
||||
Note that if you change the GPU library precision (discussed above)
|
||||
and rebuild the GPU library, then you also need to re-install the GPU
|
||||
package and re-build LAMMPS, so that all affected files are
|
||||
re-compiled and linked to the new GPU library.
|
||||
See the "Build extras"_Build_extras.html#gpu doc page for
|
||||
instructions.
|
||||
|
||||
[Run with the GPU package from the command line:]
|
||||
|
||||
|
||||
@ -203,16 +203,12 @@ cat /proc/cpuinfo :pre
|
||||
|
||||
[Building LAMMPS with the USER-INTEL package:]
|
||||
|
||||
NOTE: See the src/USER-INTEL/README file for additional flags that
|
||||
might be needed for best performance on Intel server processors
|
||||
code-named "Skylake".
|
||||
See the "Build extras"_Build_extras.html#user-intel doc page for
|
||||
instructions. Some additional details are covered here.
|
||||
|
||||
The USER-INTEL package must be installed into the source directory:
|
||||
|
||||
make yes-user-intel :pre
|
||||
|
||||
Several example Makefiles for building with the Intel compiler are
|
||||
included with LAMMPS in the src/MAKE/OPTIONS/ directory:
|
||||
For building with make, several example Makefiles for building with
|
||||
the Intel compiler are included with LAMMPS in the src/MAKE/OPTIONS/
|
||||
directory:
|
||||
|
||||
Makefile.intel_cpu_intelmpi # Intel Compiler, Intel MPI, No Offload
|
||||
Makefile.knl # Intel Compiler, Intel MPI, No Offload
|
||||
@ -221,20 +217,16 @@ Makefile.intel_cpu_openpmi # Intel Compiler, OpenMPI, No Offload
|
||||
Makefile.intel_coprocessor # Intel Compiler, Intel MPI, Offload :pre
|
||||
|
||||
Makefile.knl is identical to Makefile.intel_cpu_intelmpi except that
|
||||
it explicitly specifies that vectorization should be for Intel
|
||||
Xeon Phi x200 processors making it easier to cross-compile. For
|
||||
users with recent installations of Intel Parallel Studio, the
|
||||
process can be as simple as:
|
||||
it explicitly specifies that vectorization should be for Intel Xeon
|
||||
Phi x200 processors making it easier to cross-compile. For users with
|
||||
recent installations of Intel Parallel Studio, the process can be as
|
||||
simple as:
|
||||
|
||||
make yes-user-intel
|
||||
source /opt/intel/parallel_studio_xe_2016.3.067/psxevars.sh
|
||||
# or psxevars.csh for C-shell
|
||||
make intel_cpu_intelmpi :pre
|
||||
|
||||
Alternatively this can be done as a single command with suitable make
|
||||
command invocations, as described on the "Packages
|
||||
details"_Packages_details.html#USER-INTEL doc page.
|
||||
|
||||
Note that if you build with support for a Phi coprocessor, the same
|
||||
binary can be used on nodes with or without coprocessors installed.
|
||||
However, if you do not have coprocessors on your system, building
|
||||
@ -253,6 +245,10 @@ required for CCFLAGS and "-qoffload" is required for LINKFLAGS. Other
|
||||
recommended CCFLAG options for best performance are "-O2 -fno-alias
|
||||
-ansi-alias -qoverride-limits fp-model fast=2 -no-prec-div".
|
||||
|
||||
NOTE: See the src/USER-INTEL/README file for additional flags that
|
||||
might be needed for best performance on Intel server processors
|
||||
code-named "Skylake".
|
||||
|
||||
NOTE: The vectorization and math capabilities can differ depending on
|
||||
the CPU. For Intel compilers, the "-x" flag specifies the type of
|
||||
processor for which to optimize. "-xHost" specifies that the compiler
|
||||
|
||||
@ -37,101 +37,29 @@ task). These are Serial (MPI-only for CPUs and Intel Phi), OpenMP
|
||||
GPUs). You choose the mode at build time to produce an executable
|
||||
compatible with specific hardware.
|
||||
|
||||
[Building LAMMPS with the KOKKOS package:]
|
||||
|
||||
NOTE: Kokkos support within LAMMPS must be built with a C++11 compatible
|
||||
compiler. This means GCC version 4.7.2 or later, Intel 14.0.4 or later, or
|
||||
Clang 3.5.2 or later is required.
|
||||
|
||||
The recommended method of building the KOKKOS package is to start with
|
||||
the provided Kokkos Makefiles in /src/MAKE/OPTIONS/. You may need to
|
||||
modify the KOKKOS_ARCH variable in the Makefile to match your specific
|
||||
hardware. For example:
|
||||
|
||||
for Sandy Bridge CPUs, set KOKKOS_ARCH=SNB
|
||||
for Broadwell CPUs, set KOKKOS_ARCH=BWD
|
||||
for K80 GPUs, set KOKKOS_ARCH=Kepler37
|
||||
for P100 GPUs and Power8 CPUs, set KOKKOS_ARCH=Pascal60,Power8 :ul
|
||||
|
||||
See the [Advanced Kokkos Options] section below for a listing of all
|
||||
KOKKOS_ARCH options.
|
||||
|
||||
[Compile for CPU-only (MPI only, no threading):]
|
||||
|
||||
use a C++11 compatible compiler and set KOKKOS_ARCH variable in
|
||||
/src/MAKE/OPTIONS/Makefile.kokkos_mpi_only as described above. Then do the
|
||||
following:
|
||||
|
||||
cd lammps/src
|
||||
make yes-kokkos
|
||||
make kokkos_mpi_only :pre
|
||||
|
||||
[Compile for CPU-only (MPI plus OpenMP threading):]
|
||||
|
||||
NOTE: To build with Kokkos support for OpenMP threading, your compiler
|
||||
must support the OpenMP interface. You should have one or more
|
||||
multi-core CPUs so that multiple threads can be launched by each MPI
|
||||
task running on a CPU.
|
||||
|
||||
Use a C++11 compatible compiler and set KOKKOS_ARCH variable in
|
||||
/src/MAKE/OPTIONS/Makefile.kokkos_omp as described above. Then do the
|
||||
following:
|
||||
|
||||
cd lammps/src
|
||||
make yes-kokkos
|
||||
make kokkos_omp :pre
|
||||
|
||||
[Compile for Intel KNL Xeon Phi (Intel Compiler, OpenMPI):]
|
||||
|
||||
use a C++11 compatible compiler and do the following:
|
||||
|
||||
cd lammps/src
|
||||
make yes-kokkos
|
||||
make kokkos_phi :pre
|
||||
|
||||
[Compile for CPUs and GPUs (with OpenMPI or MPICH):]
|
||||
|
||||
NOTE: To build with Kokkos support for NVIDIA GPUs, NVIDIA CUDA
|
||||
software version 7.5 or later must be installed on your system. See
|
||||
the discussion for the "GPU package"_Speed_gpu.html for details of how
|
||||
to check and do this.
|
||||
|
||||
NOTE: Kokkos with CUDA currently implicitly assumes, that the MPI
|
||||
library is CUDA-aware and has support for GPU-direct. This is not always
|
||||
the case, especially when using pre-compiled MPI libraries provided by
|
||||
a Linux distribution. This is not a problem when using only a single
|
||||
GPU and a single MPI rank on a desktop. When running with multiple
|
||||
MPI ranks, you may see segmentation faults without GPU-direct support.
|
||||
These can be avoided by adding the flags
|
||||
"-pk kokkos gpu/direct off"_Run_options.html
|
||||
to the LAMMPS command line or by using the command
|
||||
"package kokkos gpu/direct off"_package.html in the input file.
|
||||
library is CUDA-aware and has support for GPU-direct. This is not
|
||||
always the case, especially when using pre-compiled MPI libraries
|
||||
provided by a Linux distribution. This is not a problem when using
|
||||
only a single GPU and a single MPI rank on a desktop. When running
|
||||
with multiple MPI ranks, you may see segmentation faults without
|
||||
GPU-direct support. These can be avoided by adding the flags "-pk
|
||||
kokkos gpu/direct off"_Run_options.html to the LAMMPS command line or
|
||||
by using the command "package kokkos gpu/direct off"_package.html in
|
||||
the input file.
|
||||
|
||||
Use a C++11 compatible compiler and set KOKKOS_ARCH variable in
|
||||
/src/MAKE/OPTIONS/Makefile.kokkos_cuda_mpi for both GPU and CPU as
|
||||
described above. Then do the following:
|
||||
[Building LAMMPS with the KOKKOS package:]
|
||||
|
||||
cd lammps/src
|
||||
make yes-kokkos
|
||||
make kokkos_cuda_mpi :pre
|
||||
|
||||
[Alternative Methods of Compiling:]
|
||||
|
||||
Alternatively, the KOKKOS package can be built by specifying Kokkos variables
|
||||
on the make command line. For example:
|
||||
|
||||
make mpi KOKKOS_DEVICES=OpenMP KOKKOS_ARCH=SNB # set the KOKKOS_DEVICES and KOKKOS_ARCH variable explicitly
|
||||
make kokkos_cuda_mpi KOKKOS_ARCH=Pascal60,Power8 # set the KOKKOS_ARCH variable explicitly :pre
|
||||
|
||||
Setting the KOKKOS_DEVICES and KOKKOS_ARCH variables on the make
|
||||
command line requires a GNU-compatible make command. Try "gmake" if
|
||||
your system's standard make complains.
|
||||
|
||||
NOTE: If you build using make line variables and re-build LAMMPS twice
|
||||
with different KOKKOS options and the *same* target, then you *must*
|
||||
perform a "make clean-all" or "make clean-machine" before each
|
||||
build. This is to force all the KOKKOS-dependent files to be
|
||||
re-compiled with the new options.
|
||||
See the "Build extras"_Build_extras.html#kokkos doc page for instructions.
|
||||
|
||||
[Running LAMMPS with the KOKKOS package:]
|
||||
|
||||
@ -411,50 +339,18 @@ hardware.
|
||||
[Advanced Kokkos options:]
|
||||
|
||||
There are other allowed options when building with the KOKKOS package.
|
||||
As above, they can be set either as variables on the make command line
|
||||
or in Makefile.machine. This is the full list of options, including
|
||||
those discussed above. Each takes a value shown below. The default
|
||||
value is listed, which is set in the /lib/kokkos/Makefile.kokkos file.
|
||||
As explained on the "Build extras"_Build_extras.html#kokkos doc page,
|
||||
they can be set either as variables on the make command line or in
|
||||
Makefile.machine, or they can be specified as CMake variables. Each
|
||||
takes a value shown below. The default value is listed, which is set
|
||||
in the lib/kokkos/Makefile.kokkos file.
|
||||
|
||||
KOKKOS_DEVICES, values = {Serial}, {OpenMP}, {Pthreads}, {Cuda}, default = {OpenMP}
|
||||
KOKKOS_ARCH, values = {KNC}, {SNB}, {HSW}, {Kepler30}, {Kepler32}, {Kepler35}, {Kepler37}, {Maxwell50}, {Maxwell52}, {Maxwell53}, {Pascal60}, {Pascal61}, {ARMv80}, {ARMv81}, {ARMv81}, {ARMv8-ThunderX}, {BGQ}, {Power7}, {Power8}, {Power9}, {KNL}, {BDW}, {SKX}, default = {none}
|
||||
KOKKOS_DEBUG, values = {yes}, {no}, default = {no}
|
||||
KOKKOS_USE_TPLS, values = {hwloc}, {librt}, {experimental_memkind}, default = {none}
|
||||
KOKKOS_CXX_STANDARD, values = {c++11}, {c++1z}, default = {c++11}
|
||||
KOKKOS_OPTIONS, values = {aggressive_vectorization}, {disable_profiling}, default = {none}
|
||||
KOKKOS_CUDA_OPTIONS, values = {force_uvm}, {use_ldg}, {rdc}, {enable_lambda}, default = {enable_lambda} :ul
|
||||
|
||||
KOKKOS_DEVICES sets the parallelization method used for Kokkos code
|
||||
(within LAMMPS). KOKKOS_DEVICES=Serial means that no threading will be used.
|
||||
KOKKOS_DEVICES=OpenMP means that OpenMP threading will be
|
||||
used. KOKKOS_DEVICES=Pthreads means that pthreads will be used.
|
||||
KOKKOS_DEVICES=Cuda means an NVIDIA GPU running CUDA will be used.
|
||||
|
||||
KOKKOS_ARCH enables compiler switches needed when compiling for a
|
||||
specific hardware:
|
||||
|
||||
ARMv80 = ARMv8.0 Compatible CPU
|
||||
ARMv81 = ARMv8.1 Compatible CPU
|
||||
ARMv8-ThunderX = ARMv8 Cavium ThunderX CPU
|
||||
SNB = Intel Sandy/Ivy Bridge CPUs
|
||||
HSW = Intel Haswell CPUs
|
||||
BDW = Intel Broadwell Xeon E-class CPUs
|
||||
SKX = Intel Sky Lake Xeon E-class HPC CPUs (AVX512)
|
||||
KNC = Intel Knights Corner Xeon Phi
|
||||
KNL = Intel Knights Landing Xeon Phi
|
||||
Kepler30 = NVIDIA Kepler generation CC 3.0
|
||||
Kepler32 = NVIDIA Kepler generation CC 3.2
|
||||
Kepler35 = NVIDIA Kepler generation CC 3.5
|
||||
Kepler37 = NVIDIA Kepler generation CC 3.7
|
||||
Maxwell50 = NVIDIA Maxwell generation CC 5.0
|
||||
Maxwell52 = NVIDIA Maxwell generation CC 5.2
|
||||
Maxwell53 = NVIDIA Maxwell generation CC 5.3
|
||||
Pascal60 = NVIDIA Pascal generation CC 6.0
|
||||
Pascal61 = NVIDIA Pascal generation CC 6.1
|
||||
BGQ = IBM Blue Gene/Q CPUs
|
||||
Power8 = IBM POWER8 CPUs
|
||||
Power9 = IBM POWER9 CPUs :ul
|
||||
|
||||
KOKKOS_USE_TPLS=hwloc binds threads to hardware cores, so they do not
|
||||
migrate during a simulation. KOKKOS_USE_TPLS=hwloc should always be
|
||||
used if running with KOKKOS_DEVICES=Pthreads for pthreads. It is not
|
||||
|
||||
@ -16,18 +16,6 @@ improper), several Kspace styles, and a few fix styles. It uses
|
||||
the OpenMP interface for multi-threading, but can also be compiled
|
||||
without OpenMP support, providing optimized serial styles in that case.
|
||||
|
||||
Here is a quick overview of how to use the USER-OMP package, assuming
|
||||
one or more 16-core nodes. More details follow.
|
||||
|
||||
make yes-user-omp
|
||||
make omp # Makefile.omp already has OpenMP settings for GNU compilers
|
||||
make mpi # or build with USER-OMP package without OpenMP :pre
|
||||
|
||||
env OMP_NUM_THREADS=16 lmp_omp -sf omp -in in.script # 1 MPI task, 16 threads according to OMP_NUM_THREADS
|
||||
lmp_mpi -sf omp -in in.script # 1 MPI task, no threads, optimized kernels
|
||||
mpirun -np 4 lmp_omp -sf omp -pk omp 4 -in in.script # 4 MPI tasks, 4 threads/task
|
||||
mpirun -np 32 -ppn 4 lmp_omp -sf omp -pk omp 4 -in in.script # 8 nodes, 4 MPI tasks/node, 4 threads/task :pre
|
||||
|
||||
[Required hardware/software:]
|
||||
|
||||
To enable multi-threading, your compiler must support the OpenMP interface.
|
||||
@ -36,18 +24,18 @@ launched by each MPI task on the local node (using shared memory).
|
||||
|
||||
[Building LAMMPS with the USER-OMP package:]
|
||||
|
||||
The lines above illustrate how to include/build with the USER-OMP
|
||||
package in two steps, using the "make" command. Or how to do it with
|
||||
one command as described on the "Packages
|
||||
details"_Packages_details.html#USER-OMP doc page.
|
||||
|
||||
Note that the CCFLAGS and LINKFLAGS settings in Makefile.machine must
|
||||
include "-fopenmp" for the GNU compilers. If you use an Intel compiler,
|
||||
the corresponding flag is "-qopenmp" and the CCFLAGS setting must also
|
||||
include "-restrict".
|
||||
See the "Build extras"_Build_extras.html#user-omp doc page for
|
||||
instructions.
|
||||
|
||||
[Run with the USER-OMP package from the command line:]
|
||||
|
||||
These example asume one or more 16-core nodes.
|
||||
|
||||
env OMP_NUM_THREADS=16 lmp_omp -sf omp -in in.script # 1 MPI task, 16 threads according to OMP_NUM_THREADS
|
||||
lmp_mpi -sf omp -in in.script # 1 MPI task, no threads, optimized kernels
|
||||
mpirun -np 4 lmp_omp -sf omp -pk omp 4 -in in.script # 4 MPI tasks, 4 threads/task
|
||||
mpirun -np 32 -ppn 4 lmp_omp -sf omp -pk omp 4 -in in.script # 8 nodes, 4 MPI tasks/node, 4 threads/task :pre
|
||||
|
||||
The mpirun or mpiexec command sets the total number of MPI tasks used
|
||||
by LAMMPS (one or multiple per compute node) and the number of MPI
|
||||
tasks used per node. E.g. the mpirun command in MPICH does this via
|
||||
|
||||
@ -15,34 +15,21 @@ Technologies). It contains a handful of pair styles whose compute()
|
||||
methods were rewritten in C++ templated form to reduce the overhead
|
||||
due to if tests and other conditional code.
|
||||
|
||||
Here is a quick overview of how to use the OPT package. More details
|
||||
follow.
|
||||
|
||||
make yes-opt
|
||||
make mpi # build with the OPT package :pre
|
||||
|
||||
lmp_mpi -sf opt -in in.script # run in serial
|
||||
mpirun -np 4 lmp_mpi -sf opt -in in.script # run in parallel :pre
|
||||
|
||||
[Required hardware/software:]
|
||||
|
||||
None.
|
||||
|
||||
[Building LAMMPS with the OPT package:]
|
||||
|
||||
The lines above illustrate how to build LAMMPS with the OPT package in
|
||||
two steps, using the "make" command. Or how to do it with one command
|
||||
as described on the "Packages details"_Packages_details.html#OPT doc
|
||||
page.
|
||||
|
||||
Note that if you use an Intel compiler to build with the OPT package,
|
||||
the CCFLAGS setting in your Makefile.machine must include "-restrict".
|
||||
See the "Build extras"_Build_extras.html#opt doc page for instructions.
|
||||
|
||||
[Run with the OPT package from the command line:]
|
||||
|
||||
As in the lines above, use the "-sf opt" "command-line
|
||||
switch"_Run_options.html, which will automatically append "opt" to
|
||||
styles that support it.
|
||||
lmp_mpi -sf opt -in in.script # run in serial
|
||||
mpirun -np 4 lmp_mpi -sf opt -in in.script # run in parallel :pre
|
||||
|
||||
Use the "-sf opt" "command-line switch"_Run_options.html, which will
|
||||
automatically append "opt" to styles that support it.
|
||||
|
||||
[Or run with the OPT package by editing an input script:]
|
||||
|
||||
|
||||
@ -74,7 +74,6 @@ own sub-directories with their own Makefiles and/or README files.
|
||||
"vim"_#vim
|
||||
"xmgrace"_#xmgrace :ul
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
amber2lmp tool :h3,link(amber)
|
||||
|
||||
@ -134,7 +134,6 @@ timesteps it specifies, while it accumulates per-chunk averages.
|
||||
|
||||
The details are described below.
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
The different chunk styles operate as follows. For each style, how it
|
||||
@ -294,7 +293,6 @@ invoke other computes, fixes, or variables when they are evaluated, so
|
||||
this is a very general means of generating per-atom quantities to
|
||||
treat as a chunk ID.
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
Normally, {Nchunk} = the number of chunks, is re-calculated every time
|
||||
@ -322,7 +320,6 @@ the same compute chunk/atom compute. However, the time windows they
|
||||
induce for holding {Nchunk} constant must be identical, else an error
|
||||
will be generated.
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
The various optional keywords operate as follows. Note that some of
|
||||
|
||||
@ -133,7 +133,6 @@ dump_modify option below is valid for the {atom} style, it is also
|
||||
valid for the {atom/mpiio} style, and similarly for the other styles
|
||||
which allow for use of MPI-IO.
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
These keywords apply to various dump styles, including the "dump
|
||||
@ -629,7 +628,6 @@ the coordinate would be if it had not been wrapped back into the
|
||||
periodic box. Note that these coordinates may thus be far outside the
|
||||
box size stored with the snapshot.
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
These keywords apply only to the "dump image"_dump_image.html and
|
||||
@ -894,7 +892,6 @@ frame rate higher than 24 is not recommended, as it will result in
|
||||
simply dropping the rendered images. It is more efficient to dump
|
||||
images less frequently.
|
||||
|
||||
:line
|
||||
:line
|
||||
|
||||
[Restrictions:] none
|
||||
|
||||
@ -126,8 +126,6 @@ minimizer from the new adjusted box size/shape, since that creates a
|
||||
new objective function valid for the new box size/shape. Repeat as
|
||||
necessary until the box size/shape has reached its new equilibrium.
|
||||
|
||||
:line
|
||||
:line
|
||||
:line
|
||||
|
||||
The {couple} keyword allows two or three of the diagonal components of
|
||||
|
||||
@ -26,12 +26,13 @@ optionally copies Makefile.auto to a new Makefile.osuffix
|
||||
-h = set CUDA_HOME variable in Makefile.auto to hdir
|
||||
hdir = path to NVIDIA Cuda software, e.g. /usr/local/cuda
|
||||
-a = set CUDA_ARCH variable in Makefile.auto to arch
|
||||
use arch = 20 for Tesla C2050/C2070 (Fermi) (deprecated as of CUDA 8.0)
|
||||
use arch = 20 for Fermi (C2050/C2070, deprecated as of CUDA 8.0)
|
||||
or GeForce GTX 580 or similar
|
||||
use arch = 30 for Tesla K10 (Kepler)
|
||||
use arch = 35 for Tesla K40 (Kepler) or GeForce GTX Titan or similar
|
||||
use arch = 37 for Tesla dual K80 (Kepler)
|
||||
use arch = 60 for Tesla P100 (Pascal)
|
||||
use arch = 30 for Kepler (K10)
|
||||
use arch = 35 for Kepler (K40) or GeForce GTX Titan or similar
|
||||
use arch = 37 for Kepler (dual K80)
|
||||
use arch = 60 for Pascal (P100)
|
||||
use arch = 70 for Volta
|
||||
-p = set CUDA_PRECISION variable in Makefile.auto to precision
|
||||
use precision = double or mixed or single
|
||||
-e = set EXTRAMAKE variable in Makefile.auto to Makefile.lammps.esuffix
|
||||
|
||||
@ -14,7 +14,7 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing authors: Jim Shepherd (GA Tech) added SGI SCSL support
|
||||
Axel Kohlmeyer (Temple U) added support for
|
||||
FFTW3, KISSFFT, Dfti/MKL, and ACML.
|
||||
FFTW3, KISS FFT, Dfti/MKL, and ACML.
|
||||
Phil Blood (PSC) added single precision FFT.
|
||||
Paul Coffman (IBM) added MPI collectives remap
|
||||
------------------------------------------------------------------------- */
|
||||
@ -26,7 +26,7 @@
|
||||
#include "fft3d.h"
|
||||
#include "remap.h"
|
||||
|
||||
#ifdef FFT_KISSFFT
|
||||
#ifdef FFT_KISS
|
||||
/* include kissfft implementation */
|
||||
#include "kissfft.h"
|
||||
#endif
|
||||
|
||||
@ -24,8 +24,8 @@ typedef float FFT_SCALAR;
|
||||
typedef double FFT_SCALAR;
|
||||
#endif
|
||||
|
||||
|
||||
// set default fftw library. switch to FFT_FFTW3 when convenient.
|
||||
|
||||
#ifdef FFT_FFTW
|
||||
#define FFT_FFTW3
|
||||
#endif
|
||||
@ -57,8 +57,9 @@ typedef fftwf_complex FFT_DATA;
|
||||
#else
|
||||
|
||||
/* use a stripped down version of kiss fft as default fft */
|
||||
#ifndef FFT_KISSFFT
|
||||
#define FFT_KISSFFT
|
||||
|
||||
#ifndef FFT_KISS
|
||||
#define FFT_KISS
|
||||
#endif
|
||||
#define kiss_fft_scalar float
|
||||
typedef struct {
|
||||
@ -97,8 +98,8 @@ typedef fftw_complex FFT_DATA;
|
||||
#else
|
||||
|
||||
/* use a stripped down version of kiss fft as default fft */
|
||||
#ifndef FFT_KISSFFT
|
||||
#define FFT_KISSFFT
|
||||
#ifndef FFT_KISS
|
||||
#define FFT_KISS
|
||||
#endif
|
||||
#define kiss_fft_scalar double
|
||||
typedef struct {
|
||||
@ -152,7 +153,7 @@ struct fft_plan_3d {
|
||||
FFTW_API(plan) plan_mid_backward;
|
||||
FFTW_API(plan) plan_slow_forward;
|
||||
FFTW_API(plan) plan_slow_backward;
|
||||
#elif defined(FFT_KISSFFT)
|
||||
#elif defined(FFT_KISS)
|
||||
kiss_fft_cfg cfg_fast_forward;
|
||||
kiss_fft_cfg cfg_fast_backward;
|
||||
kiss_fft_cfg cfg_mid_forward;
|
||||
|
||||
@ -13,6 +13,7 @@
|
||||
|
||||
changes 2008-2011 by Axel Kohlmeyer <akohlmey@gmail.com>
|
||||
*/
|
||||
|
||||
#ifndef LMP_FFT_KISSFFT
|
||||
#define LMP_FFT_KISSFFT
|
||||
|
||||
|
||||
11
src/pack.h
11
src/pack.h
@ -22,9 +22,8 @@ struct pack_plan_3d {
|
||||
int nqty; // # of values/element
|
||||
};
|
||||
|
||||
|
||||
#if !defined(PACK_POINTER) && !defined(PACK_MEMCPY)
|
||||
#define PACK_ARRAY
|
||||
#if !defined(FFT_PACK_POINTER) && !defined(FFT_PACK_MEMCPY)
|
||||
#define FFT_PACK_ARRAY
|
||||
#endif
|
||||
|
||||
#ifndef PACK_DATA
|
||||
@ -47,7 +46,7 @@ struct pack_plan_3d {
|
||||
pack/unpack with array indices
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_ARRAY
|
||||
#ifdef FFT_PACK_ARRAY
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
@ -274,7 +273,7 @@ static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_pl
|
||||
pack/unpack with pointers
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_POINTER
|
||||
#ifdef FFT_PACK_POINTER
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
@ -523,7 +522,7 @@ static void unpack_3d_permute2_n(PACK_DATA *buf, PACK_DATA *data, struct pack_pl
|
||||
just use PACK_POINTER versions
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef PACK_MEMCPY
|
||||
#ifdef FFT_PACK_MEMCPY
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
pack from data -> buf
|
||||
|
||||
Reference in New Issue
Block a user