diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index c7b3edaca3..26d1cc6af4 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -738,9 +738,12 @@ if(PKG_KSPACE) else() message(STATUS "Using double precision FFTs") endif() - if(FFT_THREADS) + if(FFT_FFTW_THREADS) message(STATUS "Using threaded FFTs") else() message(STATUS "Using non-threaded FFTs") endif() + if(PKG_KOKKOS AND FFT_CUFFT) + message(STATUS "Kokkos FFT: cuFFT") + endif() endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 23ff6cd1ff..13a751f18d 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -42,6 +42,10 @@ if(PKG_KOKKOS) list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/fft3d_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp ${KOKKOS_PKG_SOURCES_DIR}/remap_kokkos.cpp) + if(KOKKOS_ENABLE_CUDA) + add_definitions(-DFFT_CUFFT) + list(APPEND LAMMPS_LINK_LIBS -lcufft) + endif() endif() set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}") diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 8451eef91c..5786d7cb8a 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -22,12 +22,12 @@ if(PKG_KSPACE) include_directories(${${FFTW}_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES}) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) - option(FFT_THREADS "Use threaded FFT library" ON) + option(FFT_FFTW_THREADS "Use threaded FFT library" ON) else() - option(FFT_THREADS "Use threaded FFT library" OFF) + option(FFT_FFTW_THREADS "Use threaded FFT library" OFF) endif() - if(FFT_THREADS) + if(FFT_FFTW_THREADS) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) add_definitions(-DFFT_FFTW_THREADS) list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_OMP_LIBRARIES}) @@ -37,20 +37,12 @@ if(PKG_KSPACE) endif() elseif(FFT STREQUAL "MKL") find_package(MKL REQUIRED) - option(FFT_THREADS "Use threaded FFT library" OFF) add_definitions(-DFFT_MKL) include_directories(${MKL_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) - if (FFT_THREADS) - message(FATAL_ERROR "FFT_THREADS not supported with FFT from MKL") - endif() else() # last option is KISSFFT - option(FFT_THREADS "Use threaded FFT library" OFF) add_definitions(-DFFT_KISS) - if (FFT_THREADS) - message(FATAL_ERROR "FFT_THREADS not supported with KISSFFT") - endif() endif() set(FFT_PACK "array" CACHE STRING "Optimization for FFT") diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index 07d427db49..b76e902033 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -49,13 +49,19 @@ through the CMAKE\_CXX\_FLAGS variable. Example for CentOS 7: -D CMAKE_CXX_FLAGS="-O3 -g -fopenmp -DNDEBUG -std=c++11" -**Makefile.machine setting**\ : +**Makefile.machine setting**\ to bypass the C++11 test and compile in C++98 mode: .. parsed-literal:: LMP_INC = -DLAMMPS_CXX98 +**Makefile.machine setting**\ to enable the C++11 with older (but not too old) GNU c++ (e.g. on CentOS 7): + + +.. parsed-literal:: + + CCFLAGS = -g -O3 -std=c++11 ---------- @@ -86,14 +92,19 @@ LAMMPS can use them if they are available on your system. an exception to the rule that all CMake variables can be specified with lower-case values. -Usually these settings are all that is needed. If CMake cannot find -the FFT library, you can set these variables: +Usually these settings are all that is needed. If FFTW3 is selected, +then CMake will try to detect, if threaded FFTW libraries are available +and enable them by default. This setting is independent of whether +OpenMP threads are enabled and a packages like KOKKOS or USER-OMP is +used. If CMake cannot detect the FFT library, you can set these variables +to assist: .. parsed-literal:: -D FFTW3_INCLUDE_DIRS=path # path to FFTW3 include files -D FFTW3_LIBRARIES=path # path to FFTW3 libraries + -D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries -D MKL_INCLUDE_DIRS=path # ditto for Intel MKL library -D MKL_LIBRARIES=path @@ -105,6 +116,7 @@ the FFT library, you can set these variables: FFT_INC = -DFFT_FFTW3 # -DFFT_FFTW3, -DFFT_FFTW (same as -DFFT_FFTW3), -DFFT_MKL, or -DFFT_KISS # default is KISS if not specified FFT_INC = -DFFT_SINGLE # do not specify for double precision + FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY # default is FFT\_PACK\_ARRAY if not specified @@ -115,6 +127,7 @@ the FFT library, you can set these variables: FFT_INC = -I/usr/local/include FFT_PATH = -L/usr/local/lib FFT_LIB = -lfftw3 # FFTW3 double precision + FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS) FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier @@ -126,16 +139,19 @@ FFT\_LIB with the appropriate FFT libraries to include in the link. **CMake and make info**\ : The `KISS FFT library `_ is included in the LAMMPS -distribution. It is portable across all platforms. Depending on the -size of the FFTs and the number of processors used, the other -libraries listed here can be faster. +distribution. It is portable across all platforms. Depending on the size +of the FFTs and the number of processors used, the other libraries listed +here can be faster. However, note that long-range Coulombics are only a portion of the per-timestep CPU cost, FFTs are only a portion of long-range Coulombics, and 1d FFTs are only a portion of the FFT cost (parallel communication can be costly). A breakdown of these timings is printed -to the screen at the end of a run using the :doc:`kspace\_style pppm ` command. The :doc:`Run output ` -doc page gives more details. +to the screen at the end of a run when using the +:doc:`kspace_style pppm ` command. The :doc:`Run output ` +doc page gives more details. A more detailed (and time consuming) +report of the FFT performance is generated with the +:doc:`kspace_modify fftbench yes ` command. FFTW is a fast, portable FFT library that should also work on any platform and can be faster than the KISS FFT library. You can @@ -166,7 +182,7 @@ When using -DFFT\_SINGLE with FFTW3 you may need to build the FFTW library a second time with support for single-precision. For FFTW3, do the following, which should produce the additional -library libfftw3f.a +library libfftw3f.a or libfftw3f.so. .. parsed-literal::