Merge pull request #3237 from emilyviolet/hipfft
Add support for hipFFT with PPPMKokkos
This commit is contained in:
@ -947,6 +947,12 @@ if(PKG_KSPACE)
|
||||
else()
|
||||
message(STATUS "Kokkos FFT: cuFFT")
|
||||
endif()
|
||||
elseif(Kokkos_ENABLE_HIP)
|
||||
if(FFT STREQUAL "KISS")
|
||||
message(STATUS "Kokkos FFT: KISS")
|
||||
else()
|
||||
message(STATUS "Kokkos FFT: hipFFT")
|
||||
endif()
|
||||
else()
|
||||
message(STATUS "Kokkos FFT: ${FFT}")
|
||||
endif()
|
||||
|
||||
@ -130,6 +130,11 @@ if(PKG_KSPACE)
|
||||
target_compile_definitions(lammps PRIVATE -DFFT_CUFFT)
|
||||
target_link_libraries(lammps PRIVATE cufft)
|
||||
endif()
|
||||
elseif(Kokkos_ENABLE_HIP)
|
||||
if(NOT (FFT STREQUAL "KISS"))
|
||||
target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT)
|
||||
target_link_libraries(lammps PRIVATE hipfft)
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
@ -641,6 +641,20 @@ This list was last updated for version 3.5.0 of the Kokkos library.
|
||||
|
||||
-D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper
|
||||
|
||||
For AMD or NVIDIA GPUs using HIP, set these variables:
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
|
||||
-D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above
|
||||
-D Kokkos_ENABLE_HIP=yes
|
||||
-D Kokkos_ENABLE_OPENMP=yes
|
||||
|
||||
This will enable FFTs on the GPU, either by the internal KISSFFT library
|
||||
or with the hipFFT wrapper library, which will call out to the
|
||||
platform-appropriate vendor library: rocFFT on AMD GPUs or cuFFT on
|
||||
NVIDIA GPUs.
|
||||
|
||||
To simplify compilation, four preset files are included in the
|
||||
``cmake/presets`` folder, ``kokkos-serial.cmake``,
|
||||
``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, and
|
||||
@ -707,6 +721,15 @@ This list was last updated for version 3.5.0 of the Kokkos library.
|
||||
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
|
||||
CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
|
||||
|
||||
For AMD or NVIDIA GPUs using HIP:
|
||||
|
||||
.. code-block:: make
|
||||
|
||||
KOKKOS_DEVICES = HIP
|
||||
KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU
|
||||
# GPUARCH = GPU from list above
|
||||
FFT_INC = -DFFT_HIPFFT # enable use of hipFFT (optional)
|
||||
FFT_LIB = -lhipfft # link to hipFFT library
|
||||
|
||||
Advanced KOKKOS compilation settings
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
|
||||
1
src/.gitignore
vendored
1
src/.gitignore
vendored
@ -627,6 +627,7 @@
|
||||
/ewald.h
|
||||
/ewald_cg.cpp
|
||||
/ewald_cg.h
|
||||
/ewald_const.h
|
||||
/ewald_dipole.cpp
|
||||
/ewald_dipole.h
|
||||
/ewald_dipole_spin.cpp
|
||||
|
||||
@ -46,13 +46,17 @@ FFT3dKokkos<DeviceType>::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int
|
||||
|
||||
#if defined(FFT_MKL)
|
||||
if (ngpus > 0 && execution_space == Device)
|
||||
lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs");
|
||||
lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs");
|
||||
#elif defined(FFT_FFTW3)
|
||||
if (ngpus > 0 && execution_space == Device)
|
||||
lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA on GPUs");
|
||||
lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs");
|
||||
#elif defined(FFT_CUFFT)
|
||||
if (ngpus > 0 && execution_space == Host)
|
||||
lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs");
|
||||
lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs");
|
||||
#elif defined(FFT_HIPFFT)
|
||||
if (ngpus > 0 && execution_space == Host)
|
||||
lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs");
|
||||
|
||||
#elif defined(FFT_KISSFFT)
|
||||
// The compiler can't statically determine the stack size needed for
|
||||
// recursive function calls in KISS FFT and the default per-thread
|
||||
@ -145,7 +149,7 @@ public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int &i) const {
|
||||
#if defined(FFT_FFTW3) || defined(FFT_CUFFT)
|
||||
#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT)
|
||||
FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i);
|
||||
*(out_ptr++) *= norm;
|
||||
*(out_ptr++) *= norm;
|
||||
@ -227,6 +231,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
#elif defined(FFT_CUFFT)
|
||||
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
|
||||
#elif defined(FFT_HIPFFT)
|
||||
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
typename FFT_AT::t_FFT_DATA_1d d_tmp =
|
||||
typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
|
||||
@ -271,6 +277,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
#elif defined(FFT_CUFFT)
|
||||
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
|
||||
#elif defined(FFT_HIPFFT)
|
||||
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
|
||||
if (flag == 1)
|
||||
@ -313,6 +321,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
#elif defined(FFT_CUFFT)
|
||||
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
|
||||
#elif defined(FFT_HIPFFT)
|
||||
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
|
||||
if (flag == 1)
|
||||
@ -699,6 +709,23 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
||||
&nslow,1,plan->length3,
|
||||
CUFFT_TYPE,plan->total3/plan->length3);
|
||||
|
||||
#elif defined(FFT_HIPFFT)
|
||||
|
||||
hipfftPlanMany(&(plan->plan_fast), 1, &nfast,
|
||||
&nfast,1,plan->length1,
|
||||
&nfast,1,plan->length1,
|
||||
HIPFFT_TYPE,plan->total1/plan->length1);
|
||||
|
||||
hipfftPlanMany(&(plan->plan_mid), 1, &nmid,
|
||||
&nmid,1,plan->length2,
|
||||
&nmid,1,plan->length2,
|
||||
HIPFFT_TYPE,plan->total2/plan->length2);
|
||||
|
||||
hipfftPlanMany(&(plan->plan_slow), 1, &nslow,
|
||||
&nslow,1,plan->length3,
|
||||
&nslow,1,plan->length3,
|
||||
HIPFFT_TYPE,plan->total3/plan->length3);
|
||||
|
||||
#else /* FFT_KISS */
|
||||
|
||||
kissfftKK = new KissFFTKokkos<DeviceType>();
|
||||
@ -863,6 +890,10 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
|
||||
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
|
||||
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
|
||||
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
|
||||
#elif defined(FFT_HIPFFT)
|
||||
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
|
||||
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
|
||||
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
kiss_fft_functor<DeviceType> f;
|
||||
typename FFT_AT::t_FFT_DATA_1d d_tmp =
|
||||
|
||||
@ -60,6 +60,10 @@ struct fft_plan_3d_kokkos {
|
||||
cufftHandle plan_fast;
|
||||
cufftHandle plan_mid;
|
||||
cufftHandle plan_slow;
|
||||
#elif defined(FFT_HIPFFT)
|
||||
hipfftHandle plan_fast;
|
||||
hipfftHandle plan_mid;
|
||||
hipfftHandle plan_slow;
|
||||
#else
|
||||
kiss_fft_state_kokkos<DeviceType> cfg_fast_forward;
|
||||
kiss_fft_state_kokkos<DeviceType> cfg_fast_backward;
|
||||
|
||||
@ -49,8 +49,8 @@ typedef double FFT_SCALAR;
|
||||
#endif
|
||||
|
||||
|
||||
// with KOKKOS in CUDA mode we can only have
|
||||
// CUFFT or KISSFFT, thus undefine all other
|
||||
// with KOKKOS in CUDA or HIP mode we can only have
|
||||
// CUFFT/HIPFFT or KISSFFT, thus undefine all other
|
||||
// FFTs here, since they may be valid in fft3d.cpp
|
||||
|
||||
#ifdef KOKKOS_ENABLE_CUDA
|
||||
@ -66,10 +66,26 @@ typedef double FFT_SCALAR;
|
||||
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
|
||||
# define FFT_KISSFFT
|
||||
# endif
|
||||
#elif defined(KOKKOS_ENABLE_HIP)
|
||||
# if defined(FFT_FFTW)
|
||||
# undef FFT_FFTW
|
||||
# endif
|
||||
# if defined(FFT_FFTW3)
|
||||
# undef FFT_FFTW3
|
||||
# endif
|
||||
# if defined(FFT_MKL)
|
||||
# undef FFT_MKL
|
||||
# endif
|
||||
# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT)
|
||||
# define FFT_KISSFFT
|
||||
# endif
|
||||
#else
|
||||
# if defined(FFT_CUFFT)
|
||||
# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
|
||||
# endif
|
||||
# if defined(FFT_HIPFFT)
|
||||
# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT"
|
||||
# endif
|
||||
// if user set FFTW, it means FFTW3
|
||||
# ifdef FFT_FFTW
|
||||
# define FFT_FFTW3
|
||||
@ -110,6 +126,17 @@ typedef double FFT_SCALAR;
|
||||
#define CUFFT_TYPE CUFFT_Z2Z
|
||||
typedef cufftDoubleComplex FFT_DATA;
|
||||
#endif
|
||||
#elif defined(FFT_HIPFFT)
|
||||
#include "hipfft.h"
|
||||
#if defined(FFT_SINGLE)
|
||||
#define hipfftExec hipfftExecC2C
|
||||
#define HIPFFT_TYPE HIPFFT_C2C
|
||||
typedef hipfftComplex FFT_DATA;
|
||||
#else
|
||||
#define hipfftExec hipfftExecZ2Z
|
||||
#define HIPFFT_TYPE HIPFFT_Z2Z
|
||||
typedef hipfftDoubleComplex FFT_DATA;
|
||||
#endif
|
||||
#else
|
||||
#if defined(FFT_SINGLE)
|
||||
#define kiss_fft_scalar float
|
||||
|
||||
@ -28,6 +28,8 @@ KSpaceStyle(pppm,PPPM);
|
||||
#define LMP_FFT_LIB "MKL FFT"
|
||||
#elif defined(FFT_CUFFT)
|
||||
#define LMP_FFT_LIB "cuFFT"
|
||||
#elif defined(FFT_HIPFFT)
|
||||
#define LMP_FFT_LIB "hipFFT"
|
||||
#else
|
||||
#define LMP_FFT_LIB "KISS FFT"
|
||||
#endif
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler
|
||||
# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC =
|
||||
MY_HIP_EXE = $(shell which hipcc)
|
||||
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
|
||||
|
||||
FFT_INC = -DFFT_HIPFFT
|
||||
FFT_PATH =
|
||||
FFT_LIB =
|
||||
FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 3.5.4 of manual
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler
|
||||
# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler, hipFFT
|
||||
|
||||
SHELL = /bin/sh
|
||||
|
||||
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
|
||||
# PATH = path for FFT library
|
||||
# LIB = name of FFT library
|
||||
|
||||
FFT_INC =
|
||||
MY_HIP_EXE = $(shell which hipcc)
|
||||
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
|
||||
|
||||
FFT_INC = -DFFT_HIPFFT
|
||||
FFT_PATH =
|
||||
FFT_LIB =
|
||||
FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
|
||||
|
||||
# JPEG and/or PNG library
|
||||
# see discussion in Section 3.5.4 of manual
|
||||
|
||||
Reference in New Issue
Block a user