Merge pull request #3237 from emilyviolet/hipfft

Add support for hipFFT with PPPMKokkos
This commit is contained in:
Axel Kohlmeyer
2022-05-05 19:39:52 -04:00
committed by GitHub
10 changed files with 117 additions and 12 deletions

View File

@ -947,6 +947,12 @@ if(PKG_KSPACE)
else()
message(STATUS "Kokkos FFT: cuFFT")
endif()
elseif(Kokkos_ENABLE_HIP)
if(FFT STREQUAL "KISS")
message(STATUS "Kokkos FFT: KISS")
else()
message(STATUS "Kokkos FFT: hipFFT")
endif()
else()
message(STATUS "Kokkos FFT: ${FFT}")
endif()

View File

@ -130,6 +130,11 @@ if(PKG_KSPACE)
target_compile_definitions(lammps PRIVATE -DFFT_CUFFT)
target_link_libraries(lammps PRIVATE cufft)
endif()
elseif(Kokkos_ENABLE_HIP)
if(NOT (FFT STREQUAL "KISS"))
target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT)
target_link_libraries(lammps PRIVATE hipfft)
endif()
endif()
endif()

View File

@ -641,6 +641,20 @@ This list was last updated for version 3.5.0 of the Kokkos library.
-D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper
For AMD or NVIDIA GPUs using HIP, set these variables:
.. code-block:: bash
-D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above
-D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above
-D Kokkos_ENABLE_HIP=yes
-D Kokkos_ENABLE_OPENMP=yes
This will enable FFTs on the GPU, either by the internal KISSFFT library
or with the hipFFT wrapper library, which will call out to the
platform-appropriate vendor library: rocFFT on AMD GPUs or cuFFT on
NVIDIA GPUs.
To simplify compilation, four preset files are included in the
``cmake/presets`` folder, ``kokkos-serial.cmake``,
``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, and
@ -707,6 +721,15 @@ This list was last updated for version 3.5.0 of the Kokkos library.
KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd)
CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper
For AMD or NVIDIA GPUs using HIP:
.. code-block:: make
KOKKOS_DEVICES = HIP
KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU
# GPUARCH = GPU from list above
FFT_INC = -DFFT_HIPFFT # enable use of hipFFT (optional)
FFT_LIB = -lhipfft # link to hipFFT library
Advanced KOKKOS compilation settings
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

1
src/.gitignore vendored
View File

@ -627,6 +627,7 @@
/ewald.h
/ewald_cg.cpp
/ewald_cg.h
/ewald_const.h
/ewald_dipole.cpp
/ewald_dipole.h
/ewald_dipole_spin.cpp

View File

@ -46,13 +46,17 @@ FFT3dKokkos<DeviceType>::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int
#if defined(FFT_MKL)
if (ngpus > 0 && execution_space == Device)
lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs");
lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs");
#elif defined(FFT_FFTW3)
if (ngpus > 0 && execution_space == Device)
lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA on GPUs");
lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs");
#elif defined(FFT_CUFFT)
if (ngpus > 0 && execution_space == Host)
lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs");
lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs");
#elif defined(FFT_HIPFFT)
if (ngpus > 0 && execution_space == Host)
lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs");
#elif defined(FFT_KISSFFT)
// The compiler can't statically determine the stack size needed for
// recursive function calls in KISS FFT and the default per-thread
@ -145,7 +149,7 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
#if defined(FFT_FFTW3) || defined(FFT_CUFFT)
#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT)
FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i);
*(out_ptr++) *= norm;
*(out_ptr++) *= norm;
@ -227,6 +231,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
#else
typename FFT_AT::t_FFT_DATA_1d d_tmp =
typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
@ -271,6 +277,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
#else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == 1)
@ -313,6 +321,8 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == 1)
@ -699,6 +709,23 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
&nslow,1,plan->length3,
CUFFT_TYPE,plan->total3/plan->length3);
#elif defined(FFT_HIPFFT)
hipfftPlanMany(&(plan->plan_fast), 1, &nfast,
&nfast,1,plan->length1,
&nfast,1,plan->length1,
HIPFFT_TYPE,plan->total1/plan->length1);
hipfftPlanMany(&(plan->plan_mid), 1, &nmid,
&nmid,1,plan->length2,
&nmid,1,plan->length2,
HIPFFT_TYPE,plan->total2/plan->length2);
hipfftPlanMany(&(plan->plan_slow), 1, &nslow,
&nslow,1,plan->length3,
&nslow,1,plan->length3,
HIPFFT_TYPE,plan->total3/plan->length3);
#else /* FFT_KISS */
kissfftKK = new KissFFTKokkos<DeviceType>();
@ -863,6 +890,10 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#elif defined(FFT_HIPFFT)
hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else
kiss_fft_functor<DeviceType> f;
typename FFT_AT::t_FFT_DATA_1d d_tmp =

View File

@ -60,6 +60,10 @@ struct fft_plan_3d_kokkos {
cufftHandle plan_fast;
cufftHandle plan_mid;
cufftHandle plan_slow;
#elif defined(FFT_HIPFFT)
hipfftHandle plan_fast;
hipfftHandle plan_mid;
hipfftHandle plan_slow;
#else
kiss_fft_state_kokkos<DeviceType> cfg_fast_forward;
kiss_fft_state_kokkos<DeviceType> cfg_fast_backward;

View File

@ -49,8 +49,8 @@ typedef double FFT_SCALAR;
#endif
// with KOKKOS in CUDA mode we can only have
// CUFFT or KISSFFT, thus undefine all other
// with KOKKOS in CUDA or HIP mode we can only have
// CUFFT/HIPFFT or KISSFFT, thus undefine all other
// FFTs here, since they may be valid in fft3d.cpp
#ifdef KOKKOS_ENABLE_CUDA
@ -66,10 +66,26 @@ typedef double FFT_SCALAR;
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
# define FFT_KISSFFT
# endif
#elif defined(KOKKOS_ENABLE_HIP)
# if defined(FFT_FFTW)
# undef FFT_FFTW
# endif
# if defined(FFT_FFTW3)
# undef FFT_FFTW3
# endif
# if defined(FFT_MKL)
# undef FFT_MKL
# endif
# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT)
# define FFT_KISSFFT
# endif
#else
# if defined(FFT_CUFFT)
# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
# endif
# if defined(FFT_HIPFFT)
# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT"
# endif
// if user set FFTW, it means FFTW3
# ifdef FFT_FFTW
# define FFT_FFTW3
@ -110,6 +126,17 @@ typedef double FFT_SCALAR;
#define CUFFT_TYPE CUFFT_Z2Z
typedef cufftDoubleComplex FFT_DATA;
#endif
#elif defined(FFT_HIPFFT)
#include "hipfft.h"
#if defined(FFT_SINGLE)
#define hipfftExec hipfftExecC2C
#define HIPFFT_TYPE HIPFFT_C2C
typedef hipfftComplex FFT_DATA;
#else
#define hipfftExec hipfftExecZ2Z
#define HIPFFT_TYPE HIPFFT_Z2Z
typedef hipfftDoubleComplex FFT_DATA;
#endif
#else
#if defined(FFT_SINGLE)
#define kiss_fft_scalar float

View File

@ -28,6 +28,8 @@ KSpaceStyle(pppm,PPPM);
#define LMP_FFT_LIB "MKL FFT"
#elif defined(FFT_CUFFT)
#define LMP_FFT_LIB "cuFFT"
#elif defined(FFT_HIPFFT)
#define LMP_FFT_LIB "hipFFT"
#else
#define LMP_FFT_LIB "KISS FFT"
#endif

View File

@ -1,4 +1,4 @@
# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler
# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT
SHELL = /bin/sh
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC =
MY_HIP_EXE = $(shell which hipcc)
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
FFT_INC = -DFFT_HIPFFT
FFT_PATH =
FFT_LIB =
FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
# JPEG and/or PNG library
# see discussion in Section 3.5.4 of manual

View File

@ -1,4 +1,4 @@
# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler
# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler, hipFFT
SHELL = /bin/sh
@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa
# PATH = path for FFT library
# LIB = name of FFT library
FFT_INC =
MY_HIP_EXE = $(shell which hipcc)
MY_HIP_PATH = $(dir ${MY_HIP_EXE})
FFT_INC = -DFFT_HIPFFT
FFT_PATH =
FFT_LIB =
FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft
# JPEG and/or PNG library
# see discussion in Section 3.5.4 of manual