From 8cdbf380cf2ff3c08a4b99c4e528769b9717658d Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 20 Apr 2022 12:06:38 +1000 Subject: [PATCH] Add support for hipFFT backend in PPPMKokkos. --- cmake/CMakeLists.txt | 6 +++++ cmake/Modules/Packages/KOKKOS.cmake | 5 ++++ src/KOKKOS/fft3d_kokkos.cpp | 40 +++++++++++++++++++++++++++-- src/KOKKOS/fft3d_kokkos.h | 4 +++ src/KOKKOS/fftdata_kokkos.h | 31 ++++++++++++++++++++-- src/KSPACE/pppm.h | 2 ++ 6 files changed, 84 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 99d51614c8..233392c9f8 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -941,6 +941,12 @@ if(PKG_KSPACE) else() message(STATUS "Kokkos FFT: cuFFT") endif() + elseif(Kokkos_ENABLE_HIP) + if(FFT STREQUAL "KISS") + message(STATUS "Kokkos FFT: KISS") + else() + message(STATUS "Kokkos FFT: hipFFT") + endif() else() message(STATUS "Kokkos FFT: ${FFT}") endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 4e35e6dcc0..6fa5892e78 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -130,6 +130,11 @@ if(PKG_KSPACE) target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) target_link_libraries(lammps PRIVATE cufft) endif() + elseif(Kokkos_ENABLE_HIP) + if(NOT (FFT STREQUAL "KISS")) + target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT) + target_link_libraries(lammps PRIVATE hipfft) + endif() endif() endif() diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 737c2f20b5..7e03648357 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -49,10 +49,14 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs"); #elif defined(FFT_FFTW3) if (ngpus > 0 && execution_space == Device) - lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA on GPUs"); + lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA/HIP on GPUs"); #elif defined(FFT_CUFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs"); +#elif defined(FFT_HIPFFT) + if (ngpus > 0 && execution_space == Host) + lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos HIP on the host CPUs"); + #elif defined(FFT_KISSFFT) // The compiler can't statically determine the stack size needed for // recursive function calls in KISS FFT and the default per-thread @@ -63,6 +67,11 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int cudaDeviceGetLimit(&stack_size,cudaLimitStackSize); if (stack_size < 2048) cudaDeviceSetLimit(cudaLimitStackSize,2048); +// #elif defined (KOKKOS_ENABLE_HIP) +// size_t stack_size; +// hipDeviceGetLimit(&stack_size,hipLimitStackSize); +// if (stack_size < 2048) +// hipDeviceSetLimit(hipLimitStackSize,2048); #endif #endif @@ -145,7 +154,7 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) +#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; @@ -227,6 +236,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -271,6 +282,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) @@ -313,6 +326,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) @@ -699,6 +714,23 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); +#elif defined(FFT_HIPFFT) + + hipfftPlanMany(&(plan->plan_fast), 1, &nfast, + &nfast,1,plan->length1, + &nfast,1,plan->length1, + HIPFFT_TYPE,plan->total1/plan->length1); + + hipfftPlanMany(&(plan->plan_mid), 1, &nmid, + &nmid,1,plan->length2, + &nmid,1,plan->length2, + HIPFFT_TYPE,plan->total2/plan->length2); + + hipfftPlanMany(&(plan->plan_slow), 1, &nslow, + &nslow,1,plan->length3, + &nslow,1,plan->length3, + HIPFFT_TYPE,plan->total3/plan->length3); + #else /* FFT_KISS */ kissfftKK = new KissFFTKokkos(); @@ -863,6 +895,10 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); +#elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); + hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); + hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; typename FFT_AT::t_FFT_DATA_1d d_tmp = diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 12f0f787d1..f4bc3fe58a 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -60,6 +60,10 @@ struct fft_plan_3d_kokkos { cufftHandle plan_fast; cufftHandle plan_mid; cufftHandle plan_slow; +#elif defined(FFT_HIPFFT) + hipfftHandle plan_fast; + hipfftHandle plan_mid; + hipfftHandle plan_slow; #else kiss_fft_state_kokkos cfg_fast_forward; kiss_fft_state_kokkos cfg_fast_backward; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 8a853c33af..a82e9279f9 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -49,8 +49,8 @@ typedef double FFT_SCALAR; #endif -// with KOKKOS in CUDA mode we can only have -// CUFFT or KISSFFT, thus undefine all other +// with KOKKOS in CUDA or HIP mode we can only have +// CUFFT/HIPFFT or KISSFFT, thus undefine all other // FFTs here, since they may be valid in fft3d.cpp #ifdef KOKKOS_ENABLE_CUDA @@ -66,10 +66,26 @@ typedef double FFT_SCALAR; # if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) # define FFT_KISSFFT # endif +#elif defined(KOKKOS_ENABLE_HIP) +# if defined(FFT_FFTW) +# undef FFT_FFTW +# endif +# if defined(FFT_FFTW3) +# undef FFT_FFTW3 +# endif +# if defined(FFT_MKL) +# undef FFT_MKL +# endif +# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) +# define FFT_KISSFFT +# endif #else # if defined(FFT_CUFFT) # error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" # endif +# if defined(FFT_HIPFFT) +# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT" +# endif // if user set FFTW, it means FFTW3 # ifdef FFT_FFTW # define FFT_FFTW3 @@ -110,6 +126,17 @@ typedef double FFT_SCALAR; #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_DATA; #endif +#elif defined(FFT_HIPFFT) + #include "hipfft.h" + #if defined(FFT_SINGLE) + #define hipfftExec hipfftExecC2C + #define HIPFFT_TYPE HIPFFT_C2C + typedef hipfftComplex FFT_DATA; + #else + #define hipfftExec hipfftExecZ2Z + #define HIPFFT_TYPE HIPFFT_Z2Z + typedef hipfftDoubleComplex FFT_DATA; + #endif #else #if defined(FFT_SINGLE) #define kiss_fft_scalar float diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h index 5df8e5e2bc..f982b5ff6c 100644 --- a/src/KSPACE/pppm.h +++ b/src/KSPACE/pppm.h @@ -28,6 +28,8 @@ KSpaceStyle(pppm,PPPM); #define LMP_FFT_LIB "MKL FFT" #elif defined(FFT_CUFFT) #define LMP_FFT_LIB "cuFFT" +#elif defined(FFT_HIPFFT) +#define LMP_FFT_LIB "hipFFT" #else #define LMP_FFT_LIB "KISS FFT" #endif