From 8cdbf380cf2ff3c08a4b99c4e528769b9717658d Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 20 Apr 2022 12:06:38 +1000 Subject: [PATCH 1/7] Add support for hipFFT backend in PPPMKokkos. --- cmake/CMakeLists.txt | 6 +++++ cmake/Modules/Packages/KOKKOS.cmake | 5 ++++ src/KOKKOS/fft3d_kokkos.cpp | 40 +++++++++++++++++++++++++++-- src/KOKKOS/fft3d_kokkos.h | 4 +++ src/KOKKOS/fftdata_kokkos.h | 31 ++++++++++++++++++++-- src/KSPACE/pppm.h | 2 ++ 6 files changed, 84 insertions(+), 4 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 99d51614c8..233392c9f8 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -941,6 +941,12 @@ if(PKG_KSPACE) else() message(STATUS "Kokkos FFT: cuFFT") endif() + elseif(Kokkos_ENABLE_HIP) + if(FFT STREQUAL "KISS") + message(STATUS "Kokkos FFT: KISS") + else() + message(STATUS "Kokkos FFT: hipFFT") + endif() else() message(STATUS "Kokkos FFT: ${FFT}") endif() diff --git a/cmake/Modules/Packages/KOKKOS.cmake b/cmake/Modules/Packages/KOKKOS.cmake index 4e35e6dcc0..6fa5892e78 100644 --- a/cmake/Modules/Packages/KOKKOS.cmake +++ b/cmake/Modules/Packages/KOKKOS.cmake @@ -130,6 +130,11 @@ if(PKG_KSPACE) target_compile_definitions(lammps PRIVATE -DFFT_CUFFT) target_link_libraries(lammps PRIVATE cufft) endif() + elseif(Kokkos_ENABLE_HIP) + if(NOT (FFT STREQUAL "KISS")) + target_compile_definitions(lammps PRIVATE -DFFT_HIPFFT) + target_link_libraries(lammps PRIVATE hipfft) + endif() endif() endif() diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 737c2f20b5..7e03648357 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -49,10 +49,14 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs"); #elif defined(FFT_FFTW3) if (ngpus > 0 && execution_space == Device) - lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA on GPUs"); + lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA/HIP on GPUs"); #elif defined(FFT_CUFFT) if (ngpus > 0 && execution_space == Host) lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs"); +#elif defined(FFT_HIPFFT) + if (ngpus > 0 && execution_space == Host) + lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos HIP on the host CPUs"); + #elif defined(FFT_KISSFFT) // The compiler can't statically determine the stack size needed for // recursive function calls in KISS FFT and the default per-thread @@ -63,6 +67,11 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int cudaDeviceGetLimit(&stack_size,cudaLimitStackSize); if (stack_size < 2048) cudaDeviceSetLimit(cudaLimitStackSize,2048); +// #elif defined (KOKKOS_ENABLE_HIP) +// size_t stack_size; +// hipDeviceGetLimit(&stack_size,hipLimitStackSize); +// if (stack_size < 2048) +// hipDeviceSetLimit(hipLimitStackSize,2048); #endif #endif @@ -145,7 +154,7 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) +#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_HIPFFT) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; @@ -227,6 +236,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); @@ -271,6 +282,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) @@ -313,6 +326,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); + #elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0)); if (flag == 1) @@ -699,6 +714,23 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); +#elif defined(FFT_HIPFFT) + + hipfftPlanMany(&(plan->plan_fast), 1, &nfast, + &nfast,1,plan->length1, + &nfast,1,plan->length1, + HIPFFT_TYPE,plan->total1/plan->length1); + + hipfftPlanMany(&(plan->plan_mid), 1, &nmid, + &nmid,1,plan->length2, + &nmid,1,plan->length2, + HIPFFT_TYPE,plan->total2/plan->length2); + + hipfftPlanMany(&(plan->plan_slow), 1, &nslow, + &nslow,1,plan->length3, + &nslow,1,plan->length3, + HIPFFT_TYPE,plan->total3/plan->length3); + #else /* FFT_KISS */ kissfftKK = new KissFFTKokkos(); @@ -863,6 +895,10 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); +#elif defined(FFT_HIPFFT) + hipfftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag); + hipfftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag); + hipfftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag); #else kiss_fft_functor f; typename FFT_AT::t_FFT_DATA_1d d_tmp = diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 12f0f787d1..f4bc3fe58a 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -60,6 +60,10 @@ struct fft_plan_3d_kokkos { cufftHandle plan_fast; cufftHandle plan_mid; cufftHandle plan_slow; +#elif defined(FFT_HIPFFT) + hipfftHandle plan_fast; + hipfftHandle plan_mid; + hipfftHandle plan_slow; #else kiss_fft_state_kokkos cfg_fast_forward; kiss_fft_state_kokkos cfg_fast_backward; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 8a853c33af..a82e9279f9 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -49,8 +49,8 @@ typedef double FFT_SCALAR; #endif -// with KOKKOS in CUDA mode we can only have -// CUFFT or KISSFFT, thus undefine all other +// with KOKKOS in CUDA or HIP mode we can only have +// CUFFT/HIPFFT or KISSFFT, thus undefine all other // FFTs here, since they may be valid in fft3d.cpp #ifdef KOKKOS_ENABLE_CUDA @@ -66,10 +66,26 @@ typedef double FFT_SCALAR; # if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) # define FFT_KISSFFT # endif +#elif defined(KOKKOS_ENABLE_HIP) +# if defined(FFT_FFTW) +# undef FFT_FFTW +# endif +# if defined(FFT_FFTW3) +# undef FFT_FFTW3 +# endif +# if defined(FFT_MKL) +# undef FFT_MKL +# endif +# if !defined(FFT_HIPFFT) && !defined(FFT_KISSFFT) +# define FFT_KISSFFT +# endif #else # if defined(FFT_CUFFT) # error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" # endif +# if defined(FFT_HIPFFT) +# error "Must enable HIP with KOKKOS to use -DFFT_HIPFFT" +# endif // if user set FFTW, it means FFTW3 # ifdef FFT_FFTW # define FFT_FFTW3 @@ -110,6 +126,17 @@ typedef double FFT_SCALAR; #define CUFFT_TYPE CUFFT_Z2Z typedef cufftDoubleComplex FFT_DATA; #endif +#elif defined(FFT_HIPFFT) + #include "hipfft.h" + #if defined(FFT_SINGLE) + #define hipfftExec hipfftExecC2C + #define HIPFFT_TYPE HIPFFT_C2C + typedef hipfftComplex FFT_DATA; + #else + #define hipfftExec hipfftExecZ2Z + #define HIPFFT_TYPE HIPFFT_Z2Z + typedef hipfftDoubleComplex FFT_DATA; + #endif #else #if defined(FFT_SINGLE) #define kiss_fft_scalar float diff --git a/src/KSPACE/pppm.h b/src/KSPACE/pppm.h index 5df8e5e2bc..f982b5ff6c 100644 --- a/src/KSPACE/pppm.h +++ b/src/KSPACE/pppm.h @@ -28,6 +28,8 @@ KSpaceStyle(pppm,PPPM); #define LMP_FFT_LIB "MKL FFT" #elif defined(FFT_CUFFT) #define LMP_FFT_LIB "cuFFT" +#elif defined(FFT_HIPFFT) +#define LMP_FFT_LIB "hipFFT" #else #define LMP_FFT_LIB "KISS FFT" #endif From 385f0c6c954ed13be9281c6ba2667144a0ddcfb5 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 27 Apr 2022 10:13:03 +1000 Subject: [PATCH 2/7] Update Kokkos build documentation to include hipFFT options. --- doc/src/Build_extras.rst | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/doc/src/Build_extras.rst b/doc/src/Build_extras.rst index d2d12b48db..a6b03916f5 100644 --- a/doc/src/Build_extras.rst +++ b/doc/src/Build_extras.rst @@ -641,6 +641,20 @@ This list was last updated for version 3.5.0 of the Kokkos library. -D CMAKE_CXX_COMPILER=${HOME}/lammps/lib/kokkos/bin/nvcc_wrapper + For AMD or NVIDIA GPUs using HIP, set these variables: + + .. code-block:: bash + + -D Kokkos_ARCH_HOSTARCH=yes # HOSTARCH = HOST from list above + -D Kokkos_ARCH_GPUARCH=yes # GPUARCH = GPU from list above + -D Kokkos_ENABLE_HIP=yes + -D Kokkos_ENABLE_OPENMP=yes + + This will enable FFTs on the GPU, either by the internal KISSFFT library + or with the hipFFT wrapper library, which will call out to the + platform-appropriate vendor library: rocFFT on AMD GPUs or cuFFT on + NVIDIA GPUs. + To simplify compilation, four preset files are included in the ``cmake/presets`` folder, ``kokkos-serial.cmake``, ``kokkos-openmp.cmake``, ``kokkos-cuda.cmake``, and @@ -707,6 +721,15 @@ This list was last updated for version 3.5.0 of the Kokkos library. KOKKOS_ABSOLUTE_PATH = $(shell cd $(KOKKOS_PATH); pwd) CC = mpicxx -cxx=$(KOKKOS_ABSOLUTE_PATH)/config/nvcc_wrapper + For AMD or NVIDIA GPUs using HIP: + + .. code-block:: make + + KOKKOS_DEVICES = HIP + KOKKOS_ARCH = HOSTARCH,GPUARCH # HOSTARCH = HOST from list above that is hosting the GPU + # GPUARCH = GPU from list above + FFT_INC = -DFFT_HIPFFT # enable use of hipFFT (optional) + FFT_LIB = -lhipfft # link to hipFFT library Advanced KOKKOS compilation settings ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ From c43bc000d978561d4844079e3197cdc97e41e714 Mon Sep 17 00:00:00 2001 From: Emily Kahl Date: Wed, 27 Apr 2022 19:46:18 +1000 Subject: [PATCH 3/7] Removed unnecessary commented out code section. --- src/KOKKOS/fft3d_kokkos.cpp | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 7e03648357..82b1a19265 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -67,11 +67,6 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int cudaDeviceGetLimit(&stack_size,cudaLimitStackSize); if (stack_size < 2048) cudaDeviceSetLimit(cudaLimitStackSize,2048); -// #elif defined (KOKKOS_ENABLE_HIP) -// size_t stack_size; -// hipDeviceGetLimit(&stack_size,hipLimitStackSize); -// if (stack_size < 2048) -// hipDeviceSetLimit(hipLimitStackSize,2048); #endif #endif From 24b94551a2a2bf6a2a4c7885ef621a1b52e4823e Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 5 May 2022 11:59:32 -0400 Subject: [PATCH 4/7] Update Kokkos AMD Makefiles --- src/MAKE/MACHINES/Makefile.crusher_kokkos | 7 +++++-- src/MAKE/MACHINES/Makefile.spock_kokkos | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.crusher_kokkos b/src/MAKE/MACHINES/Makefile.crusher_kokkos index 7dc1447d4e..f3f6e02a0e 100644 --- a/src/MAKE/MACHINES/Makefile.crusher_kokkos +++ b/src/MAKE/MACHINES/Makefile.crusher_kokkos @@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = +MY_HIP_EXE = $(shell which hipcc) +MY_HIP_PATH = $(dir ${MY_HIP_EXE}) + +FFT_INC = -DFFT_HIPFFT FFT_PATH = -FFT_LIB = +FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft # JPEG and/or PNG library # see discussion in Section 3.5.4 of manual diff --git a/src/MAKE/MACHINES/Makefile.spock_kokkos b/src/MAKE/MACHINES/Makefile.spock_kokkos index a85ebb3039..3c0dca4f95 100644 --- a/src/MAKE/MACHINES/Makefile.spock_kokkos +++ b/src/MAKE/MACHINES/Makefile.spock_kokkos @@ -54,9 +54,12 @@ MPI_LIB = -L${MPICH_DIR}/lib -lmpi -L${CRAY_MPICH_ROOTDIR}/gtl/lib -lmpi_gtl_hsa # PATH = path for FFT library # LIB = name of FFT library -FFT_INC = +MY_HIP_EXE = $(shell which hipcc) +MY_HIP_PATH = $(dir ${MY_HIP_EXE}) + +FFT_INC = -DFFT_HIPFFT FFT_PATH = -FFT_LIB = +FFT_LIB = -L${MY_HIP_PATH}../lib -lhipfft # JPEG and/or PNG library # see discussion in Section 3.5.4 of manual From 2c95f84accb58f9963ddece47efb64ee9a1ca182 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 5 May 2022 12:17:00 -0400 Subject: [PATCH 5/7] Tweak error messages --- src/KOKKOS/fft3d_kokkos.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 82b1a19265..acaed71bd9 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -46,16 +46,16 @@ FFT3dKokkos::FFT3dKokkos(LAMMPS *lmp, MPI_Comm comm, int nfast, int #if defined(FFT_MKL) if (ngpus > 0 && execution_space == Device) - lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos CUDA on GPUs"); + lmp->error->all(FLERR,"Cannot use the MKL library with Kokkos on GPUs"); #elif defined(FFT_FFTW3) if (ngpus > 0 && execution_space == Device) - lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos CUDA/HIP on GPUs"); + lmp->error->all(FLERR,"Cannot use the FFTW library with Kokkos on GPUs"); #elif defined(FFT_CUFFT) if (ngpus > 0 && execution_space == Host) - lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos CUDA on the host CPUs"); + lmp->error->all(FLERR,"Cannot use the cuFFT library with Kokkos on the host CPUs"); #elif defined(FFT_HIPFFT) if (ngpus > 0 && execution_space == Host) - lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos HIP on the host CPUs"); + lmp->error->all(FLERR,"Cannot use the hipFFT library with Kokkos on the host CPUs"); #elif defined(FFT_KISSFFT) // The compiler can't statically determine the stack size needed for From 3b9389e86c13928e47d3981cd0ba7e1ba30edf35 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 5 May 2022 12:23:52 -0400 Subject: [PATCH 6/7] Update .gitignore --- src/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/src/.gitignore b/src/.gitignore index f4db0fc27a..decadd20ff 100644 --- a/src/.gitignore +++ b/src/.gitignore @@ -627,6 +627,7 @@ /ewald.h /ewald_cg.cpp /ewald_cg.h +/ewald_const.h /ewald_dipole.cpp /ewald_dipole.h /ewald_dipole_spin.cpp From 3b68c0ea24f5d104883f870ad60dac885cf43203 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 5 May 2022 14:02:29 -0400 Subject: [PATCH 7/7] Update Makefile comment --- src/MAKE/MACHINES/Makefile.crusher_kokkos | 2 +- src/MAKE/MACHINES/Makefile.spock_kokkos | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/MAKE/MACHINES/Makefile.crusher_kokkos b/src/MAKE/MACHINES/Makefile.crusher_kokkos index f3f6e02a0e..5744f2d9bf 100644 --- a/src/MAKE/MACHINES/Makefile.crusher_kokkos +++ b/src/MAKE/MACHINES/Makefile.crusher_kokkos @@ -1,4 +1,4 @@ -# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler +# crusher_kokkos = KOKKOS/HIP, AMD MI250X GPU and AMD EPYC 7A53 "Optimized 3rd Gen EPYC" CPU, Cray MPICH, hipcc compiler, hipFFT SHELL = /bin/sh diff --git a/src/MAKE/MACHINES/Makefile.spock_kokkos b/src/MAKE/MACHINES/Makefile.spock_kokkos index 3c0dca4f95..5771184287 100644 --- a/src/MAKE/MACHINES/Makefile.spock_kokkos +++ b/src/MAKE/MACHINES/Makefile.spock_kokkos @@ -1,4 +1,4 @@ -# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler +# spock_kokkos = KOKKOS/HIP, AMD MI100 GPU and AMD EPYC 7662 "Rome" CPU, Cray MPICH, hipcc compiler, hipFFT SHELL = /bin/sh