diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 0d8cc61a7b..92d18f2702 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -739,7 +739,7 @@ if(PKG_KSPACE) else() message(STATUS "Using double precision FFTs") endif() - if(FFT_FFTW_THREADS) + if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) message(STATUS "Using threaded FFTs") else() message(STATUS "Using non-threaded FFTs") diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 5786d7cb8a..07612447f9 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -22,7 +22,7 @@ if(PKG_KSPACE) include_directories(${${FFTW}_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES}) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) - option(FFT_FFTW_THREADS "Use threaded FFT library" ON) + option(FFT_FFTW_THREADS "Use threaded FFTW library" ON) else() option(FFT_FFTW_THREADS "Use threaded FFT library" OFF) endif() @@ -38,6 +38,10 @@ if(PKG_KSPACE) elseif(FFT STREQUAL "MKL") find_package(MKL REQUIRED) add_definitions(-DFFT_MKL) + option(FFT_MKL_THREADS "Use threaded MKL FFT" ON) + if(FFT_MKL_THREADS) + add_definitions(-DFFT_MKL_THREADS) + endif() include_directories(${MKL_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) else() diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index b76e902033..a6cbf7b8ca 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -106,6 +106,7 @@ to assist: -D FFTW3_LIBRARIES=path # path to FFTW3 libraries -D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries -D MKL_INCLUDE_DIRS=path # ditto for Intel MKL library + -D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries -D MKL_LIBRARIES=path **Makefile.machine settings**\ : @@ -117,6 +118,7 @@ to assist: # default is KISS if not specified FFT_INC = -DFFT_SINGLE # do not specify for double precision FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries + FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY # default is FFT\_PACK\_ARRAY if not specified @@ -129,12 +131,14 @@ to assist: FFT_LIB = -lfftw3 # FFTW3 double precision FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS) FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision - FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler - FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier + FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler, serial interface + FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier, serial interface + FFT_LIB = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface + FFT_LIB = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core # MKL with GNU compiler, threaded interface As with CMake, you do not need to set paths in FFT\_INC or FFT\_PATH, if -make can find the FFT header and library files. You must specify -FFT\_LIB with the appropriate FFT libraries to include in the link. +the compiler can find the FFT header and library files in its default search path. +You must specify FFT\_LIB with the appropriate FFT libraries to include in the link. **CMake and make info**\ : diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 9a67ca81f0..5e51c97b54 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -147,12 +147,10 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) +#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_MKL) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; -#elif defined(FFT_MKL) - d_out(i) *= norm; #else d_out(i,0) *= norm; d_out(i,1) *= norm; @@ -607,7 +605,9 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); - //DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_fast); DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -617,7 +617,9 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); - //DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_mid); DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -627,7 +629,9 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); - //DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_slow); if (scaled == 0) @@ -684,6 +688,16 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl NULL,&nslow,1,plan->length3, NULL,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); + + if (scaled == 0) + plan->scaled = 0; + else { + plan->scaled = 1; + plan->norm = 1.0/(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } + #elif defined(FFT_CUFFT) cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -699,6 +713,16 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); + + if (scaled == 0) + plan->scaled = 0; + else { + plan->scaled = 1; + plan->norm = 1.0/(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } + #else kissfftKK = new KissFFTKokkos(); @@ -726,7 +750,6 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->cfg_slow_forward = KissFFTKokkos::kiss_fft_alloc_kokkos(nslow,0,NULL,NULL); plan->cfg_slow_backward = KissFFTKokkos::kiss_fft_alloc_kokkos(nslow,1,NULL,NULL); } -#endif if (scaled == 0) plan->scaled = 0; @@ -737,6 +760,8 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl (out_khi-out_klo+1); } +#endif + return plan; } @@ -839,13 +864,13 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d #if defined(FFT_MKL) if (flag == -1) { - DftiComputeForward(plan->handle_fast,data); - DftiComputeForward(plan->handle_mid,data); - DftiComputeForward(plan->handle_slow,data); + DftiComputeForward(plan->handle_fast,(FFT_DATA*)d_data.data()); + DftiComputeForward(plan->handle_mid,(FFT_DATA*)d_data.data()); + DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data()); } else { - DftiComputeBackward(plan->handle_fast,data); - DftiComputeBackward(plan->handle_mid,data); - DftiComputeBackward(plan->handle_slow,data); + DftiComputeBackward(plan->handle_fast,(FFT_DATA*)d_data.data()); + DftiComputeBackward(plan->handle_mid,(FFT_DATA*)d_data.data()); + DftiComputeBackward(plan->handle_slow,(FFT_DATA*)d_data.data()); } #elif defined(FFT_FFTW3) if (flag == -1) { diff --git a/src/KSPACE/fft3d.cpp b/src/KSPACE/fft3d.cpp index c9cf8df172..d7b3248692 100644 --- a/src/KSPACE/fft3d.cpp +++ b/src/KSPACE/fft3d.cpp @@ -107,13 +107,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_fast,data); else DftiComputeBackward(plan->handle_fast,data); - /* -#elif defined(FFT_FFTW2) - if (flag == -1) - fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0); - else - fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0); - */ #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_fast_forward; @@ -148,13 +141,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_mid,data); else DftiComputeBackward(plan->handle_mid,data); - /* -#elif defined(FFT_FFTW2) - if (flag == -1) - fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0); - else - fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0); - */ #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_mid_forward; @@ -189,13 +175,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_slow,data); else DftiComputeBackward(plan->handle_slow,data); - /* -#elif defined(FFT_FFTW2) - if (flag == -1) - fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0); - else - fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0); - */ #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_slow_forward; @@ -508,6 +487,9 @@ struct fft_plan_3d *fft_3d_create_plan( DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_fast); DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -517,6 +499,9 @@ struct fft_plan_3d *fft_3d_create_plan( DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_mid); DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -526,6 +511,9 @@ struct fft_plan_3d *fft_3d_create_plan( DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_slow); if (scaled == 0) @@ -537,50 +525,6 @@ struct fft_plan_3d *fft_3d_create_plan( (out_khi-out_klo+1); } - /* -#elif defined(FFT_FFTW2) - - plan->plan_fast_forward = - fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - plan->plan_fast_backward = - fftw_create_plan(nfast,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - - if (nmid == nfast) { - plan->plan_mid_forward = plan->plan_fast_forward; - plan->plan_mid_backward = plan->plan_fast_backward; - } - else { - plan->plan_mid_forward = - fftw_create_plan(nmid,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - plan->plan_mid_backward = - fftw_create_plan(nmid,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - } - - if (nslow == nfast) { - plan->plan_slow_forward = plan->plan_fast_forward; - plan->plan_slow_backward = plan->plan_fast_backward; - } - else if (nslow == nmid) { - plan->plan_slow_forward = plan->plan_mid_forward; - plan->plan_slow_backward = plan->plan_mid_backward; - } - else { - plan->plan_slow_forward = - fftw_create_plan(nslow,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - plan->plan_slow_backward = - fftw_create_plan(nslow,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - } - - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } - */ - #elif defined(FFT_FFTW3) #if defined(FFT_FFTW_THREADS) if (nthreads > 1) { @@ -686,20 +630,6 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan) DftiFreeDescriptor(&(plan->handle_fast)); DftiFreeDescriptor(&(plan->handle_mid)); DftiFreeDescriptor(&(plan->handle_slow)); - /* -#elif defined(FFT_FFTW2) - if (plan->plan_slow_forward != plan->plan_fast_forward && - plan->plan_slow_forward != plan->plan_mid_forward) { - fftw_destroy_plan(plan->plan_slow_forward); - fftw_destroy_plan(plan->plan_slow_backward); - } - if (plan->plan_mid_forward != plan->plan_fast_forward) { - fftw_destroy_plan(plan->plan_mid_forward); - fftw_destroy_plan(plan->plan_mid_backward); - } - fftw_destroy_plan(plan->plan_fast_forward); - fftw_destroy_plan(plan->plan_fast_backward); - */ #elif defined(FFT_FFTW3) FFTW_API(destroy_plan)(plan->plan_slow_forward); FFTW_API(destroy_plan)(plan->plan_slow_backward); @@ -840,18 +770,6 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) DftiComputeBackward(plan->handle_mid,data); DftiComputeBackward(plan->handle_slow,data); } - /* -#elif defined(FFT_FFTW2) - if (flag == -1) { - fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0); - fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0); - fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0); - } else { - fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0); - fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0); - fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0); - } - */ #elif defined(FFT_FFTW3) FFTW_API(plan) theplan; if (flag == -1) diff --git a/src/KSPACE/fft3d.h b/src/KSPACE/fft3d.h index 8c3d2f27ad..4b5f36716f 100644 --- a/src/KSPACE/fft3d.h +++ b/src/KSPACE/fft3d.h @@ -43,14 +43,6 @@ typedef double FFT_SCALAR; typedef float _Complex FFT_DATA; #define FFT_MKL_PREC DFTI_SINGLE -//#elif defined(FFT_FFTW2) -//#if defined(FFTW_SIZE) -//#include "sfftw.h" -//#else -//#include "fftw.h" -//#endif -//typedef FFTW_COMPLEX FFT_DATA; - #elif defined(FFT_FFTW3) #include "fftw3.h" typedef fftwf_complex FFT_DATA; @@ -84,14 +76,6 @@ typedef struct kiss_fft_state* kiss_fft_cfg; typedef double _Complex FFT_DATA; #define FFT_MKL_PREC DFTI_DOUBLE -//#elif defined(FFT_FFTW2) -//#if defined(FFTW_SIZE) -//#include "dfftw.h" -//#else -//#include "fftw.h" -//#endif -//typedef FFTW_COMPLEX FFT_DATA; - #elif defined(FFT_FFTW3) #include "fftw3.h" typedef fftw_complex FFT_DATA; @@ -141,13 +125,6 @@ struct fft_plan_3d { DFTI_DESCRIPTOR *handle_fast; DFTI_DESCRIPTOR *handle_mid; DFTI_DESCRIPTOR *handle_slow; -//#elif defined(FFT_FFTW2) -// fftw_plan plan_fast_forward; -// fftw_plan plan_fast_backward; -// fftw_plan plan_mid_forward; -// fftw_plan plan_mid_backward; -//fftw_plan plan_slow_forward; -//fftw_plan plan_slow_backward; #elif defined(FFT_FFTW3) FFTW_API(plan) plan_fast_forward; FFTW_API(plan) plan_fast_backward;