properly support threaded FFTs with MKL and document it. remove commented out FFTW2 code

This commit is contained in:
Axel Kohlmeyer
2020-01-27 11:00:57 -05:00
parent 4134d7fddd
commit 795f2fd894
6 changed files with 61 additions and 133 deletions

View File

@ -739,7 +739,7 @@ if(PKG_KSPACE)
else()
message(STATUS "Using double precision FFTs")
endif()
if(FFT_FFTW_THREADS)
if(FFT_FFTW_THREADS OR FFT_MKL_THREADS)
message(STATUS "Using threaded FFTs")
else()
message(STATUS "Using non-threaded FFTs")

View File

@ -22,7 +22,7 @@ if(PKG_KSPACE)
include_directories(${${FFTW}_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES})
if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
option(FFT_FFTW_THREADS "Use threaded FFT library" ON)
option(FFT_FFTW_THREADS "Use threaded FFTW library" ON)
else()
option(FFT_FFTW_THREADS "Use threaded FFT library" OFF)
endif()
@ -38,6 +38,10 @@ if(PKG_KSPACE)
elseif(FFT STREQUAL "MKL")
find_package(MKL REQUIRED)
add_definitions(-DFFT_MKL)
option(FFT_MKL_THREADS "Use threaded MKL FFT" ON)
if(FFT_MKL_THREADS)
add_definitions(-DFFT_MKL_THREADS)
endif()
include_directories(${MKL_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
else()

View File

@ -106,6 +106,7 @@ to assist:
-D FFTW3_LIBRARIES=path # path to FFTW3 libraries
-D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries
-D MKL_INCLUDE_DIRS=path # ditto for Intel MKL library
-D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries
-D MKL_LIBRARIES=path
**Makefile.machine settings**\ :
@ -117,6 +118,7 @@ to assist:
# default is KISS if not specified
FFT_INC = -DFFT_SINGLE # do not specify for double precision
FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries
FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries
FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY
# default is FFT\_PACK\_ARRAY if not specified
@ -129,12 +131,14 @@ to assist:
FFT_LIB = -lfftw3 # FFTW3 double precision
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision
FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler
FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier
FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler, serial interface
FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier, serial interface
FFT_LIB = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface
FFT_LIB = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core # MKL with GNU compiler, threaded interface
As with CMake, you do not need to set paths in FFT\_INC or FFT\_PATH, if
make can find the FFT header and library files. You must specify
FFT\_LIB with the appropriate FFT libraries to include in the link.
the compiler can find the FFT header and library files in its default search path.
You must specify FFT\_LIB with the appropriate FFT libraries to include in the link.
**CMake and make info**\ :

View File

@ -147,12 +147,10 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (const int &i) const {
#if defined(FFT_FFTW3) || defined(FFT_CUFFT)
#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_MKL)
FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i);
*(out_ptr++) *= norm;
*(out_ptr++) *= norm;
#elif defined(FFT_MKL)
d_out(i) *= norm;
#else
d_out(i,0) *= norm;
d_out(i,1) *= norm;
@ -607,7 +605,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
//DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#if defined(FFT_MKL_THREADS)
DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
DftiCommitDescriptor(plan->handle_fast);
DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@ -617,7 +617,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
//DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#if defined(FFT_MKL_THREADS)
DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
DftiCommitDescriptor(plan->handle_mid);
DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@ -627,7 +629,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
//DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#if defined(FFT_MKL_THREADS)
DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
DftiCommitDescriptor(plan->handle_slow);
if (scaled == 0)
@ -684,6 +688,16 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
NULL,&nslow,1,plan->length3,
NULL,&nslow,1,plan->length3,
FFTW_BACKWARD,FFTW_ESTIMATE);
if (scaled == 0)
plan->scaled = 0;
else {
plan->scaled = 1;
plan->norm = 1.0/(nfast*nmid*nslow);
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
(out_khi-out_klo+1);
}
#elif defined(FFT_CUFFT)
cufftPlanMany(&(plan->plan_fast), 1, &nfast,
&nfast,1,plan->length1,
@ -699,6 +713,16 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
&nslow,1,plan->length3,
&nslow,1,plan->length3,
CUFFT_TYPE,plan->total3/plan->length3);
if (scaled == 0)
plan->scaled = 0;
else {
plan->scaled = 1;
plan->norm = 1.0/(nfast*nmid*nslow);
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
(out_khi-out_klo+1);
}
#else
kissfftKK = new KissFFTKokkos<DeviceType>();
@ -726,7 +750,6 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
plan->cfg_slow_forward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,0,NULL,NULL);
plan->cfg_slow_backward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,1,NULL,NULL);
}
#endif
if (scaled == 0)
plan->scaled = 0;
@ -737,6 +760,8 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
(out_khi-out_klo+1);
}
#endif
return plan;
}
@ -839,13 +864,13 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d
#if defined(FFT_MKL)
if (flag == -1) {
DftiComputeForward(plan->handle_fast,data);
DftiComputeForward(plan->handle_mid,data);
DftiComputeForward(plan->handle_slow,data);
DftiComputeForward(plan->handle_fast,(FFT_DATA*)d_data.data());
DftiComputeForward(plan->handle_mid,(FFT_DATA*)d_data.data());
DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data());
} else {
DftiComputeBackward(plan->handle_fast,data);
DftiComputeBackward(plan->handle_mid,data);
DftiComputeBackward(plan->handle_slow,data);
DftiComputeBackward(plan->handle_fast,(FFT_DATA*)d_data.data());
DftiComputeBackward(plan->handle_mid,(FFT_DATA*)d_data.data());
DftiComputeBackward(plan->handle_slow,(FFT_DATA*)d_data.data());
}
#elif defined(FFT_FFTW3)
if (flag == -1) {

View File

@ -107,13 +107,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
DftiComputeForward(plan->handle_fast,data);
else
DftiComputeBackward(plan->handle_fast,data);
/*
#elif defined(FFT_FFTW2)
if (flag == -1)
fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
else
fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
*/
#elif defined(FFT_FFTW3)
if (flag == -1)
theplan=plan->plan_fast_forward;
@ -148,13 +141,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
DftiComputeForward(plan->handle_mid,data);
else
DftiComputeBackward(plan->handle_mid,data);
/*
#elif defined(FFT_FFTW2)
if (flag == -1)
fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
else
fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
*/
#elif defined(FFT_FFTW3)
if (flag == -1)
theplan=plan->plan_mid_forward;
@ -189,13 +175,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
DftiComputeForward(plan->handle_slow,data);
else
DftiComputeBackward(plan->handle_slow,data);
/*
#elif defined(FFT_FFTW2)
if (flag == -1)
fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
else
fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
*/
#elif defined(FFT_FFTW3)
if (flag == -1)
theplan=plan->plan_slow_forward;
@ -508,6 +487,9 @@ struct fft_plan_3d *fft_3d_create_plan(
DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
#if defined(FFT_MKL_THREADS)
DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
DftiCommitDescriptor(plan->handle_fast);
DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@ -517,6 +499,9 @@ struct fft_plan_3d *fft_3d_create_plan(
DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
#if defined(FFT_MKL_THREADS)
DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
DftiCommitDescriptor(plan->handle_mid);
DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
@ -526,6 +511,9 @@ struct fft_plan_3d *fft_3d_create_plan(
DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
#if defined(FFT_MKL_THREADS)
DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
#endif
DftiCommitDescriptor(plan->handle_slow);
if (scaled == 0)
@ -537,50 +525,6 @@ struct fft_plan_3d *fft_3d_create_plan(
(out_khi-out_klo+1);
}
/*
#elif defined(FFT_FFTW2)
plan->plan_fast_forward =
fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
plan->plan_fast_backward =
fftw_create_plan(nfast,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
if (nmid == nfast) {
plan->plan_mid_forward = plan->plan_fast_forward;
plan->plan_mid_backward = plan->plan_fast_backward;
}
else {
plan->plan_mid_forward =
fftw_create_plan(nmid,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
plan->plan_mid_backward =
fftw_create_plan(nmid,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
}
if (nslow == nfast) {
plan->plan_slow_forward = plan->plan_fast_forward;
plan->plan_slow_backward = plan->plan_fast_backward;
}
else if (nslow == nmid) {
plan->plan_slow_forward = plan->plan_mid_forward;
plan->plan_slow_backward = plan->plan_mid_backward;
}
else {
plan->plan_slow_forward =
fftw_create_plan(nslow,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
plan->plan_slow_backward =
fftw_create_plan(nslow,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
}
if (scaled == 0)
plan->scaled = 0;
else {
plan->scaled = 1;
plan->norm = 1.0/(nfast*nmid*nslow);
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
(out_khi-out_klo+1);
}
*/
#elif defined(FFT_FFTW3)
#if defined(FFT_FFTW_THREADS)
if (nthreads > 1) {
@ -686,20 +630,6 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan)
DftiFreeDescriptor(&(plan->handle_fast));
DftiFreeDescriptor(&(plan->handle_mid));
DftiFreeDescriptor(&(plan->handle_slow));
/*
#elif defined(FFT_FFTW2)
if (plan->plan_slow_forward != plan->plan_fast_forward &&
plan->plan_slow_forward != plan->plan_mid_forward) {
fftw_destroy_plan(plan->plan_slow_forward);
fftw_destroy_plan(plan->plan_slow_backward);
}
if (plan->plan_mid_forward != plan->plan_fast_forward) {
fftw_destroy_plan(plan->plan_mid_forward);
fftw_destroy_plan(plan->plan_mid_backward);
}
fftw_destroy_plan(plan->plan_fast_forward);
fftw_destroy_plan(plan->plan_fast_backward);
*/
#elif defined(FFT_FFTW3)
FFTW_API(destroy_plan)(plan->plan_slow_forward);
FFTW_API(destroy_plan)(plan->plan_slow_backward);
@ -840,18 +770,6 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
DftiComputeBackward(plan->handle_mid,data);
DftiComputeBackward(plan->handle_slow,data);
}
/*
#elif defined(FFT_FFTW2)
if (flag == -1) {
fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0);
fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0);
fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0);
} else {
fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0);
fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0);
fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0);
}
*/
#elif defined(FFT_FFTW3)
FFTW_API(plan) theplan;
if (flag == -1)

View File

@ -43,14 +43,6 @@ typedef double FFT_SCALAR;
typedef float _Complex FFT_DATA;
#define FFT_MKL_PREC DFTI_SINGLE
//#elif defined(FFT_FFTW2)
//#if defined(FFTW_SIZE)
//#include "sfftw.h"
//#else
//#include "fftw.h"
//#endif
//typedef FFTW_COMPLEX FFT_DATA;
#elif defined(FFT_FFTW3)
#include "fftw3.h"
typedef fftwf_complex FFT_DATA;
@ -84,14 +76,6 @@ typedef struct kiss_fft_state* kiss_fft_cfg;
typedef double _Complex FFT_DATA;
#define FFT_MKL_PREC DFTI_DOUBLE
//#elif defined(FFT_FFTW2)
//#if defined(FFTW_SIZE)
//#include "dfftw.h"
//#else
//#include "fftw.h"
//#endif
//typedef FFTW_COMPLEX FFT_DATA;
#elif defined(FFT_FFTW3)
#include "fftw3.h"
typedef fftw_complex FFT_DATA;
@ -141,13 +125,6 @@ struct fft_plan_3d {
DFTI_DESCRIPTOR *handle_fast;
DFTI_DESCRIPTOR *handle_mid;
DFTI_DESCRIPTOR *handle_slow;
//#elif defined(FFT_FFTW2)
// fftw_plan plan_fast_forward;
// fftw_plan plan_fast_backward;
// fftw_plan plan_mid_forward;
// fftw_plan plan_mid_backward;
//fftw_plan plan_slow_forward;
//fftw_plan plan_slow_backward;
#elif defined(FFT_FFTW3)
FFTW_API(plan) plan_fast_forward;
FFTW_API(plan) plan_fast_backward;