From 795f2fd894a8c35274d048aa05c8d1883e87e04b Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 27 Jan 2020 11:00:57 -0500 Subject: [PATCH 01/10] properly support threaded FFTs with MKL and document it. remove commented out FFTW2 code --- cmake/CMakeLists.txt | 2 +- cmake/Modules/Packages/KSPACE.cmake | 6 +- doc/src/Build_settings.rst | 12 ++-- src/KOKKOS/fft3d_kokkos.cpp | 51 ++++++++++---- src/KSPACE/fft3d.cpp | 100 +++------------------------- src/KSPACE/fft3d.h | 23 ------- 6 files changed, 61 insertions(+), 133 deletions(-) diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 0d8cc61a7b..92d18f2702 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -739,7 +739,7 @@ if(PKG_KSPACE) else() message(STATUS "Using double precision FFTs") endif() - if(FFT_FFTW_THREADS) + if(FFT_FFTW_THREADS OR FFT_MKL_THREADS) message(STATUS "Using threaded FFTs") else() message(STATUS "Using non-threaded FFTs") diff --git a/cmake/Modules/Packages/KSPACE.cmake b/cmake/Modules/Packages/KSPACE.cmake index 5786d7cb8a..07612447f9 100644 --- a/cmake/Modules/Packages/KSPACE.cmake +++ b/cmake/Modules/Packages/KSPACE.cmake @@ -22,7 +22,7 @@ if(PKG_KSPACE) include_directories(${${FFTW}_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES}) if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY) - option(FFT_FFTW_THREADS "Use threaded FFT library" ON) + option(FFT_FFTW_THREADS "Use threaded FFTW library" ON) else() option(FFT_FFTW_THREADS "Use threaded FFT library" OFF) endif() @@ -38,6 +38,10 @@ if(PKG_KSPACE) elseif(FFT STREQUAL "MKL") find_package(MKL REQUIRED) add_definitions(-DFFT_MKL) + option(FFT_MKL_THREADS "Use threaded MKL FFT" ON) + if(FFT_MKL_THREADS) + add_definitions(-DFFT_MKL_THREADS) + endif() include_directories(${MKL_INCLUDE_DIRS}) list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES}) else() diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index b76e902033..a6cbf7b8ca 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -106,6 +106,7 @@ to assist: -D FFTW3_LIBRARIES=path # path to FFTW3 libraries -D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries -D MKL_INCLUDE_DIRS=path # ditto for Intel MKL library + -D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries -D MKL_LIBRARIES=path **Makefile.machine settings**\ : @@ -117,6 +118,7 @@ to assist: # default is KISS if not specified FFT_INC = -DFFT_SINGLE # do not specify for double precision FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries + FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY # default is FFT\_PACK\_ARRAY if not specified @@ -129,12 +131,14 @@ to assist: FFT_LIB = -lfftw3 # FFTW3 double precision FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS) FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision - FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler - FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier + FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler, serial interface + FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier, serial interface + FFT_LIB = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface + FFT_LIB = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core # MKL with GNU compiler, threaded interface As with CMake, you do not need to set paths in FFT\_INC or FFT\_PATH, if -make can find the FFT header and library files. You must specify -FFT\_LIB with the appropriate FFT libraries to include in the link. +the compiler can find the FFT header and library files in its default search path. +You must specify FFT\_LIB with the appropriate FFT libraries to include in the link. **CMake and make info**\ : diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 9a67ca81f0..5e51c97b54 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -147,12 +147,10 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) +#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_MKL) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; -#elif defined(FFT_MKL) - d_out(i) *= norm; #else d_out(i,0) *= norm; d_out(i,1) *= norm; @@ -607,7 +605,9 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); - //DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_fast); DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -617,7 +617,9 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); - //DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_mid); DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -627,7 +629,9 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); - //DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_slow); if (scaled == 0) @@ -684,6 +688,16 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl NULL,&nslow,1,plan->length3, NULL,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); + + if (scaled == 0) + plan->scaled = 0; + else { + plan->scaled = 1; + plan->norm = 1.0/(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } + #elif defined(FFT_CUFFT) cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, @@ -699,6 +713,16 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); + + if (scaled == 0) + plan->scaled = 0; + else { + plan->scaled = 1; + plan->norm = 1.0/(nfast*nmid*nslow); + plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * + (out_khi-out_klo+1); + } + #else kissfftKK = new KissFFTKokkos(); @@ -726,7 +750,6 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->cfg_slow_forward = KissFFTKokkos::kiss_fft_alloc_kokkos(nslow,0,NULL,NULL); plan->cfg_slow_backward = KissFFTKokkos::kiss_fft_alloc_kokkos(nslow,1,NULL,NULL); } -#endif if (scaled == 0) plan->scaled = 0; @@ -737,6 +760,8 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl (out_khi-out_klo+1); } +#endif + return plan; } @@ -839,13 +864,13 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d #if defined(FFT_MKL) if (flag == -1) { - DftiComputeForward(plan->handle_fast,data); - DftiComputeForward(plan->handle_mid,data); - DftiComputeForward(plan->handle_slow,data); + DftiComputeForward(plan->handle_fast,(FFT_DATA*)d_data.data()); + DftiComputeForward(plan->handle_mid,(FFT_DATA*)d_data.data()); + DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data()); } else { - DftiComputeBackward(plan->handle_fast,data); - DftiComputeBackward(plan->handle_mid,data); - DftiComputeBackward(plan->handle_slow,data); + DftiComputeBackward(plan->handle_fast,(FFT_DATA*)d_data.data()); + DftiComputeBackward(plan->handle_mid,(FFT_DATA*)d_data.data()); + DftiComputeBackward(plan->handle_slow,(FFT_DATA*)d_data.data()); } #elif defined(FFT_FFTW3) if (flag == -1) { diff --git a/src/KSPACE/fft3d.cpp b/src/KSPACE/fft3d.cpp index c9cf8df172..d7b3248692 100644 --- a/src/KSPACE/fft3d.cpp +++ b/src/KSPACE/fft3d.cpp @@ -107,13 +107,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_fast,data); else DftiComputeBackward(plan->handle_fast,data); - /* -#elif defined(FFT_FFTW2) - if (flag == -1) - fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0); - else - fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0); - */ #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_fast_forward; @@ -148,13 +141,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_mid,data); else DftiComputeBackward(plan->handle_mid,data); - /* -#elif defined(FFT_FFTW2) - if (flag == -1) - fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0); - else - fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0); - */ #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_mid_forward; @@ -189,13 +175,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) DftiComputeForward(plan->handle_slow,data); else DftiComputeBackward(plan->handle_slow,data); - /* -#elif defined(FFT_FFTW2) - if (flag == -1) - fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0); - else - fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0); - */ #elif defined(FFT_FFTW3) if (flag == -1) theplan=plan->plan_slow_forward; @@ -508,6 +487,9 @@ struct fft_plan_3d *fft_3d_create_plan( DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast); DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_fast); DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -517,6 +499,9 @@ struct fft_plan_3d *fft_3d_create_plan( DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid); DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_mid); DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1, @@ -526,6 +511,9 @@ struct fft_plan_3d *fft_3d_create_plan( DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE); DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow); DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow); +#if defined(FFT_MKL_THREADS) + DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads); +#endif DftiCommitDescriptor(plan->handle_slow); if (scaled == 0) @@ -537,50 +525,6 @@ struct fft_plan_3d *fft_3d_create_plan( (out_khi-out_klo+1); } - /* -#elif defined(FFT_FFTW2) - - plan->plan_fast_forward = - fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - plan->plan_fast_backward = - fftw_create_plan(nfast,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - - if (nmid == nfast) { - plan->plan_mid_forward = plan->plan_fast_forward; - plan->plan_mid_backward = plan->plan_fast_backward; - } - else { - plan->plan_mid_forward = - fftw_create_plan(nmid,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - plan->plan_mid_backward = - fftw_create_plan(nmid,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - } - - if (nslow == nfast) { - plan->plan_slow_forward = plan->plan_fast_forward; - plan->plan_slow_backward = plan->plan_fast_backward; - } - else if (nslow == nmid) { - plan->plan_slow_forward = plan->plan_mid_forward; - plan->plan_slow_backward = plan->plan_mid_backward; - } - else { - plan->plan_slow_forward = - fftw_create_plan(nslow,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - plan->plan_slow_backward = - fftw_create_plan(nslow,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE); - } - - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } - */ - #elif defined(FFT_FFTW3) #if defined(FFT_FFTW_THREADS) if (nthreads > 1) { @@ -686,20 +630,6 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan) DftiFreeDescriptor(&(plan->handle_fast)); DftiFreeDescriptor(&(plan->handle_mid)); DftiFreeDescriptor(&(plan->handle_slow)); - /* -#elif defined(FFT_FFTW2) - if (plan->plan_slow_forward != plan->plan_fast_forward && - plan->plan_slow_forward != plan->plan_mid_forward) { - fftw_destroy_plan(plan->plan_slow_forward); - fftw_destroy_plan(plan->plan_slow_backward); - } - if (plan->plan_mid_forward != plan->plan_fast_forward) { - fftw_destroy_plan(plan->plan_mid_forward); - fftw_destroy_plan(plan->plan_mid_backward); - } - fftw_destroy_plan(plan->plan_fast_forward); - fftw_destroy_plan(plan->plan_fast_backward); - */ #elif defined(FFT_FFTW3) FFTW_API(destroy_plan)(plan->plan_slow_forward); FFTW_API(destroy_plan)(plan->plan_slow_backward); @@ -840,18 +770,6 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan) DftiComputeBackward(plan->handle_mid,data); DftiComputeBackward(plan->handle_slow,data); } - /* -#elif defined(FFT_FFTW2) - if (flag == -1) { - fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0); - fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0); - fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0); - } else { - fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0); - fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0); - fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0); - } - */ #elif defined(FFT_FFTW3) FFTW_API(plan) theplan; if (flag == -1) diff --git a/src/KSPACE/fft3d.h b/src/KSPACE/fft3d.h index 8c3d2f27ad..4b5f36716f 100644 --- a/src/KSPACE/fft3d.h +++ b/src/KSPACE/fft3d.h @@ -43,14 +43,6 @@ typedef double FFT_SCALAR; typedef float _Complex FFT_DATA; #define FFT_MKL_PREC DFTI_SINGLE -//#elif defined(FFT_FFTW2) -//#if defined(FFTW_SIZE) -//#include "sfftw.h" -//#else -//#include "fftw.h" -//#endif -//typedef FFTW_COMPLEX FFT_DATA; - #elif defined(FFT_FFTW3) #include "fftw3.h" typedef fftwf_complex FFT_DATA; @@ -84,14 +76,6 @@ typedef struct kiss_fft_state* kiss_fft_cfg; typedef double _Complex FFT_DATA; #define FFT_MKL_PREC DFTI_DOUBLE -//#elif defined(FFT_FFTW2) -//#if defined(FFTW_SIZE) -//#include "dfftw.h" -//#else -//#include "fftw.h" -//#endif -//typedef FFTW_COMPLEX FFT_DATA; - #elif defined(FFT_FFTW3) #include "fftw3.h" typedef fftw_complex FFT_DATA; @@ -141,13 +125,6 @@ struct fft_plan_3d { DFTI_DESCRIPTOR *handle_fast; DFTI_DESCRIPTOR *handle_mid; DFTI_DESCRIPTOR *handle_slow; -//#elif defined(FFT_FFTW2) -// fftw_plan plan_fast_forward; -// fftw_plan plan_fast_backward; -// fftw_plan plan_mid_forward; -// fftw_plan plan_mid_backward; -//fftw_plan plan_slow_forward; -//fftw_plan plan_slow_backward; #elif defined(FFT_FFTW3) FFTW_API(plan) plan_fast_forward; FFTW_API(plan) plan_fast_backward; From 99cfc9e66895b7f247e97b35f3cea14f535b1d27 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Mon, 27 Jan 2020 11:17:08 -0500 Subject: [PATCH 02/10] small documentation tweak --- doc/src/Build_settings.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/src/Build_settings.rst b/doc/src/Build_settings.rst index a6cbf7b8ca..8c9f525425 100644 --- a/doc/src/Build_settings.rst +++ b/doc/src/Build_settings.rst @@ -135,6 +135,7 @@ to assist: FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier, serial interface FFT_LIB = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface FFT_LIB = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core # MKL with GNU compiler, threaded interface + FFT_LIB = -lmkl_rt # MKL with automatic runtime selection of interface libs As with CMake, you do not need to set paths in FFT\_INC or FFT\_PATH, if the compiler can find the FFT header and library files in its default search path. From 0d0c5e29614055e17243c63d766e790487432ae4 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Wed, 29 Jan 2020 16:37:29 -0500 Subject: [PATCH 03/10] make KOKKOS and KSPACE FFT3d code more consistent and remove redundant code --- src/KOKKOS/fft3d_kokkos.cpp | 37 ++++++------------------------------- src/KSPACE/fft3d.cpp | 26 +++++--------------------- 2 files changed, 11 insertions(+), 52 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 5e51c97b54..a4bf5307c5 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -147,11 +147,11 @@ public: KOKKOS_INLINE_FUNCTION void operator() (const int &i) const { -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) || defined(FFT_MKL) +#if defined(FFT_FFTW3) || defined(FFT_CUFFT) FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; -#else +#else /* FFT_MKL or FFT_KISS */ d_out(i,0) *= norm; d_out(i,1) *= norm; #endif @@ -634,15 +634,6 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl #endif DftiCommitDescriptor(plan->handle_slow); - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } - #elif defined(FFT_FFTW3) #if defined (FFT_FFTW_THREADS) @@ -689,16 +680,8 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl NULL,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } - #elif defined(FFT_CUFFT) + cufftPlanMany(&(plan->plan_fast), 1, &nfast, &nfast,1,plan->length1, &nfast,1,plan->length1, @@ -714,16 +697,8 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl &nslow,1,plan->length3, CUFFT_TYPE,plan->total3/plan->length3); - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } +#else /* FFT_KISS */ -#else kissfftKK = new KissFFTKokkos(); plan->cfg_fast_forward = KissFFTKokkos::kiss_fft_alloc_kokkos(nfast,0,NULL,NULL); @@ -751,6 +726,8 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl plan->cfg_slow_backward = KissFFTKokkos::kiss_fft_alloc_kokkos(nslow,1,NULL,NULL); } +#endif + if (scaled == 0) plan->scaled = 0; else { @@ -760,8 +737,6 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl (out_khi-out_klo+1); } -#endif - return plan; } diff --git a/src/KSPACE/fft3d.cpp b/src/KSPACE/fft3d.cpp index d7b3248692..3e4d131d3a 100644 --- a/src/KSPACE/fft3d.cpp +++ b/src/KSPACE/fft3d.cpp @@ -210,7 +210,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan) *(out_ptr++) *= norm; #elif defined(FFT_MKL) out[i] *= norm; -#else +#else /* FFT_KISS */ out[i].re *= norm; out[i].im *= norm; #endif @@ -516,15 +516,6 @@ struct fft_plan_3d *fft_3d_create_plan( #endif DftiCommitDescriptor(plan->handle_slow); - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } - #elif defined(FFT_FFTW3) #if defined(FFT_FFTW_THREADS) if (nthreads > 1) { @@ -564,15 +555,8 @@ struct fft_plan_3d *fft_3d_create_plan( NULL,&nslow,1,plan->length3, FFTW_BACKWARD,FFTW_ESTIMATE); - if (scaled == 0) - plan->scaled = 0; - else { - plan->scaled = 1; - plan->norm = 1.0/(nfast*nmid*nslow); - plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) * - (out_khi-out_klo+1); - } -#else +#else /* FFT_KISS */ + plan->cfg_fast_forward = kiss_fft_alloc(nfast,0,NULL,NULL); plan->cfg_fast_backward = kiss_fft_alloc(nfast,1,NULL,NULL); @@ -598,6 +582,8 @@ struct fft_plan_3d *fft_3d_create_plan( plan->cfg_slow_backward = kiss_fft_alloc(nslow,1,NULL,NULL); } +#endif + if (scaled == 0) plan->scaled = 0; else { @@ -607,8 +593,6 @@ struct fft_plan_3d *fft_3d_create_plan( (out_khi-out_klo+1); } -#endif - return plan; } From 1851a9f7d47cd59bffd675ee576a861fa83c6652 Mon Sep 17 00:00:00 2001 From: Stan Gerald Moore Date: Wed, 29 Jan 2020 16:21:33 -0700 Subject: [PATCH 04/10] Refactor Kokkos FFT_DATA --- src/KOKKOS/fft3d_kokkos.cpp | 94 +++++++++++++++++++------------------ src/KOKKOS/fft3d_kokkos.h | 67 -------------------------- src/KOKKOS/fftdata_kokkos.h | 78 ++++++++++++++++++++++++++++++ src/KOKKOS/kokkos_type.h | 20 ++------ 4 files changed, 129 insertions(+), 130 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index a4bf5307c5..4cd92cbbb0 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -88,8 +88,8 @@ FFT3dKokkos::~FFT3dKokkos() template void FFT3dKokkos::compute(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, int flag) { - typename AT::t_FFT_DATA_1d d_in_data(d_in.data(),d_in.size()/2); - typename AT::t_FFT_DATA_1d d_out_data(d_out.data(),d_out.size()/2); + typename AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); + typename AT::t_FFT_DATA_1d d_out_data((FFT_DATA*)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -99,7 +99,7 @@ void FFT3dKokkos::compute(typename AT::t_FFT_SCALAR_1d d_in, typenam template void FFT3dKokkos::timing1d(typename AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) { - typename AT::t_FFT_DATA_1d d_in_data(d_in.data(),d_in.size()); + typename AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -151,7 +151,9 @@ public: FFT_SCALAR* out_ptr = (FFT_SCALAR *)(d_out.data()+i); *(out_ptr++) *= norm; *(out_ptr++) *= norm; -#else /* FFT_MKL or FFT_KISS */ +#elif defined(FFT_MKL) + d_out(i) *= norm; +#else // FFT_KISS d_out(i,0) *= norm; d_out(i,1) *= norm; #endif @@ -200,9 +202,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename AT::t_FFT_SCALAR_1d(d_in.data(),d_in.size()); - d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size()); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size()); + d_in_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -217,16 +219,16 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ #if defined(FFT_MKL) if (flag == -1) - DftiComputeForward(plan->handle_fast,(FFT_DATA *)d_data.data()); + DftiComputeForward(plan->handle_fast,d_data.data()); else - DftiComputeBackward(plan->handle_fast,(FFT_DATA *)d_data.data()); + DftiComputeBackward(plan->handle_fast,d_data.data()); #elif defined(FFT_FFTW3) if (flag == -1) - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,d_data.data(),d_data.data()); else - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,d_data.data(),d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_fast,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag); + cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); #else typename AT::t_FFT_DATA_1d d_tmp = typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); @@ -247,9 +249,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size()*2); - d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -263,16 +265,16 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ #if defined(FFT_MKL) if (flag == -1) - DftiComputeForward(plan->handle_mid,(FFT_DATA *)d_data.data()); + DftiComputeForward(plan->handle_mid,d_data.data()); else - DftiComputeBackward(plan->handle_mid,(FFT_DATA *)d_data.data()); + DftiComputeBackward(plan->handle_mid,d_data.data()); #elif defined(FFT_FFTW3) if (flag == -1) - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,d_data.data(),d_data.data()); else - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,d_data.data(),d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_mid,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag); + cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag); #else if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); @@ -289,9 +291,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size()); - d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size()); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size()); + d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -305,16 +307,16 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ #if defined(FFT_MKL) if (flag == -1) - DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data()); + DftiComputeForward(plan->handle_slow,d_data.data()); else - DftiComputeBackward(plan->handle_slow,(FFT_DATA *)d_data.data()); + DftiComputeBackward(plan->handle_slow,d_data.data()); #elif defined(FFT_FFTW3) if (flag == -1) - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,d_data.data(),d_data.data()); else - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,d_data.data(),d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_slow,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag); + cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); @@ -329,9 +331,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ // destination is always out if (plan->post_plan) { - d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size()); - d_out_scalar = typename AT::t_FFT_SCALAR_1d(d_out.data(),d_out.size()); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size()); + d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -839,28 +841,28 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d #if defined(FFT_MKL) if (flag == -1) { - DftiComputeForward(plan->handle_fast,(FFT_DATA*)d_data.data()); - DftiComputeForward(plan->handle_mid,(FFT_DATA*)d_data.data()); - DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data()); + DftiComputeForward(plan->handle_fast,d_data.data()); + DftiComputeForward(plan->handle_mid,d_data.data()); + DftiComputeForward(plan->handle_slow,d_data.data()); } else { - DftiComputeBackward(plan->handle_fast,(FFT_DATA*)d_data.data()); - DftiComputeBackward(plan->handle_mid,(FFT_DATA*)d_data.data()); - DftiComputeBackward(plan->handle_slow,(FFT_DATA*)d_data.data()); + DftiComputeBackward(plan->handle_fast,d_data.data()); + DftiComputeBackward(plan->handle_mid,d_data.data()); + DftiComputeBackward(plan->handle_slow,d_data.data()); } #elif defined(FFT_FFTW3) if (flag == -1) { - FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,d_data.data(),d_data.data()); } else { - FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,d_data.data(),d_data.data()); } #elif defined(FFT_CUFFT) - cufftExec(plan->plan_fast,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); - cufftExec(plan->plan_mid,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); - cufftExec(plan->plan_slow,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); + cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else kiss_fft_functor f; typename AT::t_FFT_DATA_1d d_tmp = typename AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0()); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 2de3b641ea..f2a300050e 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -19,73 +19,6 @@ #include "remap_kokkos.h" #include "fftdata_kokkos.h" -// with KOKKOS in CUDA mode we can only have -// CUFFT or KISSFFT, thus undefine all other -// FFTs here, since they may be valid in fft3d.cpp - -#if defined(KOKKOS_ENABLE_CUDA) -# if defined(FFT_FFTW) -# undef FFT_FFTW -# endif -# if defined(FFT_FFTW3) -# undef FFT_FFTW3 -# endif -# if defined(FFT_MKL) -# undef FFT_MKL -# endif -# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) -# define FFT_KISSFFT -# endif -#else -# if defined(FFT_CUFFT) -# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" -# endif -// if user set FFTW, it means FFTW3 -# ifdef FFT_FFTW -# define FFT_FFTW3 -# endif -# ifdef FFT_FFTW_THREADS -# if !defined(FFT_FFTW3) -# error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS" -# endif -# endif -#endif - -#if defined(FFT_MKL) - #include "mkl_dfti.h" - #if defined(FFT_SINGLE) - typedef float _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_SINGLE - #else - typedef double _Complex FFT_DATA; - #define FFT_MKL_PREC DFTI_DOUBLE - #endif -#elif defined(FFT_FFTW3) - #include "fftw3.h" - #if defined(FFT_SINGLE) - typedef fftwf_complex FFT_DATA; - #define FFTW_API(function) fftwf_ ## function - #else - typedef fftw_complex FFT_DATA; - #define FFTW_API(function) fftw_ ## function - #endif -#elif defined(FFT_CUFFT) - #include "cufft.h" - #if defined(FFT_SINGLE) - #define cufftExec cufftExecC2C - #define CUFFT_TYPE CUFFT_C2C - typedef cufftComplex FFT_DATA; - #else - #define cufftExec cufftExecZ2Z - #define CUFFT_TYPE CUFFT_Z2Z - typedef cufftDoubleComplex FFT_DATA; - #endif -#else - #include "kissfft_kokkos.h" - #ifndef FFT_KISSFFT - #define FFT_KISSFFT - #endif -#endif namespace LAMMPS_NS { diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 262b963802..3b36f2f88b 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -43,4 +43,82 @@ typedef double FFT_SCALAR; #error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)" #endif + +// with KOKKOS in CUDA mode we can only have +// CUFFT or KISSFFT, thus undefine all other +// FFTs here, since they may be valid in fft3d.cpp + +#if defined(KOKKOS_ENABLE_CUDA) +# if defined(FFT_FFTW) +# undef FFT_FFTW +# endif +# if defined(FFT_FFTW3) +# undef FFT_FFTW3 +# endif +# if defined(FFT_MKL) +# undef FFT_MKL +# endif +# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT) +# define FFT_KISSFFT +# endif +#else +# if defined(FFT_CUFFT) +# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT" +# endif +// if user set FFTW, it means FFTW3 +# ifdef FFT_FFTW +# define FFT_FFTW3 +# endif +# ifdef FFT_FFTW_THREADS +# if !defined(FFT_FFTW3) +# error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS" +# endif +# endif +#endif + +#if defined(FFT_MKL) + #include "mkl_dfti.h" + #if defined(FFT_SINGLE) + typedef float _Complex FFT_DATA; + #define FFT_MKL_PREC DFTI_SINGLE + #else + typedef double _Complex FFT_DATA; + #define FFT_MKL_PREC DFTI_DOUBLE + #endif +#elif defined(FFT_FFTW3) + #include "fftw3.h" + #if defined(FFT_SINGLE) + typedef fftwf_complex FFT_DATA; + #define FFTW_API(function) fftwf_ ## function + #else + typedef fftw_complex FFT_DATA; + #define FFTW_API(function) fftw_ ## function + #endif +#elif defined(FFT_CUFFT) + #include "cufft.h" + #if defined(FFT_SINGLE) + #define cufftExec cufftExecC2C + #define CUFFT_TYPE CUFFT_C2C + typedef cufftComplex FFT_DATA; + #else + #define cufftExec cufftExecZ2Z + #define CUFFT_TYPE CUFFT_Z2Z + typedef cufftDoubleComplex FFT_DATA; + #endif +#else + #include "kissfft_kokkos.h" + #if defined(FFT_SINGLE) + #define kiss_fft_scalar float + #else + #define kiss_fft_scalar double + typedef struct { + kiss_fft_scalar re; + kiss_fft_scalar im; + } FFT_DATA; + #endif + #ifndef FFT_KISSFFT + #define FFT_KISSFFT + #endif +#endif + #endif diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 3ead8417b9..3215b2ab39 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -15,6 +15,7 @@ #define LMP_LMPTYPE_KOKKOS_H #include "pointers.h" +#include "fftdata_kokkos.h" #include #include @@ -29,21 +30,6 @@ enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u}; #define ISFINITE(x) std::isfinite(x) #endif -// User-settable FFT precision - -// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag) -// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag) - -#ifdef FFT_SINGLE -#define FFT_PRECISION 1 -#define MPI_FFT_SCALAR MPI_FLOAT -typedef float FFT_SCALAR; -#else -#define FFT_PRECISION 2 -#define MPI_FFT_SCALAR MPI_DOUBLE -typedef double FFT_SCALAR; -#endif - #define MAX_TYPES_STACKPARAMS 12 #define NeighClusterSize 8 @@ -769,7 +755,7 @@ typedef Kokkos::DualView tdual_ typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; + DualView tdual_FFT_DATA_1d; typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; @@ -1030,7 +1016,7 @@ typedef Kokkos::DualView tdual_ typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; typedef Kokkos:: - DualView tdual_FFT_DATA_1d; + DualView tdual_FFT_DATA_1d; typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; From f8e2543c448de3d981ee1cb640f9c7a0289713b1 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 29 Jan 2020 17:15:47 -0700 Subject: [PATCH 05/10] Move FFT data types out of kokkos_type.h --- src/KOKKOS/Install.sh | 1 + src/KOKKOS/fft3d_kokkos.cpp | 1 - src/KOKKOS/fft3d_kokkos.h | 6 ++-- src/KOKKOS/fftdata_kokkos.h | 63 ++++++++++++++++++++++++++++++++++ src/KOKKOS/gridcomm_kokkos.cpp | 4 +-- src/KOKKOS/kissfft_kokkos.h | 3 +- src/KOKKOS/kokkos_base.h | 6 ---- src/KOKKOS/kokkos_base_fft.h | 38 ++++++++++++++++++++ src/KOKKOS/kokkos_type.h | 54 ----------------------------- src/KOKKOS/pack_kokkos.h | 4 +-- src/KOKKOS/pppm_kokkos.h | 4 +-- src/KOKKOS/remap_kokkos.h | 5 ++- 12 files changed, 113 insertions(+), 76 deletions(-) create mode 100644 src/KOKKOS/kokkos_base_fft.h diff --git a/src/KOKKOS/Install.sh b/src/KOKKOS/Install.sh index 3ae04384fd..e0b1c92821 100755 --- a/src/KOKKOS/Install.sh +++ b/src/KOKKOS/Install.sh @@ -160,6 +160,7 @@ action kissfft_kokkos.h kissfft.h action kokkos.cpp action kokkos.h action kokkos_base.h +action kokkos_base_fft.h fft3d.h action kokkos_few.h action kokkos_type.h action memory_kokkos.h diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 4cd92cbbb0..6ec19b1d8e 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -21,7 +21,6 @@ #include #include "fft3d_kokkos.h" #include "remap_kokkos.h" -#include "kokkos_type.h" #include "error.h" #include "kokkos.h" diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index f2a300050e..12548fea5e 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -15,11 +15,9 @@ #define LMP_FFT3D_KOKKOS_H #include "pointers.h" -#include "kokkos_type.h" #include "remap_kokkos.h" #include "fftdata_kokkos.h" - namespace LAMMPS_NS { // ------------------------------------------------------------------------- @@ -29,7 +27,7 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef ArrayTypes AT; + typedef FFTArrayTypes AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs @@ -75,7 +73,7 @@ template class FFT3dKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef ArrayTypes AT; + typedef FFTArrayTypes AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 3b36f2f88b..9afa9ce0e1 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -121,4 +121,67 @@ typedef double FFT_SCALAR; #endif #endif + +template +struct FFTArrayTypes; + +template <> +struct FFTArrayTypes { + +typedef Kokkos:: + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; + +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; + +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; + +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; + +typedef Kokkos:: + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; + +typedef Kokkos:: + DualView tdual_int_64; +typedef tdual_int_64::t_dev t_int_64; +typedef tdual_int_64::t_dev_um t_int_64_um; + +#ifdef KOKKOS_ENABLE_CUDA +template <> +struct FFTArrayTypes { + +//Kspace + +typedef Kokkos:: + DualView tdual_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; +typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; + +typedef Kokkos::DualView tdual_FFT_SCALAR_2d; +typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; + +typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; +typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; + +typedef Kokkos::DualView tdual_FFT_SCALAR_3d; +typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; + +typedef Kokkos:: + DualView tdual_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; +typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; + +typedef Kokkos:: + DualView tdual_int_64; +typedef tdual_int_64::t_host t_int_64; +typedef tdual_int_64::t_host_um t_int_64_um; + +}; + #endif diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index ac33b15d73..03a738969a 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -517,7 +517,7 @@ void GridCommKokkos::forward_comm(KSpace *kspace, int which) k_packlist.sync(); k_unpacklist.sync(); - KokkosBase* kspaceKKBase = dynamic_cast(kspace); + KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); for (int m = 0; m < nswap; m++) { if (swap[m].sendproc == me) @@ -567,7 +567,7 @@ void GridCommKokkos::reverse_comm(KSpace *kspace, int which) k_packlist.sync(); k_unpacklist.sync(); - KokkosBase* kspaceKKBase = dynamic_cast(kspace); + KokkosBaseFFT* kspaceKKBase = dynamic_cast(kspace); for (int m = nswap-1; m >= 0; m--) { if (swap[m].recvproc == me) diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index ca11c6e6a5..17bdba527d 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -64,7 +64,6 @@ #include #include "kissfft_kokkos.h" #include "fftdata_kokkos.h" -#include "kokkos_type.h" #ifndef M_PI #define M_PI 3.141592653589793238462643383279502884197169399375105820974944 @@ -139,7 +138,7 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef ArrayTypes AT; + typedef FFTArrayTypes AT; int nfft; int inverse; typename AT::t_int_64 d_factors; diff --git a/src/KOKKOS/kokkos_base.h b/src/KOKKOS/kokkos_base.h index 1c7a862f11..1d8f1e93e2 100644 --- a/src/KOKKOS/kokkos_base.h +++ b/src/KOKKOS/kokkos_base.h @@ -22,12 +22,6 @@ class KokkosBase { public: KokkosBase() {} - //Kspace - virtual void pack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - // Pair virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d, int, DAT::tdual_xfloat_1d &, diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h new file mode 100644 index 0000000000..b9b634d495 --- /dev/null +++ b/src/KOKKOS/kokkos_base_fft.h @@ -0,0 +1,38 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +#ifndef KOKKOS_BASE_FFT_H +#define KOKKOS_BASE_FFT_H + +#include "fftdata_kokkos.h" + +namespace LAMMPS_NS { + +class KokkosBaseFFT { + public: + KokkosBaseFFT() {} + + //Kspace + virtual void pack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; +}; + +} + +#endif + +/* ERROR/WARNING messages: + +*/ diff --git a/src/KOKKOS/kokkos_type.h b/src/KOKKOS/kokkos_type.h index 3215b2ab39..d21b9eecd2 100644 --- a/src/KOKKOS/kokkos_type.h +++ b/src/KOKKOS/kokkos_type.h @@ -15,7 +15,6 @@ #define LMP_LMPTYPE_KOKKOS_H #include "pointers.h" -#include "fftdata_kokkos.h" #include #include @@ -738,32 +737,6 @@ typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um; typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um; typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread; -//Kspace - -typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um; - -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d; - -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3; - -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d; - -typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um; - -typedef Kokkos:: - DualView tdual_int_64; -typedef tdual_int_64::t_dev t_int_64; -typedef tdual_int_64::t_dev_um t_int_64_um; - }; #ifdef KOKKOS_ENABLE_CUDA @@ -998,33 +971,6 @@ typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um; typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um; typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread; - -//Kspace - -typedef Kokkos:: - DualView tdual_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d; -typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um; - -typedef Kokkos::DualView tdual_FFT_SCALAR_2d; -typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d; - -typedef Kokkos::DualView tdual_FFT_SCALAR_2d_3; -typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3; - -typedef Kokkos::DualView tdual_FFT_SCALAR_3d; -typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d; - -typedef Kokkos:: - DualView tdual_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d; -typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um; - -typedef Kokkos:: - DualView tdual_int_64; -typedef tdual_int_64::t_host t_int_64; -typedef tdual_int_64::t_host_um t_int_64_um; - }; #endif //default LAMMPS Types diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index e0a11ff4e0..a8fc4024fa 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -29,7 +29,7 @@ pack from data -> buf ------------------------------------------------------------------------- */ -#include "kokkos_type.h" +#include "fftdata_kokkos.h" namespace LAMMPS_NS { @@ -37,7 +37,7 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef ArrayTypes AT; + typedef FFTArrayTypes AT; struct pack_3d_functor { public: diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index c855b1bda8..ca13859e54 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -25,7 +25,7 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos) #include "gridcomm_kokkos.h" #include "remap_kokkos.h" #include "fft3d_kokkos.h" -#include "kokkos_base.h" +#include "kokkos_base_fft.h" #include "kokkos_type.h" // fix up FFT defines for KOKKOS with CUDA @@ -107,7 +107,7 @@ struct TagPPPM_slabcorr4{}; struct TagPPPM_timing_zero{}; template -class PPPMKokkos : public PPPM, public KokkosBase { +class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index f77ce9fb72..711fef04da 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -18,7 +18,6 @@ #include #include "fftdata_kokkos.h" #include "remap.h" -#include "kokkos_type.h" namespace LAMMPS_NS { @@ -27,7 +26,7 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef ArrayTypes AT; + typedef FFTArrayTypes AT; typename AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends typename AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs void (*pack)(typename AT::t_FFT_SCALAR_1d_um, int, typename AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); @@ -58,7 +57,7 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef ArrayTypes AT; + typedef FFTArrayTypes AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int); From ef30d0ed7bad5844b14511ce7d4169ec1abe6fcc Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Wed, 29 Jan 2020 17:32:25 -0700 Subject: [PATCH 06/10] Add missing endif --- src/KOKKOS/fftdata_kokkos.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 9afa9ce0e1..0cb1b0308f 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -183,5 +183,6 @@ typedef tdual_int_64::t_host t_int_64; typedef tdual_int_64::t_host_um t_int_64_um; }; +#endif #endif From 4ea9dea19cbdbe3f107b72e15b947ce82c250fb9 Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 30 Jan 2020 12:04:57 -0700 Subject: [PATCH 07/10] More Kokkos FFT refactor --- src/KOKKOS/fft3d_kokkos.cpp | 72 +++++++------- src/KOKKOS/fft3d_kokkos.h | 16 +-- src/KOKKOS/fftdata_kokkos.h | 13 ++- src/KOKKOS/gridcomm_kokkos.cpp | 6 +- src/KOKKOS/gridcomm_kokkos.h | 6 +- src/KOKKOS/kissfft_kokkos.h | 173 ++++++++++++++++----------------- src/KOKKOS/kokkos_base_fft.h | 8 +- src/KOKKOS/pack_kokkos.h | 52 +++++----- src/KOKKOS/pppm_kokkos.cpp | 28 +++--- src/KOKKOS/pppm_kokkos.h | 34 ++++--- src/KOKKOS/remap_kokkos.cpp | 10 +- src/KOKKOS/remap_kokkos.h | 16 +-- 12 files changed, 224 insertions(+), 210 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 6ec19b1d8e..81066b961d 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -85,10 +85,10 @@ FFT3dKokkos::~FFT3dKokkos() /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::compute(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, int flag) +void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) { - typename AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); - typename AT::t_FFT_DATA_1d d_out_data((FFT_DATA*)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_DATA*)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -96,9 +96,9 @@ void FFT3dKokkos::compute(typename AT::t_FFT_SCALAR_1d d_in, typenam /* ---------------------------------------------------------------------- */ template -void FFT3dKokkos::timing1d(typename AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) +void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) { - typename AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -137,11 +137,11 @@ template struct norm_functor { public: typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_FFT_DATA_1d_um d_out; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_DATA_1d_um d_out; int norm; - norm_functor(typename AT::t_FFT_DATA_1d &d_out_, int norm_): + norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_): d_out(d_out_),norm(norm_) {} KOKKOS_INLINE_FUNCTION @@ -153,8 +153,8 @@ public: #elif defined(FFT_MKL) d_out(i) *= norm; #else // FFT_KISS - d_out(i,0) *= norm; - d_out(i,1) *= norm; + d_out(i).re *= norm; + d_out(i).im *= norm; #endif } }; @@ -164,14 +164,14 @@ template struct kiss_fft_functor { public: typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_FFT_DATA_1d_um d_data,d_tmp; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp; kiss_fft_state_kokkos st; int length; kiss_fft_functor() {} - kiss_fft_functor(typename AT::t_FFT_DATA_1d &d_data_,typename AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): + kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos &st_, int length_): d_data(d_data_), d_tmp(d_tmp_), st(st_) @@ -188,11 +188,11 @@ public: #endif template -void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typename AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) +void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos *plan) { int total,length; - typename AT::t_FFT_DATA_1d d_data,d_copy; - typename AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; + typename FFT_AT::t_FFT_DATA_1d d_data,d_copy; + typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar; // pre-remap to prepare for 1st FFTs if needed // copy = loc for remap result @@ -201,9 +201,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ if (plan->pre_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_in_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); - d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar, d_scratch_scalar, plan->pre_plan); @@ -229,8 +229,8 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ #elif defined(FFT_CUFFT) cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); #else - typename AT::t_FFT_DATA_1d d_tmp = - typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); + typename FFT_AT::t_FFT_DATA_1d d_tmp = + typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); kiss_fft_functor f; if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length); @@ -238,7 +238,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_backward,length); Kokkos::parallel_for(total/length,f); d_data = d_tmp; - d_tmp = typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); #endif @@ -248,9 +248,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ if (plan->mid1_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid1_plan); @@ -281,7 +281,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_backward,length); Kokkos::parallel_for(total/length,f); d_data = d_tmp; - d_tmp = typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); + d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); #endif // 2nd mid-remap to prepare for 3rd FFTs @@ -290,9 +290,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ if (plan->mid2_target == 0) d_copy = d_out; else d_copy = plan->d_copy; - d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_copy_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar, d_scratch_scalar, plan->mid2_plan); @@ -330,9 +330,9 @@ void FFT3dKokkos::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ // destination is always out if (plan->post_plan) { - d_data_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); - d_out_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); - d_scratch_scalar = typename AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); + d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2); + d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2); + d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2); remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar, d_scratch_scalar, plan->post_plan); @@ -588,11 +588,11 @@ struct fft_plan_3d_kokkos* FFT3dKokkos::fft_3d_create_pl *nbuf = copy_size + scratch_size; if (copy_size) { - plan->d_copy = typename AT::t_FFT_DATA_1d("fft3d:copy",copy_size); + plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size); } if (scratch_size) { - plan->d_scratch = typename AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); + plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size); } // system specific pre-computation of 1d FFT coeffs @@ -810,7 +810,7 @@ void FFT3dKokkos::bifactor(int n, int *factor1, int *factor2) ------------------------------------------------------------------------- */ template -void FFT3dKokkos::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d_data, int nsize, int flag, +void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag, struct fft_plan_3d_kokkos *plan) { // total = size of data needed in each dim @@ -864,7 +864,7 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else kiss_fft_functor f; - typename AT::t_FFT_DATA_1d d_tmp = typename AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0()); + typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0()); if (flag == -1) { f = kiss_fft_functor(d_data,d_tmp,plan->cfg_fast_forward,length1); Kokkos::parallel_for(total1/length1,f); diff --git a/src/KOKKOS/fft3d_kokkos.h b/src/KOKKOS/fft3d_kokkos.h index 12548fea5e..ffdd2a38ce 100644 --- a/src/KOKKOS/fft3d_kokkos.h +++ b/src/KOKKOS/fft3d_kokkos.h @@ -27,14 +27,14 @@ namespace LAMMPS_NS { template struct fft_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes AT; + typedef FFTArrayTypes FFT_AT; struct remap_plan_3d_kokkos *pre_plan; // remap from input -> 1st FFTs struct remap_plan_3d_kokkos *mid1_plan; // remap from 1st -> 2nd FFTs struct remap_plan_3d_kokkos *mid2_plan; // remap from 2nd -> 3rd FFTs struct remap_plan_3d_kokkos *post_plan; // remap from 3rd FFTs -> output - typename AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) - typename AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps + typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed) + typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length) int length1,length2,length3; // length of 1st,2nd,3rd FFTs int pre_target; // where to put remap results @@ -73,14 +73,14 @@ template class FFT3dKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes AT; + typedef FFTArrayTypes FFT_AT; FFT3dKokkos(class LAMMPS *, MPI_Comm, int,int,int,int,int,int,int,int,int,int,int,int,int,int,int, int,int,int *,int); ~FFT3dKokkos(); - void compute(typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, int); - void timing1d(typename AT::t_FFT_SCALAR_1d, int, int); + void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int); + void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int); private: struct fft_plan_3d_kokkos *plan; @@ -90,7 +90,7 @@ class FFT3dKokkos : protected Pointers { KissFFTKokkos *kissfftKK; #endif - void fft_3d_kokkos(typename AT::t_FFT_DATA_1d, typename AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); + void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos *); struct fft_plan_3d_kokkos *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, @@ -99,7 +99,7 @@ class FFT3dKokkos : protected Pointers { void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos *); - void fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); + void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos *); void bifactor(int, int *, int *); }; diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 0cb1b0308f..7ec11f4cd6 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -106,7 +106,6 @@ typedef double FFT_SCALAR; typedef cufftDoubleComplex FFT_DATA; #endif #else - #include "kissfft_kokkos.h" #if defined(FFT_SINGLE) #define kiss_fft_scalar float #else @@ -121,6 +120,7 @@ typedef double FFT_SCALAR; #endif #endif +#include "kokkos_type.h" template struct FFTArrayTypes; @@ -152,6 +152,8 @@ typedef Kokkos:: typedef tdual_int_64::t_dev t_int_64; typedef tdual_int_64::t_dev_um t_int_64_um; +}; + #ifdef KOKKOS_ENABLE_CUDA template <> struct FFTArrayTypes { @@ -185,4 +187,13 @@ typedef tdual_int_64::t_host_um t_int_64_um; }; #endif +typedef struct FFTArrayTypes FFT_DAT; +typedef struct FFTArrayTypes FFT_HAT; + + +#if defined(FFT_KISSFFT) +#include "kissfft_kokkos.h" +#endif + + #endif diff --git a/src/KOKKOS/gridcomm_kokkos.cpp b/src/KOKKOS/gridcomm_kokkos.cpp index 03a738969a..f1ccffe20d 100644 --- a/src/KOKKOS/gridcomm_kokkos.cpp +++ b/src/KOKKOS/gridcomm_kokkos.cpp @@ -17,7 +17,7 @@ #include "kspace.h" #include "memory_kokkos.h" #include "error.h" -#include "kokkos_base.h" +#include "kokkos_base_fft.h" #include "kokkos.h" using namespace LAMMPS_NS; @@ -502,9 +502,9 @@ void GridCommKokkos::setup() } nbuf *= MAX(nforward,nreverse); //memory->create(buf1,nbuf,"Commgrid:buf1"); - k_buf1 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf1",nbuf); + k_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("Commgrid:buf1",nbuf); //memory->create(buf2,nbuf,"Commgrid:buf2"); - k_buf2 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf2",nbuf); + k_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("Commgrid:buf2",nbuf); } /* ---------------------------------------------------------------------- diff --git a/src/KOKKOS/gridcomm_kokkos.h b/src/KOKKOS/gridcomm_kokkos.h index b220220d74..086834b467 100644 --- a/src/KOKKOS/gridcomm_kokkos.h +++ b/src/KOKKOS/gridcomm_kokkos.h @@ -16,6 +16,7 @@ #include "pointers.h" #include "kokkos_type.h" +#include "fftdata_kokkos.h" #ifdef FFT_SINGLE typedef float FFT_SCALAR; @@ -32,6 +33,7 @@ class GridCommKokkos : protected Pointers { public: typedef DeviceType device_type; typedef ArrayTypes AT; + typedef FFTArrayTypes FFT_AT; GridCommKokkos(class LAMMPS *, MPI_Comm, int, int, int, int, int, int, int, int, @@ -70,8 +72,8 @@ class GridCommKokkos : protected Pointers { int nbuf; //FFT_SCALAR *buf1,*buf2; - DAT::tdual_FFT_SCALAR_1d k_buf1; - DAT::tdual_FFT_SCALAR_1d k_buf2; + FFT_DAT::tdual_FFT_SCALAR_1d k_buf1; + FFT_DAT::tdual_FFT_SCALAR_1d k_buf2; struct Swap { int sendproc; // proc to send to for forward comm diff --git a/src/KOKKOS/kissfft_kokkos.h b/src/KOKKOS/kissfft_kokkos.h index 17bdba527d..9163366cb9 100644 --- a/src/KOKKOS/kissfft_kokkos.h +++ b/src/KOKKOS/kissfft_kokkos.h @@ -62,7 +62,6 @@ #include #include #include -#include "kissfft_kokkos.h" #include "fftdata_kokkos.h" #ifndef M_PI @@ -83,8 +82,8 @@ #define S_MUL(a,b) ( (a)*(b) ) #define C_MUL(m,a,a_index,b,b_index) \ - do{ (m)[0] = (a)(a_index,0)*(b)(b_index,0) - (a)(a_index,1)*(b)(b_index,1);\ - (m)[1] = (a)(a_index,0)*(b)(b_index,1) + (a)(a_index,1)*(b)(b_index,0); }while(0) + do{ (m)[0] = (a)(a_index).re*(b)(b_index).re - (a)(a_index).im*(b)(b_index).im;\ + (m)[1] = (a)(a_index).re*(b)(b_index).im + (a)(a_index).im*(b)(b_index).re; }while(0) /* #define C_FIXDIV(c,div) // NOOP @@ -125,8 +124,8 @@ #define kf_cexp(x,x_index,phase) \ do{ \ - (x)(x_index,0) = KISS_FFT_COS(phase);\ - (x)(x_index,1) = KISS_FFT_SIN(phase);\ + (x)(x_index).re = KISS_FFT_COS(phase);\ + (x)(x_index).im = KISS_FFT_SIN(phase);\ }while(0) @@ -138,25 +137,25 @@ namespace LAMMPS_NS { template struct kiss_fft_state_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes AT; + typedef FFTArrayTypes FFT_AT; int nfft; int inverse; - typename AT::t_int_64 d_factors; - typename AT::t_FFT_DATA_1d d_twiddles; - typename AT::t_FFT_DATA_1d d_scratch; + typename FFT_AT::t_int_64 d_factors; + typename FFT_AT::t_FFT_DATA_1d d_twiddles; + typename FFT_AT::t_FFT_DATA_1d d_scratch; }; template class KissFFTKokkos { public: typedef DeviceType device_type; - typedef ArrayTypes AT; + typedef FFTArrayTypes FFT_AT; KOKKOS_INLINE_FUNCTION - static void kf_bfly2(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { - typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; FFT_SCALAR t[2]; int Fout2_count; int tw1_count = 0; @@ -168,21 +167,21 @@ class KissFFTKokkos { C_MUL(t,d_Fout,Fout2_count,d_twiddles,tw1_count); tw1_count += fstride; //C_SUB(*Fout2,*Fout,t); - d_Fout(Fout2_count,0) = d_Fout(Fout_count,0) - t[0]; - d_Fout(Fout2_count,1) = d_Fout(Fout_count,1) - t[1]; + d_Fout(Fout2_count).re = d_Fout(Fout_count).re - t[0]; + d_Fout(Fout2_count).im = d_Fout(Fout_count).im - t[1]; //C_ADDTO(d_Fout[Fout_count],t); - d_Fout(Fout_count,0) += t[0]; - d_Fout(Fout_count,1) += t[1]; + d_Fout(Fout_count).re += t[0]; + d_Fout(Fout_count).im += t[1]; ++Fout2_count; ++Fout_count; } while(--m); } KOKKOS_INLINE_FUNCTION - static void kf_bfly4(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, const size_t m, int Fout_count) { - typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; FFT_SCALAR scratch[6][2]; size_t k=m; const size_t m2=2*m; @@ -199,11 +198,11 @@ class KissFFTKokkos { C_MUL(scratch[2],d_Fout,Fout_count + m3,d_twiddles,tw3_count); //C_SUB(scratch[5],d_Fout[Fout_count],scratch[1] ); - scratch[5][0] = d_Fout(Fout_count,0) - scratch[1][0]; - scratch[5][1] = d_Fout(Fout_count,1) - scratch[1][1]; + scratch[5][0] = d_Fout(Fout_count).re - scratch[1][0]; + scratch[5][1] = d_Fout(Fout_count).im - scratch[1][1]; //C_ADDTO(d_Fout[Fout_count], scratch[1]); - d_Fout(Fout_count,0) += scratch[1][0]; - d_Fout(Fout_count,1) += scratch[1][1]; + d_Fout(Fout_count).re += scratch[1][0]; + d_Fout(Fout_count).im += scratch[1][1]; //C_ADD(scratch[3],scratch[0],scratch[2]); scratch[3][0] = scratch[0][0] + scratch[2][0]; scratch[3][1] = scratch[0][1] + scratch[2][1]; @@ -211,43 +210,43 @@ class KissFFTKokkos { scratch[4][0] = scratch[0][0] - scratch[2][0]; scratch[4][1] = scratch[0][1] - scratch[2][1]; //C_SUB(d_Fout[m2],d_Fout[Fout_count],scratch[3]); - d_Fout(Fout_count + m2,0) = d_Fout(Fout_count,0) - scratch[3][0]; - d_Fout(Fout_count + m2,1) = d_Fout(Fout_count,1) - scratch[3][1]; + d_Fout(Fout_count + m2).re = d_Fout(Fout_count).re - scratch[3][0]; + d_Fout(Fout_count + m2).im = d_Fout(Fout_count).im - scratch[3][1]; tw1_count += fstride; tw2_count += fstride*2; tw3_count += fstride*3; //C_ADDTO(d_Fout[Fout_count],scratch[3]); - d_Fout(Fout_count,0) += scratch[3][0]; - d_Fout(Fout_count,1) += scratch[3][1]; + d_Fout(Fout_count).re += scratch[3][0]; + d_Fout(Fout_count).im += scratch[3][1]; if (st.inverse) { - d_Fout(Fout_count + m,0) = scratch[5][0] - scratch[4][1]; - d_Fout(Fout_count + m,1) = scratch[5][1] + scratch[4][0]; - d_Fout(Fout_count + m3,0) = scratch[5][0] + scratch[4][1]; - d_Fout(Fout_count + m3,1) = scratch[5][1] - scratch[4][0]; + d_Fout(Fout_count + m).re = scratch[5][0] - scratch[4][1]; + d_Fout(Fout_count + m).im = scratch[5][1] + scratch[4][0]; + d_Fout(Fout_count + m3).re = scratch[5][0] + scratch[4][1]; + d_Fout(Fout_count + m3).im = scratch[5][1] - scratch[4][0]; } else{ - d_Fout(Fout_count + m,0) = scratch[5][0] + scratch[4][1]; - d_Fout(Fout_count + m,1) = scratch[5][1] - scratch[4][0]; - d_Fout(Fout_count + m3,0) = scratch[5][0] - scratch[4][1]; - d_Fout(Fout_count + m3,1) = scratch[5][1] + scratch[4][0]; + d_Fout(Fout_count + m).re = scratch[5][0] + scratch[4][1]; + d_Fout(Fout_count + m).im = scratch[5][1] - scratch[4][0]; + d_Fout(Fout_count + m3).re = scratch[5][0] - scratch[4][1]; + d_Fout(Fout_count + m3).im = scratch[5][1] + scratch[4][0]; } ++Fout_count; } while(--k); } KOKKOS_INLINE_FUNCTION - static void kf_bfly3(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, size_t m, int Fout_count) { size_t k=m; const size_t m2 = 2*m; - typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; FFT_SCALAR scratch[5][2]; FFT_SCALAR epi3[2]; //C_EQ(epi3,d_twiddles[fstride*m]); - epi3[0] = d_twiddles(fstride*m,0); - epi3[1] = d_twiddles(fstride*m,1); + epi3[0] = d_twiddles(fstride*m).re; + epi3[1] = d_twiddles(fstride*m).im; int tw1_count,tw2_count; tw1_count = tw2_count = 0; @@ -267,41 +266,41 @@ class KissFFTKokkos { tw1_count += fstride; tw2_count += fstride*2; - d_Fout(Fout_count + m,0) = d_Fout(Fout_count,0) - HALF_OF(scratch[3][0]); - d_Fout(Fout_count + m,1) = d_Fout(Fout_count,1) - HALF_OF(scratch[3][1]); + d_Fout(Fout_count + m).re = d_Fout(Fout_count).re - HALF_OF(scratch[3][0]); + d_Fout(Fout_count + m).im = d_Fout(Fout_count).im - HALF_OF(scratch[3][1]); //C_MULBYSCALAR(scratch[0],epi3[1]); scratch[0][0] *= epi3[1]; scratch[0][1] *= epi3[1]; //C_ADDTO(d_Fout[Fout_count],scratch[3]); - d_Fout(Fout_count,0) += scratch[3][0]; - d_Fout(Fout_count,1) += scratch[3][1]; + d_Fout(Fout_count).re += scratch[3][0]; + d_Fout(Fout_count).im += scratch[3][1]; - d_Fout(Fout_count + m2,0) = d_Fout(Fout_count + m,0) + scratch[0][1]; - d_Fout(Fout_count + m2,1) = d_Fout(Fout_count + m,1) - scratch[0][0]; + d_Fout(Fout_count + m2).re = d_Fout(Fout_count + m).re + scratch[0][1]; + d_Fout(Fout_count + m2).im = d_Fout(Fout_count + m).im - scratch[0][0]; - d_Fout(Fout_count + m,0) -= scratch[0][1]; - d_Fout(Fout_count + m,1) += scratch[0][0]; + d_Fout(Fout_count + m).re -= scratch[0][1]; + d_Fout(Fout_count + m).im += scratch[0][0]; ++Fout_count; } while(--k); } KOKKOS_INLINE_FUNCTION - static void kf_bfly5(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int Fout_count) { int u; FFT_SCALAR scratch[13][2]; - typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; FFT_SCALAR ya[2],yb[2]; //C_EQ(ya,d_twiddles[fstride*m]); - ya[1] = d_twiddles(fstride*m,1); - ya[0] = d_twiddles(fstride*m,0); + ya[1] = d_twiddles(fstride*m).im; + ya[0] = d_twiddles(fstride*m).re; //C_EQ(yb,d_twiddles[fstride*2*m]); - yb[1] = d_twiddles(fstride*2*m,1); - yb[0] = d_twiddles(fstride*2*m,0); + yb[1] = d_twiddles(fstride*2*m).im; + yb[0] = d_twiddles(fstride*2*m).re; int Fout0_count=Fout_count; int Fout1_count=Fout0_count+m; @@ -313,8 +312,8 @@ class KissFFTKokkos { //C_FIXDIV( d_Fout[Fout0_count],5); C_FIXDIV( d_Fout[Fout1_count],5); C_FIXDIV( d_Fout[Fout2_count],5); //C_FIXDIV( d_Fout[Fout3_count],5); C_FIXDIV( d_Fout[Fout4_count],5); //C_EQ(scratch[0],d_Fout[Fout0_count]); - scratch[0][0] = d_Fout(Fout0_count,0); - scratch[0][1] = d_Fout(Fout0_count,1); + scratch[0][0] = d_Fout(Fout0_count).re; + scratch[0][1] = d_Fout(Fout0_count).im; C_MUL(scratch[1],d_Fout,Fout1_count,d_twiddles,u*fstride ); C_MUL(scratch[2],d_Fout,Fout2_count,d_twiddles,2*u*fstride); @@ -334,8 +333,8 @@ class KissFFTKokkos { scratch[9][0] = scratch[2][0] - scratch[3][0]; scratch[9][1] = scratch[2][1] - scratch[3][1]; - d_Fout(Fout0_count,0) += scratch[7][0] + scratch[8][0]; - d_Fout(Fout0_count,1) += scratch[7][1] + scratch[8][1]; + d_Fout(Fout0_count).re += scratch[7][0] + scratch[8][0]; + d_Fout(Fout0_count).im += scratch[7][1] + scratch[8][1]; scratch[5][0] = scratch[0][0] + S_MUL(scratch[7][0],ya[0]) + S_MUL(scratch[8][0],yb[0]); scratch[5][1] = scratch[0][1] + S_MUL(scratch[7][1],ya[0]) + S_MUL(scratch[8][1],yb[0]); @@ -344,11 +343,11 @@ class KissFFTKokkos { scratch[6][1] = -S_MUL(scratch[10][0],ya[1]) - S_MUL(scratch[9][0],yb[1]); //C_SUB(d_Fout[Fout1_count],scratch[5],scratch[6]); - d_Fout(Fout1_count,0) = scratch[5][0] - scratch[6][0]; - d_Fout(Fout1_count,1) = scratch[5][1] - scratch[6][1]; + d_Fout(Fout1_count).re = scratch[5][0] - scratch[6][0]; + d_Fout(Fout1_count).im = scratch[5][1] - scratch[6][1]; //C_ADD(d_Fout[Fout4_count],scratch[5],scratch[6]); - d_Fout(Fout4_count,0) = scratch[5][0] + scratch[6][0]; - d_Fout(Fout4_count,1) = scratch[5][1] + scratch[6][1]; + d_Fout(Fout4_count).re = scratch[5][0] + scratch[6][0]; + d_Fout(Fout4_count).im = scratch[5][1] + scratch[6][1]; scratch[11][0] = scratch[0][0] + S_MUL(scratch[7][0],yb[0]) + S_MUL(scratch[8][0],ya[0]); scratch[11][1] = scratch[0][1] + S_MUL(scratch[7][1],yb[0]) + S_MUL(scratch[8][1],ya[0]); @@ -356,11 +355,11 @@ class KissFFTKokkos { scratch[12][1] = S_MUL(scratch[10][0],yb[1]) - S_MUL(scratch[9][0],ya[1]); //C_ADD(d_Fout[Fout2_count],scratch[11],scratch[12]); - d_Fout(Fout2_count,0) = scratch[11][0] + scratch[12][0]; - d_Fout(Fout2_count,1) = scratch[11][1] + scratch[12][1]; + d_Fout(Fout2_count).re = scratch[11][0] + scratch[12][0]; + d_Fout(Fout2_count).im = scratch[11][1] + scratch[12][1]; //C_SUB(d_Fout3[Fout3_count],scratch[11],scratch[12]); - d_Fout(Fout3_count,0) = scratch[11][0] - scratch[12][0]; - d_Fout(Fout3_count,1) = scratch[11][1] - scratch[12][1]; + d_Fout(Fout3_count).re = scratch[11][0] - scratch[12][0]; + d_Fout(Fout3_count).im = scratch[11][1] - scratch[12][1]; ++Fout0_count;++Fout1_count;++Fout2_count;++Fout3_count;++Fout4_count; } @@ -369,21 +368,21 @@ class KissFFTKokkos { /* perform the butterfly for one stage of a mixed radix FFT */ KOKKOS_INLINE_FUNCTION - static void kf_bfly_generic(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, + static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride, const kiss_fft_state_kokkos &st, int m, int p, int Fout_count) { int u,k,q1,q; - typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; + typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles; FFT_SCALAR t[2]; int Norig = st.nfft; - typename AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; + typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch; for ( u=0; u=Norig) twidx-=Norig; C_MUL(t,d_scratch,q,d_twiddles,twidx); //C_ADDTO(d_Fout[k],t); - d_Fout(Fout_count + k,0) += t[0]; - d_Fout(Fout_count + k,1) += t[1]; + d_Fout(Fout_count + k).re += t[0]; + d_Fout(Fout_count + k).im += t[1]; } k += m; } @@ -408,9 +407,9 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kf_work(typename AT::t_FFT_DATA_1d_um &d_Fout, const typename AT::t_FFT_DATA_1d_um &d_f, + static void kf_work(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const typename FFT_AT::t_FFT_DATA_1d_um &d_f, const size_t fstride, int in_stride, - const typename AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) + const typename FFT_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos &st, int Fout_count, int f_count, int factors_count) { const int beg = Fout_count; const int p = d_factors[factors_count++]; /* the radix */ @@ -420,8 +419,8 @@ class KissFFTKokkos { if (m == 1) { do { //C_EQ(d_Fout[Fout_count],d_f[f_count]); - d_Fout(Fout_count,0) = d_f(f_count,0); - d_Fout(Fout_count,1) = d_f(f_count,1); + d_Fout(Fout_count).re = d_f(f_count).re; + d_Fout(Fout_count).im = d_f(f_count).im; f_count += fstride*in_stride; } while (++Fout_count != end); } else { @@ -452,7 +451,7 @@ class KissFFTKokkos { p[i] * m[i] = m[i-1] m0 = n */ - static int kf_factor(int n, HAT::t_int_64 h_facbuf) + static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf) { int p=4, nf=0; double floor_sqrt; @@ -496,12 +495,12 @@ class KissFFTKokkos { st.nfft = nfft; st.inverse = inverse_fft; - typename AT::tdual_int_64 k_factors = typename AT::tdual_int_64(); - typename AT::tdual_FFT_DATA_1d k_twiddles = typename AT::tdual_FFT_DATA_1d(); + typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64(); + typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d(); if (nfft > 0) { - k_factors = typename AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); - k_twiddles = typename AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); + k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2); + k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft); for (i=0;i(); @@ -524,13 +523,13 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename AT::t_FFT_DATA_1d_um &d_fin, typename AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) + static void kiss_fft_stride(const kiss_fft_state_kokkos &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset) { //if (d_fin.data() == d_fout.data()) { // // NOTE: this is not really an in-place FFT algorithm. // // It just performs an out-of-place FFT into a temp buffer - // typename AT::t_FFT_DATA_1d_um d_tmpbuf = typename AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); - // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset,0); + // typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1)); + // kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re; // Kokkos::deep_copy(d_fout,d_tmpbuf); //} else { kf_work(d_fout,d_fin,1,in_stride,st.d_factors,st,offset,offset,0); @@ -538,7 +537,7 @@ class KissFFTKokkos { } KOKKOS_INLINE_FUNCTION - static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename AT::t_FFT_DATA_1d_um d_fin, typename AT::t_FFT_DATA_1d_um d_fout, int offset) + static void kiss_fft_kokkos(const kiss_fft_state_kokkos &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset) { kiss_fft_stride(cfg,d_fin,d_fout,1,offset); } diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index b9b634d495..1e5cb70e7f 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -23,10 +23,10 @@ class KokkosBaseFFT { KokkosBaseFFT() {} //Kspace - virtual void pack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void pack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; - virtual void unpack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void pack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; + virtual void unpack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {}; }; } diff --git a/src/KOKKOS/pack_kokkos.h b/src/KOKKOS/pack_kokkos.h index a8fc4024fa..62e7960999 100644 --- a/src/KOKKOS/pack_kokkos.h +++ b/src/KOKKOS/pack_kokkos.h @@ -37,13 +37,13 @@ template class PackKokkos { public: typedef DeviceType device_type; - typedef FFTArrayTypes AT; + typedef FFTArrayTypes FFT_AT; struct pack_3d_functor { public: typedef DeviceType device_type; - typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -51,7 +51,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - pack_3d_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -79,7 +79,7 @@ public: } }; -static void pack_3d(typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) +static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -97,7 +97,7 @@ struct unpack_3d_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -105,7 +105,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -133,7 +133,7 @@ public: } }; -static void unpack_3d(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -152,7 +152,7 @@ struct unpack_3d_permute1_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -160,7 +160,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_1_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -188,7 +188,7 @@ public: } }; -static void unpack_3d_permute1_1(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -205,7 +205,7 @@ struct unpack_3d_permute1_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -213,7 +213,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute1_2_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -242,7 +242,7 @@ public: } }; -static void unpack_3d_permute1_2(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -260,7 +260,7 @@ struct unpack_3d_permute1_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -269,7 +269,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute1_n_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -298,7 +298,7 @@ public: } }; -static void unpack_3d_permute1_n(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -316,7 +316,7 @@ struct unpack_3d_permute2_1_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -324,7 +324,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_1_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -351,7 +351,7 @@ public: } }; -static void unpack_3d_permute2_1(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -369,7 +369,7 @@ struct unpack_3d_permute2_2_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -377,7 +377,7 @@ public: int nstride_line; // stride between successive mid indices int nstride_plane; // stride between successive slow indices - unpack_3d_permute2_2_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -405,7 +405,7 @@ public: } }; -static void unpack_3d_permute2_2(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; @@ -422,7 +422,7 @@ struct unpack_3d_permute2_n_functor { public: typedef DeviceType device_type; typedef ArrayTypes AT; - typename AT::t_FFT_SCALAR_1d_um d_buf,d_data; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data; int buf_offset,data_offset; int nfast; // # of elements in fast index int nmid; // # of elements in mid index @@ -431,7 +431,7 @@ public: int nstride_plane; // stride between successive slow indices int nqty; // # of values/element - unpack_3d_permute2_n_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): + unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan): d_buf(d_buf_), d_data(d_data_) { @@ -459,7 +459,7 @@ public: } }; -static void unpack_3d_permute2_n(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) +static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan) { const int nslow = plan->nslow; const int nmid = plan->nmid; diff --git a/src/KOKKOS/pppm_kokkos.cpp b/src/KOKKOS/pppm_kokkos.cpp index 36e48febe8..7085f06101 100644 --- a/src/KOKKOS/pppm_kokkos.cpp +++ b/src/KOKKOS/pppm_kokkos.cpp @@ -669,7 +669,7 @@ void PPPMKokkos::compute(int eflag, int vflag) nmax = atomKK->nmax; //memory->create(part2grid,nmax,3,"pppm:part2grid"); d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax); - d_rho1d = typename AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); } // find grid points for all my particles @@ -799,7 +799,7 @@ void PPPMKokkos::operator()(TagPPPM_self2, const int &i) const template void PPPMKokkos::allocate() { - d_density_brick = typename AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft"); d_density_fft = k_density_fft.view(); @@ -821,17 +821,17 @@ void PPPMKokkos::allocate() d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both); } - d_vdx_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdy_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_vdz_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // summation coeffs order_allocated = order; k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order); d_gf_b = k_gf_b.view(); - d_rho1d = typename AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); - k_rho_coeff = DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); + d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1); + k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1); d_rho_coeff = k_rho_coeff.view(); h_rho_coeff = k_rho_coeff.h_view; @@ -902,14 +902,14 @@ void PPPMKokkos::allocate_peratom() { peratom_allocate_flag = 1; - d_u_brick = typename AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v0_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v1_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v2_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v3_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v4_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); - d_v5_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); + d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1); // create ghost grid object for rho and electric field communication diff --git a/src/KOKKOS/pppm_kokkos.h b/src/KOKKOS/pppm_kokkos.h index ca13859e54..a07ed7d7c4 100644 --- a/src/KOKKOS/pppm_kokkos.h +++ b/src/KOKKOS/pppm_kokkos.h @@ -26,6 +26,7 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos) #include "remap_kokkos.h" #include "fft3d_kokkos.h" #include "kokkos_base_fft.h" +#include "fftdata_kokkos.h" #include "kokkos_type.h" // fix up FFT defines for KOKKOS with CUDA @@ -111,6 +112,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { public: typedef DeviceType device_type; typedef ArrayTypes AT; + typedef FFTArrayTypes FFT_AT; PPPMKokkos(class LAMMPS *); virtual ~PPPMKokkos(); @@ -308,7 +310,7 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int nx,ny,nz; typename AT::t_int_1d_um d_list_index; - typename AT::t_FFT_SCALAR_1d_um d_buf; + typename FFT_AT::t_FFT_SCALAR_1d_um d_buf; DAT::tdual_int_scalar k_flag; @@ -323,31 +325,31 @@ class PPPMKokkos : public PPPM, public KokkosBaseFFT { int factors[3]; - typename AT::t_FFT_SCALAR_3d d_density_brick; - typename AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; - typename AT::t_FFT_SCALAR_3d d_u_brick; - typename AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; - typename AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_density_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_u_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick; + typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick; typename AT::t_float_1d d_greensfn; typename AT::t_virial_array d_vg; typename AT::t_float_1d d_fkx; typename AT::t_float_1d d_fky; typename AT::t_float_1d d_fkz; - DAT::tdual_FFT_SCALAR_1d k_density_fft; - DAT::tdual_FFT_SCALAR_1d k_work1; - DAT::tdual_FFT_SCALAR_1d k_work2; - typename AT::t_FFT_SCALAR_1d d_density_fft; - typename AT::t_FFT_SCALAR_1d d_work1; - typename AT::t_FFT_SCALAR_1d d_work2; + FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft; + FFT_DAT::tdual_FFT_SCALAR_1d k_work1; + FFT_DAT::tdual_FFT_SCALAR_1d k_work2; + typename FFT_AT::t_FFT_SCALAR_1d d_density_fft; + typename FFT_AT::t_FFT_SCALAR_1d d_work1; + typename FFT_AT::t_FFT_SCALAR_1d d_work2; DAT::tdual_float_1d k_gf_b; typename AT::t_float_1d d_gf_b; //FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff; - typename AT::t_FFT_SCALAR_2d_3 d_rho1d; - DAT::tdual_FFT_SCALAR_2d k_rho_coeff; - typename AT::t_FFT_SCALAR_2d d_rho_coeff; - HAT::t_FFT_SCALAR_2d h_rho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d; + FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff; + typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff; + FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff; //double **acons; typename Kokkos::DualView::t_host acons; diff --git a/src/KOKKOS/remap_kokkos.cpp b/src/KOKKOS/remap_kokkos.cpp index 4d4300daf0..c809aa034c 100644 --- a/src/KOKKOS/remap_kokkos.cpp +++ b/src/KOKKOS/remap_kokkos.cpp @@ -58,7 +58,7 @@ RemapKokkos::~RemapKokkos() /* ---------------------------------------------------------------------- */ template -void RemapKokkos::perform(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, typename AT::t_FFT_SCALAR_1d d_buf) +void RemapKokkos::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf) { remap_3d_kokkos(d_in,d_out,d_buf,plan); } @@ -102,7 +102,7 @@ void RemapKokkos::perform(typename AT::t_FFT_SCALAR_1d d_in, typenam ------------------------------------------------------------------------- */ template -void RemapKokkos::remap_3d_kokkos(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, typename AT::t_FFT_SCALAR_1d d_buf, +void RemapKokkos::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf, struct remap_plan_3d_kokkos *plan) { // collective flag not yet supported @@ -110,7 +110,7 @@ void RemapKokkos::remap_3d_kokkos(typename AT::t_FFT_SCALAR_1d d_in, // use point-to-point communication int i,isend,irecv; - typename AT::t_FFT_SCALAR_1d d_scratch; + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; if (plan->memory == 0) d_scratch = d_buf; @@ -442,7 +442,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat size = MAX(size,plan->send_size[nsend]); if (size) { - plan->d_sendbuf = typename AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); + plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size); if (!plan->d_sendbuf.data()) return NULL; } @@ -452,7 +452,7 @@ struct remap_plan_3d_kokkos* RemapKokkos::remap_3d_creat if (memory == 1) { if (nrecv > 0) { plan->d_scratch = - typename AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); + typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize); if (!plan->d_scratch.data()) return NULL; } } diff --git a/src/KOKKOS/remap_kokkos.h b/src/KOKKOS/remap_kokkos.h index 711fef04da..9f1334be26 100644 --- a/src/KOKKOS/remap_kokkos.h +++ b/src/KOKKOS/remap_kokkos.h @@ -26,12 +26,12 @@ namespace LAMMPS_NS { template struct remap_plan_3d_kokkos { typedef DeviceType device_type; - typedef FFTArrayTypes AT; - typename AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends - typename AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs - void (*pack)(typename AT::t_FFT_SCALAR_1d_um, int, typename AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + typedef FFTArrayTypes FFT_AT; + typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends + typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs + void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which pack function to use - void (*unpack)(typename AT::t_FFT_SCALAR_1d_um, int, typename AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); + void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *); // which unpack function to use int *send_offset; // extraction loc for each send int *send_size; // size of each send message @@ -57,16 +57,16 @@ template class RemapKokkos : protected Pointers { public: typedef DeviceType device_type; - typedef FFTArrayTypes AT; + typedef FFTArrayTypes FFT_AT; RemapKokkos(class LAMMPS *); RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int, int,int,int,int,int,int,int,int,int,int,int); ~RemapKokkos(); - void perform(typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d); + void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d); struct remap_plan_3d_kokkos *plan; - void remap_3d_kokkos(typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); + void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos *); struct remap_plan_3d_kokkos *remap_3d_create_plan_kokkos(MPI_Comm, int, int, int, int, int, int, int, int, int, int, int, int, From a50563d5821892a74141f1dec6129bba8a5694ae Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 30 Jan 2020 13:03:15 -0700 Subject: [PATCH 08/10] Fix issue with Kokkos FFTW3 --- src/KOKKOS/fft3d_kokkos.cpp | 42 ++++++++++++++++++------------------- src/KOKKOS/fftdata_kokkos.h | 10 ++++++++- 2 files changed, 30 insertions(+), 22 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index 81066b961d..a2112d911b 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -87,8 +87,8 @@ FFT3dKokkos::~FFT3dKokkos() template void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); - typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_DATA*)d_out.data(),d_out.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_DATA_POINTER)d_out.data(),d_out.size()/2); fft_3d_kokkos(d_in_data,d_out_data,flag,plan); } @@ -98,7 +98,7 @@ void FFT3dKokkos::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typ template void FFT3dKokkos::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag) { - typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA*)d_in.data(),d_in.size()/2); + typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2); fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan); } @@ -223,11 +223,11 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_fast,d_data.data()); #elif defined(FFT_FFTW3) if (flag == -1) - FFTW_API(execute_dft)(plan->plan_fast_forward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_fast_backward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_fast,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); @@ -269,11 +269,11 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_mid,d_data.data()); #elif defined(FFT_FFTW3) if (flag == -1) - FFTW_API(execute_dft)(plan->plan_mid_forward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_mid_backward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_mid,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); #else if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); @@ -311,11 +311,11 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, DftiComputeBackward(plan->handle_slow,d_data.data()); #elif defined(FFT_FFTW3) if (flag == -1) - FFTW_API(execute_dft)(plan->plan_slow_forward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); else - FFTW_API(execute_dft)(plan->plan_slow_backward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_slow,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); #else if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); @@ -850,18 +850,18 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ } #elif defined(FFT_FFTW3) if (flag == -1) { - FFTW_API(execute_dft)(plan->plan_fast_forward,d_data.data(),d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_forward,d_data.data(),d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_forward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); } else { - FFTW_API(execute_dft)(plan->plan_fast_backward,d_data.data(),d_data.data()); - FFTW_API(execute_dft)(plan->plan_mid_backward,d_data.data(),d_data.data()); - FFTW_API(execute_dft)(plan->plan_slow_backward,d_data.data(),d_data.data()); + FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); + FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); } #elif defined(FFT_CUFFT) - cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); - cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag); - cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_fast,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); + cufftExec(plan->plan_mid,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); + cufftExec(plan->plan_slow,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); #else kiss_fft_functor f; typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0()); diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 7ec11f4cd6..8ee5967f42 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -120,6 +120,14 @@ typedef double FFT_SCALAR; #endif #endif +// (double[2]*) is not a 1D pointer +#if defined(FFT_FFTW3) || defined(FFT_CUFFT) + typedef FFT_SCALAR* FFT_DATA_POINTER; +#else + typedef FFT_DATA* FFT_DATA_POINTER; +#endif + + #include "kokkos_type.h" template @@ -192,7 +200,7 @@ typedef struct FFTArrayTypes FFT_HAT; #if defined(FFT_KISSFFT) -#include "kissfft_kokkos.h" +#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last #endif From 9fade740fbd542cc4d8546413d2e371e05d7c2ca Mon Sep 17 00:00:00 2001 From: Stan Moore Date: Thu, 30 Jan 2020 13:27:36 -0700 Subject: [PATCH 09/10] Fix issue with Kokkos FFT_CUFFT --- src/KOKKOS/fft3d_kokkos.cpp | 12 ++++++------ src/KOKKOS/fftdata_kokkos.h | 7 ++++--- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/KOKKOS/fft3d_kokkos.cpp b/src/KOKKOS/fft3d_kokkos.cpp index a2112d911b..26e6e93ad9 100644 --- a/src/KOKKOS/fft3d_kokkos.cpp +++ b/src/KOKKOS/fft3d_kokkos.cpp @@ -227,7 +227,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, else FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_fast,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); + cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); #else typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0()); @@ -273,7 +273,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, else FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_mid,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); + cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag); #else if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_mid_forward,length); @@ -315,7 +315,7 @@ void FFT3dKokkos::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, else FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); #elif defined(FFT_CUFFT) - cufftExec(plan->plan_slow,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); + cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else if (flag == -1) f = kiss_fft_functor(d_data,d_tmp,plan->cfg_slow_forward,length); @@ -859,9 +859,9 @@ void FFT3dKokkos::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_ FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data()); } #elif defined(FFT_CUFFT) - cufftExec(plan->plan_fast,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); - cufftExec(plan->plan_mid,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); - cufftExec(plan->plan_slow,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag); + cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag); + cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag); #else kiss_fft_functor f; typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0()); diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 8ee5967f42..6ed3be8da3 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -11,6 +11,9 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ +#include "kokkos_type.h" + + #define MAX(A,B) ((A) > (B) ? (A) : (B)) // data types for 2d/3d FFTs @@ -121,15 +124,13 @@ typedef double FFT_SCALAR; #endif // (double[2]*) is not a 1D pointer -#if defined(FFT_FFTW3) || defined(FFT_CUFFT) +#if defined(FFT_FFTW3) typedef FFT_SCALAR* FFT_DATA_POINTER; #else typedef FFT_DATA* FFT_DATA_POINTER; #endif -#include "kokkos_type.h" - template struct FFTArrayTypes; From 6369ab230e928229ea174439729fcc13576e19c3 Mon Sep 17 00:00:00 2001 From: Axel Kohlmeyer Date: Thu, 30 Jan 2020 16:56:02 -0500 Subject: [PATCH 10/10] make inclusion guard defines consistent with LAMMPS and avoid redefinition of MAX() --- src/KOKKOS/fftdata_kokkos.h | 7 ++++--- src/KOKKOS/kokkos_base_fft.h | 4 ++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/KOKKOS/fftdata_kokkos.h b/src/KOKKOS/fftdata_kokkos.h index 6ed3be8da3..cd12753170 100644 --- a/src/KOKKOS/fftdata_kokkos.h +++ b/src/KOKKOS/fftdata_kokkos.h @@ -13,13 +13,14 @@ #include "kokkos_type.h" - +#ifndef MAX #define MAX(A,B) ((A) > (B) ? (A) : (B)) +#endif // data types for 2d/3d FFTs -#ifndef FFT_DATA_KOKKOS_H -#define FFT_DATA_KOKKOS_H +#ifndef LMP_FFT_DATA_KOKKOS_H +#define LMP_FFT_DATA_KOKKOS_H // User-settable FFT precision diff --git a/src/KOKKOS/kokkos_base_fft.h b/src/KOKKOS/kokkos_base_fft.h index 1e5cb70e7f..7d0829e4d6 100644 --- a/src/KOKKOS/kokkos_base_fft.h +++ b/src/KOKKOS/kokkos_base_fft.h @@ -11,8 +11,8 @@ See the README file in the top-level LAMMPS directory. ------------------------------------------------------------------------- */ -#ifndef KOKKOS_BASE_FFT_H -#define KOKKOS_BASE_FFT_H +#ifndef LMP_KOKKOS_BASE_FFT_H +#define LMP_KOKKOS_BASE_FFT_H #include "fftdata_kokkos.h"