Merge pull request #1858 from akohlmey/fft-consistency
Improved consistency and thread support in FFTs
This commit is contained in:
@ -739,7 +739,7 @@ if(PKG_KSPACE)
|
|||||||
else()
|
else()
|
||||||
message(STATUS "Using double precision FFTs")
|
message(STATUS "Using double precision FFTs")
|
||||||
endif()
|
endif()
|
||||||
if(FFT_FFTW_THREADS)
|
if(FFT_FFTW_THREADS OR FFT_MKL_THREADS)
|
||||||
message(STATUS "Using threaded FFTs")
|
message(STATUS "Using threaded FFTs")
|
||||||
else()
|
else()
|
||||||
message(STATUS "Using non-threaded FFTs")
|
message(STATUS "Using non-threaded FFTs")
|
||||||
|
|||||||
@ -22,7 +22,7 @@ if(PKG_KSPACE)
|
|||||||
include_directories(${${FFTW}_INCLUDE_DIRS})
|
include_directories(${${FFTW}_INCLUDE_DIRS})
|
||||||
list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES})
|
list(APPEND LAMMPS_LINK_LIBS ${${FFTW}_LIBRARIES})
|
||||||
if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
|
if(FFTW3_OMP_LIBRARY OR FFTW3F_OMP_LIBRARY)
|
||||||
option(FFT_FFTW_THREADS "Use threaded FFT library" ON)
|
option(FFT_FFTW_THREADS "Use threaded FFTW library" ON)
|
||||||
else()
|
else()
|
||||||
option(FFT_FFTW_THREADS "Use threaded FFT library" OFF)
|
option(FFT_FFTW_THREADS "Use threaded FFT library" OFF)
|
||||||
endif()
|
endif()
|
||||||
@ -38,6 +38,10 @@ if(PKG_KSPACE)
|
|||||||
elseif(FFT STREQUAL "MKL")
|
elseif(FFT STREQUAL "MKL")
|
||||||
find_package(MKL REQUIRED)
|
find_package(MKL REQUIRED)
|
||||||
add_definitions(-DFFT_MKL)
|
add_definitions(-DFFT_MKL)
|
||||||
|
option(FFT_MKL_THREADS "Use threaded MKL FFT" ON)
|
||||||
|
if(FFT_MKL_THREADS)
|
||||||
|
add_definitions(-DFFT_MKL_THREADS)
|
||||||
|
endif()
|
||||||
include_directories(${MKL_INCLUDE_DIRS})
|
include_directories(${MKL_INCLUDE_DIRS})
|
||||||
list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
|
list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
|
||||||
else()
|
else()
|
||||||
|
|||||||
@ -106,6 +106,7 @@ to assist:
|
|||||||
-D FFTW3_LIBRARIES=path # path to FFTW3 libraries
|
-D FFTW3_LIBRARIES=path # path to FFTW3 libraries
|
||||||
-D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries
|
-D FFT_FFTW_THREADS=on # enable using threaded FFTW3 libraries
|
||||||
-D MKL_INCLUDE_DIRS=path # ditto for Intel MKL library
|
-D MKL_INCLUDE_DIRS=path # ditto for Intel MKL library
|
||||||
|
-D FFT_MKL_THREADS=on # enable using threaded FFTs with MKL libraries
|
||||||
-D MKL_LIBRARIES=path
|
-D MKL_LIBRARIES=path
|
||||||
|
|
||||||
**Makefile.machine settings**\ :
|
**Makefile.machine settings**\ :
|
||||||
@ -117,6 +118,7 @@ to assist:
|
|||||||
# default is KISS if not specified
|
# default is KISS if not specified
|
||||||
FFT_INC = -DFFT_SINGLE # do not specify for double precision
|
FFT_INC = -DFFT_SINGLE # do not specify for double precision
|
||||||
FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries
|
FFT_INC = -DFFT_FFTW_THREADS # enable using threaded FFTW3 libraries
|
||||||
|
FFT_INC = -DFFT_MKL_THREADS # enable using threaded FFTs with MKL libraries
|
||||||
FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY
|
FFT_INC = -DFFT_PACK_ARRAY # or -DFFT_PACK_POINTER or -DFFT_PACK_MEMCPY
|
||||||
|
|
||||||
# default is FFT\_PACK\_ARRAY if not specified
|
# default is FFT\_PACK\_ARRAY if not specified
|
||||||
@ -129,12 +131,15 @@ to assist:
|
|||||||
FFT_LIB = -lfftw3 # FFTW3 double precision
|
FFT_LIB = -lfftw3 # FFTW3 double precision
|
||||||
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
|
FFT_LIB = -lfftw3 -lfftw3_omp # FFTW3 double precision with threads (needs -DFFT_FFTW_THREADS)
|
||||||
FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision
|
FFT_LIB = -lfftw3 -lfftw3f # FFTW3 single precision
|
||||||
FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler
|
FFT_LIB = -lmkl_intel_lp64 -lmkl_sequential -lmkl_core # MKL with Intel compiler, serial interface
|
||||||
FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier
|
FFT_LIB = -lmkl_gf_lp64 -lmkl_sequential -lmkl_core # MKL with GNU compier, serial interface
|
||||||
|
FFT_LIB = -lmkl_intel_lp64 -lmkl_intel_thread -lmkl_core # MKL with Intel compiler, threaded interface
|
||||||
|
FFT_LIB = -lmkl_gf_lp64 -lmkl_gnu_thread -lmkl_core # MKL with GNU compiler, threaded interface
|
||||||
|
FFT_LIB = -lmkl_rt # MKL with automatic runtime selection of interface libs
|
||||||
|
|
||||||
As with CMake, you do not need to set paths in FFT\_INC or FFT\_PATH, if
|
As with CMake, you do not need to set paths in FFT\_INC or FFT\_PATH, if
|
||||||
make can find the FFT header and library files. You must specify
|
the compiler can find the FFT header and library files in its default search path.
|
||||||
FFT\_LIB with the appropriate FFT libraries to include in the link.
|
You must specify FFT\_LIB with the appropriate FFT libraries to include in the link.
|
||||||
|
|
||||||
**CMake and make info**\ :
|
**CMake and make info**\ :
|
||||||
|
|
||||||
|
|||||||
@ -160,6 +160,7 @@ action kissfft_kokkos.h kissfft.h
|
|||||||
action kokkos.cpp
|
action kokkos.cpp
|
||||||
action kokkos.h
|
action kokkos.h
|
||||||
action kokkos_base.h
|
action kokkos_base.h
|
||||||
|
action kokkos_base_fft.h fft3d.h
|
||||||
action kokkos_few.h
|
action kokkos_few.h
|
||||||
action kokkos_type.h
|
action kokkos_type.h
|
||||||
action memory_kokkos.h
|
action memory_kokkos.h
|
||||||
|
|||||||
@ -21,7 +21,6 @@
|
|||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "fft3d_kokkos.h"
|
#include "fft3d_kokkos.h"
|
||||||
#include "remap_kokkos.h"
|
#include "remap_kokkos.h"
|
||||||
#include "kokkos_type.h"
|
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "kokkos.h"
|
#include "kokkos.h"
|
||||||
|
|
||||||
@ -86,10 +85,10 @@ FFT3dKokkos<DeviceType>::~FFT3dKokkos()
|
|||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void FFT3dKokkos<DeviceType>::compute(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, int flag)
|
void FFT3dKokkos<DeviceType>::compute(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, int flag)
|
||||||
{
|
{
|
||||||
typename AT::t_FFT_DATA_1d d_in_data(d_in.data(),d_in.size()/2);
|
typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2);
|
||||||
typename AT::t_FFT_DATA_1d d_out_data(d_out.data(),d_out.size()/2);
|
typename FFT_AT::t_FFT_DATA_1d d_out_data((FFT_DATA_POINTER)d_out.data(),d_out.size()/2);
|
||||||
|
|
||||||
fft_3d_kokkos(d_in_data,d_out_data,flag,plan);
|
fft_3d_kokkos(d_in_data,d_out_data,flag,plan);
|
||||||
}
|
}
|
||||||
@ -97,9 +96,9 @@ void FFT3dKokkos<DeviceType>::compute(typename AT::t_FFT_SCALAR_1d d_in, typenam
|
|||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void FFT3dKokkos<DeviceType>::timing1d(typename AT::t_FFT_SCALAR_1d d_in, int nsize, int flag)
|
void FFT3dKokkos<DeviceType>::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, int nsize, int flag)
|
||||||
{
|
{
|
||||||
typename AT::t_FFT_DATA_1d d_in_data(d_in.data(),d_in.size());
|
typename FFT_AT::t_FFT_DATA_1d d_in_data((FFT_DATA_POINTER)d_in.data(),d_in.size()/2);
|
||||||
|
|
||||||
fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan);
|
fft_3d_1d_only_kokkos(d_in_data,nsize,flag,plan);
|
||||||
}
|
}
|
||||||
@ -138,11 +137,11 @@ template<class DeviceType>
|
|||||||
struct norm_functor {
|
struct norm_functor {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
typename AT::t_FFT_DATA_1d_um d_out;
|
typename FFT_AT::t_FFT_DATA_1d_um d_out;
|
||||||
int norm;
|
int norm;
|
||||||
|
|
||||||
norm_functor(typename AT::t_FFT_DATA_1d &d_out_, int norm_):
|
norm_functor(typename FFT_AT::t_FFT_DATA_1d &d_out_, int norm_):
|
||||||
d_out(d_out_),norm(norm_) {}
|
d_out(d_out_),norm(norm_) {}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
@ -153,9 +152,9 @@ public:
|
|||||||
*(out_ptr++) *= norm;
|
*(out_ptr++) *= norm;
|
||||||
#elif defined(FFT_MKL)
|
#elif defined(FFT_MKL)
|
||||||
d_out(i) *= norm;
|
d_out(i) *= norm;
|
||||||
#else
|
#else // FFT_KISS
|
||||||
d_out(i,0) *= norm;
|
d_out(i).re *= norm;
|
||||||
d_out(i,1) *= norm;
|
d_out(i).im *= norm;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@ -165,14 +164,14 @@ template<class DeviceType>
|
|||||||
struct kiss_fft_functor {
|
struct kiss_fft_functor {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
typename AT::t_FFT_DATA_1d_um d_data,d_tmp;
|
typename FFT_AT::t_FFT_DATA_1d_um d_data,d_tmp;
|
||||||
kiss_fft_state_kokkos<DeviceType> st;
|
kiss_fft_state_kokkos<DeviceType> st;
|
||||||
int length;
|
int length;
|
||||||
|
|
||||||
kiss_fft_functor() {}
|
kiss_fft_functor() {}
|
||||||
|
|
||||||
kiss_fft_functor(typename AT::t_FFT_DATA_1d &d_data_,typename AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos<DeviceType> &st_, int length_):
|
kiss_fft_functor(typename FFT_AT::t_FFT_DATA_1d &d_data_,typename FFT_AT::t_FFT_DATA_1d &d_tmp_, kiss_fft_state_kokkos<DeviceType> &st_, int length_):
|
||||||
d_data(d_data_),
|
d_data(d_data_),
|
||||||
d_tmp(d_tmp_),
|
d_tmp(d_tmp_),
|
||||||
st(st_)
|
st(st_)
|
||||||
@ -189,11 +188,11 @@ public:
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typename AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos<DeviceType> *plan)
|
void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in, typename FFT_AT::t_FFT_DATA_1d d_out, int flag, struct fft_plan_3d_kokkos<DeviceType> *plan)
|
||||||
{
|
{
|
||||||
int total,length;
|
int total,length;
|
||||||
typename AT::t_FFT_DATA_1d d_data,d_copy;
|
typename FFT_AT::t_FFT_DATA_1d d_data,d_copy;
|
||||||
typename AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar;
|
typename FFT_AT::t_FFT_SCALAR_1d d_in_scalar,d_data_scalar,d_out_scalar,d_copy_scalar,d_scratch_scalar;
|
||||||
|
|
||||||
// pre-remap to prepare for 1st FFTs if needed
|
// pre-remap to prepare for 1st FFTs if needed
|
||||||
// copy = loc for remap result
|
// copy = loc for remap result
|
||||||
@ -202,9 +201,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
if (plan->pre_target == 0) d_copy = d_out;
|
if (plan->pre_target == 0) d_copy = d_out;
|
||||||
else d_copy = plan->d_copy;
|
else d_copy = plan->d_copy;
|
||||||
|
|
||||||
d_in_scalar = typename AT::t_FFT_SCALAR_1d(d_in.data(),d_in.size());
|
d_in_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_in.data(),d_in.size()*2);
|
||||||
d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size());
|
d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2);
|
||||||
d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size());
|
d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);
|
||||||
|
|
||||||
remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar,
|
remapKK->remap_3d_kokkos(d_in_scalar, d_copy_scalar,
|
||||||
d_scratch_scalar, plan->pre_plan);
|
d_scratch_scalar, plan->pre_plan);
|
||||||
@ -219,19 +218,19 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
|
|
||||||
#if defined(FFT_MKL)
|
#if defined(FFT_MKL)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
DftiComputeForward(plan->handle_fast,(FFT_DATA *)d_data.data());
|
DftiComputeForward(plan->handle_fast,d_data.data());
|
||||||
else
|
else
|
||||||
DftiComputeBackward(plan->handle_fast,(FFT_DATA *)d_data.data());
|
DftiComputeBackward(plan->handle_fast,d_data.data());
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||||
else
|
else
|
||||||
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||||
#elif defined(FFT_CUFFT)
|
#elif defined(FFT_CUFFT)
|
||||||
cufftExec(plan->plan_fast,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag);
|
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
|
||||||
#else
|
#else
|
||||||
typename AT::t_FFT_DATA_1d d_tmp =
|
typename FFT_AT::t_FFT_DATA_1d d_tmp =
|
||||||
typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0());
|
typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0());
|
||||||
kiss_fft_functor<DeviceType> f;
|
kiss_fft_functor<DeviceType> f;
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length);
|
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length);
|
||||||
@ -239,7 +238,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_backward,length);
|
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_backward,length);
|
||||||
Kokkos::parallel_for(total/length,f);
|
Kokkos::parallel_for(total/length,f);
|
||||||
d_data = d_tmp;
|
d_data = d_tmp;
|
||||||
d_tmp = typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0());
|
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
@ -249,9 +248,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
if (plan->mid1_target == 0) d_copy = d_out;
|
if (plan->mid1_target == 0) d_copy = d_out;
|
||||||
else d_copy = plan->d_copy;
|
else d_copy = plan->d_copy;
|
||||||
|
|
||||||
d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size()*2);
|
d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2);
|
||||||
d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size()*2);
|
d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2);
|
||||||
d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size()*2);
|
d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);
|
||||||
|
|
||||||
remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar,
|
remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar,
|
||||||
d_scratch_scalar, plan->mid1_plan);
|
d_scratch_scalar, plan->mid1_plan);
|
||||||
@ -265,16 +264,16 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
|
|
||||||
#if defined(FFT_MKL)
|
#if defined(FFT_MKL)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
DftiComputeForward(plan->handle_mid,(FFT_DATA *)d_data.data());
|
DftiComputeForward(plan->handle_mid,d_data.data());
|
||||||
else
|
else
|
||||||
DftiComputeBackward(plan->handle_mid,(FFT_DATA *)d_data.data());
|
DftiComputeBackward(plan->handle_mid,d_data.data());
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||||
else
|
else
|
||||||
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||||
#elif defined(FFT_CUFFT)
|
#elif defined(FFT_CUFFT)
|
||||||
cufftExec(plan->plan_mid,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag);
|
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
|
||||||
#else
|
#else
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_forward,length);
|
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_forward,length);
|
||||||
@ -282,7 +281,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_backward,length);
|
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_backward,length);
|
||||||
Kokkos::parallel_for(total/length,f);
|
Kokkos::parallel_for(total/length,f);
|
||||||
d_data = d_tmp;
|
d_data = d_tmp;
|
||||||
d_tmp = typename AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0());
|
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_in.dimension_0());
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// 2nd mid-remap to prepare for 3rd FFTs
|
// 2nd mid-remap to prepare for 3rd FFTs
|
||||||
@ -291,9 +290,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
if (plan->mid2_target == 0) d_copy = d_out;
|
if (plan->mid2_target == 0) d_copy = d_out;
|
||||||
else d_copy = plan->d_copy;
|
else d_copy = plan->d_copy;
|
||||||
|
|
||||||
d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size());
|
d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2);
|
||||||
d_copy_scalar = typename AT::t_FFT_SCALAR_1d(d_copy.data(),d_copy.size());
|
d_copy_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_copy.data(),d_copy.size()*2);
|
||||||
d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size());
|
d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);
|
||||||
|
|
||||||
remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar,
|
remapKK->remap_3d_kokkos(d_data_scalar, d_copy_scalar,
|
||||||
d_scratch_scalar, plan->mid2_plan);
|
d_scratch_scalar, plan->mid2_plan);
|
||||||
@ -307,16 +306,16 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
|
|
||||||
#if defined(FFT_MKL)
|
#if defined(FFT_MKL)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
DftiComputeForward(plan->handle_slow,(FFT_DATA *)d_data.data());
|
DftiComputeForward(plan->handle_slow,d_data.data());
|
||||||
else
|
else
|
||||||
DftiComputeBackward(plan->handle_slow,(FFT_DATA *)d_data.data());
|
DftiComputeBackward(plan->handle_slow,d_data.data());
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||||
else
|
else
|
||||||
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||||
#elif defined(FFT_CUFFT)
|
#elif defined(FFT_CUFFT)
|
||||||
cufftExec(plan->plan_slow,(FFT_DATA *)d_data.data(),(FFT_DATA *)d_data.data(),flag);
|
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
|
||||||
#else
|
#else
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_forward,length);
|
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_forward,length);
|
||||||
@ -331,9 +330,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename AT::t_FFT_DATA_1d d_in, typ
|
|||||||
// destination is always out
|
// destination is always out
|
||||||
|
|
||||||
if (plan->post_plan) {
|
if (plan->post_plan) {
|
||||||
d_data_scalar = typename AT::t_FFT_SCALAR_1d(d_data.data(),d_data.size());
|
d_data_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_data.data(),d_data.size()*2);
|
||||||
d_out_scalar = typename AT::t_FFT_SCALAR_1d(d_out.data(),d_out.size());
|
d_out_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)d_out.data(),d_out.size()*2);
|
||||||
d_scratch_scalar = typename AT::t_FFT_SCALAR_1d(plan->d_scratch.data(),plan->d_scratch.size());
|
d_scratch_scalar = typename FFT_AT::t_FFT_SCALAR_1d((FFT_SCALAR*)plan->d_scratch.data(),plan->d_scratch.size()*2);
|
||||||
|
|
||||||
remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar,
|
remapKK->remap_3d_kokkos(d_data_scalar, d_out_scalar,
|
||||||
d_scratch_scalar, plan->post_plan);
|
d_scratch_scalar, plan->post_plan);
|
||||||
@ -589,11 +588,11 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
|||||||
*nbuf = copy_size + scratch_size;
|
*nbuf = copy_size + scratch_size;
|
||||||
|
|
||||||
if (copy_size) {
|
if (copy_size) {
|
||||||
plan->d_copy = typename AT::t_FFT_DATA_1d("fft3d:copy",copy_size);
|
plan->d_copy = typename FFT_AT::t_FFT_DATA_1d("fft3d:copy",copy_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (scratch_size) {
|
if (scratch_size) {
|
||||||
plan->d_scratch = typename AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size);
|
plan->d_scratch = typename FFT_AT::t_FFT_DATA_1d("fft3d:scratch",scratch_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// system specific pre-computation of 1d FFT coeffs
|
// system specific pre-computation of 1d FFT coeffs
|
||||||
@ -607,7 +606,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
|||||||
DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
|
DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
|
||||||
DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
|
DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
|
||||||
DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
|
DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
|
||||||
//DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
#if defined(FFT_MKL_THREADS)
|
||||||
|
DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
||||||
|
#endif
|
||||||
DftiCommitDescriptor(plan->handle_fast);
|
DftiCommitDescriptor(plan->handle_fast);
|
||||||
|
|
||||||
DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
||||||
@ -617,7 +618,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
|||||||
DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
|
DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
|
||||||
DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
|
DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
|
||||||
DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
|
DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
|
||||||
//DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
#if defined(FFT_MKL_THREADS)
|
||||||
|
DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
||||||
|
#endif
|
||||||
DftiCommitDescriptor(plan->handle_mid);
|
DftiCommitDescriptor(plan->handle_mid);
|
||||||
|
|
||||||
DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
||||||
@ -627,18 +630,11 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
|||||||
DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
|
DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
|
||||||
DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
|
DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
|
||||||
DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
|
DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
|
||||||
//DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
#if defined(FFT_MKL_THREADS)
|
||||||
|
DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
||||||
|
#endif
|
||||||
DftiCommitDescriptor(plan->handle_slow);
|
DftiCommitDescriptor(plan->handle_slow);
|
||||||
|
|
||||||
if (scaled == 0)
|
|
||||||
plan->scaled = 0;
|
|
||||||
else {
|
|
||||||
plan->scaled = 1;
|
|
||||||
plan->norm = 1.0/(nfast*nmid*nslow);
|
|
||||||
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
|
|
||||||
(out_khi-out_klo+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
|
|
||||||
#if defined (FFT_FFTW_THREADS)
|
#if defined (FFT_FFTW_THREADS)
|
||||||
@ -684,7 +680,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
|||||||
NULL,&nslow,1,plan->length3,
|
NULL,&nslow,1,plan->length3,
|
||||||
NULL,&nslow,1,plan->length3,
|
NULL,&nslow,1,plan->length3,
|
||||||
FFTW_BACKWARD,FFTW_ESTIMATE);
|
FFTW_BACKWARD,FFTW_ESTIMATE);
|
||||||
|
|
||||||
#elif defined(FFT_CUFFT)
|
#elif defined(FFT_CUFFT)
|
||||||
|
|
||||||
cufftPlanMany(&(plan->plan_fast), 1, &nfast,
|
cufftPlanMany(&(plan->plan_fast), 1, &nfast,
|
||||||
&nfast,1,plan->length1,
|
&nfast,1,plan->length1,
|
||||||
&nfast,1,plan->length1,
|
&nfast,1,plan->length1,
|
||||||
@ -699,7 +697,9 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
|||||||
&nslow,1,plan->length3,
|
&nslow,1,plan->length3,
|
||||||
&nslow,1,plan->length3,
|
&nslow,1,plan->length3,
|
||||||
CUFFT_TYPE,plan->total3/plan->length3);
|
CUFFT_TYPE,plan->total3/plan->length3);
|
||||||
#else
|
|
||||||
|
#else /* FFT_KISS */
|
||||||
|
|
||||||
kissfftKK = new KissFFTKokkos<DeviceType>();
|
kissfftKK = new KissFFTKokkos<DeviceType>();
|
||||||
|
|
||||||
plan->cfg_fast_forward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nfast,0,NULL,NULL);
|
plan->cfg_fast_forward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nfast,0,NULL,NULL);
|
||||||
@ -726,6 +726,7 @@ struct fft_plan_3d_kokkos<DeviceType>* FFT3dKokkos<DeviceType>::fft_3d_create_pl
|
|||||||
plan->cfg_slow_forward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,0,NULL,NULL);
|
plan->cfg_slow_forward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,0,NULL,NULL);
|
||||||
plan->cfg_slow_backward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,1,NULL,NULL);
|
plan->cfg_slow_backward = KissFFTKokkos<DeviceType>::kiss_fft_alloc_kokkos(nslow,1,NULL,NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (scaled == 0)
|
if (scaled == 0)
|
||||||
@ -809,7 +810,7 @@ void FFT3dKokkos<DeviceType>::bifactor(int n, int *factor1, int *factor2)
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d_data, int nsize, int flag,
|
void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d d_data, int nsize, int flag,
|
||||||
struct fft_plan_3d_kokkos<DeviceType> *plan)
|
struct fft_plan_3d_kokkos<DeviceType> *plan)
|
||||||
{
|
{
|
||||||
// total = size of data needed in each dim
|
// total = size of data needed in each dim
|
||||||
@ -839,13 +840,13 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d
|
|||||||
|
|
||||||
#if defined(FFT_MKL)
|
#if defined(FFT_MKL)
|
||||||
if (flag == -1) {
|
if (flag == -1) {
|
||||||
DftiComputeForward(plan->handle_fast,data);
|
DftiComputeForward(plan->handle_fast,d_data.data());
|
||||||
DftiComputeForward(plan->handle_mid,data);
|
DftiComputeForward(plan->handle_mid,d_data.data());
|
||||||
DftiComputeForward(plan->handle_slow,data);
|
DftiComputeForward(plan->handle_slow,d_data.data());
|
||||||
} else {
|
} else {
|
||||||
DftiComputeBackward(plan->handle_fast,data);
|
DftiComputeBackward(plan->handle_fast,d_data.data());
|
||||||
DftiComputeBackward(plan->handle_mid,data);
|
DftiComputeBackward(plan->handle_mid,d_data.data());
|
||||||
DftiComputeBackward(plan->handle_slow,data);
|
DftiComputeBackward(plan->handle_slow,d_data.data());
|
||||||
}
|
}
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
if (flag == -1) {
|
if (flag == -1) {
|
||||||
@ -858,12 +859,12 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d d
|
|||||||
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||||
}
|
}
|
||||||
#elif defined(FFT_CUFFT)
|
#elif defined(FFT_CUFFT)
|
||||||
cufftExec(plan->plan_fast,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag);
|
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
|
||||||
cufftExec(plan->plan_mid,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag);
|
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
|
||||||
cufftExec(plan->plan_slow,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data(),flag);
|
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
|
||||||
#else
|
#else
|
||||||
kiss_fft_functor<DeviceType> f;
|
kiss_fft_functor<DeviceType> f;
|
||||||
typename AT::t_FFT_DATA_1d d_tmp = typename AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0());
|
typename FFT_AT::t_FFT_DATA_1d d_tmp = typename FFT_AT::t_FFT_DATA_1d("fft_3d:tmp",d_data.dimension_0());
|
||||||
if (flag == -1) {
|
if (flag == -1) {
|
||||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length1);
|
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length1);
|
||||||
Kokkos::parallel_for(total1/length1,f);
|
Kokkos::parallel_for(total1/length1,f);
|
||||||
|
|||||||
@ -15,78 +15,9 @@
|
|||||||
#define LMP_FFT3D_KOKKOS_H
|
#define LMP_FFT3D_KOKKOS_H
|
||||||
|
|
||||||
#include "pointers.h"
|
#include "pointers.h"
|
||||||
#include "kokkos_type.h"
|
|
||||||
#include "remap_kokkos.h"
|
#include "remap_kokkos.h"
|
||||||
#include "fftdata_kokkos.h"
|
#include "fftdata_kokkos.h"
|
||||||
|
|
||||||
// with KOKKOS in CUDA mode we can only have
|
|
||||||
// CUFFT or KISSFFT, thus undefine all other
|
|
||||||
// FFTs here, since they may be valid in fft3d.cpp
|
|
||||||
|
|
||||||
#if defined(KOKKOS_ENABLE_CUDA)
|
|
||||||
# if defined(FFT_FFTW)
|
|
||||||
# undef FFT_FFTW
|
|
||||||
# endif
|
|
||||||
# if defined(FFT_FFTW3)
|
|
||||||
# undef FFT_FFTW3
|
|
||||||
# endif
|
|
||||||
# if defined(FFT_MKL)
|
|
||||||
# undef FFT_MKL
|
|
||||||
# endif
|
|
||||||
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
|
|
||||||
# define FFT_KISSFFT
|
|
||||||
# endif
|
|
||||||
#else
|
|
||||||
# if defined(FFT_CUFFT)
|
|
||||||
# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
|
|
||||||
# endif
|
|
||||||
// if user set FFTW, it means FFTW3
|
|
||||||
# ifdef FFT_FFTW
|
|
||||||
# define FFT_FFTW3
|
|
||||||
# endif
|
|
||||||
# ifdef FFT_FFTW_THREADS
|
|
||||||
# if !defined(FFT_FFTW3)
|
|
||||||
# error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS"
|
|
||||||
# endif
|
|
||||||
# endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if defined(FFT_MKL)
|
|
||||||
#include "mkl_dfti.h"
|
|
||||||
#if defined(FFT_SINGLE)
|
|
||||||
typedef float _Complex FFT_DATA;
|
|
||||||
#define FFT_MKL_PREC DFTI_SINGLE
|
|
||||||
#else
|
|
||||||
typedef double _Complex FFT_DATA;
|
|
||||||
#define FFT_MKL_PREC DFTI_DOUBLE
|
|
||||||
#endif
|
|
||||||
#elif defined(FFT_FFTW3)
|
|
||||||
#include "fftw3.h"
|
|
||||||
#if defined(FFT_SINGLE)
|
|
||||||
typedef fftwf_complex FFT_DATA;
|
|
||||||
#define FFTW_API(function) fftwf_ ## function
|
|
||||||
#else
|
|
||||||
typedef fftw_complex FFT_DATA;
|
|
||||||
#define FFTW_API(function) fftw_ ## function
|
|
||||||
#endif
|
|
||||||
#elif defined(FFT_CUFFT)
|
|
||||||
#include "cufft.h"
|
|
||||||
#if defined(FFT_SINGLE)
|
|
||||||
#define cufftExec cufftExecC2C
|
|
||||||
#define CUFFT_TYPE CUFFT_C2C
|
|
||||||
typedef cufftComplex FFT_DATA;
|
|
||||||
#else
|
|
||||||
#define cufftExec cufftExecZ2Z
|
|
||||||
#define CUFFT_TYPE CUFFT_Z2Z
|
|
||||||
typedef cufftDoubleComplex FFT_DATA;
|
|
||||||
#endif
|
|
||||||
#else
|
|
||||||
#include "kissfft_kokkos.h"
|
|
||||||
#ifndef FFT_KISSFFT
|
|
||||||
#define FFT_KISSFFT
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
// -------------------------------------------------------------------------
|
// -------------------------------------------------------------------------
|
||||||
@ -96,14 +27,14 @@ namespace LAMMPS_NS {
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
struct fft_plan_3d_kokkos {
|
struct fft_plan_3d_kokkos {
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
|
|
||||||
struct remap_plan_3d_kokkos<DeviceType> *pre_plan; // remap from input -> 1st FFTs
|
struct remap_plan_3d_kokkos<DeviceType> *pre_plan; // remap from input -> 1st FFTs
|
||||||
struct remap_plan_3d_kokkos<DeviceType> *mid1_plan; // remap from 1st -> 2nd FFTs
|
struct remap_plan_3d_kokkos<DeviceType> *mid1_plan; // remap from 1st -> 2nd FFTs
|
||||||
struct remap_plan_3d_kokkos<DeviceType> *mid2_plan; // remap from 2nd -> 3rd FFTs
|
struct remap_plan_3d_kokkos<DeviceType> *mid2_plan; // remap from 2nd -> 3rd FFTs
|
||||||
struct remap_plan_3d_kokkos<DeviceType> *post_plan; // remap from 3rd FFTs -> output
|
struct remap_plan_3d_kokkos<DeviceType> *post_plan; // remap from 3rd FFTs -> output
|
||||||
typename AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed)
|
typename FFT_AT::t_FFT_DATA_1d d_copy; // memory for remap results (if needed)
|
||||||
typename AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps
|
typename FFT_AT::t_FFT_DATA_1d d_scratch; // scratch space for remaps
|
||||||
int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length)
|
int total1,total2,total3; // # of 1st,2nd,3rd FFTs (times length)
|
||||||
int length1,length2,length3; // length of 1st,2nd,3rd FFTs
|
int length1,length2,length3; // length of 1st,2nd,3rd FFTs
|
||||||
int pre_target; // where to put remap results
|
int pre_target; // where to put remap results
|
||||||
@ -142,14 +73,14 @@ template<class DeviceType>
|
|||||||
class FFT3dKokkos : protected Pointers {
|
class FFT3dKokkos : protected Pointers {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
|
|
||||||
FFT3dKokkos(class LAMMPS *, MPI_Comm,
|
FFT3dKokkos(class LAMMPS *, MPI_Comm,
|
||||||
int,int,int,int,int,int,int,int,int,int,int,int,int,int,int,
|
int,int,int,int,int,int,int,int,int,int,int,int,int,int,int,
|
||||||
int,int,int *,int);
|
int,int,int *,int);
|
||||||
~FFT3dKokkos();
|
~FFT3dKokkos();
|
||||||
void compute(typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, int);
|
void compute(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, int);
|
||||||
void timing1d(typename AT::t_FFT_SCALAR_1d, int, int);
|
void timing1d(typename FFT_AT::t_FFT_SCALAR_1d, int, int);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
struct fft_plan_3d_kokkos<DeviceType> *plan;
|
struct fft_plan_3d_kokkos<DeviceType> *plan;
|
||||||
@ -159,7 +90,7 @@ class FFT3dKokkos : protected Pointers {
|
|||||||
KissFFTKokkos<DeviceType> *kissfftKK;
|
KissFFTKokkos<DeviceType> *kissfftKK;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void fft_3d_kokkos(typename AT::t_FFT_DATA_1d, typename AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos<DeviceType> *);
|
void fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d, typename FFT_AT::t_FFT_DATA_1d, int, struct fft_plan_3d_kokkos<DeviceType> *);
|
||||||
|
|
||||||
struct fft_plan_3d_kokkos<DeviceType> *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int,
|
struct fft_plan_3d_kokkos<DeviceType> *fft_3d_create_plan_kokkos(MPI_Comm, int, int, int,
|
||||||
int, int, int, int, int,
|
int, int, int, int, int,
|
||||||
@ -168,7 +99,7 @@ class FFT3dKokkos : protected Pointers {
|
|||||||
|
|
||||||
void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos<DeviceType> *);
|
void fft_3d_destroy_plan_kokkos(struct fft_plan_3d_kokkos<DeviceType> *);
|
||||||
|
|
||||||
void fft_3d_1d_only_kokkos(typename AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos<DeviceType> *);
|
void fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_1d, int, int, struct fft_plan_3d_kokkos<DeviceType> *);
|
||||||
|
|
||||||
void bifactor(int, int *, int *);
|
void bifactor(int, int *, int *);
|
||||||
};
|
};
|
||||||
|
|||||||
@ -11,12 +11,16 @@
|
|||||||
See the README file in the top-level LAMMPS directory.
|
See the README file in the top-level LAMMPS directory.
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include "kokkos_type.h"
|
||||||
|
|
||||||
|
#ifndef MAX
|
||||||
#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
#define MAX(A,B) ((A) > (B) ? (A) : (B))
|
||||||
|
#endif
|
||||||
|
|
||||||
// data types for 2d/3d FFTs
|
// data types for 2d/3d FFTs
|
||||||
|
|
||||||
#ifndef FFT_DATA_KOKKOS_H
|
#ifndef LMP_FFT_DATA_KOKKOS_H
|
||||||
#define FFT_DATA_KOKKOS_H
|
#define LMP_FFT_DATA_KOKKOS_H
|
||||||
|
|
||||||
// User-settable FFT precision
|
// User-settable FFT precision
|
||||||
|
|
||||||
@ -43,4 +47,163 @@ typedef double FFT_SCALAR;
|
|||||||
#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)"
|
#error "FFT_PRECISION needs to be either 1 (=single) or 2 (=double)"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// with KOKKOS in CUDA mode we can only have
|
||||||
|
// CUFFT or KISSFFT, thus undefine all other
|
||||||
|
// FFTs here, since they may be valid in fft3d.cpp
|
||||||
|
|
||||||
|
#if defined(KOKKOS_ENABLE_CUDA)
|
||||||
|
# if defined(FFT_FFTW)
|
||||||
|
# undef FFT_FFTW
|
||||||
|
# endif
|
||||||
|
# if defined(FFT_FFTW3)
|
||||||
|
# undef FFT_FFTW3
|
||||||
|
# endif
|
||||||
|
# if defined(FFT_MKL)
|
||||||
|
# undef FFT_MKL
|
||||||
|
# endif
|
||||||
|
# if !defined(FFT_CUFFT) && !defined(FFT_KISSFFT)
|
||||||
|
# define FFT_KISSFFT
|
||||||
|
# endif
|
||||||
|
#else
|
||||||
|
# if defined(FFT_CUFFT)
|
||||||
|
# error "Must enable CUDA with KOKKOS to use -DFFT_CUFFT"
|
||||||
|
# endif
|
||||||
|
// if user set FFTW, it means FFTW3
|
||||||
|
# ifdef FFT_FFTW
|
||||||
|
# define FFT_FFTW3
|
||||||
|
# endif
|
||||||
|
# ifdef FFT_FFTW_THREADS
|
||||||
|
# if !defined(FFT_FFTW3)
|
||||||
|
# error "Must use -DFFT_FFTW3 with -DFFT_FFTW_THREADS"
|
||||||
|
# endif
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(FFT_MKL)
|
||||||
|
#include "mkl_dfti.h"
|
||||||
|
#if defined(FFT_SINGLE)
|
||||||
|
typedef float _Complex FFT_DATA;
|
||||||
|
#define FFT_MKL_PREC DFTI_SINGLE
|
||||||
|
#else
|
||||||
|
typedef double _Complex FFT_DATA;
|
||||||
|
#define FFT_MKL_PREC DFTI_DOUBLE
|
||||||
|
#endif
|
||||||
|
#elif defined(FFT_FFTW3)
|
||||||
|
#include "fftw3.h"
|
||||||
|
#if defined(FFT_SINGLE)
|
||||||
|
typedef fftwf_complex FFT_DATA;
|
||||||
|
#define FFTW_API(function) fftwf_ ## function
|
||||||
|
#else
|
||||||
|
typedef fftw_complex FFT_DATA;
|
||||||
|
#define FFTW_API(function) fftw_ ## function
|
||||||
|
#endif
|
||||||
|
#elif defined(FFT_CUFFT)
|
||||||
|
#include "cufft.h"
|
||||||
|
#if defined(FFT_SINGLE)
|
||||||
|
#define cufftExec cufftExecC2C
|
||||||
|
#define CUFFT_TYPE CUFFT_C2C
|
||||||
|
typedef cufftComplex FFT_DATA;
|
||||||
|
#else
|
||||||
|
#define cufftExec cufftExecZ2Z
|
||||||
|
#define CUFFT_TYPE CUFFT_Z2Z
|
||||||
|
typedef cufftDoubleComplex FFT_DATA;
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#if defined(FFT_SINGLE)
|
||||||
|
#define kiss_fft_scalar float
|
||||||
|
#else
|
||||||
|
#define kiss_fft_scalar double
|
||||||
|
typedef struct {
|
||||||
|
kiss_fft_scalar re;
|
||||||
|
kiss_fft_scalar im;
|
||||||
|
} FFT_DATA;
|
||||||
|
#endif
|
||||||
|
#ifndef FFT_KISSFFT
|
||||||
|
#define FFT_KISSFFT
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// (double[2]*) is not a 1D pointer
|
||||||
|
#if defined(FFT_FFTW3)
|
||||||
|
typedef FFT_SCALAR* FFT_DATA_POINTER;
|
||||||
|
#else
|
||||||
|
typedef FFT_DATA* FFT_DATA_POINTER;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
template <class DeviceType>
|
||||||
|
struct FFTArrayTypes;
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct FFTArrayTypes<LMPDeviceType> {
|
||||||
|
|
||||||
|
typedef Kokkos::
|
||||||
|
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
|
||||||
|
typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d;
|
||||||
|
typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um;
|
||||||
|
|
||||||
|
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
|
||||||
|
typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d;
|
||||||
|
|
||||||
|
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
|
||||||
|
typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3;
|
||||||
|
|
||||||
|
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
|
||||||
|
typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d;
|
||||||
|
|
||||||
|
typedef Kokkos::
|
||||||
|
DualView<FFT_DATA*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
|
||||||
|
typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d;
|
||||||
|
typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um;
|
||||||
|
|
||||||
|
typedef Kokkos::
|
||||||
|
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
|
||||||
|
typedef tdual_int_64::t_dev t_int_64;
|
||||||
|
typedef tdual_int_64::t_dev_um t_int_64_um;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
|
#ifdef KOKKOS_ENABLE_CUDA
|
||||||
|
template <>
|
||||||
|
struct FFTArrayTypes<LMPHostType> {
|
||||||
|
|
||||||
|
//Kspace
|
||||||
|
|
||||||
|
typedef Kokkos::
|
||||||
|
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
|
||||||
|
typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d;
|
||||||
|
typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um;
|
||||||
|
|
||||||
|
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
|
||||||
|
typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d;
|
||||||
|
|
||||||
|
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
|
||||||
|
typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3;
|
||||||
|
|
||||||
|
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
|
||||||
|
typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d;
|
||||||
|
|
||||||
|
typedef Kokkos::
|
||||||
|
DualView<FFT_DATA*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
|
||||||
|
typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d;
|
||||||
|
typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um;
|
||||||
|
|
||||||
|
typedef Kokkos::
|
||||||
|
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
|
||||||
|
typedef tdual_int_64::t_host t_int_64;
|
||||||
|
typedef tdual_int_64::t_host_um t_int_64_um;
|
||||||
|
|
||||||
|
};
|
||||||
|
#endif
|
||||||
|
|
||||||
|
typedef struct FFTArrayTypes<LMPDeviceType> FFT_DAT;
|
||||||
|
typedef struct FFTArrayTypes<LMPHostType> FFT_HAT;
|
||||||
|
|
||||||
|
|
||||||
|
#if defined(FFT_KISSFFT)
|
||||||
|
#include "kissfft_kokkos.h" // uses t_FFT_DATA_1d, needs to come last
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
@ -17,7 +17,7 @@
|
|||||||
#include "kspace.h"
|
#include "kspace.h"
|
||||||
#include "memory_kokkos.h"
|
#include "memory_kokkos.h"
|
||||||
#include "error.h"
|
#include "error.h"
|
||||||
#include "kokkos_base.h"
|
#include "kokkos_base_fft.h"
|
||||||
#include "kokkos.h"
|
#include "kokkos.h"
|
||||||
|
|
||||||
using namespace LAMMPS_NS;
|
using namespace LAMMPS_NS;
|
||||||
@ -502,9 +502,9 @@ void GridCommKokkos<DeviceType>::setup()
|
|||||||
}
|
}
|
||||||
nbuf *= MAX(nforward,nreverse);
|
nbuf *= MAX(nforward,nreverse);
|
||||||
//memory->create(buf1,nbuf,"Commgrid:buf1");
|
//memory->create(buf1,nbuf,"Commgrid:buf1");
|
||||||
k_buf1 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf1",nbuf);
|
k_buf1 = FFT_DAT::tdual_FFT_SCALAR_1d("Commgrid:buf1",nbuf);
|
||||||
//memory->create(buf2,nbuf,"Commgrid:buf2");
|
//memory->create(buf2,nbuf,"Commgrid:buf2");
|
||||||
k_buf2 = DAT::tdual_FFT_SCALAR_1d("Commgrid:buf2",nbuf);
|
k_buf2 = FFT_DAT::tdual_FFT_SCALAR_1d("Commgrid:buf2",nbuf);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -517,7 +517,7 @@ void GridCommKokkos<DeviceType>::forward_comm(KSpace *kspace, int which)
|
|||||||
k_packlist.sync<DeviceType>();
|
k_packlist.sync<DeviceType>();
|
||||||
k_unpacklist.sync<DeviceType>();
|
k_unpacklist.sync<DeviceType>();
|
||||||
|
|
||||||
KokkosBase* kspaceKKBase = dynamic_cast<KokkosBase*>(kspace);
|
KokkosBaseFFT* kspaceKKBase = dynamic_cast<KokkosBaseFFT*>(kspace);
|
||||||
|
|
||||||
for (int m = 0; m < nswap; m++) {
|
for (int m = 0; m < nswap; m++) {
|
||||||
if (swap[m].sendproc == me)
|
if (swap[m].sendproc == me)
|
||||||
@ -567,7 +567,7 @@ void GridCommKokkos<DeviceType>::reverse_comm(KSpace *kspace, int which)
|
|||||||
k_packlist.sync<DeviceType>();
|
k_packlist.sync<DeviceType>();
|
||||||
k_unpacklist.sync<DeviceType>();
|
k_unpacklist.sync<DeviceType>();
|
||||||
|
|
||||||
KokkosBase* kspaceKKBase = dynamic_cast<KokkosBase*>(kspace);
|
KokkosBaseFFT* kspaceKKBase = dynamic_cast<KokkosBaseFFT*>(kspace);
|
||||||
|
|
||||||
for (int m = nswap-1; m >= 0; m--) {
|
for (int m = nswap-1; m >= 0; m--) {
|
||||||
if (swap[m].recvproc == me)
|
if (swap[m].recvproc == me)
|
||||||
|
|||||||
@ -16,6 +16,7 @@
|
|||||||
|
|
||||||
#include "pointers.h"
|
#include "pointers.h"
|
||||||
#include "kokkos_type.h"
|
#include "kokkos_type.h"
|
||||||
|
#include "fftdata_kokkos.h"
|
||||||
|
|
||||||
#ifdef FFT_SINGLE
|
#ifdef FFT_SINGLE
|
||||||
typedef float FFT_SCALAR;
|
typedef float FFT_SCALAR;
|
||||||
@ -32,6 +33,7 @@ class GridCommKokkos : protected Pointers {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
|
|
||||||
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
|
GridCommKokkos(class LAMMPS *, MPI_Comm, int, int,
|
||||||
int, int, int, int, int, int,
|
int, int, int, int, int, int,
|
||||||
@ -70,8 +72,8 @@ class GridCommKokkos : protected Pointers {
|
|||||||
|
|
||||||
int nbuf;
|
int nbuf;
|
||||||
//FFT_SCALAR *buf1,*buf2;
|
//FFT_SCALAR *buf1,*buf2;
|
||||||
DAT::tdual_FFT_SCALAR_1d k_buf1;
|
FFT_DAT::tdual_FFT_SCALAR_1d k_buf1;
|
||||||
DAT::tdual_FFT_SCALAR_1d k_buf2;
|
FFT_DAT::tdual_FFT_SCALAR_1d k_buf2;
|
||||||
|
|
||||||
struct Swap {
|
struct Swap {
|
||||||
int sendproc; // proc to send to for forward comm
|
int sendproc; // proc to send to for forward comm
|
||||||
|
|||||||
@ -62,9 +62,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "kissfft_kokkos.h"
|
|
||||||
#include "fftdata_kokkos.h"
|
#include "fftdata_kokkos.h"
|
||||||
#include "kokkos_type.h"
|
|
||||||
|
|
||||||
#ifndef M_PI
|
#ifndef M_PI
|
||||||
#define M_PI 3.141592653589793238462643383279502884197169399375105820974944
|
#define M_PI 3.141592653589793238462643383279502884197169399375105820974944
|
||||||
@ -84,8 +82,8 @@
|
|||||||
#define S_MUL(a,b) ( (a)*(b) )
|
#define S_MUL(a,b) ( (a)*(b) )
|
||||||
|
|
||||||
#define C_MUL(m,a,a_index,b,b_index) \
|
#define C_MUL(m,a,a_index,b,b_index) \
|
||||||
do{ (m)[0] = (a)(a_index,0)*(b)(b_index,0) - (a)(a_index,1)*(b)(b_index,1);\
|
do{ (m)[0] = (a)(a_index).re*(b)(b_index).re - (a)(a_index).im*(b)(b_index).im;\
|
||||||
(m)[1] = (a)(a_index,0)*(b)(b_index,1) + (a)(a_index,1)*(b)(b_index,0); }while(0)
|
(m)[1] = (a)(a_index).re*(b)(b_index).im + (a)(a_index).im*(b)(b_index).re; }while(0)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
#define C_FIXDIV(c,div) // NOOP
|
#define C_FIXDIV(c,div) // NOOP
|
||||||
@ -126,8 +124,8 @@
|
|||||||
|
|
||||||
#define kf_cexp(x,x_index,phase) \
|
#define kf_cexp(x,x_index,phase) \
|
||||||
do{ \
|
do{ \
|
||||||
(x)(x_index,0) = KISS_FFT_COS(phase);\
|
(x)(x_index).re = KISS_FFT_COS(phase);\
|
||||||
(x)(x_index,1) = KISS_FFT_SIN(phase);\
|
(x)(x_index).im = KISS_FFT_SIN(phase);\
|
||||||
}while(0)
|
}while(0)
|
||||||
|
|
||||||
|
|
||||||
@ -139,25 +137,25 @@ namespace LAMMPS_NS {
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
struct kiss_fft_state_kokkos {
|
struct kiss_fft_state_kokkos {
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
int nfft;
|
int nfft;
|
||||||
int inverse;
|
int inverse;
|
||||||
typename AT::t_int_64 d_factors;
|
typename FFT_AT::t_int_64 d_factors;
|
||||||
typename AT::t_FFT_DATA_1d d_twiddles;
|
typename FFT_AT::t_FFT_DATA_1d d_twiddles;
|
||||||
typename AT::t_FFT_DATA_1d d_scratch;
|
typename FFT_AT::t_FFT_DATA_1d d_scratch;
|
||||||
};
|
};
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
class KissFFTKokkos {
|
class KissFFTKokkos {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kf_bfly2(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
static void kf_bfly2(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
||||||
const kiss_fft_state_kokkos<DeviceType> &st, int m, int Fout_count)
|
const kiss_fft_state_kokkos<DeviceType> &st, int m, int Fout_count)
|
||||||
{
|
{
|
||||||
typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
||||||
FFT_SCALAR t[2];
|
FFT_SCALAR t[2];
|
||||||
int Fout2_count;
|
int Fout2_count;
|
||||||
int tw1_count = 0;
|
int tw1_count = 0;
|
||||||
@ -169,21 +167,21 @@ class KissFFTKokkos {
|
|||||||
C_MUL(t,d_Fout,Fout2_count,d_twiddles,tw1_count);
|
C_MUL(t,d_Fout,Fout2_count,d_twiddles,tw1_count);
|
||||||
tw1_count += fstride;
|
tw1_count += fstride;
|
||||||
//C_SUB(*Fout2,*Fout,t);
|
//C_SUB(*Fout2,*Fout,t);
|
||||||
d_Fout(Fout2_count,0) = d_Fout(Fout_count,0) - t[0];
|
d_Fout(Fout2_count).re = d_Fout(Fout_count).re - t[0];
|
||||||
d_Fout(Fout2_count,1) = d_Fout(Fout_count,1) - t[1];
|
d_Fout(Fout2_count).im = d_Fout(Fout_count).im - t[1];
|
||||||
//C_ADDTO(d_Fout[Fout_count],t);
|
//C_ADDTO(d_Fout[Fout_count],t);
|
||||||
d_Fout(Fout_count,0) += t[0];
|
d_Fout(Fout_count).re += t[0];
|
||||||
d_Fout(Fout_count,1) += t[1];
|
d_Fout(Fout_count).im += t[1];
|
||||||
++Fout2_count;
|
++Fout2_count;
|
||||||
++Fout_count;
|
++Fout_count;
|
||||||
} while(--m);
|
} while(--m);
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kf_bfly4(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
static void kf_bfly4(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
||||||
const kiss_fft_state_kokkos<DeviceType> &st, const size_t m, int Fout_count)
|
const kiss_fft_state_kokkos<DeviceType> &st, const size_t m, int Fout_count)
|
||||||
{
|
{
|
||||||
typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
||||||
FFT_SCALAR scratch[6][2];
|
FFT_SCALAR scratch[6][2];
|
||||||
size_t k=m;
|
size_t k=m;
|
||||||
const size_t m2=2*m;
|
const size_t m2=2*m;
|
||||||
@ -200,11 +198,11 @@ class KissFFTKokkos {
|
|||||||
C_MUL(scratch[2],d_Fout,Fout_count + m3,d_twiddles,tw3_count);
|
C_MUL(scratch[2],d_Fout,Fout_count + m3,d_twiddles,tw3_count);
|
||||||
|
|
||||||
//C_SUB(scratch[5],d_Fout[Fout_count],scratch[1] );
|
//C_SUB(scratch[5],d_Fout[Fout_count],scratch[1] );
|
||||||
scratch[5][0] = d_Fout(Fout_count,0) - scratch[1][0];
|
scratch[5][0] = d_Fout(Fout_count).re - scratch[1][0];
|
||||||
scratch[5][1] = d_Fout(Fout_count,1) - scratch[1][1];
|
scratch[5][1] = d_Fout(Fout_count).im - scratch[1][1];
|
||||||
//C_ADDTO(d_Fout[Fout_count], scratch[1]);
|
//C_ADDTO(d_Fout[Fout_count], scratch[1]);
|
||||||
d_Fout(Fout_count,0) += scratch[1][0];
|
d_Fout(Fout_count).re += scratch[1][0];
|
||||||
d_Fout(Fout_count,1) += scratch[1][1];
|
d_Fout(Fout_count).im += scratch[1][1];
|
||||||
//C_ADD(scratch[3],scratch[0],scratch[2]);
|
//C_ADD(scratch[3],scratch[0],scratch[2]);
|
||||||
scratch[3][0] = scratch[0][0] + scratch[2][0];
|
scratch[3][0] = scratch[0][0] + scratch[2][0];
|
||||||
scratch[3][1] = scratch[0][1] + scratch[2][1];
|
scratch[3][1] = scratch[0][1] + scratch[2][1];
|
||||||
@ -212,43 +210,43 @@ class KissFFTKokkos {
|
|||||||
scratch[4][0] = scratch[0][0] - scratch[2][0];
|
scratch[4][0] = scratch[0][0] - scratch[2][0];
|
||||||
scratch[4][1] = scratch[0][1] - scratch[2][1];
|
scratch[4][1] = scratch[0][1] - scratch[2][1];
|
||||||
//C_SUB(d_Fout[m2],d_Fout[Fout_count],scratch[3]);
|
//C_SUB(d_Fout[m2],d_Fout[Fout_count],scratch[3]);
|
||||||
d_Fout(Fout_count + m2,0) = d_Fout(Fout_count,0) - scratch[3][0];
|
d_Fout(Fout_count + m2).re = d_Fout(Fout_count).re - scratch[3][0];
|
||||||
d_Fout(Fout_count + m2,1) = d_Fout(Fout_count,1) - scratch[3][1];
|
d_Fout(Fout_count + m2).im = d_Fout(Fout_count).im - scratch[3][1];
|
||||||
|
|
||||||
tw1_count += fstride;
|
tw1_count += fstride;
|
||||||
tw2_count += fstride*2;
|
tw2_count += fstride*2;
|
||||||
tw3_count += fstride*3;
|
tw3_count += fstride*3;
|
||||||
//C_ADDTO(d_Fout[Fout_count],scratch[3]);
|
//C_ADDTO(d_Fout[Fout_count],scratch[3]);
|
||||||
d_Fout(Fout_count,0) += scratch[3][0];
|
d_Fout(Fout_count).re += scratch[3][0];
|
||||||
d_Fout(Fout_count,1) += scratch[3][1];
|
d_Fout(Fout_count).im += scratch[3][1];
|
||||||
|
|
||||||
if (st.inverse) {
|
if (st.inverse) {
|
||||||
d_Fout(Fout_count + m,0) = scratch[5][0] - scratch[4][1];
|
d_Fout(Fout_count + m).re = scratch[5][0] - scratch[4][1];
|
||||||
d_Fout(Fout_count + m,1) = scratch[5][1] + scratch[4][0];
|
d_Fout(Fout_count + m).im = scratch[5][1] + scratch[4][0];
|
||||||
d_Fout(Fout_count + m3,0) = scratch[5][0] + scratch[4][1];
|
d_Fout(Fout_count + m3).re = scratch[5][0] + scratch[4][1];
|
||||||
d_Fout(Fout_count + m3,1) = scratch[5][1] - scratch[4][0];
|
d_Fout(Fout_count + m3).im = scratch[5][1] - scratch[4][0];
|
||||||
} else{
|
} else{
|
||||||
d_Fout(Fout_count + m,0) = scratch[5][0] + scratch[4][1];
|
d_Fout(Fout_count + m).re = scratch[5][0] + scratch[4][1];
|
||||||
d_Fout(Fout_count + m,1) = scratch[5][1] - scratch[4][0];
|
d_Fout(Fout_count + m).im = scratch[5][1] - scratch[4][0];
|
||||||
d_Fout(Fout_count + m3,0) = scratch[5][0] - scratch[4][1];
|
d_Fout(Fout_count + m3).re = scratch[5][0] - scratch[4][1];
|
||||||
d_Fout(Fout_count + m3,1) = scratch[5][1] + scratch[4][0];
|
d_Fout(Fout_count + m3).im = scratch[5][1] + scratch[4][0];
|
||||||
}
|
}
|
||||||
++Fout_count;
|
++Fout_count;
|
||||||
} while(--k);
|
} while(--k);
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kf_bfly3(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
static void kf_bfly3(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
||||||
const kiss_fft_state_kokkos<DeviceType> &st, size_t m, int Fout_count)
|
const kiss_fft_state_kokkos<DeviceType> &st, size_t m, int Fout_count)
|
||||||
{
|
{
|
||||||
size_t k=m;
|
size_t k=m;
|
||||||
const size_t m2 = 2*m;
|
const size_t m2 = 2*m;
|
||||||
typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
||||||
FFT_SCALAR scratch[5][2];
|
FFT_SCALAR scratch[5][2];
|
||||||
FFT_SCALAR epi3[2];
|
FFT_SCALAR epi3[2];
|
||||||
//C_EQ(epi3,d_twiddles[fstride*m]);
|
//C_EQ(epi3,d_twiddles[fstride*m]);
|
||||||
epi3[0] = d_twiddles(fstride*m,0);
|
epi3[0] = d_twiddles(fstride*m).re;
|
||||||
epi3[1] = d_twiddles(fstride*m,1);
|
epi3[1] = d_twiddles(fstride*m).im;
|
||||||
|
|
||||||
int tw1_count,tw2_count;
|
int tw1_count,tw2_count;
|
||||||
tw1_count = tw2_count = 0;
|
tw1_count = tw2_count = 0;
|
||||||
@ -268,41 +266,41 @@ class KissFFTKokkos {
|
|||||||
tw1_count += fstride;
|
tw1_count += fstride;
|
||||||
tw2_count += fstride*2;
|
tw2_count += fstride*2;
|
||||||
|
|
||||||
d_Fout(Fout_count + m,0) = d_Fout(Fout_count,0) - HALF_OF(scratch[3][0]);
|
d_Fout(Fout_count + m).re = d_Fout(Fout_count).re - HALF_OF(scratch[3][0]);
|
||||||
d_Fout(Fout_count + m,1) = d_Fout(Fout_count,1) - HALF_OF(scratch[3][1]);
|
d_Fout(Fout_count + m).im = d_Fout(Fout_count).im - HALF_OF(scratch[3][1]);
|
||||||
|
|
||||||
//C_MULBYSCALAR(scratch[0],epi3[1]);
|
//C_MULBYSCALAR(scratch[0],epi3[1]);
|
||||||
scratch[0][0] *= epi3[1];
|
scratch[0][0] *= epi3[1];
|
||||||
scratch[0][1] *= epi3[1];
|
scratch[0][1] *= epi3[1];
|
||||||
|
|
||||||
//C_ADDTO(d_Fout[Fout_count],scratch[3]);
|
//C_ADDTO(d_Fout[Fout_count],scratch[3]);
|
||||||
d_Fout(Fout_count,0) += scratch[3][0];
|
d_Fout(Fout_count).re += scratch[3][0];
|
||||||
d_Fout(Fout_count,1) += scratch[3][1];
|
d_Fout(Fout_count).im += scratch[3][1];
|
||||||
|
|
||||||
d_Fout(Fout_count + m2,0) = d_Fout(Fout_count + m,0) + scratch[0][1];
|
d_Fout(Fout_count + m2).re = d_Fout(Fout_count + m).re + scratch[0][1];
|
||||||
d_Fout(Fout_count + m2,1) = d_Fout(Fout_count + m,1) - scratch[0][0];
|
d_Fout(Fout_count + m2).im = d_Fout(Fout_count + m).im - scratch[0][0];
|
||||||
|
|
||||||
d_Fout(Fout_count + m,0) -= scratch[0][1];
|
d_Fout(Fout_count + m).re -= scratch[0][1];
|
||||||
d_Fout(Fout_count + m,1) += scratch[0][0];
|
d_Fout(Fout_count + m).im += scratch[0][0];
|
||||||
|
|
||||||
++Fout_count;
|
++Fout_count;
|
||||||
} while(--k);
|
} while(--k);
|
||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kf_bfly5(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
static void kf_bfly5(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
||||||
const kiss_fft_state_kokkos<DeviceType> &st, int m, int Fout_count)
|
const kiss_fft_state_kokkos<DeviceType> &st, int m, int Fout_count)
|
||||||
{
|
{
|
||||||
int u;
|
int u;
|
||||||
FFT_SCALAR scratch[13][2];
|
FFT_SCALAR scratch[13][2];
|
||||||
typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
||||||
FFT_SCALAR ya[2],yb[2];
|
FFT_SCALAR ya[2],yb[2];
|
||||||
//C_EQ(ya,d_twiddles[fstride*m]);
|
//C_EQ(ya,d_twiddles[fstride*m]);
|
||||||
ya[1] = d_twiddles(fstride*m,1);
|
ya[1] = d_twiddles(fstride*m).im;
|
||||||
ya[0] = d_twiddles(fstride*m,0);
|
ya[0] = d_twiddles(fstride*m).re;
|
||||||
//C_EQ(yb,d_twiddles[fstride*2*m]);
|
//C_EQ(yb,d_twiddles[fstride*2*m]);
|
||||||
yb[1] = d_twiddles(fstride*2*m,1);
|
yb[1] = d_twiddles(fstride*2*m).im;
|
||||||
yb[0] = d_twiddles(fstride*2*m,0);
|
yb[0] = d_twiddles(fstride*2*m).re;
|
||||||
|
|
||||||
int Fout0_count=Fout_count;
|
int Fout0_count=Fout_count;
|
||||||
int Fout1_count=Fout0_count+m;
|
int Fout1_count=Fout0_count+m;
|
||||||
@ -314,8 +312,8 @@ class KissFFTKokkos {
|
|||||||
//C_FIXDIV( d_Fout[Fout0_count],5); C_FIXDIV( d_Fout[Fout1_count],5); C_FIXDIV( d_Fout[Fout2_count],5);
|
//C_FIXDIV( d_Fout[Fout0_count],5); C_FIXDIV( d_Fout[Fout1_count],5); C_FIXDIV( d_Fout[Fout2_count],5);
|
||||||
//C_FIXDIV( d_Fout[Fout3_count],5); C_FIXDIV( d_Fout[Fout4_count],5);
|
//C_FIXDIV( d_Fout[Fout3_count],5); C_FIXDIV( d_Fout[Fout4_count],5);
|
||||||
//C_EQ(scratch[0],d_Fout[Fout0_count]);
|
//C_EQ(scratch[0],d_Fout[Fout0_count]);
|
||||||
scratch[0][0] = d_Fout(Fout0_count,0);
|
scratch[0][0] = d_Fout(Fout0_count).re;
|
||||||
scratch[0][1] = d_Fout(Fout0_count,1);
|
scratch[0][1] = d_Fout(Fout0_count).im;
|
||||||
|
|
||||||
C_MUL(scratch[1],d_Fout,Fout1_count,d_twiddles,u*fstride );
|
C_MUL(scratch[1],d_Fout,Fout1_count,d_twiddles,u*fstride );
|
||||||
C_MUL(scratch[2],d_Fout,Fout2_count,d_twiddles,2*u*fstride);
|
C_MUL(scratch[2],d_Fout,Fout2_count,d_twiddles,2*u*fstride);
|
||||||
@ -335,8 +333,8 @@ class KissFFTKokkos {
|
|||||||
scratch[9][0] = scratch[2][0] - scratch[3][0];
|
scratch[9][0] = scratch[2][0] - scratch[3][0];
|
||||||
scratch[9][1] = scratch[2][1] - scratch[3][1];
|
scratch[9][1] = scratch[2][1] - scratch[3][1];
|
||||||
|
|
||||||
d_Fout(Fout0_count,0) += scratch[7][0] + scratch[8][0];
|
d_Fout(Fout0_count).re += scratch[7][0] + scratch[8][0];
|
||||||
d_Fout(Fout0_count,1) += scratch[7][1] + scratch[8][1];
|
d_Fout(Fout0_count).im += scratch[7][1] + scratch[8][1];
|
||||||
|
|
||||||
scratch[5][0] = scratch[0][0] + S_MUL(scratch[7][0],ya[0]) + S_MUL(scratch[8][0],yb[0]);
|
scratch[5][0] = scratch[0][0] + S_MUL(scratch[7][0],ya[0]) + S_MUL(scratch[8][0],yb[0]);
|
||||||
scratch[5][1] = scratch[0][1] + S_MUL(scratch[7][1],ya[0]) + S_MUL(scratch[8][1],yb[0]);
|
scratch[5][1] = scratch[0][1] + S_MUL(scratch[7][1],ya[0]) + S_MUL(scratch[8][1],yb[0]);
|
||||||
@ -345,11 +343,11 @@ class KissFFTKokkos {
|
|||||||
scratch[6][1] = -S_MUL(scratch[10][0],ya[1]) - S_MUL(scratch[9][0],yb[1]);
|
scratch[6][1] = -S_MUL(scratch[10][0],ya[1]) - S_MUL(scratch[9][0],yb[1]);
|
||||||
|
|
||||||
//C_SUB(d_Fout[Fout1_count],scratch[5],scratch[6]);
|
//C_SUB(d_Fout[Fout1_count],scratch[5],scratch[6]);
|
||||||
d_Fout(Fout1_count,0) = scratch[5][0] - scratch[6][0];
|
d_Fout(Fout1_count).re = scratch[5][0] - scratch[6][0];
|
||||||
d_Fout(Fout1_count,1) = scratch[5][1] - scratch[6][1];
|
d_Fout(Fout1_count).im = scratch[5][1] - scratch[6][1];
|
||||||
//C_ADD(d_Fout[Fout4_count],scratch[5],scratch[6]);
|
//C_ADD(d_Fout[Fout4_count],scratch[5],scratch[6]);
|
||||||
d_Fout(Fout4_count,0) = scratch[5][0] + scratch[6][0];
|
d_Fout(Fout4_count).re = scratch[5][0] + scratch[6][0];
|
||||||
d_Fout(Fout4_count,1) = scratch[5][1] + scratch[6][1];
|
d_Fout(Fout4_count).im = scratch[5][1] + scratch[6][1];
|
||||||
|
|
||||||
scratch[11][0] = scratch[0][0] + S_MUL(scratch[7][0],yb[0]) + S_MUL(scratch[8][0],ya[0]);
|
scratch[11][0] = scratch[0][0] + S_MUL(scratch[7][0],yb[0]) + S_MUL(scratch[8][0],ya[0]);
|
||||||
scratch[11][1] = scratch[0][1] + S_MUL(scratch[7][1],yb[0]) + S_MUL(scratch[8][1],ya[0]);
|
scratch[11][1] = scratch[0][1] + S_MUL(scratch[7][1],yb[0]) + S_MUL(scratch[8][1],ya[0]);
|
||||||
@ -357,11 +355,11 @@ class KissFFTKokkos {
|
|||||||
scratch[12][1] = S_MUL(scratch[10][0],yb[1]) - S_MUL(scratch[9][0],ya[1]);
|
scratch[12][1] = S_MUL(scratch[10][0],yb[1]) - S_MUL(scratch[9][0],ya[1]);
|
||||||
|
|
||||||
//C_ADD(d_Fout[Fout2_count],scratch[11],scratch[12]);
|
//C_ADD(d_Fout[Fout2_count],scratch[11],scratch[12]);
|
||||||
d_Fout(Fout2_count,0) = scratch[11][0] + scratch[12][0];
|
d_Fout(Fout2_count).re = scratch[11][0] + scratch[12][0];
|
||||||
d_Fout(Fout2_count,1) = scratch[11][1] + scratch[12][1];
|
d_Fout(Fout2_count).im = scratch[11][1] + scratch[12][1];
|
||||||
//C_SUB(d_Fout3[Fout3_count],scratch[11],scratch[12]);
|
//C_SUB(d_Fout3[Fout3_count],scratch[11],scratch[12]);
|
||||||
d_Fout(Fout3_count,0) = scratch[11][0] - scratch[12][0];
|
d_Fout(Fout3_count).re = scratch[11][0] - scratch[12][0];
|
||||||
d_Fout(Fout3_count,1) = scratch[11][1] - scratch[12][1];
|
d_Fout(Fout3_count).im = scratch[11][1] - scratch[12][1];
|
||||||
|
|
||||||
++Fout0_count;++Fout1_count;++Fout2_count;++Fout3_count;++Fout4_count;
|
++Fout0_count;++Fout1_count;++Fout2_count;++Fout3_count;++Fout4_count;
|
||||||
}
|
}
|
||||||
@ -370,21 +368,21 @@ class KissFFTKokkos {
|
|||||||
/* perform the butterfly for one stage of a mixed radix FFT */
|
/* perform the butterfly for one stage of a mixed radix FFT */
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kf_bfly_generic(typename AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
static void kf_bfly_generic(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const size_t fstride,
|
||||||
const kiss_fft_state_kokkos<DeviceType> &st, int m, int p, int Fout_count)
|
const kiss_fft_state_kokkos<DeviceType> &st, int m, int p, int Fout_count)
|
||||||
{
|
{
|
||||||
int u,k,q1,q;
|
int u,k,q1,q;
|
||||||
typename AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
typename FFT_AT::t_FFT_DATA_1d_um d_twiddles = st.d_twiddles;
|
||||||
FFT_SCALAR t[2];
|
FFT_SCALAR t[2];
|
||||||
int Norig = st.nfft;
|
int Norig = st.nfft;
|
||||||
|
|
||||||
typename AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch;
|
typename FFT_AT::t_FFT_DATA_1d_um d_scratch = st.d_scratch;
|
||||||
for ( u=0; u<m; ++u ) {
|
for ( u=0; u<m; ++u ) {
|
||||||
k=u;
|
k=u;
|
||||||
for ( q1=0 ; q1<p ; ++q1 ) {
|
for ( q1=0 ; q1<p ; ++q1 ) {
|
||||||
//C_EQ(d_scratch[q1],d_Fout[k]);
|
//C_EQ(d_scratch[q1],d_Fout[k]);
|
||||||
d_scratch(q1,0) = d_Fout(Fout_count + k,0);
|
d_scratch(q1).re = d_Fout(Fout_count + k).re;
|
||||||
d_scratch(q1,1) = d_Fout(Fout_count + k,1);
|
d_scratch(q1).im = d_Fout(Fout_count + k).im;
|
||||||
//C_FIXDIV(d_scratch[q1],p);
|
//C_FIXDIV(d_scratch[q1],p);
|
||||||
k += m;
|
k += m;
|
||||||
}
|
}
|
||||||
@ -393,15 +391,15 @@ class KissFFTKokkos {
|
|||||||
for ( q1=0 ; q1<p ; ++q1 ) {
|
for ( q1=0 ; q1<p ; ++q1 ) {
|
||||||
int twidx=0;
|
int twidx=0;
|
||||||
//C_EQ(d_Fout[k],d_scratch[0]);
|
//C_EQ(d_Fout[k],d_scratch[0]);
|
||||||
d_Fout(Fout_count + k,0) = d_scratch(0,0);
|
d_Fout(Fout_count + k).re = d_scratch(0).re;
|
||||||
d_Fout(Fout_count + k,1) = d_scratch(0,1);
|
d_Fout(Fout_count + k).im = d_scratch(0).im;
|
||||||
for (q=1;q<p;++q ) {
|
for (q=1;q<p;++q ) {
|
||||||
twidx += fstride * k;
|
twidx += fstride * k;
|
||||||
if (twidx>=Norig) twidx-=Norig;
|
if (twidx>=Norig) twidx-=Norig;
|
||||||
C_MUL(t,d_scratch,q,d_twiddles,twidx);
|
C_MUL(t,d_scratch,q,d_twiddles,twidx);
|
||||||
//C_ADDTO(d_Fout[k],t);
|
//C_ADDTO(d_Fout[k],t);
|
||||||
d_Fout(Fout_count + k,0) += t[0];
|
d_Fout(Fout_count + k).re += t[0];
|
||||||
d_Fout(Fout_count + k,1) += t[1];
|
d_Fout(Fout_count + k).im += t[1];
|
||||||
}
|
}
|
||||||
k += m;
|
k += m;
|
||||||
}
|
}
|
||||||
@ -409,9 +407,9 @@ class KissFFTKokkos {
|
|||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kf_work(typename AT::t_FFT_DATA_1d_um &d_Fout, const typename AT::t_FFT_DATA_1d_um &d_f,
|
static void kf_work(typename FFT_AT::t_FFT_DATA_1d_um &d_Fout, const typename FFT_AT::t_FFT_DATA_1d_um &d_f,
|
||||||
const size_t fstride, int in_stride,
|
const size_t fstride, int in_stride,
|
||||||
const typename AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos<DeviceType> &st, int Fout_count, int f_count, int factors_count)
|
const typename FFT_AT::t_int_64_um &d_factors, const kiss_fft_state_kokkos<DeviceType> &st, int Fout_count, int f_count, int factors_count)
|
||||||
{
|
{
|
||||||
const int beg = Fout_count;
|
const int beg = Fout_count;
|
||||||
const int p = d_factors[factors_count++]; /* the radix */
|
const int p = d_factors[factors_count++]; /* the radix */
|
||||||
@ -421,8 +419,8 @@ class KissFFTKokkos {
|
|||||||
if (m == 1) {
|
if (m == 1) {
|
||||||
do {
|
do {
|
||||||
//C_EQ(d_Fout[Fout_count],d_f[f_count]);
|
//C_EQ(d_Fout[Fout_count],d_f[f_count]);
|
||||||
d_Fout(Fout_count,0) = d_f(f_count,0);
|
d_Fout(Fout_count).re = d_f(f_count).re;
|
||||||
d_Fout(Fout_count,1) = d_f(f_count,1);
|
d_Fout(Fout_count).im = d_f(f_count).im;
|
||||||
f_count += fstride*in_stride;
|
f_count += fstride*in_stride;
|
||||||
} while (++Fout_count != end);
|
} while (++Fout_count != end);
|
||||||
} else {
|
} else {
|
||||||
@ -453,7 +451,7 @@ class KissFFTKokkos {
|
|||||||
p[i] * m[i] = m[i-1]
|
p[i] * m[i] = m[i-1]
|
||||||
m0 = n */
|
m0 = n */
|
||||||
|
|
||||||
static int kf_factor(int n, HAT::t_int_64 h_facbuf)
|
static int kf_factor(int n, FFT_HAT::t_int_64 h_facbuf)
|
||||||
{
|
{
|
||||||
int p=4, nf=0;
|
int p=4, nf=0;
|
||||||
double floor_sqrt;
|
double floor_sqrt;
|
||||||
@ -497,12 +495,12 @@ class KissFFTKokkos {
|
|||||||
st.nfft = nfft;
|
st.nfft = nfft;
|
||||||
st.inverse = inverse_fft;
|
st.inverse = inverse_fft;
|
||||||
|
|
||||||
typename AT::tdual_int_64 k_factors = typename AT::tdual_int_64();
|
typename FFT_AT::tdual_int_64 k_factors = typename FFT_AT::tdual_int_64();
|
||||||
typename AT::tdual_FFT_DATA_1d k_twiddles = typename AT::tdual_FFT_DATA_1d();
|
typename FFT_AT::tdual_FFT_DATA_1d k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d();
|
||||||
|
|
||||||
if (nfft > 0) {
|
if (nfft > 0) {
|
||||||
k_factors = typename AT::tdual_int_64("kissfft:factors",MAXFACTORS*2);
|
k_factors = typename FFT_AT::tdual_int_64("kissfft:factors",MAXFACTORS*2);
|
||||||
k_twiddles = typename AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft);
|
k_twiddles = typename FFT_AT::tdual_FFT_DATA_1d("kissfft:twiddles",nfft);
|
||||||
|
|
||||||
for (i=0;i<nfft;++i) {
|
for (i=0;i<nfft;++i) {
|
||||||
const double phase = (st.inverse ? 2.0*M_PI:-2.0*M_PI)*i / nfft;
|
const double phase = (st.inverse ? 2.0*M_PI:-2.0*M_PI)*i / nfft;
|
||||||
@ -510,7 +508,7 @@ class KissFFTKokkos {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int p_max = kf_factor(nfft,k_factors.h_view);
|
int p_max = kf_factor(nfft,k_factors.h_view);
|
||||||
st.d_scratch = typename AT::t_FFT_DATA_1d("kissfft:scratch",p_max);
|
st.d_scratch = typename FFT_AT::t_FFT_DATA_1d("kissfft:scratch",p_max);
|
||||||
}
|
}
|
||||||
|
|
||||||
k_factors.template modify<LMPHostType>();
|
k_factors.template modify<LMPHostType>();
|
||||||
@ -525,13 +523,13 @@ class KissFFTKokkos {
|
|||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kiss_fft_stride(const kiss_fft_state_kokkos<DeviceType> &st, const typename AT::t_FFT_DATA_1d_um &d_fin, typename AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset)
|
static void kiss_fft_stride(const kiss_fft_state_kokkos<DeviceType> &st, const typename FFT_AT::t_FFT_DATA_1d_um &d_fin, typename FFT_AT::t_FFT_DATA_1d_um &d_fout, int in_stride, int offset)
|
||||||
{
|
{
|
||||||
//if (d_fin.data() == d_fout.data()) {
|
//if (d_fin.data() == d_fout.data()) {
|
||||||
// // NOTE: this is not really an in-place FFT algorithm.
|
// // NOTE: this is not really an in-place FFT algorithm.
|
||||||
// // It just performs an out-of-place FFT into a temp buffer
|
// // It just performs an out-of-place FFT into a temp buffer
|
||||||
// typename AT::t_FFT_DATA_1d_um d_tmpbuf = typename AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1));
|
// typename FFT_AT::t_FFT_DATA_1d_um d_tmpbuf = typename FFT_AT::t_FFT_DATA_1d("kissfft:tmpbuf",d_fin.extent(1));
|
||||||
// kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset,0);
|
// kf_work(d_tmpbuf,d_fin,1,in_stride,st.d_factors,st,offset,offset).re;
|
||||||
// Kokkos::deep_copy(d_fout,d_tmpbuf);
|
// Kokkos::deep_copy(d_fout,d_tmpbuf);
|
||||||
//} else {
|
//} else {
|
||||||
kf_work(d_fout,d_fin,1,in_stride,st.d_factors,st,offset,offset,0);
|
kf_work(d_fout,d_fin,1,in_stride,st.d_factors,st,offset,offset,0);
|
||||||
@ -539,7 +537,7 @@ class KissFFTKokkos {
|
|||||||
}
|
}
|
||||||
|
|
||||||
KOKKOS_INLINE_FUNCTION
|
KOKKOS_INLINE_FUNCTION
|
||||||
static void kiss_fft_kokkos(const kiss_fft_state_kokkos<DeviceType> &cfg, const typename AT::t_FFT_DATA_1d_um d_fin, typename AT::t_FFT_DATA_1d_um d_fout, int offset)
|
static void kiss_fft_kokkos(const kiss_fft_state_kokkos<DeviceType> &cfg, const typename FFT_AT::t_FFT_DATA_1d_um d_fin, typename FFT_AT::t_FFT_DATA_1d_um d_fout, int offset)
|
||||||
{
|
{
|
||||||
kiss_fft_stride(cfg,d_fin,d_fout,1,offset);
|
kiss_fft_stride(cfg,d_fin,d_fout,1,offset);
|
||||||
}
|
}
|
||||||
|
|||||||
@ -22,12 +22,6 @@ class KokkosBase {
|
|||||||
public:
|
public:
|
||||||
KokkosBase() {}
|
KokkosBase() {}
|
||||||
|
|
||||||
//Kspace
|
|
||||||
virtual void pack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
|
||||||
virtual void unpack_forward_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
|
||||||
virtual void pack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
|
||||||
virtual void unpack_reverse_kspace_kokkos(int, DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
|
||||||
|
|
||||||
// Pair
|
// Pair
|
||||||
virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d,
|
virtual int pack_forward_comm_kokkos(int, DAT::tdual_int_2d,
|
||||||
int, DAT::tdual_xfloat_1d &,
|
int, DAT::tdual_xfloat_1d &,
|
||||||
|
|||||||
38
src/KOKKOS/kokkos_base_fft.h
Normal file
38
src/KOKKOS/kokkos_base_fft.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||||
|
http://lammps.sandia.gov, Sandia National Laboratories
|
||||||
|
Steve Plimpton, sjplimp@sandia.gov
|
||||||
|
|
||||||
|
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||||
|
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||||
|
certain rights in this software. This software is distributed under
|
||||||
|
the GNU General Public License.
|
||||||
|
|
||||||
|
See the README file in the top-level LAMMPS directory.
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#ifndef LMP_KOKKOS_BASE_FFT_H
|
||||||
|
#define LMP_KOKKOS_BASE_FFT_H
|
||||||
|
|
||||||
|
#include "fftdata_kokkos.h"
|
||||||
|
|
||||||
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
|
class KokkosBaseFFT {
|
||||||
|
public:
|
||||||
|
KokkosBaseFFT() {}
|
||||||
|
|
||||||
|
//Kspace
|
||||||
|
virtual void pack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||||
|
virtual void unpack_forward_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||||
|
virtual void pack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||||
|
virtual void unpack_reverse_kspace_kokkos(int, FFT_DAT::tdual_FFT_SCALAR_1d &, int, DAT::tdual_int_2d &, int) {};
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* ERROR/WARNING messages:
|
||||||
|
|
||||||
|
*/
|
||||||
@ -29,21 +29,6 @@ enum{FULL=1u,HALFTHREAD=2u,HALF=4u,N2=8u};
|
|||||||
#define ISFINITE(x) std::isfinite(x)
|
#define ISFINITE(x) std::isfinite(x)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// User-settable FFT precision
|
|
||||||
|
|
||||||
// FFT_PRECISION = 1 is single-precision complex (4-byte real, 4-byte imag)
|
|
||||||
// FFT_PRECISION = 2 is double-precision complex (8-byte real, 8-byte imag)
|
|
||||||
|
|
||||||
#ifdef FFT_SINGLE
|
|
||||||
#define FFT_PRECISION 1
|
|
||||||
#define MPI_FFT_SCALAR MPI_FLOAT
|
|
||||||
typedef float FFT_SCALAR;
|
|
||||||
#else
|
|
||||||
#define FFT_PRECISION 2
|
|
||||||
#define MPI_FFT_SCALAR MPI_DOUBLE
|
|
||||||
typedef double FFT_SCALAR;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define MAX_TYPES_STACKPARAMS 12
|
#define MAX_TYPES_STACKPARAMS 12
|
||||||
#define NeighClusterSize 8
|
#define NeighClusterSize 8
|
||||||
|
|
||||||
@ -752,32 +737,6 @@ typedef tdual_neighbors_2d::t_dev_um t_neighbors_2d_um;
|
|||||||
typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
|
typedef tdual_neighbors_2d::t_dev_const_um t_neighbors_2d_const_um;
|
||||||
typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
|
typedef tdual_neighbors_2d::t_dev_const_randomread t_neighbors_2d_randomread;
|
||||||
|
|
||||||
//Kspace
|
|
||||||
|
|
||||||
typedef Kokkos::
|
|
||||||
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
|
|
||||||
typedef tdual_FFT_SCALAR_1d::t_dev t_FFT_SCALAR_1d;
|
|
||||||
typedef tdual_FFT_SCALAR_1d::t_dev_um t_FFT_SCALAR_1d_um;
|
|
||||||
|
|
||||||
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
|
|
||||||
typedef tdual_FFT_SCALAR_2d::t_dev t_FFT_SCALAR_2d;
|
|
||||||
|
|
||||||
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
|
|
||||||
typedef tdual_FFT_SCALAR_2d_3::t_dev t_FFT_SCALAR_2d_3;
|
|
||||||
|
|
||||||
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
|
|
||||||
typedef tdual_FFT_SCALAR_3d::t_dev t_FFT_SCALAR_3d;
|
|
||||||
|
|
||||||
typedef Kokkos::
|
|
||||||
DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
|
|
||||||
typedef tdual_FFT_DATA_1d::t_dev t_FFT_DATA_1d;
|
|
||||||
typedef tdual_FFT_DATA_1d::t_dev_um t_FFT_DATA_1d_um;
|
|
||||||
|
|
||||||
typedef Kokkos::
|
|
||||||
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
|
|
||||||
typedef tdual_int_64::t_dev t_int_64;
|
|
||||||
typedef tdual_int_64::t_dev_um t_int_64_um;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifdef KOKKOS_ENABLE_CUDA
|
#ifdef KOKKOS_ENABLE_CUDA
|
||||||
@ -1012,33 +971,6 @@ typedef tdual_neighbors_2d::t_host_um t_neighbors_2d_um;
|
|||||||
typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
|
typedef tdual_neighbors_2d::t_host_const_um t_neighbors_2d_const_um;
|
||||||
typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
|
typedef tdual_neighbors_2d::t_host_const_randomread t_neighbors_2d_randomread;
|
||||||
|
|
||||||
|
|
||||||
//Kspace
|
|
||||||
|
|
||||||
typedef Kokkos::
|
|
||||||
DualView<FFT_SCALAR*, Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_SCALAR_1d;
|
|
||||||
typedef tdual_FFT_SCALAR_1d::t_host t_FFT_SCALAR_1d;
|
|
||||||
typedef tdual_FFT_SCALAR_1d::t_host_um t_FFT_SCALAR_1d_um;
|
|
||||||
|
|
||||||
typedef Kokkos::DualView<FFT_SCALAR**,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d;
|
|
||||||
typedef tdual_FFT_SCALAR_2d::t_host t_FFT_SCALAR_2d;
|
|
||||||
|
|
||||||
typedef Kokkos::DualView<FFT_SCALAR**[3],Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_2d_3;
|
|
||||||
typedef tdual_FFT_SCALAR_2d_3::t_host t_FFT_SCALAR_2d_3;
|
|
||||||
|
|
||||||
typedef Kokkos::DualView<FFT_SCALAR***,Kokkos::LayoutRight,LMPDeviceType> tdual_FFT_SCALAR_3d;
|
|
||||||
typedef tdual_FFT_SCALAR_3d::t_host t_FFT_SCALAR_3d;
|
|
||||||
|
|
||||||
typedef Kokkos::
|
|
||||||
DualView<FFT_SCALAR*[2], Kokkos::LayoutRight, LMPDeviceType> tdual_FFT_DATA_1d;
|
|
||||||
typedef tdual_FFT_DATA_1d::t_host t_FFT_DATA_1d;
|
|
||||||
typedef tdual_FFT_DATA_1d::t_host_um t_FFT_DATA_1d_um;
|
|
||||||
|
|
||||||
typedef Kokkos::
|
|
||||||
DualView<int*, LMPDeviceType::array_layout, LMPDeviceType> tdual_int_64;
|
|
||||||
typedef tdual_int_64::t_host t_int_64;
|
|
||||||
typedef tdual_int_64::t_host_um t_int_64_um;
|
|
||||||
|
|
||||||
};
|
};
|
||||||
#endif
|
#endif
|
||||||
//default LAMMPS Types
|
//default LAMMPS Types
|
||||||
|
|||||||
@ -29,7 +29,7 @@
|
|||||||
pack from data -> buf
|
pack from data -> buf
|
||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
#include "kokkos_type.h"
|
#include "fftdata_kokkos.h"
|
||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
@ -37,13 +37,13 @@ template<class DeviceType>
|
|||||||
class PackKokkos {
|
class PackKokkos {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
|
|
||||||
struct pack_3d_functor {
|
struct pack_3d_functor {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -51,7 +51,7 @@ public:
|
|||||||
int nstride_line; // stride between successive mid indices
|
int nstride_line; // stride between successive mid indices
|
||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
|
|
||||||
pack_3d_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
pack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -79,7 +79,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void pack_3d(typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan)
|
static void pack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
@ -97,7 +97,7 @@ struct unpack_3d_functor {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -105,7 +105,7 @@ public:
|
|||||||
int nstride_line; // stride between successive mid indices
|
int nstride_line; // stride between successive mid indices
|
||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
|
|
||||||
unpack_3d_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
unpack_3d_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -133,7 +133,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void unpack_3d(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
static void unpack_3d(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
@ -152,7 +152,7 @@ struct unpack_3d_permute1_1_functor {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -160,7 +160,7 @@ public:
|
|||||||
int nstride_line; // stride between successive mid indices
|
int nstride_line; // stride between successive mid indices
|
||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
|
|
||||||
unpack_3d_permute1_1_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
unpack_3d_permute1_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -188,7 +188,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void unpack_3d_permute1_1(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
static void unpack_3d_permute1_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
@ -205,7 +205,7 @@ struct unpack_3d_permute1_2_functor {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -213,7 +213,7 @@ public:
|
|||||||
int nstride_line; // stride between successive mid indices
|
int nstride_line; // stride between successive mid indices
|
||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
|
|
||||||
unpack_3d_permute1_2_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
unpack_3d_permute1_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -242,7 +242,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void unpack_3d_permute1_2(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
static void unpack_3d_permute1_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
@ -260,7 +260,7 @@ struct unpack_3d_permute1_n_functor {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -269,7 +269,7 @@ public:
|
|||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
int nqty; // # of values/element
|
int nqty; // # of values/element
|
||||||
|
|
||||||
unpack_3d_permute1_n_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
unpack_3d_permute1_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -298,7 +298,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void unpack_3d_permute1_n(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
static void unpack_3d_permute1_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
@ -316,7 +316,7 @@ struct unpack_3d_permute2_1_functor {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -324,7 +324,7 @@ public:
|
|||||||
int nstride_line; // stride between successive mid indices
|
int nstride_line; // stride between successive mid indices
|
||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
|
|
||||||
unpack_3d_permute2_1_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
unpack_3d_permute2_1_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -351,7 +351,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void unpack_3d_permute2_1(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
static void unpack_3d_permute2_1(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
@ -369,7 +369,7 @@ struct unpack_3d_permute2_2_functor {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -377,7 +377,7 @@ public:
|
|||||||
int nstride_line; // stride between successive mid indices
|
int nstride_line; // stride between successive mid indices
|
||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
|
|
||||||
unpack_3d_permute2_2_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
unpack_3d_permute2_2_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -405,7 +405,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void unpack_3d_permute2_2(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
static void unpack_3d_permute2_2(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
@ -422,7 +422,7 @@ struct unpack_3d_permute2_n_functor {
|
|||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf,d_data;
|
||||||
int buf_offset,data_offset;
|
int buf_offset,data_offset;
|
||||||
int nfast; // # of elements in fast index
|
int nfast; // # of elements in fast index
|
||||||
int nmid; // # of elements in mid index
|
int nmid; // # of elements in mid index
|
||||||
@ -431,7 +431,7 @@ public:
|
|||||||
int nstride_plane; // stride between successive slow indices
|
int nstride_plane; // stride between successive slow indices
|
||||||
int nqty; // # of values/element
|
int nqty; // # of values/element
|
||||||
|
|
||||||
unpack_3d_permute2_n_functor(typename AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
unpack_3d_permute2_n_functor(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf_, int buf_offset_, typename FFT_AT::t_FFT_SCALAR_1d_um d_data_, int data_offset_, struct pack_plan_3d *plan):
|
||||||
d_buf(d_buf_),
|
d_buf(d_buf_),
|
||||||
d_data(d_data_)
|
d_data(d_data_)
|
||||||
{
|
{
|
||||||
@ -459,7 +459,7 @@ public:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
static void unpack_3d_permute2_n(typename AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
static void unpack_3d_permute2_n(typename FFT_AT::t_FFT_SCALAR_1d_um d_buf, int buf_offset, typename FFT_AT::t_FFT_SCALAR_1d_um d_data, int data_offset, struct pack_plan_3d *plan)
|
||||||
{
|
{
|
||||||
const int nslow = plan->nslow;
|
const int nslow = plan->nslow;
|
||||||
const int nmid = plan->nmid;
|
const int nmid = plan->nmid;
|
||||||
|
|||||||
@ -669,7 +669,7 @@ void PPPMKokkos<DeviceType>::compute(int eflag, int vflag)
|
|||||||
nmax = atomKK->nmax;
|
nmax = atomKK->nmax;
|
||||||
//memory->create(part2grid,nmax,3,"pppm:part2grid");
|
//memory->create(part2grid,nmax,3,"pppm:part2grid");
|
||||||
d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax);
|
d_part2grid = typename AT::t_int_1d_3("pppm:part2grid",nmax);
|
||||||
d_rho1d = typename AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1);
|
d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// find grid points for all my particles
|
// find grid points for all my particles
|
||||||
@ -799,7 +799,7 @@ void PPPMKokkos<DeviceType>::operator()(TagPPPM_self2, const int &i) const
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void PPPMKokkos<DeviceType>::allocate()
|
void PPPMKokkos<DeviceType>::allocate()
|
||||||
{
|
{
|
||||||
d_density_brick = typename AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_density_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:density_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
|
|
||||||
memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft");
|
memoryKK->create_kokkos(k_density_fft,density_fft,nfft_both,"pppm:d_density_fft");
|
||||||
d_density_fft = k_density_fft.view<DeviceType>();
|
d_density_fft = k_density_fft.view<DeviceType>();
|
||||||
@ -821,17 +821,17 @@ void PPPMKokkos<DeviceType>::allocate()
|
|||||||
d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both);
|
d_fkz = typename AT::t_float_1d("pppm:d_fkz",nfft_both);
|
||||||
}
|
}
|
||||||
|
|
||||||
d_vdx_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_vdx_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdx_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
d_vdy_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_vdy_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdy_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
d_vdz_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_vdz_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_vdz_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
|
|
||||||
// summation coeffs
|
// summation coeffs
|
||||||
|
|
||||||
order_allocated = order;
|
order_allocated = order;
|
||||||
k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order);
|
k_gf_b = typename DAT::tdual_float_1d("pppm:gf_b",order);
|
||||||
d_gf_b = k_gf_b.view<DeviceType>();
|
d_gf_b = k_gf_b.view<DeviceType>();
|
||||||
d_rho1d = typename AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1);
|
d_rho1d = typename FFT_AT::t_FFT_SCALAR_2d_3("pppm:rho1d",nmax,order/2+order/2+1);
|
||||||
k_rho_coeff = DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1);
|
k_rho_coeff = FFT_DAT::tdual_FFT_SCALAR_2d("pppm:rho_coeff",order,order/2-(1-order)/2+1);
|
||||||
d_rho_coeff = k_rho_coeff.view<DeviceType>();
|
d_rho_coeff = k_rho_coeff.view<DeviceType>();
|
||||||
h_rho_coeff = k_rho_coeff.h_view;
|
h_rho_coeff = k_rho_coeff.h_view;
|
||||||
|
|
||||||
@ -902,14 +902,14 @@ void PPPMKokkos<DeviceType>::allocate_peratom()
|
|||||||
{
|
{
|
||||||
peratom_allocate_flag = 1;
|
peratom_allocate_flag = 1;
|
||||||
|
|
||||||
d_u_brick = typename AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_u_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:u_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
|
|
||||||
d_v0_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_v0_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v0_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
d_v1_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_v1_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v1_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
d_v2_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_v2_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v2_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
d_v3_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_v3_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v3_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
d_v4_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_v4_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v4_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
d_v5_brick = typename AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
d_v5_brick = typename FFT_AT::t_FFT_SCALAR_3d("pppm:d_v5_brick",nzhi_out-nzlo_out+1,nyhi_out-nylo_out+1,nxhi_out-nxlo_out+1);
|
||||||
|
|
||||||
|
|
||||||
// create ghost grid object for rho and electric field communication
|
// create ghost grid object for rho and electric field communication
|
||||||
|
|||||||
@ -25,7 +25,8 @@ KSpaceStyle(pppm/kk/host,PPPMKokkos<LMPHostType>)
|
|||||||
#include "gridcomm_kokkos.h"
|
#include "gridcomm_kokkos.h"
|
||||||
#include "remap_kokkos.h"
|
#include "remap_kokkos.h"
|
||||||
#include "fft3d_kokkos.h"
|
#include "fft3d_kokkos.h"
|
||||||
#include "kokkos_base.h"
|
#include "kokkos_base_fft.h"
|
||||||
|
#include "fftdata_kokkos.h"
|
||||||
#include "kokkos_type.h"
|
#include "kokkos_type.h"
|
||||||
|
|
||||||
// fix up FFT defines for KOKKOS with CUDA
|
// fix up FFT defines for KOKKOS with CUDA
|
||||||
@ -107,10 +108,11 @@ struct TagPPPM_slabcorr4{};
|
|||||||
struct TagPPPM_timing_zero{};
|
struct TagPPPM_timing_zero{};
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
class PPPMKokkos : public PPPM, public KokkosBase {
|
class PPPMKokkos : public PPPM, public KokkosBaseFFT {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef ArrayTypes<DeviceType> AT;
|
||||||
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
|
|
||||||
PPPMKokkos(class LAMMPS *);
|
PPPMKokkos(class LAMMPS *);
|
||||||
virtual ~PPPMKokkos();
|
virtual ~PPPMKokkos();
|
||||||
@ -308,7 +310,7 @@ class PPPMKokkos : public PPPM, public KokkosBase {
|
|||||||
|
|
||||||
int nx,ny,nz;
|
int nx,ny,nz;
|
||||||
typename AT::t_int_1d_um d_list_index;
|
typename AT::t_int_1d_um d_list_index;
|
||||||
typename AT::t_FFT_SCALAR_1d_um d_buf;
|
typename FFT_AT::t_FFT_SCALAR_1d_um d_buf;
|
||||||
|
|
||||||
DAT::tdual_int_scalar k_flag;
|
DAT::tdual_int_scalar k_flag;
|
||||||
|
|
||||||
@ -323,31 +325,31 @@ class PPPMKokkos : public PPPM, public KokkosBase {
|
|||||||
|
|
||||||
int factors[3];
|
int factors[3];
|
||||||
|
|
||||||
typename AT::t_FFT_SCALAR_3d d_density_brick;
|
typename FFT_AT::t_FFT_SCALAR_3d d_density_brick;
|
||||||
typename AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick;
|
typename FFT_AT::t_FFT_SCALAR_3d d_vdx_brick,d_vdy_brick,d_vdz_brick;
|
||||||
typename AT::t_FFT_SCALAR_3d d_u_brick;
|
typename FFT_AT::t_FFT_SCALAR_3d d_u_brick;
|
||||||
typename AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick;
|
typename FFT_AT::t_FFT_SCALAR_3d d_v0_brick,d_v1_brick,d_v2_brick;
|
||||||
typename AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick;
|
typename FFT_AT::t_FFT_SCALAR_3d d_v3_brick,d_v4_brick,d_v5_brick;
|
||||||
typename AT::t_float_1d d_greensfn;
|
typename AT::t_float_1d d_greensfn;
|
||||||
typename AT::t_virial_array d_vg;
|
typename AT::t_virial_array d_vg;
|
||||||
typename AT::t_float_1d d_fkx;
|
typename AT::t_float_1d d_fkx;
|
||||||
typename AT::t_float_1d d_fky;
|
typename AT::t_float_1d d_fky;
|
||||||
typename AT::t_float_1d d_fkz;
|
typename AT::t_float_1d d_fkz;
|
||||||
DAT::tdual_FFT_SCALAR_1d k_density_fft;
|
FFT_DAT::tdual_FFT_SCALAR_1d k_density_fft;
|
||||||
DAT::tdual_FFT_SCALAR_1d k_work1;
|
FFT_DAT::tdual_FFT_SCALAR_1d k_work1;
|
||||||
DAT::tdual_FFT_SCALAR_1d k_work2;
|
FFT_DAT::tdual_FFT_SCALAR_1d k_work2;
|
||||||
typename AT::t_FFT_SCALAR_1d d_density_fft;
|
typename FFT_AT::t_FFT_SCALAR_1d d_density_fft;
|
||||||
typename AT::t_FFT_SCALAR_1d d_work1;
|
typename FFT_AT::t_FFT_SCALAR_1d d_work1;
|
||||||
typename AT::t_FFT_SCALAR_1d d_work2;
|
typename FFT_AT::t_FFT_SCALAR_1d d_work2;
|
||||||
|
|
||||||
DAT::tdual_float_1d k_gf_b;
|
DAT::tdual_float_1d k_gf_b;
|
||||||
typename AT::t_float_1d d_gf_b;
|
typename AT::t_float_1d d_gf_b;
|
||||||
|
|
||||||
//FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff;
|
//FFT_SCALAR **rho1d,**rho_coeff,**drho1d,**drho_coeff;
|
||||||
typename AT::t_FFT_SCALAR_2d_3 d_rho1d;
|
typename FFT_AT::t_FFT_SCALAR_2d_3 d_rho1d;
|
||||||
DAT::tdual_FFT_SCALAR_2d k_rho_coeff;
|
FFT_DAT::tdual_FFT_SCALAR_2d k_rho_coeff;
|
||||||
typename AT::t_FFT_SCALAR_2d d_rho_coeff;
|
typename FFT_AT::t_FFT_SCALAR_2d d_rho_coeff;
|
||||||
HAT::t_FFT_SCALAR_2d h_rho_coeff;
|
FFT_HAT::t_FFT_SCALAR_2d h_rho_coeff;
|
||||||
//double **acons;
|
//double **acons;
|
||||||
typename Kokkos::DualView<F_FLOAT[8][7],Kokkos::LayoutRight,DeviceType>::t_host acons;
|
typename Kokkos::DualView<F_FLOAT[8][7],Kokkos::LayoutRight,DeviceType>::t_host acons;
|
||||||
|
|
||||||
|
|||||||
@ -58,7 +58,7 @@ RemapKokkos<DeviceType>::~RemapKokkos()
|
|||||||
/* ---------------------------------------------------------------------- */
|
/* ---------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void RemapKokkos<DeviceType>::perform(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, typename AT::t_FFT_SCALAR_1d d_buf)
|
void RemapKokkos<DeviceType>::perform(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf)
|
||||||
{
|
{
|
||||||
remap_3d_kokkos(d_in,d_out,d_buf,plan);
|
remap_3d_kokkos(d_in,d_out,d_buf,plan);
|
||||||
}
|
}
|
||||||
@ -102,7 +102,7 @@ void RemapKokkos<DeviceType>::perform(typename AT::t_FFT_SCALAR_1d d_in, typenam
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
void RemapKokkos<DeviceType>::remap_3d_kokkos(typename AT::t_FFT_SCALAR_1d d_in, typename AT::t_FFT_SCALAR_1d d_out, typename AT::t_FFT_SCALAR_1d d_buf,
|
void RemapKokkos<DeviceType>::remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d d_in, typename FFT_AT::t_FFT_SCALAR_1d d_out, typename FFT_AT::t_FFT_SCALAR_1d d_buf,
|
||||||
struct remap_plan_3d_kokkos<DeviceType> *plan)
|
struct remap_plan_3d_kokkos<DeviceType> *plan)
|
||||||
{
|
{
|
||||||
// collective flag not yet supported
|
// collective flag not yet supported
|
||||||
@ -110,7 +110,7 @@ void RemapKokkos<DeviceType>::remap_3d_kokkos(typename AT::t_FFT_SCALAR_1d d_in,
|
|||||||
// use point-to-point communication
|
// use point-to-point communication
|
||||||
|
|
||||||
int i,isend,irecv;
|
int i,isend,irecv;
|
||||||
typename AT::t_FFT_SCALAR_1d d_scratch;
|
typename FFT_AT::t_FFT_SCALAR_1d d_scratch;
|
||||||
|
|
||||||
if (plan->memory == 0)
|
if (plan->memory == 0)
|
||||||
d_scratch = d_buf;
|
d_scratch = d_buf;
|
||||||
@ -442,7 +442,7 @@ struct remap_plan_3d_kokkos<DeviceType>* RemapKokkos<DeviceType>::remap_3d_creat
|
|||||||
size = MAX(size,plan->send_size[nsend]);
|
size = MAX(size,plan->send_size[nsend]);
|
||||||
|
|
||||||
if (size) {
|
if (size) {
|
||||||
plan->d_sendbuf = typename AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size);
|
plan->d_sendbuf = typename FFT_AT::t_FFT_SCALAR_1d("remap3d:sendbuf",size);
|
||||||
if (!plan->d_sendbuf.data()) return NULL;
|
if (!plan->d_sendbuf.data()) return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -452,7 +452,7 @@ struct remap_plan_3d_kokkos<DeviceType>* RemapKokkos<DeviceType>::remap_3d_creat
|
|||||||
if (memory == 1) {
|
if (memory == 1) {
|
||||||
if (nrecv > 0) {
|
if (nrecv > 0) {
|
||||||
plan->d_scratch =
|
plan->d_scratch =
|
||||||
typename AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize);
|
typename FFT_AT::t_FFT_SCALAR_1d("remap3d:scratch",nqty*out.isize*out.jsize*out.ksize);
|
||||||
if (!plan->d_scratch.data()) return NULL;
|
if (!plan->d_scratch.data()) return NULL;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -18,7 +18,6 @@
|
|||||||
#include <mpi.h>
|
#include <mpi.h>
|
||||||
#include "fftdata_kokkos.h"
|
#include "fftdata_kokkos.h"
|
||||||
#include "remap.h"
|
#include "remap.h"
|
||||||
#include "kokkos_type.h"
|
|
||||||
|
|
||||||
namespace LAMMPS_NS {
|
namespace LAMMPS_NS {
|
||||||
|
|
||||||
@ -27,12 +26,12 @@ namespace LAMMPS_NS {
|
|||||||
template<class DeviceType>
|
template<class DeviceType>
|
||||||
struct remap_plan_3d_kokkos {
|
struct remap_plan_3d_kokkos {
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
typename AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends
|
typename FFT_AT::t_FFT_SCALAR_1d d_sendbuf; // buffer for MPI sends
|
||||||
typename AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs
|
typename FFT_AT::t_FFT_SCALAR_1d d_scratch; // scratch buffer for MPI recvs
|
||||||
void (*pack)(typename AT::t_FFT_SCALAR_1d_um, int, typename AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *);
|
void (*pack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *);
|
||||||
// which pack function to use
|
// which pack function to use
|
||||||
void (*unpack)(typename AT::t_FFT_SCALAR_1d_um, int, typename AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *);
|
void (*unpack)(typename FFT_AT::t_FFT_SCALAR_1d_um, int, typename FFT_AT::t_FFT_SCALAR_1d_um, int, struct pack_plan_3d *);
|
||||||
// which unpack function to use
|
// which unpack function to use
|
||||||
int *send_offset; // extraction loc for each send
|
int *send_offset; // extraction loc for each send
|
||||||
int *send_size; // size of each send message
|
int *send_size; // size of each send message
|
||||||
@ -58,16 +57,16 @@ template<class DeviceType>
|
|||||||
class RemapKokkos : protected Pointers {
|
class RemapKokkos : protected Pointers {
|
||||||
public:
|
public:
|
||||||
typedef DeviceType device_type;
|
typedef DeviceType device_type;
|
||||||
typedef ArrayTypes<DeviceType> AT;
|
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||||
RemapKokkos(class LAMMPS *);
|
RemapKokkos(class LAMMPS *);
|
||||||
RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
|
RemapKokkos(class LAMMPS *, MPI_Comm,int,int,int,int,int,int,
|
||||||
int,int,int,int,int,int,int,int,int,int,int);
|
int,int,int,int,int,int,int,int,int,int,int);
|
||||||
~RemapKokkos();
|
~RemapKokkos();
|
||||||
void perform(typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d);
|
void perform(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d);
|
||||||
|
|
||||||
struct remap_plan_3d_kokkos<DeviceType> *plan;
|
struct remap_plan_3d_kokkos<DeviceType> *plan;
|
||||||
|
|
||||||
void remap_3d_kokkos(typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, typename AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos<DeviceType> *);
|
void remap_3d_kokkos(typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, typename FFT_AT::t_FFT_SCALAR_1d, struct remap_plan_3d_kokkos<DeviceType> *);
|
||||||
struct remap_plan_3d_kokkos<DeviceType> *remap_3d_create_plan_kokkos(MPI_Comm,
|
struct remap_plan_3d_kokkos<DeviceType> *remap_3d_create_plan_kokkos(MPI_Comm,
|
||||||
int, int, int, int, int, int,
|
int, int, int, int, int, int,
|
||||||
int, int, int, int, int, int,
|
int, int, int, int, int, int,
|
||||||
|
|||||||
@ -107,13 +107,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
|
|||||||
DftiComputeForward(plan->handle_fast,data);
|
DftiComputeForward(plan->handle_fast,data);
|
||||||
else
|
else
|
||||||
DftiComputeBackward(plan->handle_fast,data);
|
DftiComputeBackward(plan->handle_fast,data);
|
||||||
/*
|
|
||||||
#elif defined(FFT_FFTW2)
|
|
||||||
if (flag == -1)
|
|
||||||
fftw(plan->plan_fast_forward,total/length,data,1,length,NULL,0,0);
|
|
||||||
else
|
|
||||||
fftw(plan->plan_fast_backward,total/length,data,1,length,NULL,0,0);
|
|
||||||
*/
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
theplan=plan->plan_fast_forward;
|
theplan=plan->plan_fast_forward;
|
||||||
@ -148,13 +141,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
|
|||||||
DftiComputeForward(plan->handle_mid,data);
|
DftiComputeForward(plan->handle_mid,data);
|
||||||
else
|
else
|
||||||
DftiComputeBackward(plan->handle_mid,data);
|
DftiComputeBackward(plan->handle_mid,data);
|
||||||
/*
|
|
||||||
#elif defined(FFT_FFTW2)
|
|
||||||
if (flag == -1)
|
|
||||||
fftw(plan->plan_mid_forward,total/length,data,1,length,NULL,0,0);
|
|
||||||
else
|
|
||||||
fftw(plan->plan_mid_backward,total/length,data,1,length,NULL,0,0);
|
|
||||||
*/
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
theplan=plan->plan_mid_forward;
|
theplan=plan->plan_mid_forward;
|
||||||
@ -189,13 +175,6 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
|
|||||||
DftiComputeForward(plan->handle_slow,data);
|
DftiComputeForward(plan->handle_slow,data);
|
||||||
else
|
else
|
||||||
DftiComputeBackward(plan->handle_slow,data);
|
DftiComputeBackward(plan->handle_slow,data);
|
||||||
/*
|
|
||||||
#elif defined(FFT_FFTW2)
|
|
||||||
if (flag == -1)
|
|
||||||
fftw(plan->plan_slow_forward,total/length,data,1,length,NULL,0,0);
|
|
||||||
else
|
|
||||||
fftw(plan->plan_slow_backward,total/length,data,1,length,NULL,0,0);
|
|
||||||
*/
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
theplan=plan->plan_slow_forward;
|
theplan=plan->plan_slow_forward;
|
||||||
@ -231,7 +210,7 @@ void fft_3d(FFT_DATA *in, FFT_DATA *out, int flag, struct fft_plan_3d *plan)
|
|||||||
*(out_ptr++) *= norm;
|
*(out_ptr++) *= norm;
|
||||||
#elif defined(FFT_MKL)
|
#elif defined(FFT_MKL)
|
||||||
out[i] *= norm;
|
out[i] *= norm;
|
||||||
#else
|
#else /* FFT_KISS */
|
||||||
out[i].re *= norm;
|
out[i].re *= norm;
|
||||||
out[i].im *= norm;
|
out[i].im *= norm;
|
||||||
#endif
|
#endif
|
||||||
@ -508,6 +487,9 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||||||
DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
|
DftiSetValue(plan->handle_fast, DFTI_PLACEMENT,DFTI_INPLACE);
|
||||||
DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
|
DftiSetValue(plan->handle_fast, DFTI_INPUT_DISTANCE, (MKL_LONG)nfast);
|
||||||
DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
|
DftiSetValue(plan->handle_fast, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nfast);
|
||||||
|
#if defined(FFT_MKL_THREADS)
|
||||||
|
DftiSetValue(plan->handle_fast, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
||||||
|
#endif
|
||||||
DftiCommitDescriptor(plan->handle_fast);
|
DftiCommitDescriptor(plan->handle_fast);
|
||||||
|
|
||||||
DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
DftiCreateDescriptor( &(plan->handle_mid), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
||||||
@ -517,6 +499,9 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||||||
DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
|
DftiSetValue(plan->handle_mid, DFTI_PLACEMENT,DFTI_INPLACE);
|
||||||
DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
|
DftiSetValue(plan->handle_mid, DFTI_INPUT_DISTANCE, (MKL_LONG)nmid);
|
||||||
DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
|
DftiSetValue(plan->handle_mid, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nmid);
|
||||||
|
#if defined(FFT_MKL_THREADS)
|
||||||
|
DftiSetValue(plan->handle_mid, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
||||||
|
#endif
|
||||||
DftiCommitDescriptor(plan->handle_mid);
|
DftiCommitDescriptor(plan->handle_mid);
|
||||||
|
|
||||||
DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
DftiCreateDescriptor( &(plan->handle_slow), FFT_MKL_PREC, DFTI_COMPLEX, 1,
|
||||||
@ -526,61 +511,11 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||||||
DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
|
DftiSetValue(plan->handle_slow, DFTI_PLACEMENT,DFTI_INPLACE);
|
||||||
DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
|
DftiSetValue(plan->handle_slow, DFTI_INPUT_DISTANCE, (MKL_LONG)nslow);
|
||||||
DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
|
DftiSetValue(plan->handle_slow, DFTI_OUTPUT_DISTANCE, (MKL_LONG)nslow);
|
||||||
|
#if defined(FFT_MKL_THREADS)
|
||||||
|
DftiSetValue(plan->handle_slow, DFTI_NUMBER_OF_USER_THREADS, nthreads);
|
||||||
|
#endif
|
||||||
DftiCommitDescriptor(plan->handle_slow);
|
DftiCommitDescriptor(plan->handle_slow);
|
||||||
|
|
||||||
if (scaled == 0)
|
|
||||||
plan->scaled = 0;
|
|
||||||
else {
|
|
||||||
plan->scaled = 1;
|
|
||||||
plan->norm = 1.0/(nfast*nmid*nslow);
|
|
||||||
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
|
|
||||||
(out_khi-out_klo+1);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
#elif defined(FFT_FFTW2)
|
|
||||||
|
|
||||||
plan->plan_fast_forward =
|
|
||||||
fftw_create_plan(nfast,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
|
|
||||||
plan->plan_fast_backward =
|
|
||||||
fftw_create_plan(nfast,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
|
|
||||||
|
|
||||||
if (nmid == nfast) {
|
|
||||||
plan->plan_mid_forward = plan->plan_fast_forward;
|
|
||||||
plan->plan_mid_backward = plan->plan_fast_backward;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
plan->plan_mid_forward =
|
|
||||||
fftw_create_plan(nmid,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
|
|
||||||
plan->plan_mid_backward =
|
|
||||||
fftw_create_plan(nmid,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (nslow == nfast) {
|
|
||||||
plan->plan_slow_forward = plan->plan_fast_forward;
|
|
||||||
plan->plan_slow_backward = plan->plan_fast_backward;
|
|
||||||
}
|
|
||||||
else if (nslow == nmid) {
|
|
||||||
plan->plan_slow_forward = plan->plan_mid_forward;
|
|
||||||
plan->plan_slow_backward = plan->plan_mid_backward;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
plan->plan_slow_forward =
|
|
||||||
fftw_create_plan(nslow,FFTW_FORWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
|
|
||||||
plan->plan_slow_backward =
|
|
||||||
fftw_create_plan(nslow,FFTW_BACKWARD,FFTW_ESTIMATE | FFTW_IN_PLACE);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (scaled == 0)
|
|
||||||
plan->scaled = 0;
|
|
||||||
else {
|
|
||||||
plan->scaled = 1;
|
|
||||||
plan->norm = 1.0/(nfast*nmid*nslow);
|
|
||||||
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
|
|
||||||
(out_khi-out_klo+1);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
#if defined(FFT_FFTW_THREADS)
|
#if defined(FFT_FFTW_THREADS)
|
||||||
if (nthreads > 1) {
|
if (nthreads > 1) {
|
||||||
@ -620,15 +555,8 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||||||
NULL,&nslow,1,plan->length3,
|
NULL,&nslow,1,plan->length3,
|
||||||
FFTW_BACKWARD,FFTW_ESTIMATE);
|
FFTW_BACKWARD,FFTW_ESTIMATE);
|
||||||
|
|
||||||
if (scaled == 0)
|
#else /* FFT_KISS */
|
||||||
plan->scaled = 0;
|
|
||||||
else {
|
|
||||||
plan->scaled = 1;
|
|
||||||
plan->norm = 1.0/(nfast*nmid*nslow);
|
|
||||||
plan->normnum = (out_ihi-out_ilo+1) * (out_jhi-out_jlo+1) *
|
|
||||||
(out_khi-out_klo+1);
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
plan->cfg_fast_forward = kiss_fft_alloc(nfast,0,NULL,NULL);
|
plan->cfg_fast_forward = kiss_fft_alloc(nfast,0,NULL,NULL);
|
||||||
plan->cfg_fast_backward = kiss_fft_alloc(nfast,1,NULL,NULL);
|
plan->cfg_fast_backward = kiss_fft_alloc(nfast,1,NULL,NULL);
|
||||||
|
|
||||||
@ -654,6 +582,8 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||||||
plan->cfg_slow_backward = kiss_fft_alloc(nslow,1,NULL,NULL);
|
plan->cfg_slow_backward = kiss_fft_alloc(nslow,1,NULL,NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
if (scaled == 0)
|
if (scaled == 0)
|
||||||
plan->scaled = 0;
|
plan->scaled = 0;
|
||||||
else {
|
else {
|
||||||
@ -663,8 +593,6 @@ struct fft_plan_3d *fft_3d_create_plan(
|
|||||||
(out_khi-out_klo+1);
|
(out_khi-out_klo+1);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
return plan;
|
return plan;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -686,20 +614,6 @@ void fft_3d_destroy_plan(struct fft_plan_3d *plan)
|
|||||||
DftiFreeDescriptor(&(plan->handle_fast));
|
DftiFreeDescriptor(&(plan->handle_fast));
|
||||||
DftiFreeDescriptor(&(plan->handle_mid));
|
DftiFreeDescriptor(&(plan->handle_mid));
|
||||||
DftiFreeDescriptor(&(plan->handle_slow));
|
DftiFreeDescriptor(&(plan->handle_slow));
|
||||||
/*
|
|
||||||
#elif defined(FFT_FFTW2)
|
|
||||||
if (plan->plan_slow_forward != plan->plan_fast_forward &&
|
|
||||||
plan->plan_slow_forward != plan->plan_mid_forward) {
|
|
||||||
fftw_destroy_plan(plan->plan_slow_forward);
|
|
||||||
fftw_destroy_plan(plan->plan_slow_backward);
|
|
||||||
}
|
|
||||||
if (plan->plan_mid_forward != plan->plan_fast_forward) {
|
|
||||||
fftw_destroy_plan(plan->plan_mid_forward);
|
|
||||||
fftw_destroy_plan(plan->plan_mid_backward);
|
|
||||||
}
|
|
||||||
fftw_destroy_plan(plan->plan_fast_forward);
|
|
||||||
fftw_destroy_plan(plan->plan_fast_backward);
|
|
||||||
*/
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
FFTW_API(destroy_plan)(plan->plan_slow_forward);
|
FFTW_API(destroy_plan)(plan->plan_slow_forward);
|
||||||
FFTW_API(destroy_plan)(plan->plan_slow_backward);
|
FFTW_API(destroy_plan)(plan->plan_slow_backward);
|
||||||
@ -840,18 +754,6 @@ void fft_1d_only(FFT_DATA *data, int nsize, int flag, struct fft_plan_3d *plan)
|
|||||||
DftiComputeBackward(plan->handle_mid,data);
|
DftiComputeBackward(plan->handle_mid,data);
|
||||||
DftiComputeBackward(plan->handle_slow,data);
|
DftiComputeBackward(plan->handle_slow,data);
|
||||||
}
|
}
|
||||||
/*
|
|
||||||
#elif defined(FFT_FFTW2)
|
|
||||||
if (flag == -1) {
|
|
||||||
fftw(plan->plan_fast_forward,total1/length1,data,1,0,NULL,0,0);
|
|
||||||
fftw(plan->plan_mid_forward,total2/length2,data,1,0,NULL,0,0);
|
|
||||||
fftw(plan->plan_slow_forward,total3/length3,data,1,0,NULL,0,0);
|
|
||||||
} else {
|
|
||||||
fftw(plan->plan_fast_backward,total1/length1,data,1,0,NULL,0,0);
|
|
||||||
fftw(plan->plan_mid_backward,total2/length2,data,1,0,NULL,0,0);
|
|
||||||
fftw(plan->plan_slow_backward,total3/length3,data,1,0,NULL,0,0);
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
FFTW_API(plan) theplan;
|
FFTW_API(plan) theplan;
|
||||||
if (flag == -1)
|
if (flag == -1)
|
||||||
|
|||||||
@ -43,14 +43,6 @@ typedef double FFT_SCALAR;
|
|||||||
typedef float _Complex FFT_DATA;
|
typedef float _Complex FFT_DATA;
|
||||||
#define FFT_MKL_PREC DFTI_SINGLE
|
#define FFT_MKL_PREC DFTI_SINGLE
|
||||||
|
|
||||||
//#elif defined(FFT_FFTW2)
|
|
||||||
//#if defined(FFTW_SIZE)
|
|
||||||
//#include "sfftw.h"
|
|
||||||
//#else
|
|
||||||
//#include "fftw.h"
|
|
||||||
//#endif
|
|
||||||
//typedef FFTW_COMPLEX FFT_DATA;
|
|
||||||
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
#include "fftw3.h"
|
#include "fftw3.h"
|
||||||
typedef fftwf_complex FFT_DATA;
|
typedef fftwf_complex FFT_DATA;
|
||||||
@ -84,14 +76,6 @@ typedef struct kiss_fft_state* kiss_fft_cfg;
|
|||||||
typedef double _Complex FFT_DATA;
|
typedef double _Complex FFT_DATA;
|
||||||
#define FFT_MKL_PREC DFTI_DOUBLE
|
#define FFT_MKL_PREC DFTI_DOUBLE
|
||||||
|
|
||||||
//#elif defined(FFT_FFTW2)
|
|
||||||
//#if defined(FFTW_SIZE)
|
|
||||||
//#include "dfftw.h"
|
|
||||||
//#else
|
|
||||||
//#include "fftw.h"
|
|
||||||
//#endif
|
|
||||||
//typedef FFTW_COMPLEX FFT_DATA;
|
|
||||||
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
#include "fftw3.h"
|
#include "fftw3.h"
|
||||||
typedef fftw_complex FFT_DATA;
|
typedef fftw_complex FFT_DATA;
|
||||||
@ -141,13 +125,6 @@ struct fft_plan_3d {
|
|||||||
DFTI_DESCRIPTOR *handle_fast;
|
DFTI_DESCRIPTOR *handle_fast;
|
||||||
DFTI_DESCRIPTOR *handle_mid;
|
DFTI_DESCRIPTOR *handle_mid;
|
||||||
DFTI_DESCRIPTOR *handle_slow;
|
DFTI_DESCRIPTOR *handle_slow;
|
||||||
//#elif defined(FFT_FFTW2)
|
|
||||||
// fftw_plan plan_fast_forward;
|
|
||||||
// fftw_plan plan_fast_backward;
|
|
||||||
// fftw_plan plan_mid_forward;
|
|
||||||
// fftw_plan plan_mid_backward;
|
|
||||||
//fftw_plan plan_slow_forward;
|
|
||||||
//fftw_plan plan_slow_backward;
|
|
||||||
#elif defined(FFT_FFTW3)
|
#elif defined(FFT_FFTW3)
|
||||||
FFTW_API(plan) plan_fast_forward;
|
FFTW_API(plan) plan_fast_forward;
|
||||||
FFTW_API(plan) plan_fast_backward;
|
FFTW_API(plan) plan_fast_backward;
|
||||||
|
|||||||
Reference in New Issue
Block a user