From 46b8b00a4faf716c1bad0139a37461138c572094 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 15 Aug 2022 15:51:43 -0500 Subject: [PATCH] Working on fft on the device --- lib/gpu/lal_amoeba_ext.cpp | 4 ++++ lib/gpu/lal_base_amoeba.cpp | 22 ++++++++++++++++++++++ lib/gpu/lal_base_amoeba.h | 5 ++++- src/GPU/amoeba_convolution_gpu.cpp | 5 ++++- 4 files changed, 34 insertions(+), 2 deletions(-) diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index 63ed683833..be183b284d 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -162,6 +162,10 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double * eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr); } +void amoeba_compute_fft1d(void** in, void** out, const int mode) { + AMOEBAMF.compute_fft1d(in, out, mode); +} + double amoeba_gpu_bytes() { return AMOEBAMF.host_memory_usage(); } diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index 3b67ee31a1..b0d6ecee68 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -568,12 +568,30 @@ void BaseAmoebaT::compute_polar_real(int *host_amtype, int *host_amgroup, _tep.update_host(_max_tep_size*4,false); } +// --------------------------------------------------------------------------- +// Return the memory bytes allocated on the host and device +// --------------------------------------------------------------------------- + template double BaseAmoebaT::host_memory_usage_atomic() const { return device->atom.host_memory_usage()+nbor->host_memory_usage()+ 4*sizeof(numtyp)+sizeof(BaseAmoeba); } +// --------------------------------------------------------------------------- +// Compute FFT +// --------------------------------------------------------------------------- + +template +void BaseAmoebaT::compute_fft1d(void** in, void** out, const int mode) +{ + +} + +// --------------------------------------------------------------------------- +// Copy the extra data from host to device +// --------------------------------------------------------------------------- + template void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole, double** uind, double** uinp, double* pval) { @@ -645,6 +663,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole, } } +// --------------------------------------------------------------------------- +// Compile (load) the kernel strings and set the kernels +// --------------------------------------------------------------------------- + template void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str, const char *kname_multipole, diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index f439e2945f..cf767be96e 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -189,7 +189,10 @@ class BaseAmoeba { // _fieldp store both arrays, one after another _fieldp.update_host(_max_fieldp_size*8,false); } - + + /// compute forward/backward FFT on the device + void compute_fft1d(void** in, void** out, const int mode); + // -------------------------- DEVICE DATA ------------------------- /// Device Properties and Atom and Neighbor storage diff --git a/src/GPU/amoeba_convolution_gpu.cpp b/src/GPU/amoeba_convolution_gpu.cpp index 976a115fe1..ad52df3d4b 100644 --- a/src/GPU/amoeba_convolution_gpu.cpp +++ b/src/GPU/amoeba_convolution_gpu.cpp @@ -23,7 +23,8 @@ using namespace LAMMPS_NS; // External functions from GPU library -//int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int size, const int flag); +int amoeba_setup_fft(const int size); +int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int mode); /* ---------------------------------------------------------------------- partition an FFT grid across processors @@ -39,6 +40,7 @@ AmoebaConvolutionGPU::AmoebaConvolutionGPU(LAMMPS *lmp, Pair *pair, AmoebaConvolution(lmp, pair, nx_caller, ny_caller, nz_caller, order_caller, which_caller) { + } /* ---------------------------------------------------------------------- @@ -81,6 +83,7 @@ FFT_SCALAR *AmoebaConvolutionGPU::pre_convolution_4d() debug_scalar(FFT,"PRE Convo / POST Remap"); debug_file(FFT,"pre.convo.post.remap"); #endif + // perform forward FFT fft1->compute(cfft,cfft,FFT3d::FORWARD);