Working on fft on the device

2022-08-15 15:51:43 -05:00
parent f1112ab6b6
commit 46b8b00a4f
4 changed files with 34 additions and 2 deletions
--- a/lib/gpu/lal_amoeba_ext.cpp
+++ b/lib/gpu/lal_amoeba_ext.cpp
@ -162,6 +162,10 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double *
                              eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr);
 }

+void amoeba_compute_fft1d(void** in, void** out, const int mode) {
+  AMOEBAMF.compute_fft1d(in, out, mode);
+}
+
 double amoeba_gpu_bytes() {
  return AMOEBAMF.host_memory_usage();
 }
--- a/lib/gpu/lal_base_amoeba.cpp
+++ b/lib/gpu/lal_base_amoeba.cpp
@ -568,12 +568,30 @@ void BaseAmoebaT::compute_polar_real(int *host_amtype, int *host_amgroup,
  _tep.update_host(_max_tep_size*4,false);
 }

+// ---------------------------------------------------------------------------
+// Return the memory bytes allocated on the host and device
+// ---------------------------------------------------------------------------
+
 template <class numtyp, class acctyp>
 double BaseAmoebaT::host_memory_usage_atomic() const {
  return device->atom.host_memory_usage()+nbor->host_memory_usage()+
         4*sizeof(numtyp)+sizeof(BaseAmoeba<numtyp,acctyp>);
 }

+// ---------------------------------------------------------------------------
+// Compute FFT
+// ---------------------------------------------------------------------------
+
+template <class numtyp, class acctyp>
+void BaseAmoebaT::compute_fft1d(void** in, void** out, const int mode)
+{
+
+}
+
+// ---------------------------------------------------------------------------
+// Copy the extra data from host to device
+// ---------------------------------------------------------------------------
+
 template <class numtyp, class acctyp>
 void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
                                  double** uind, double** uinp, double* pval) {
@ -645,6 +663,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
  }
 }

+// ---------------------------------------------------------------------------
+// Compile (load) the kernel strings and set the kernels
+// ---------------------------------------------------------------------------
+
 template <class numtyp, class acctyp>
 void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
                                  const char *kname_multipole,
--- a/lib/gpu/lal_base_amoeba.h
+++ b/lib/gpu/lal_base_amoeba.h
@ -189,7 +189,10 @@ class BaseAmoeba {
     // _fieldp store both arrays, one after another
    _fieldp.update_host(_max_fieldp_size*8,false);
  }
-  
+
+  /// compute forward/backward FFT on the device
+  void compute_fft1d(void** in, void** out, const int mode);
+
  // -------------------------- DEVICE DATA -------------------------

  /// Device Properties and Atom and Neighbor storage
--- a/src/GPU/amoeba_convolution_gpu.cpp
+++ b/src/GPU/amoeba_convolution_gpu.cpp
@ -23,7 +23,8 @@ using namespace LAMMPS_NS;

 // External functions from GPU library

-//int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int size, const int flag);
+int amoeba_setup_fft(const int size);
+int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int mode);

 /* ----------------------------------------------------------------------
   partition an FFT grid across processors
@ -39,6 +40,7 @@ AmoebaConvolutionGPU::AmoebaConvolutionGPU(LAMMPS *lmp, Pair *pair,
  AmoebaConvolution(lmp, pair, nx_caller, ny_caller,  nz_caller, order_caller,
                    which_caller)
 {
+
 }

 /* ----------------------------------------------------------------------
@ -81,6 +83,7 @@ FFT_SCALAR *AmoebaConvolutionGPU::pre_convolution_4d()
  debug_scalar(FFT,"PRE Convo / POST Remap");
  debug_file(FFT,"pre.convo.post.remap");
 #endif
+
  // perform forward FFT

  fft1->compute(cfft,cfft,FFT3d::FORWARD);