Working on fft on the device

This commit is contained in:
Trung Nguyen
2022-08-15 15:51:43 -05:00
parent f1112ab6b6
commit 46b8b00a4f
4 changed files with 34 additions and 2 deletions

View File

@ -162,6 +162,10 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double *
eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr);
}
void amoeba_compute_fft1d(void** in, void** out, const int mode) {
AMOEBAMF.compute_fft1d(in, out, mode);
}
double amoeba_gpu_bytes() {
return AMOEBAMF.host_memory_usage();
}

View File

@ -568,12 +568,30 @@ void BaseAmoebaT::compute_polar_real(int *host_amtype, int *host_amgroup,
_tep.update_host(_max_tep_size*4,false);
}
// ---------------------------------------------------------------------------
// Return the memory bytes allocated on the host and device
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
double BaseAmoebaT::host_memory_usage_atomic() const {
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
4*sizeof(numtyp)+sizeof(BaseAmoeba<numtyp,acctyp>);
}
// ---------------------------------------------------------------------------
// Compute FFT
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void BaseAmoebaT::compute_fft1d(void** in, void** out, const int mode)
{
}
// ---------------------------------------------------------------------------
// Copy the extra data from host to device
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
double** uind, double** uinp, double* pval) {
@ -645,6 +663,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
}
}
// ---------------------------------------------------------------------------
// Compile (load) the kernel strings and set the kernels
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
const char *kname_multipole,

View File

@ -189,7 +189,10 @@ class BaseAmoeba {
// _fieldp store both arrays, one after another
_fieldp.update_host(_max_fieldp_size*8,false);
}
/// compute forward/backward FFT on the device
void compute_fft1d(void** in, void** out, const int mode);
// -------------------------- DEVICE DATA -------------------------
/// Device Properties and Atom and Neighbor storage

View File

@ -23,7 +23,8 @@ using namespace LAMMPS_NS;
// External functions from GPU library
//int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int size, const int flag);
int amoeba_setup_fft(const int size);
int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int mode);
/* ----------------------------------------------------------------------
partition an FFT grid across processors
@ -39,6 +40,7 @@ AmoebaConvolutionGPU::AmoebaConvolutionGPU(LAMMPS *lmp, Pair *pair,
AmoebaConvolution(lmp, pair, nx_caller, ny_caller, nz_caller, order_caller,
which_caller)
{
}
/* ----------------------------------------------------------------------
@ -81,6 +83,7 @@ FFT_SCALAR *AmoebaConvolutionGPU::pre_convolution_4d()
debug_scalar(FFT,"PRE Convo / POST Remap");
debug_file(FFT,"pre.convo.post.remap");
#endif
// perform forward FFT
fft1->compute(cfft,cfft,FFT3d::FORWARD);