Working on fft on the device
This commit is contained in:
@ -162,6 +162,10 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double *
|
||||
eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr);
|
||||
}
|
||||
|
||||
void amoeba_compute_fft1d(void** in, void** out, const int mode) {
|
||||
AMOEBAMF.compute_fft1d(in, out, mode);
|
||||
}
|
||||
|
||||
double amoeba_gpu_bytes() {
|
||||
return AMOEBAMF.host_memory_usage();
|
||||
}
|
||||
|
||||
@ -568,12 +568,30 @@ void BaseAmoebaT::compute_polar_real(int *host_amtype, int *host_amgroup,
|
||||
_tep.update_host(_max_tep_size*4,false);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Return the memory bytes allocated on the host and device
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
double BaseAmoebaT::host_memory_usage_atomic() const {
|
||||
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
|
||||
4*sizeof(numtyp)+sizeof(BaseAmoeba<numtyp,acctyp>);
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Compute FFT
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::compute_fft1d(void** in, void** out, const int mode)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Copy the extra data from host to device
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
||||
double** uind, double** uinp, double* pval) {
|
||||
@ -645,6 +663,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
||||
}
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Compile (load) the kernel strings and set the kernels
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||
const char *kname_multipole,
|
||||
|
||||
@ -189,7 +189,10 @@ class BaseAmoeba {
|
||||
// _fieldp store both arrays, one after another
|
||||
_fieldp.update_host(_max_fieldp_size*8,false);
|
||||
}
|
||||
|
||||
|
||||
/// compute forward/backward FFT on the device
|
||||
void compute_fft1d(void** in, void** out, const int mode);
|
||||
|
||||
// -------------------------- DEVICE DATA -------------------------
|
||||
|
||||
/// Device Properties and Atom and Neighbor storage
|
||||
|
||||
@ -23,7 +23,8 @@ using namespace LAMMPS_NS;
|
||||
|
||||
// External functions from GPU library
|
||||
|
||||
//int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int size, const int flag);
|
||||
int amoeba_setup_fft(const int size);
|
||||
int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int mode);
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
partition an FFT grid across processors
|
||||
@ -39,6 +40,7 @@ AmoebaConvolutionGPU::AmoebaConvolutionGPU(LAMMPS *lmp, Pair *pair,
|
||||
AmoebaConvolution(lmp, pair, nx_caller, ny_caller, nz_caller, order_caller,
|
||||
which_caller)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
@ -81,6 +83,7 @@ FFT_SCALAR *AmoebaConvolutionGPU::pre_convolution_4d()
|
||||
debug_scalar(FFT,"PRE Convo / POST Remap");
|
||||
debug_file(FFT,"pre.convo.post.remap");
|
||||
#endif
|
||||
|
||||
// perform forward FFT
|
||||
|
||||
fft1->compute(cfft,cfft,FFT3d::FORWARD);
|
||||
|
||||
Reference in New Issue
Block a user