Working on fft on the device
This commit is contained in:
@ -162,6 +162,10 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double *
|
|||||||
eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr);
|
eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void amoeba_compute_fft1d(void** in, void** out, const int mode) {
|
||||||
|
AMOEBAMF.compute_fft1d(in, out, mode);
|
||||||
|
}
|
||||||
|
|
||||||
double amoeba_gpu_bytes() {
|
double amoeba_gpu_bytes() {
|
||||||
return AMOEBAMF.host_memory_usage();
|
return AMOEBAMF.host_memory_usage();
|
||||||
}
|
}
|
||||||
|
|||||||
@ -568,12 +568,30 @@ void BaseAmoebaT::compute_polar_real(int *host_amtype, int *host_amgroup,
|
|||||||
_tep.update_host(_max_tep_size*4,false);
|
_tep.update_host(_max_tep_size*4,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Return the memory bytes allocated on the host and device
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
double BaseAmoebaT::host_memory_usage_atomic() const {
|
double BaseAmoebaT::host_memory_usage_atomic() const {
|
||||||
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
|
return device->atom.host_memory_usage()+nbor->host_memory_usage()+
|
||||||
4*sizeof(numtyp)+sizeof(BaseAmoeba<numtyp,acctyp>);
|
4*sizeof(numtyp)+sizeof(BaseAmoeba<numtyp,acctyp>);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Compute FFT
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
void BaseAmoebaT::compute_fft1d(void** in, void** out, const int mode)
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Copy the extra data from host to device
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
||||||
double** uind, double** uinp, double* pval) {
|
double** uind, double** uinp, double* pval) {
|
||||||
@ -645,6 +663,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Compile (load) the kernel strings and set the kernels
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
|
||||||
const char *kname_multipole,
|
const char *kname_multipole,
|
||||||
|
|||||||
@ -189,7 +189,10 @@ class BaseAmoeba {
|
|||||||
// _fieldp store both arrays, one after another
|
// _fieldp store both arrays, one after another
|
||||||
_fieldp.update_host(_max_fieldp_size*8,false);
|
_fieldp.update_host(_max_fieldp_size*8,false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// compute forward/backward FFT on the device
|
||||||
|
void compute_fft1d(void** in, void** out, const int mode);
|
||||||
|
|
||||||
// -------------------------- DEVICE DATA -------------------------
|
// -------------------------- DEVICE DATA -------------------------
|
||||||
|
|
||||||
/// Device Properties and Atom and Neighbor storage
|
/// Device Properties and Atom and Neighbor storage
|
||||||
|
|||||||
@ -23,7 +23,8 @@ using namespace LAMMPS_NS;
|
|||||||
|
|
||||||
// External functions from GPU library
|
// External functions from GPU library
|
||||||
|
|
||||||
//int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int size, const int flag);
|
int amoeba_setup_fft(const int size);
|
||||||
|
int amoeba_compute_fft1d(FFT_SCALAR* in, FFT_SCALAR* out, const int mode);
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
partition an FFT grid across processors
|
partition an FFT grid across processors
|
||||||
@ -39,6 +40,7 @@ AmoebaConvolutionGPU::AmoebaConvolutionGPU(LAMMPS *lmp, Pair *pair,
|
|||||||
AmoebaConvolution(lmp, pair, nx_caller, ny_caller, nz_caller, order_caller,
|
AmoebaConvolution(lmp, pair, nx_caller, ny_caller, nz_caller, order_caller,
|
||||||
which_caller)
|
which_caller)
|
||||||
{
|
{
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
@ -81,6 +83,7 @@ FFT_SCALAR *AmoebaConvolutionGPU::pre_convolution_4d()
|
|||||||
debug_scalar(FFT,"PRE Convo / POST Remap");
|
debug_scalar(FFT,"PRE Convo / POST Remap");
|
||||||
debug_file(FFT,"pre.convo.post.remap");
|
debug_file(FFT,"pre.convo.post.remap");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// perform forward FFT
|
// perform forward FFT
|
||||||
|
|
||||||
fft1->compute(cfft,cfft,FFT3d::FORWARD);
|
fft1->compute(cfft,cfft,FFT3d::FORWARD);
|
||||||
|
|||||||
Reference in New Issue
Block a user