Cleaned up GPU lib functions
This commit is contained in:
@ -173,18 +173,10 @@ void amoeba_gpu_precompute_induce(const int inum_full, const int bsorder,
|
||||
nylo_out, nyhi_out, nxlo_out, nxhi_out);
|
||||
}
|
||||
|
||||
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid, double ****host_grid_brick,
|
||||
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out,
|
||||
bool& first_iteration) {
|
||||
AMOEBAMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
|
||||
host_thetai3, igrid, host_grid_brick, host_fdip_phi1,
|
||||
host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
|
||||
nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
|
||||
void amoeba_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi) {
|
||||
AMOEBAMF.compute_fphi_uind(host_grid_brick, host_fdip_phi1,
|
||||
host_fdip_phi2, host_fdip_sum_phi);
|
||||
}
|
||||
|
||||
void amoeba_setup_fft(const int numel, const int element_type) {
|
||||
|
||||
@ -670,17 +670,10 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
template <class numtyp, class acctyp>
|
||||
void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** host_igrid,
|
||||
double ****host_grid_brick,
|
||||
void BaseAmoebaT::compute_fphi_uind(double ****host_grid_brick,
|
||||
void **host_fdip_phi1,
|
||||
void **host_fdip_phi2,
|
||||
void **host_fdip_sum_phi,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out,
|
||||
bool& first_iteration)
|
||||
void **host_fdip_sum_phi)
|
||||
{
|
||||
// TODO: find out why this (dummy) host alloc helps the cgrid_brick update_device() work correcly
|
||||
UCL_H_Vec<numtyp> hdummy;
|
||||
|
||||
@ -151,13 +151,6 @@ class BaseAmoeba {
|
||||
int **&ilist, int **&numj, const double cpu_time, bool &success,
|
||||
double *charge, double *boxlo, double *prd);
|
||||
|
||||
virtual void precompute_induce(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out);
|
||||
|
||||
/// Compute multipole real-space with device neighboring
|
||||
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
||||
double **host_x, int *host_type, int *host_amtype,
|
||||
@ -180,15 +173,17 @@ class BaseAmoeba {
|
||||
double **host_uind, double **host_uinp, double *host_pval,
|
||||
const double aewald, const double off2_polar, void **fieldp_ptr);
|
||||
|
||||
virtual void compute_fphi_uind(const int inum_full, const int bsorder,
|
||||
/// Allocate/resize per-atom arrays before induce()
|
||||
virtual void precompute_induce(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double ****host_grid_brick,
|
||||
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out,
|
||||
bool& first_iteration);
|
||||
const int nxlo_out, const int nxhi_out);
|
||||
|
||||
virtual void compute_fphi_uind(double ****host_grid_brick,
|
||||
void **host_fdip_phi1, void **host_fdip_phi2,
|
||||
void **host_fdip_sum_phi);
|
||||
|
||||
/// Compute polar real-space with device neighboring
|
||||
virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||
|
||||
@ -204,18 +204,9 @@ void hippo_gpu_precompute_induce(const int inum_full, const int bsorder,
|
||||
nylo_out, nyhi_out, nxlo_out, nxhi_out);
|
||||
}
|
||||
|
||||
void hippo_gpu_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid, double ****host_grid_brick,
|
||||
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out,
|
||||
bool& first_iteration) {
|
||||
HIPPOMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
|
||||
host_thetai3, igrid, host_grid_brick, host_fdip_phi1,
|
||||
host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
|
||||
nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
|
||||
void hippo_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi) {
|
||||
HIPPOMF.compute_fphi_uind(host_grid_brick, host_fdip_phi1, host_fdip_phi2, host_fdip_sum_phi);
|
||||
}
|
||||
|
||||
double hippo_gpu_bytes() {
|
||||
|
||||
@ -95,15 +95,8 @@ void amoeba_gpu_precompute_induce(const int inum_full, const int bsorder,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out);
|
||||
|
||||
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out,
|
||||
bool& first_iteration);
|
||||
void amoeba_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi);
|
||||
|
||||
void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
|
||||
double **host_rpole, double **host_uind, double **host_uinp,
|
||||
@ -299,13 +292,6 @@ void PairAmoebaGPU::induce()
|
||||
|
||||
int debug = 1;
|
||||
|
||||
first_induce_iteration = true;
|
||||
|
||||
amoeba_gpu_precompute_induce(atom->nlocal, bsorder, thetai1,
|
||||
thetai2, thetai3, igrid,
|
||||
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
|
||||
ic_kspace->nylo_out, ic_kspace->nyhi_out,
|
||||
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
|
||||
|
||||
// set cutoffs, taper coeffs, and PME params
|
||||
// create qfac here, free at end of polar()
|
||||
@ -351,6 +337,15 @@ void PairAmoebaGPU::induce()
|
||||
}
|
||||
}
|
||||
|
||||
// allocate memory and make early host-device transfers
|
||||
// must be done before the first ufield0c
|
||||
|
||||
amoeba_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, thetai2,
|
||||
thetai3, igrid,
|
||||
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
|
||||
ic_kspace->nylo_out, ic_kspace->nyhi_out,
|
||||
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
|
||||
|
||||
// get induced dipoles via the OPT extrapolation method
|
||||
// NOTE: any way to rewrite these loops to avoid allocating
|
||||
// uopt,uoptp with a optorder+1 dimension, just optorder ??
|
||||
@ -1160,14 +1155,8 @@ void PairAmoebaGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
||||
void* fdip_phi1_pinned = nullptr;
|
||||
void* fdip_phi2_pinned = nullptr;
|
||||
void* fdip_sum_phi_pinned = nullptr;
|
||||
amoeba_gpu_fphi_uind(atom->nlocal, bsorder, thetai1,
|
||||
thetai2, thetai3, igrid, grid,
|
||||
&fdip_phi1_pinned, &fdip_phi2_pinned,
|
||||
&fdip_sum_phi_pinned,
|
||||
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
|
||||
ic_kspace->nylo_out, ic_kspace->nyhi_out,
|
||||
ic_kspace->nxlo_out, ic_kspace->nxhi_out,
|
||||
first_induce_iteration);
|
||||
amoeba_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
|
||||
&fdip_sum_phi_pinned);
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;
|
||||
|
||||
@ -62,8 +62,6 @@ class PairAmoebaGPU : public PairAmoeba {
|
||||
bool gpu_umutual2b_ready;
|
||||
bool gpu_polar_real_ready;
|
||||
|
||||
bool first_induce_iteration;
|
||||
|
||||
void udirect2b_cpu();
|
||||
|
||||
template<class numtyp>
|
||||
|
||||
@ -112,15 +112,8 @@ void hippo_gpu_precompute_induce(const int inum_full, const int bsorder,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out);
|
||||
|
||||
void hippo_gpu_fphi_uind(const int inum_full, const int bsorder,
|
||||
double ***host_thetai1, double ***host_thetai2,
|
||||
double ***host_thetai3, int** igrid,
|
||||
double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi,
|
||||
const int nzlo_out, const int nzhi_out,
|
||||
const int nylo_out, const int nyhi_out,
|
||||
const int nxlo_out, const int nxhi_out,
|
||||
bool& first_iteration);
|
||||
void hippo_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
|
||||
void **host_fdip_phi2, void **host_fdip_sum_phi);
|
||||
|
||||
void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
|
||||
double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
|
||||
@ -424,14 +417,6 @@ void PairHippoGPU::induce()
|
||||
|
||||
int debug = 1;
|
||||
|
||||
first_induce_iteration = true;
|
||||
|
||||
hippo_gpu_precompute_induce(atom->nlocal, bsorder, thetai1,
|
||||
thetai2, thetai3, igrid,
|
||||
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
|
||||
ic_kspace->nylo_out, ic_kspace->nyhi_out,
|
||||
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
|
||||
|
||||
// set cutoffs, taper coeffs, and PME params
|
||||
// create qfac here, free at end of polar()
|
||||
|
||||
@ -486,6 +471,16 @@ void PairHippoGPU::induce()
|
||||
udirp[i][0], udirp[i][1], udirp[i][2]);
|
||||
}
|
||||
*/
|
||||
|
||||
// allocate memory and make early host-device transfers
|
||||
// must be done before the first ufield0c
|
||||
|
||||
hippo_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, thetai2,
|
||||
thetai3, igrid,
|
||||
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
|
||||
ic_kspace->nylo_out, ic_kspace->nyhi_out,
|
||||
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
|
||||
|
||||
// get induced dipoles via the OPT extrapolation method
|
||||
// NOTE: any way to rewrite these loops to avoid allocating
|
||||
// uopt,uoptp with a optorder+1 dimension, just optorder ??
|
||||
@ -1296,14 +1291,8 @@ void PairHippoGPU::fphi_uind(double ****grid, double **fdip_phi1,
|
||||
void* fdip_phi1_pinned = nullptr;
|
||||
void* fdip_phi2_pinned = nullptr;
|
||||
void* fdip_sum_phi_pinned = nullptr;
|
||||
hippo_gpu_fphi_uind(atom->nlocal, bsorder, thetai1,
|
||||
thetai2, thetai3, igrid, grid,
|
||||
&fdip_phi1_pinned, &fdip_phi2_pinned,
|
||||
&fdip_sum_phi_pinned,
|
||||
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
|
||||
ic_kspace->nylo_out, ic_kspace->nyhi_out,
|
||||
ic_kspace->nxlo_out, ic_kspace->nxhi_out,
|
||||
first_induce_iteration);
|
||||
hippo_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
|
||||
&fdip_sum_phi_pinned);
|
||||
|
||||
int nlocal = atom->nlocal;
|
||||
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;
|
||||
|
||||
@ -62,8 +62,6 @@ class PairHippoGPU : public PairAmoeba {
|
||||
bool gpu_umutual2b_ready;
|
||||
bool gpu_polar_real_ready;
|
||||
|
||||
bool first_induce_iteration;
|
||||
|
||||
void udirect2b_cpu();
|
||||
|
||||
template<class numtyp>
|
||||
|
||||
Reference in New Issue
Block a user