Cleaned up GPU lib functions

This commit is contained in:
Trung Nguyen
2022-09-18 15:54:12 -05:00
parent f9f777b099
commit caa66d904e
8 changed files with 43 additions and 98 deletions

View File

@ -173,18 +173,10 @@ void amoeba_gpu_precompute_induce(const int inum_full, const int bsorder,
nylo_out, nyhi_out, nxlo_out, nxhi_out);
}
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
double ***host_thetai1, double ***host_thetai2,
double ***host_thetai3, int** igrid, double ****host_grid_brick,
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
const int nzlo_out, const int nzhi_out,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out,
bool& first_iteration) {
AMOEBAMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
host_thetai3, igrid, host_grid_brick, host_fdip_phi1,
host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
void amoeba_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
void **host_fdip_phi2, void **host_fdip_sum_phi) {
AMOEBAMF.compute_fphi_uind(host_grid_brick, host_fdip_phi1,
host_fdip_phi2, host_fdip_sum_phi);
}
void amoeba_setup_fft(const int numel, const int element_type) {

View File

@ -670,17 +670,10 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder,
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder,
double ***host_thetai1, double ***host_thetai2,
double ***host_thetai3, int** host_igrid,
double ****host_grid_brick,
void BaseAmoebaT::compute_fphi_uind(double ****host_grid_brick,
void **host_fdip_phi1,
void **host_fdip_phi2,
void **host_fdip_sum_phi,
const int nzlo_out, const int nzhi_out,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out,
bool& first_iteration)
void **host_fdip_sum_phi)
{
// TODO: find out why this (dummy) host alloc helps the cgrid_brick update_device() work correcly
UCL_H_Vec<numtyp> hdummy;

View File

@ -151,13 +151,6 @@ class BaseAmoeba {
int **&ilist, int **&numj, const double cpu_time, bool &success,
double *charge, double *boxlo, double *prd);
virtual void precompute_induce(const int inum_full, const int bsorder,
double ***host_thetai1, double ***host_thetai2,
double ***host_thetai3, int** igrid,
const int nzlo_out, const int nzhi_out,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out);
/// Compute multipole real-space with device neighboring
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
double **host_x, int *host_type, int *host_amtype,
@ -180,15 +173,17 @@ class BaseAmoeba {
double **host_uind, double **host_uinp, double *host_pval,
const double aewald, const double off2_polar, void **fieldp_ptr);
virtual void compute_fphi_uind(const int inum_full, const int bsorder,
/// Allocate/resize per-atom arrays before induce()
virtual void precompute_induce(const int inum_full, const int bsorder,
double ***host_thetai1, double ***host_thetai2,
double ***host_thetai3, int** igrid,
double ****host_grid_brick,
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
const int nzlo_out, const int nzhi_out,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out,
bool& first_iteration);
const int nxlo_out, const int nxhi_out);
virtual void compute_fphi_uind(double ****host_grid_brick,
void **host_fdip_phi1, void **host_fdip_phi2,
void **host_fdip_sum_phi);
/// Compute polar real-space with device neighboring
virtual void compute_polar_real(int *host_amtype, int *host_amgroup, double **host_rpole,

View File

@ -204,18 +204,9 @@ void hippo_gpu_precompute_induce(const int inum_full, const int bsorder,
nylo_out, nyhi_out, nxlo_out, nxhi_out);
}
void hippo_gpu_fphi_uind(const int inum_full, const int bsorder,
double ***host_thetai1, double ***host_thetai2,
double ***host_thetai3, int** igrid, double ****host_grid_brick,
void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi,
const int nzlo_out, const int nzhi_out,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out,
bool& first_iteration) {
HIPPOMF.compute_fphi_uind(inum_full, bsorder, host_thetai1, host_thetai2,
host_thetai3, igrid, host_grid_brick, host_fdip_phi1,
host_fdip_phi2, host_fdip_sum_phi, nzlo_out, nzhi_out,
nylo_out, nyhi_out, nxlo_out, nxhi_out, first_iteration);
void hippo_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
void **host_fdip_phi2, void **host_fdip_sum_phi) {
HIPPOMF.compute_fphi_uind(host_grid_brick, host_fdip_phi1, host_fdip_phi2, host_fdip_sum_phi);
}
double hippo_gpu_bytes() {

View File

@ -95,15 +95,8 @@ void amoeba_gpu_precompute_induce(const int inum_full, const int bsorder,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out);
void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder,
double ***host_thetai1, double ***host_thetai2,
double ***host_thetai3, int** igrid,
double ****host_grid_brick, void **host_fdip_phi1,
void **host_fdip_phi2, void **host_fdip_sum_phi,
const int nzlo_out, const int nzhi_out,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out,
bool& first_iteration);
void amoeba_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
void **host_fdip_phi2, void **host_fdip_sum_phi);
void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
double **host_rpole, double **host_uind, double **host_uinp,
@ -299,13 +292,6 @@ void PairAmoebaGPU::induce()
int debug = 1;
first_induce_iteration = true;
amoeba_gpu_precompute_induce(atom->nlocal, bsorder, thetai1,
thetai2, thetai3, igrid,
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
ic_kspace->nylo_out, ic_kspace->nyhi_out,
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
// set cutoffs, taper coeffs, and PME params
// create qfac here, free at end of polar()
@ -351,6 +337,15 @@ void PairAmoebaGPU::induce()
}
}
// allocate memory and make early host-device transfers
// must be done before the first ufield0c
amoeba_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, thetai2,
thetai3, igrid,
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
ic_kspace->nylo_out, ic_kspace->nyhi_out,
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
// get induced dipoles via the OPT extrapolation method
// NOTE: any way to rewrite these loops to avoid allocating
// uopt,uoptp with a optorder+1 dimension, just optorder ??
@ -1160,14 +1155,8 @@ void PairAmoebaGPU::fphi_uind(double ****grid, double **fdip_phi1,
void* fdip_phi1_pinned = nullptr;
void* fdip_phi2_pinned = nullptr;
void* fdip_sum_phi_pinned = nullptr;
amoeba_gpu_fphi_uind(atom->nlocal, bsorder, thetai1,
thetai2, thetai3, igrid, grid,
&fdip_phi1_pinned, &fdip_phi2_pinned,
&fdip_sum_phi_pinned,
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
ic_kspace->nylo_out, ic_kspace->nyhi_out,
ic_kspace->nxlo_out, ic_kspace->nxhi_out,
first_induce_iteration);
amoeba_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
&fdip_sum_phi_pinned);
int nlocal = atom->nlocal;
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;

View File

@ -62,8 +62,6 @@ class PairAmoebaGPU : public PairAmoeba {
bool gpu_umutual2b_ready;
bool gpu_polar_real_ready;
bool first_induce_iteration;
void udirect2b_cpu();
template<class numtyp>

View File

@ -112,15 +112,8 @@ void hippo_gpu_precompute_induce(const int inum_full, const int bsorder,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out);
void hippo_gpu_fphi_uind(const int inum_full, const int bsorder,
double ***host_thetai1, double ***host_thetai2,
double ***host_thetai3, int** igrid,
double ****host_grid_brick, void **host_fdip_phi1,
void **host_fdip_phi2, void **host_fdip_sum_phi,
const int nzlo_out, const int nzhi_out,
const int nylo_out, const int nyhi_out,
const int nxlo_out, const int nxhi_out,
bool& first_iteration);
void hippo_gpu_fphi_uind(double ****host_grid_brick, void **host_fdip_phi1,
void **host_fdip_phi2, void **host_fdip_sum_phi);
void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup,
double **host_rpole, double **host_uind, double **host_uinp, double *host_pval,
@ -424,14 +417,6 @@ void PairHippoGPU::induce()
int debug = 1;
first_induce_iteration = true;
hippo_gpu_precompute_induce(atom->nlocal, bsorder, thetai1,
thetai2, thetai3, igrid,
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
ic_kspace->nylo_out, ic_kspace->nyhi_out,
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
// set cutoffs, taper coeffs, and PME params
// create qfac here, free at end of polar()
@ -486,6 +471,16 @@ void PairHippoGPU::induce()
udirp[i][0], udirp[i][1], udirp[i][2]);
}
*/
// allocate memory and make early host-device transfers
// must be done before the first ufield0c
hippo_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, thetai2,
thetai3, igrid,
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
ic_kspace->nylo_out, ic_kspace->nyhi_out,
ic_kspace->nxlo_out, ic_kspace->nxhi_out);
// get induced dipoles via the OPT extrapolation method
// NOTE: any way to rewrite these loops to avoid allocating
// uopt,uoptp with a optorder+1 dimension, just optorder ??
@ -1296,14 +1291,8 @@ void PairHippoGPU::fphi_uind(double ****grid, double **fdip_phi1,
void* fdip_phi1_pinned = nullptr;
void* fdip_phi2_pinned = nullptr;
void* fdip_sum_phi_pinned = nullptr;
hippo_gpu_fphi_uind(atom->nlocal, bsorder, thetai1,
thetai2, thetai3, igrid, grid,
&fdip_phi1_pinned, &fdip_phi2_pinned,
&fdip_sum_phi_pinned,
ic_kspace->nzlo_out, ic_kspace->nzhi_out,
ic_kspace->nylo_out, ic_kspace->nyhi_out,
ic_kspace->nxlo_out, ic_kspace->nxhi_out,
first_induce_iteration);
hippo_gpu_fphi_uind(grid, &fdip_phi1_pinned, &fdip_phi2_pinned,
&fdip_sum_phi_pinned);
int nlocal = atom->nlocal;
double *_fdip_phi1_ptr = (double *)fdip_phi1_pinned;

View File

@ -62,8 +62,6 @@ class PairHippoGPU : public PairAmoeba {
bool gpu_umutual2b_ready;
bool gpu_polar_real_ready;
bool first_induce_iteration;
void udirect2b_cpu();
template<class numtyp>