Fixed bugs in the multipole real-space part on the GPU; separately multipole real and polar real work correctly (along with udirect2b and umutual2b), but

together they are conflicting due to the use of ans to copy forces back from device to host. The other 2 kernels (induce part) do not touch forces and energies.
This commit is contained in:
Trung Nguyen
2021-09-17 15:24:36 -05:00
parent d926705950
commit 2e6df83b9b
10 changed files with 123 additions and 104 deletions

View File

@ -37,8 +37,8 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
const int nlocal, const int nall, const int max_nbors,
const int maxspecial, const int maxspecial15,
const double cell_size, int &gpu_mode, FILE *screen,
const double aewald, const double polar_dscale,
const double polar_uscale, int& tep_size) {
const double polar_dscale, const double polar_uscale,
int& tep_size) {
AMOEBAMF.clear();
gpu_mode=AMOEBAMF.device->gpu_mode();
double gpu_split=AMOEBAMF.device->particle_split();
@ -67,7 +67,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale,
nlocal, nall, max_nbors, maxspecial, maxspecial15,
cell_size, gpu_split, screen, aewald, polar_dscale, polar_uscale);
cell_size, gpu_split, screen, polar_dscale, polar_uscale);
AMOEBAMF.device->world_barrier();
if (message)
@ -87,7 +87,7 @@ int amoeba_gpu_init(const int ntypes, const int max_amtype,
host_special_mpole, host_special_polar_wscale,
host_special_polar_piscale, host_special_polar_pscale,
nlocal, nall, max_nbors, maxspecial, maxspecial15,
cell_size, gpu_split, screen, aewald, polar_dscale, polar_uscale);
cell_size, gpu_split, screen, polar_dscale, polar_uscale);
AMOEBAMF.device->gpu_barrier();
if (message)
@ -113,13 +113,13 @@ int** amoeba_gpu_compute_multipole_real(const int ago, const int inum_full,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, const double felec, const double off2,
bool &success, const double aewald, const double felec, const double off2,
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
return AMOEBAMF.compute_multipole_real(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, sublo, subhi,
tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
cpu_time, success, felec, off2, host_q, boxlo, prd, tep_ptr);
cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr);
}
int** amoeba_gpu_compute_udirect2b(const int ago, const int inum_full,
@ -131,13 +131,13 @@ int** amoeba_gpu_compute_udirect2b(const int ago, const int inum_full,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, const double off2, double *host_q,
bool &success, const double aewald, const double off2, double *host_q,
double *boxlo, double *prd, void **fieldp_ptr) {
return AMOEBAMF.compute_udirect2b(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
sublo, subhi, tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr);
cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
}
int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full,
@ -149,13 +149,13 @@ int** amoeba_gpu_compute_umutual2b(const int ago, const int inum_full,
const bool eflag, const bool vflag,
const bool eatom, const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, const double off2, double *host_q,
bool &success, const double aewald, const double off2, double *host_q,
double *boxlo, double *prd, void **fieldp_ptr) {
return AMOEBAMF.compute_umutual2b(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
sublo, subhi, tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
cpu_time, success, off2, host_q, boxlo, prd, fieldp_ptr);
cpu_time, success, aewald, off2, host_q, boxlo, prd, fieldp_ptr);
}
int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full,
@ -167,13 +167,13 @@ int** amoeba_gpu_compute_polar_real(const int ago, const int inum_full,
const bool eflag, const bool vflag, const bool eatom,
const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, const double felec, const double off2,
bool &success, const double aewald, const double felec, const double off2,
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
return AMOEBAMF.compute_polar_real(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, host_uind, host_uinp,
sublo, subhi, tag, nspecial, special, nspecial15, special15,
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
cpu_time, success, felec, off2, host_q, boxlo, prd, tep_ptr);
cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr);
}
double amoeba_gpu_bytes() {