diff --git a/lib/gpu/lal_amoeba_ext.cpp b/lib/gpu/lal_amoeba_ext.cpp index f91b76f688..425caaabbb 100644 --- a/lib/gpu/lal_amoeba_ext.cpp +++ b/lib/gpu/lal_amoeba_ext.cpp @@ -162,6 +162,17 @@ void amoeba_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double * eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr); } +void amoeba_gpu_precompute_induce(const int inum_full, const int bsorder, + double ***host_thetai1, double ***host_thetai2, + double ***host_thetai3, int** igrid, + const int nzlo_out, const int nzhi_out, + const int nylo_out, const int nyhi_out, + const int nxlo_out, const int nxhi_out) { + AMOEBAMF.precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, + host_thetai3, igrid, nzlo_out, nzhi_out, + nylo_out, nyhi_out, nxlo_out, nxhi_out); +} + void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder, double ***host_thetai1, double ***host_thetai2, double ***host_thetai3, int** igrid, double ****host_grid_brick, diff --git a/lib/gpu/lal_base_amoeba.cpp b/lib/gpu/lal_base_amoeba.cpp index eac704fbfc..304e23274f 100644 --- a/lib/gpu/lal_base_amoeba.cpp +++ b/lib/gpu/lal_base_amoeba.cpp @@ -580,7 +580,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder, _thetai1.alloc(_max_thetai_size*bsorder,*(this->ucl_device),UCL_READ_ONLY); _thetai2.alloc(_max_thetai_size*bsorder,*(this->ucl_device),UCL_READ_ONLY); _thetai3.alloc(_max_thetai_size*bsorder,*(this->ucl_device),UCL_READ_ONLY); - _igrid.alloc(_max_thetai_size*4,*(this->ucl_device),UCL_READ_ONLY); + _igrid.alloc(_max_thetai_size*4,*(this->ucl_device),UCL_WRITE_ONLY,UCL_READ_ONLY); _fdip_phi1.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE); _fdip_phi2.alloc(_max_thetai_size*10,*(this->ucl_device),UCL_READ_WRITE); @@ -674,7 +674,7 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder, double ***host_thetai1, double ***host_thetai2, double ***host_thetai3, int** host_igrid, double ****host_grid_brick, - void** host_fdip_phi1, + void **host_fdip_phi1, void **host_fdip_phi2, void **host_fdip_sum_phi, const int nzlo_out, const int nzhi_out, @@ -682,16 +682,7 @@ void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder, const int nxlo_out, const int nxhi_out, bool& first_iteration) { - // allocation/resize and transfers before the first iteration - - if (first_iteration) { - precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, - host_thetai3, host_igrid, nzlo_out, nzhi_out, - nylo_out, nyhi_out, nxlo_out, nxhi_out); - first_iteration = false; - } - - // TODO: find out why this host alloc helps the cgrid_brick update_device() work correcly + // TODO: find out why this (dummy) host alloc helps the cgrid_brick update_device() work correcly UCL_H_Vec hdummy; hdummy.alloc(1, *(this->ucl_device), UCL_READ_ONLY); diff --git a/lib/gpu/lal_hippo_ext.cpp b/lib/gpu/lal_hippo_ext.cpp index 6b189defe9..2cc17c6ced 100644 --- a/lib/gpu/lal_hippo_ext.cpp +++ b/lib/gpu/lal_hippo_ext.cpp @@ -193,6 +193,17 @@ void hippo_gpu_compute_polar_real(int *host_amtype, int *host_amgroup, double ** eflag_in, vflag_in, eatom, vatom, aewald, felec, off2, tep_ptr); } +void hippo_gpu_precompute_induce(const int inum_full, const int bsorder, + double ***host_thetai1, double ***host_thetai2, + double ***host_thetai3, int** igrid, + const int nzlo_out, const int nzhi_out, + const int nylo_out, const int nyhi_out, + const int nxlo_out, const int nxhi_out) { + HIPPOMF.precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, + host_thetai3, igrid, nzlo_out, nzhi_out, + nylo_out, nyhi_out, nxlo_out, nxhi_out); +} + void hippo_gpu_fphi_uind(const int inum_full, const int bsorder, double ***host_thetai1, double ***host_thetai2, double ***host_thetai3, int** igrid, double ****host_grid_brick, diff --git a/src/GPU/pair_amoeba_gpu.cpp b/src/GPU/pair_amoeba_gpu.cpp index 267dc666d6..5770d9542d 100644 --- a/src/GPU/pair_amoeba_gpu.cpp +++ b/src/GPU/pair_amoeba_gpu.cpp @@ -88,6 +88,13 @@ void amoeba_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, void amoeba_gpu_update_fieldp(void **fieldp_ptr); +void amoeba_gpu_precompute_induce(const int inum_full, const int bsorder, + double ***host_thetai1, double ***host_thetai2, + double ***host_thetai3, int** igrid, + const int nzlo_out, const int nzhi_out, + const int nylo_out, const int nyhi_out, + const int nxlo_out, const int nxhi_out); + void amoeba_gpu_fphi_uind(const int inum_full, const int bsorder, double ***host_thetai1, double ***host_thetai2, double ***host_thetai3, int** igrid, @@ -294,6 +301,12 @@ void PairAmoebaGPU::induce() first_induce_iteration = true; + amoeba_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, + thetai2, thetai3, igrid, + ic_kspace->nzlo_out, ic_kspace->nzhi_out, + ic_kspace->nylo_out, ic_kspace->nyhi_out, + ic_kspace->nxlo_out, ic_kspace->nxhi_out); + // set cutoffs, taper coeffs, and PME params // create qfac here, free at end of polar() diff --git a/src/GPU/pair_hippo_gpu.cpp b/src/GPU/pair_hippo_gpu.cpp index 8c1b380f65..9317b11794 100644 --- a/src/GPU/pair_hippo_gpu.cpp +++ b/src/GPU/pair_hippo_gpu.cpp @@ -105,6 +105,13 @@ void hippo_gpu_compute_umutual2b(int *host_amtype, int *host_amgroup, void hippo_gpu_update_fieldp(void **fieldp_ptr); +void hippo_gpu_precompute_induce(const int inum_full, const int bsorder, + double ***host_thetai1, double ***host_thetai2, + double ***host_thetai3, int** igrid, + const int nzlo_out, const int nzhi_out, + const int nylo_out, const int nyhi_out, + const int nxlo_out, const int nxhi_out); + void hippo_gpu_fphi_uind(const int inum_full, const int bsorder, double ***host_thetai1, double ***host_thetai2, double ***host_thetai3, int** igrid, @@ -419,6 +426,12 @@ void PairHippoGPU::induce() first_induce_iteration = true; + hippo_gpu_precompute_induce(atom->nlocal, bsorder, thetai1, + thetai2, thetai3, igrid, + ic_kspace->nzlo_out, ic_kspace->nzhi_out, + ic_kspace->nylo_out, ic_kspace->nyhi_out, + ic_kspace->nxlo_out, ic_kspace->nxhi_out); + // set cutoffs, taper coeffs, and PME params // create qfac here, free at end of polar()