From c5c3c697dfd80c445fc3c310f86b220354bff6a0 Mon Sep 17 00:00:00 2001 From: Trung Nguyen Date: Mon, 29 Aug 2022 00:13:30 -0500 Subject: [PATCH] Adding fphi_uind kernel, working on the arrays allocation --- lib/gpu/Nvidia.makefile | 2 +- lib/gpu/lal_amoeba.cu | 268 ++++++++++++++++++++++++++++++++++++ lib/gpu/lal_base_amoeba.cpp | 13 +- lib/gpu/lal_base_amoeba.h | 2 +- 4 files changed, 279 insertions(+), 6 deletions(-) diff --git a/lib/gpu/Nvidia.makefile b/lib/gpu/Nvidia.makefile index 768daff53a..5f50486e28 100644 --- a/lib/gpu/Nvidia.makefile +++ b/lib/gpu/Nvidia.makefile @@ -62,7 +62,7 @@ $(OBJ_DIR)/pppm_d.cubin: lal_pppm.cu lal_precision.h lal_preprocessor.h \ $(OBJ_DIR)/pppm_d_cubin.h: $(OBJ_DIR)/pppm_d.cubin $(BIN2C) -c -n pppm_d $(OBJ_DIR)/pppm_d.cubin > $(OBJ_DIR)/pppm_d_cubin.h -$(OBJ_DIR)/%_cubin.h: lal_%.cu $(PRE1_H) +$(OBJ_DIR)/%_cubin.h: lal_%.cu $(PRE1_H) $(CUDA) --fatbin -DNV_KERNEL -o $(OBJ_DIR)/$*.cubin $(OBJ_DIR)/lal_$*.cu $(BIN2C) -c -n $* $(OBJ_DIR)/$*.cubin > $@ diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index 1b2900f97f..1239764108 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -1615,6 +1615,274 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_, offset,eflag,vflag,ans,engv,NUM_BLOCKS_X); } +/* ---------------------------------------------------------------------- + fphi_uind = induced potential from grid + fphi_uind extracts the induced dipole potential from the particle mesh Ewald grid +------------------------------------------------------------------------- */ + +__kernel void k_fphi_uind(const __global numtyp4 *restrict x_, + const __global numtyp4 *restrict thetai1, + const __global numtyp4 *restrict thetai2, + const __global numtyp4 *restrict thetai3, + const __global int4 *restrict igrid, + const __global numtyp4 *restrict grid, + __global numtyp4 *restrict fdip_phi1, + __global numtyp4 *restrict fdip_phi2, + __global numtyp4 *restrict fdip_sum_phi, + const int bsorder, const int inum, + const int t_per_atom) +{ + int tid, ii, offset, i, n_stride; + atom_info(t_per_atom,ii,tid,offset); + + if (iiucl_device),UCL_READ_WRITE,UCL_READ_WRITE); _thetai2.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); _thetai3.alloc(_max_thetai_size*bsorder*4,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); - _igrid.alloc(_max_thetai_size*3,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); + _igrid.alloc(_max_thetai_size,*(this->ucl_device),UCL_READ_WRITE,UCL_READ_WRITE); } else { if (inum_full>_max_thetai_size) { _max_thetai_size=static_cast(static_cast(inum_full)*1.10); _thetai1.resize(_max_thetai_size*bsorder*4); _thetai2.resize(_max_thetai_size*bsorder*4); _thetai3.resize(_max_thetai_size*bsorder*4); - _igrid.resize(_max_thetai_size*4); + _igrid.resize(_max_thetai_size); } } @@ -471,7 +471,7 @@ void BaseAmoebaT::precompute_induce(const int inum_full, const int bsorder, _thetai1.update_device(inum_full*bsorder*4,true); _thetai2.update_device(inum_full*bsorder*4,true); _thetai3.update_device(inum_full*bsorder*4,true); - _igrid.update_device(inum_full*4,true); + _igrid.update_device(inum_full,true); } // --------------------------------------------------------------------------- @@ -593,12 +593,17 @@ template void BaseAmoebaT::compute_fphi_uind(const int inum_full, const int bsorder, double **host_thetai1, double **host_thetai2, double **host_thetai3, int** igrid, - double ****host_grid, double **host_fdip_phi1, + double ****host_cgrid_brick, double **host_fdip_phi1, double **host_fdip_phi2, double **host_fdip_sum_phi) { // once allocation and transfers precompute_induce(inum_full, bsorder, host_thetai1, host_thetai2, host_thetai3, igrid); + // resize grid if needed, then copy from host to device + // cgrid_brick.alloc()/resize() + // cgrid_brick.begin() = host_cgrid_brick[0][0][0][0]; + // + const int red_bllocks = fphi_uind(); } diff --git a/lib/gpu/lal_base_amoeba.h b/lib/gpu/lal_base_amoeba.h index 68c3470977..f333bdf9a6 100644 --- a/lib/gpu/lal_base_amoeba.h +++ b/lib/gpu/lal_base_amoeba.h @@ -250,7 +250,7 @@ class BaseAmoeba { int _bsorder; UCL_Vector _thetai1, _thetai2, _thetai3; - UCL_Vector _igrid; + UCL_Vector _igrid; int _max_thetai_size; // ------------------------ FORCE/ENERGY DATA -----------------------