Removed duplicates in the amoeba kernels

This commit is contained in:
Trung Nguyen
2021-10-01 10:19:17 -05:00
parent 3328ac0df2
commit f126f785a4
2 changed files with 24 additions and 18 deletions

View File

@ -353,20 +353,20 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
bool &success, double *host_q, double *boxlo, bool &success, double *host_q, double *boxlo,
double *prd) { double *prd) {
acc_timers(); acc_timers();
int eflag, vflag; //int eflag, vflag;
if (eatom) eflag=2; if (eatom) _eflag=2;
else if (eflag_in) eflag=1; else if (eflag_in) _eflag=1;
else eflag=0; else _eflag=0;
if (vatom) vflag=2; if (vatom) _vflag=2;
else if (vflag_in) vflag=1; else if (vflag_in) _vflag=1;
else vflag=0; else _vflag=0;
#ifdef LAL_NO_BLOCK_REDUCE #ifdef LAL_NO_BLOCK_REDUCE
if (eflag) eflag=2; if (_eflag) _eflag=2;
if (vflag) vflag=2; if (_vflag) _vflag=2;
#endif #endif
set_kernel(eflag,vflag); set_kernel(_eflag,_vflag);
// ------------------- Resize 1-5 neighbor arrays ------------------------ // ------------------- Resize 1-5 neighbor arrays ------------------------
@ -444,6 +444,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
const double aewald, const double felec, const double aewald, const double felec,
const double off2_mpole, double *host_q, const double off2_mpole, double *host_q,
double *boxlo, double *prd, void **tep_ptr) { double *boxlo, double *prd, void **tep_ptr) {
/*
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -459,7 +460,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
#endif #endif
set_kernel(eflag,vflag); set_kernel(eflag,vflag);
*/
// reallocate per-atom arrays, transfer data from the host // reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed // and build the neighbor lists if needed
// NOTE: // NOTE:
@ -486,7 +487,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
_off2_mpole = off2_mpole; _off2_mpole = off2_mpole;
_felec = felec; _felec = felec;
_aewald = aewald; _aewald = aewald;
const int red_blocks=multipole_real(eflag,vflag); const int red_blocks=multipole_real(_eflag,_vflag);
// leave the answers (forces, energies and virial) on the device, // leave the answers (forces, energies and virial) on the device,
// only copy them back in the last kernel (polar_real) // only copy them back in the last kernel (polar_real)
@ -528,6 +529,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full,
const double aewald, const double off2_polar, const double aewald, const double off2_polar,
double *host_q, double *boxlo, double *prd, double *host_q, double *boxlo, double *prd,
void** fieldp_ptr) { void** fieldp_ptr) {
/*
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -543,7 +545,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full,
#endif #endif
set_kernel(eflag,vflag); set_kernel(eflag,vflag);
*/
// reallocate per-atom arrays, transfer data from the host // reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed // and build the neighbor lists if needed
@ -570,7 +572,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full,
_off2_polar = off2_polar; _off2_polar = off2_polar;
_aewald = aewald; _aewald = aewald;
const int red_blocks=udirect2b(eflag,vflag); const int red_blocks=udirect2b(_eflag,_vflag);
// copy field and fieldp from device to host (_fieldp store both arrays, one after another) // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
@ -606,6 +608,7 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full,
const double aewald, const double off2_polar, const double aewald, const double off2_polar,
double *host_q, double *boxlo, double *prd, double *host_q, double *boxlo, double *prd,
void** fieldp_ptr) { void** fieldp_ptr) {
/*
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -621,7 +624,7 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full,
#endif #endif
set_kernel(eflag,vflag); set_kernel(eflag,vflag);
*/
// reallocate per-atom arrays, transfer extra data from the host // reallocate per-atom arrays, transfer extra data from the host
// and build the neighbor lists if needed // and build the neighbor lists if needed
@ -648,7 +651,7 @@ int** BaseAmoebaT::compute_umutual2b(const int ago, const int inum_full,
_off2_polar = off2_polar; _off2_polar = off2_polar;
_aewald = aewald; _aewald = aewald;
const int red_blocks=umutual2b(eflag,vflag); const int red_blocks=umutual2b(_eflag,_vflag);
// copy field and fieldp from device to host (_fieldp store both arrays, one after another) // copy field and fieldp from device to host (_fieldp store both arrays, one after another)
@ -683,6 +686,7 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
const double aewald, const double felec, const double aewald, const double felec,
const double off2_polar, double *host_q, const double off2_polar, double *host_q,
double *boxlo, double *prd, void **tep_ptr) { double *boxlo, double *prd, void **tep_ptr) {
/*
acc_timers(); acc_timers();
int eflag, vflag; int eflag, vflag;
if (eatom) eflag=2; if (eatom) eflag=2;
@ -698,7 +702,7 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
#endif #endif
set_kernel(eflag,vflag); set_kernel(eflag,vflag);
*/
// reallocate per-atom arrays, transfer data from the host // reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed // and build the neighbor lists if needed
// NOTE: // NOTE:
@ -734,7 +738,7 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
_off2_polar = off2_polar; _off2_polar = off2_polar;
_felec = felec; _felec = felec;
_aewald = aewald; _aewald = aewald;
const int red_blocks=polar_real(eflag,vflag); const int red_blocks=polar_real(_eflag,_vflag);
// only copy answers (forces, energies and virial) back from the device // only copy answers (forces, energies and virial) back from the device
// in the last kernel (which is polar_real here) // in the last kernel (which is polar_real here)

View File

@ -278,6 +278,8 @@ class BaseAmoeba {
numtyp _aewald,_felec; numtyp _aewald,_felec;
numtyp _off2_hal,_off2_repulse,_off2_disp,_off2_mpole,_off2_polar; numtyp _off2_hal,_off2_repulse,_off2_disp,_off2_mpole,_off2_polar;
int _eflag, _vflag;
void compile_kernels(UCL_Device &dev, const void *pair_string, void compile_kernels(UCL_Device &dev, const void *pair_string,
const char *kname_multipole, const char *kname_udirect2b, const char *kname_multipole, const char *kname_udirect2b,
const char *kname_umutual2b, const char *kname_polar, const char *kname_umutual2b, const char *kname_polar,