diff --git a/lib/gpu/lal_amoeba.cpp b/lib/gpu/lal_amoeba.cpp index b92e1bfd55..924a175cfe 100644 --- a/lib/gpu/lal_amoeba.cpp +++ b/lib/gpu/lal_amoeba.cpp @@ -103,30 +103,21 @@ int AmoebaT::init(const int ntypes, const int max_amtype, const int max_amclass, ucl_copy(coeff_amclass,host_write2,false); UCL_H_Vec dview(5, *(this->ucl_device), UCL_WRITE_ONLY); - sp_polar.alloc(5,*(this->ucl_device),UCL_READ_ONLY); + sp_amoeba.alloc(5,*(this->ucl_device),UCL_READ_ONLY); for (int i=0; i<5; i++) { - dview[i].x=host_special_polar_wscale[i]; + dview[i].x=host_special_hal[i]; dview[i].y=host_special_polar_piscale[i]; dview[i].z=host_special_polar_pscale[i]; dview[i].w=host_special_mpole[i]; } - ucl_copy(sp_polar,dview,5,false); - - sp_nonpolar.alloc(5,*(this->ucl_device),UCL_READ_ONLY); - for (int i=0; i<5; i++) { - dview[i].x=host_special_hal[i]; - dview[i].y=host_special_repel[i]; - dview[i].z=host_special_disp[i]; - dview[i].w=(numtyp)0; - } - ucl_copy(sp_nonpolar,dview,5,false); + ucl_copy(sp_amoeba,dview,5,false); _polar_dscale = polar_dscale; _polar_uscale = polar_uscale; _allocated=true; this->_max_bytes=coeff_amtype.row_bytes() + coeff_amclass.row_bytes() - + sp_polar.row_bytes() + sp_nonpolar.row_bytes() + this->_tep.row_bytes(); + + sp_amoeba.row_bytes() + this->_tep.row_bytes(); return 0; } @@ -138,8 +129,7 @@ void AmoebaT::clear() { coeff_amtype.clear(); coeff_amclass.clear(); - sp_polar.clear(); - sp_nonpolar.clear(); + sp_amoeba.clear(); this->clear_atomic(); } @@ -177,13 +167,14 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) { &nbor_pitch, &this->_threads_per_atom); this->k_multipole.set_size(GX,BX); - this->k_multipole.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar, - &this->nbor->dev_nbor, &this->_nbor_data->begin(), - &this->dev_short_nbor, - &this->ans->force, &this->ans->engv, &this->_tep, - &eflag, &vflag, &ainum, &_nall, &nbor_pitch, - &this->_threads_per_atom, &this->_aewald, &this->_felec, - &this->_off2_mpole, &_polar_dscale, &_polar_uscale); + this->k_multipole.run(&this->atom->x, &this->atom->extra, + &coeff_amtype, &sp_amoeba, + &this->nbor->dev_nbor, &this->_nbor_data->begin(), + &this->dev_short_nbor, + &this->ans->force, &this->ans->engv, &this->_tep, + &eflag, &vflag, &ainum, &_nall, &nbor_pitch, + &this->_threads_per_atom, &this->_aewald, &this->_felec, + &this->_off2_mpole, &_polar_dscale, &_polar_uscale); this->time_pair.stop(); return GX; @@ -218,7 +209,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) { } this->k_udirect2b.set_size(GX,BX); - this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar, + this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_amoeba, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall, &nbor_pitch, @@ -258,7 +249,7 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) { } this->k_umutual2b.set_size(GX,BX); - this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar, + this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_amoeba, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->_fieldp, &ainum, &_nall, &nbor_pitch, &this->_threads_per_atom, &this->_aewald, @@ -297,7 +288,7 @@ int AmoebaT::polar_real(const int eflag, const int vflag) { } this->k_polar.set_size(GX,BX); - this->k_polar.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar, + this->k_polar.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_amoeba, &this->nbor->dev_nbor, &this->_nbor_data->begin(), &this->dev_short_nbor, &this->ans->force, &this->ans->engv, &this->_tep, diff --git a/lib/gpu/lal_amoeba.cu b/lib/gpu/lal_amoeba.cu index befefa8dd0..f29522084d 100644 --- a/lib/gpu/lal_amoeba.cu +++ b/lib/gpu/lal_amoeba.cu @@ -14,7 +14,7 @@ // *************************************************************************** #if defined(NV_KERNEL) || defined(USE_HIP) -#include +//#include #include "lal_aux_fun1.h" #ifdef LAMMPS_SMALLBIG #define tagint int @@ -412,7 +412,7 @@ _texture( q_tex,int2); __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_, const __global numtyp *restrict extra, const __global numtyp4 *restrict coeff, - const __global numtyp4 *restrict sp_polar, + const __global numtyp4 *restrict sp_amoeba, const __global int *dev_nbor, const __global int *dev_packed, const __global int *dev_short_nbor, @@ -518,7 +518,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_, int jtype = pol3j.z; // amtype[j]; int jgroup = pol3j.w; // amgroup[j]; - const numtyp4 sp_pol = sp_polar[sbmask15(jextra)]; + const numtyp4 sp_pol = sp_amoeba[sbmask15(jextra)]; numtyp factor_mpole = sp_pol.w; // sp_mpole[sbmask15(jextra)]; // intermediates involving moments and separation distance @@ -713,7 +713,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_, __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_, const __global numtyp *restrict extra, const __global numtyp4 *restrict coeff, - const __global numtyp4 *restrict sp_polar, + const __global numtyp4 *restrict sp_amoeba, const __global int *dev_nbor, const __global int *dev_packed, const __global int *dev_short_nbor, @@ -824,12 +824,12 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_, int jgroup = pol3j.w; // amgroup[j]; numtyp factor_dscale, factor_pscale; - const numtyp4 sp_pol = sp_polar[sbmask15(jextra)]; + const numtyp4 sp_pol = sp_amoeba[sbmask15(jextra)]; if (igroup == jgroup) { - factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)]; + factor_pscale = sp_pol.y; // sp_amoeba_piscale[sbmask15(jextra)]; factor_dscale = polar_dscale; } else { - factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)]; + factor_pscale = sp_pol.z; // sp_amoeba_pscale[sbmask15(jextra)]; factor_dscale = (numtyp)1.0; } @@ -931,7 +931,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_, __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_, const __global numtyp *restrict extra, const __global numtyp4 *restrict coeff, - const __global numtyp4 *restrict sp_polar, + const __global numtyp4 *restrict sp_amoeba, const __global int *dev_nbor, const __global int *dev_packed, const __global int *dev_short_nbor, @@ -1105,7 +1105,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_, __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_, const __global numtyp *restrict extra, const __global numtyp4 *restrict coeff, - const __global numtyp4 *restrict sp_polar, + const __global numtyp4 *restrict sp_amoeba, const __global int *dev_nbor, const __global int *dev_packed, const __global int *dev_short_nbor, @@ -1257,13 +1257,13 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_, numtyp ukzp = pol5j.z; // uinp[j][2]; numtyp factor_dscale, factor_pscale, factor_uscale; - const numtyp4 sp_pol = sp_polar[sbmask15(jextra)]; + const numtyp4 sp_pol = sp_amoeba[sbmask15(jextra)]; if (igroup == jgroup) { - factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)]; + factor_pscale = sp_pol.y; // sp_amoeba_piscale[sbmask15(jextra)]; factor_dscale = polar_dscale; factor_uscale = polar_uscale; } else { - factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)]; + factor_pscale = sp_pol.z; // sp_amoeba_pscale[sbmask15(jextra)]; factor_dscale = factor_uscale = (numtyp)1.0; } diff --git a/lib/gpu/lal_amoeba.h b/lib/gpu/lal_amoeba.h index df72435b81..d12b79719f 100644 --- a/lib/gpu/lal_amoeba.h +++ b/lib/gpu/lal_amoeba.h @@ -70,17 +70,12 @@ class Amoeba : public BaseAmoeba { UCL_D_Vec coeff_amtype; /// csix = coeff_amclass.x; adisp = coeff_amclass.y; UCL_D_Vec coeff_amclass; - /// Special polar values [0-4]: - /// sp_polar.x = special_polar_wscale - /// sp_polar.y special_polar_pscale, - /// sp_polar.z = special_polar_piscale - /// sp_polar.w = special_mpole - UCL_D_Vec sp_polar; - /// Special nonpolar values [0-4]: - /// sp_nonpolar.x = special_hal - /// sp_nonpolar.y special_repel - /// sp_nonpolar.z = special_disp - UCL_D_Vec sp_nonpolar; + /// Special amoeba values [0-4]: + /// sp_amoeba.x = special_hal + /// sp_amoeba.y = special_polar_pscale, + /// sp_amoeba.z = special_polar_piscale + /// sp_amoeba.w = special_mpole + UCL_D_Vec sp_amoeba; /// If atom type constants fit in shared memory, use fast kernels bool shared_types;