Working on the multipole real-space term of hippo
This commit is contained in:
@ -757,7 +757,7 @@ double BaseAmoebaT::host_memory_usage_atomic() const {
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
||||||
double** uind, double** uinp) {
|
double** uind, double** uinp, double* pval) {
|
||||||
// signal that we need to transfer extra data from the host
|
// signal that we need to transfer extra data from the host
|
||||||
|
|
||||||
atom->extra_data_unavail();
|
atom->extra_data_unavail();
|
||||||
@ -812,6 +812,14 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
|||||||
pextra[idx+2] = uinp[i][2];
|
pextra[idx+2] = uinp[i][2];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pval) {
|
||||||
|
n += nstride*_nall;
|
||||||
|
for (int i = 0; i < _nall; i++) {
|
||||||
|
int idx = n+i*nstride;
|
||||||
|
pextra[idx] = pval[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
|
|||||||
@ -131,7 +131,7 @@ class BaseAmoeba {
|
|||||||
bool &success);
|
bool &success);
|
||||||
|
|
||||||
/// Reallocate per-atom arrays if needed, and build neighbor lists once, if needed
|
/// Reallocate per-atom arrays if needed, and build neighbor lists once, if needed
|
||||||
int** precompute(const int ago, const int inum_full, const int nall,
|
virtual int** precompute(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype,
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
int *host_amgroup, double **host_rpole, double **host_uind,
|
int *host_amgroup, double **host_rpole, double **host_uind,
|
||||||
double **host_uinp, double *sublo, double *subhi,
|
double **host_uinp, double *sublo, double *subhi,
|
||||||
@ -232,7 +232,7 @@ class BaseAmoeba {
|
|||||||
|
|
||||||
/// cast host arrays into a single array for atom->extra
|
/// cast host arrays into a single array for atom->extra
|
||||||
void cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
void cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
||||||
double** uind, double** uinp);
|
double** uind, double** uinp, double* pval=nullptr);
|
||||||
|
|
||||||
/// Per-atom arrays
|
/// Per-atom arrays
|
||||||
UCL_Vector<acctyp,acctyp> _tep, _fieldp;
|
UCL_Vector<acctyp,acctyp> _tep, _fieldp;
|
||||||
|
|||||||
@ -155,6 +155,102 @@ double HippoT::host_memory_usage() const {
|
|||||||
return this->host_memory_usage_atomic()+sizeof(Hippo<numtyp,acctyp>);
|
return this->host_memory_usage_atomic()+sizeof(Hippo<numtyp,acctyp>);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Prepare for multiple kernel calls in a time step:
|
||||||
|
// - reallocate per-atom arrays, if needed
|
||||||
|
// - transfer extra data from host to device
|
||||||
|
// - build the full neighbor lists for use by different kernels
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
int** HippoT::precompute(const int ago, const int inum_full, const int nall,
|
||||||
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
|
int *host_amgroup, double **host_rpole,
|
||||||
|
double **host_uind, double **host_uinp, double *host_pval,
|
||||||
|
double *sublo, double *subhi, tagint *tag,
|
||||||
|
int **nspecial, tagint **special,
|
||||||
|
int *nspecial15, tagint **special15,
|
||||||
|
const bool eflag_in, const bool vflag_in,
|
||||||
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
|
int **&ilist, int **&jnum, const double cpu_time,
|
||||||
|
bool &success, double *host_q, double *boxlo,
|
||||||
|
double *prd) {
|
||||||
|
this->acc_timers();
|
||||||
|
int eflag, vflag;
|
||||||
|
if (eatom) eflag=2;
|
||||||
|
else if (eflag_in) eflag=1;
|
||||||
|
else eflag=0;
|
||||||
|
if (vatom) vflag=2;
|
||||||
|
else if (vflag_in) vflag=1;
|
||||||
|
else vflag=0;
|
||||||
|
|
||||||
|
#ifdef LAL_NO_BLOCK_REDUCE
|
||||||
|
if (eflag) eflag=2;
|
||||||
|
if (vflag) vflag=2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
this->set_kernel(eflag,vflag);
|
||||||
|
|
||||||
|
// ------------------- Resize 1-5 neighbor arrays ------------------------
|
||||||
|
|
||||||
|
if (nall>this->_nmax) {
|
||||||
|
this->_nmax = nall;
|
||||||
|
this->dev_nspecial15.clear();
|
||||||
|
this->dev_special15.clear();
|
||||||
|
this->dev_special15_t.clear();
|
||||||
|
this->dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
|
this->dev_special15.alloc(this->_maxspecial15*nall,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
|
this->dev_special15_t.alloc(nall*this->_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (inum_full==0) {
|
||||||
|
host_start=0;
|
||||||
|
// Make sure textures are correct if realloc by a different hybrid style
|
||||||
|
this->resize_atom(0,nall,success);
|
||||||
|
this->zero_timers();
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->hd_balancer.balance(cpu_time);
|
||||||
|
int inum=this->hd_balancer.get_gpu_count(ago,inum_full);
|
||||||
|
this->ans->inum(inum);
|
||||||
|
host_start=inum;
|
||||||
|
|
||||||
|
// Build neighbor list on GPU if necessary
|
||||||
|
if (ago==0) {
|
||||||
|
this->_max_nbors = this->build_nbor_list(inum, inum_full-inum, nall, host_x, host_type,
|
||||||
|
sublo, subhi, tag, nspecial, special, nspecial15, special15,
|
||||||
|
success);
|
||||||
|
if (!success)
|
||||||
|
return nullptr;
|
||||||
|
this->atom->cast_q_data(host_q);
|
||||||
|
this->cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
||||||
|
this->hd_balancer.start_timer();
|
||||||
|
} else {
|
||||||
|
this->atom->cast_x_data(host_x,host_type);
|
||||||
|
this->atom->cast_q_data(host_q);
|
||||||
|
this->cast_extra_data(host_amtype, host_amgroup, host_rpole, host_uind, host_uinp, host_pval);
|
||||||
|
this->hd_balancer.start_timer();
|
||||||
|
this->atom->add_x_data(host_x,host_type);
|
||||||
|
}
|
||||||
|
this->atom->add_q_data();
|
||||||
|
this->atom->add_extra_data();
|
||||||
|
|
||||||
|
*ilist=this->nbor->host_ilist.begin();
|
||||||
|
*jnum=this->nbor->host_acc.begin();
|
||||||
|
|
||||||
|
this->device->precompute(ago,inum_full,nall,host_x,host_type,success,host_q,
|
||||||
|
boxlo, prd);
|
||||||
|
|
||||||
|
// re-allocate dev_short_nbor if necessary
|
||||||
|
if (inum_full*(2+this->_max_nbors) > this->dev_short_nbor.cols()) {
|
||||||
|
int _nmax=static_cast<int>(static_cast<double>(inum_full)*1.10);
|
||||||
|
this->dev_short_nbor.resize((2+this->_max_nbors)*this->_nmax);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this->nbor->host_jlist.begin()-host_start;
|
||||||
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Reneighbor on GPU if necessary, and then compute dispersion real-space
|
// Reneighbor on GPU if necessary, and then compute dispersion real-space
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -201,9 +297,9 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
|
|||||||
// (x, type, amtype, amgroup, rpole) are ready on the device.
|
// (x, type, amtype, amgroup, rpole) are ready on the device.
|
||||||
|
|
||||||
int** firstneigh = nullptr;
|
int** firstneigh = nullptr;
|
||||||
firstneigh = this->precompute(ago, inum_full, nall, host_x, host_type,
|
firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole,
|
host_amtype, host_amgroup, host_rpole,
|
||||||
nullptr, nullptr, sublo, subhi, tag,
|
nullptr, nullptr, nullptr, sublo, subhi, tag,
|
||||||
nspecial, special, nspecial15, special15,
|
nspecial, special, nspecial15, special15,
|
||||||
eflag_in, vflag_in, eatom, vatom,
|
eflag_in, vflag_in, eatom, vatom,
|
||||||
host_start, ilist, jnum, cpu_time,
|
host_start, ilist, jnum, cpu_time,
|
||||||
@ -270,19 +366,20 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
|
|||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int** HippoT::compute_multipole_real(const int ago, const int inum_full,
|
int** HippoT::compute_multipole_real(const int ago, const int inum_full,
|
||||||
const int nall, double **host_x,
|
const int nall, double **host_x,
|
||||||
int *host_type, int *host_amtype,
|
int *host_type, int *host_amtype,
|
||||||
int *host_amgroup, double **host_rpole,
|
int *host_amgroup, double **host_rpole,
|
||||||
double *sublo, double *subhi, tagint *tag,
|
double* host_pval, double *sublo,
|
||||||
int **nspecial, tagint **special,
|
double *subhi, tagint *tag,
|
||||||
int *nspecial15, tagint **special15,
|
int **nspecial, tagint **special,
|
||||||
const bool eflag_in, const bool vflag_in,
|
int *nspecial15, tagint **special15,
|
||||||
const bool eatom, const bool vatom,
|
const bool eflag_in, const bool vflag_in,
|
||||||
int &host_start, int **ilist, int **jnum,
|
const bool eatom, const bool vatom,
|
||||||
const double cpu_time, bool &success,
|
int &host_start, int **ilist, int **jnum,
|
||||||
const double aewald, const double felec,
|
const double cpu_time, bool &success,
|
||||||
const double off2_mpole, double *host_q,
|
const double aewald, const double felec,
|
||||||
double *boxlo, double *prd, void **tep_ptr) {
|
const double off2_mpole, double *host_q,
|
||||||
|
double *boxlo, double *prd, void **tep_ptr) {
|
||||||
this->acc_timers();
|
this->acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
if (eatom) eflag=2;
|
if (eatom) eflag=2;
|
||||||
@ -311,9 +408,9 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
|
|||||||
// (x, type, amtype, amgroup, rpole) are ready on the device.
|
// (x, type, amtype, amgroup, rpole) are ready on the device.
|
||||||
|
|
||||||
int** firstneigh = nullptr;
|
int** firstneigh = nullptr;
|
||||||
firstneigh = this->precompute(ago, inum_full, nall, host_x, host_type,
|
firstneigh = precompute(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole,
|
host_amtype, host_amgroup, host_rpole,
|
||||||
nullptr, nullptr, sublo, subhi, tag,
|
nullptr, nullptr, host_pval, sublo, subhi, tag,
|
||||||
nspecial, special, nspecial15, special15,
|
nspecial, special, nspecial15, special15,
|
||||||
eflag_in, vflag_in, eatom, vatom,
|
eflag_in, vflag_in, eatom, vatom,
|
||||||
host_start, ilist, jnum, cpu_time,
|
host_start, ilist, jnum, cpu_time,
|
||||||
@ -380,7 +477,7 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
|
|||||||
&nbor_pitch, &this->_threads_per_atom);
|
&nbor_pitch, &this->_threads_per_atom);
|
||||||
|
|
||||||
this->k_multipole.set_size(GX,BX);
|
this->k_multipole.set_size(GX,BX);
|
||||||
this->k_multipole.run(&this->atom->x, &this->atom->extra, &_pval,
|
this->k_multipole.run(&this->atom->x, &this->atom->extra,
|
||||||
&coeff_amtype, &coeff_amclass, &sp_polar,
|
&coeff_amtype, &coeff_amclass, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor,
|
&this->dev_short_nbor,
|
||||||
|
|||||||
@ -908,7 +908,6 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
__kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp *restrict extra,
|
||||||
const __global numtyp *restrict pval,
|
|
||||||
const __global numtyp4 *restrict coeff_amtype,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
const __global numtyp4 *restrict coeff_amclass,
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
@ -945,6 +944,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
|
numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int m;
|
int m;
|
||||||
@ -989,7 +989,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
numtyp corei = coeff_amclass[itype].z; // pcore[iclass];
|
numtyp corei = coeff_amclass[itype].z; // pcore[iclass];
|
||||||
numtyp alphai = coeff_amclass[itype].w; // palpha[iclass];
|
numtyp alphai = coeff_amclass[itype].w; // palpha[iclass];
|
||||||
numtyp vali = pval[i];
|
numtyp vali = polar6[i].x;
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
@ -1030,7 +1030,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
|
|
||||||
numtyp corek = coeff_amclass[jtype].z; // pcore[jclass];
|
numtyp corek = coeff_amclass[jtype].z; // pcore[jclass];
|
||||||
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
numtyp alphak = coeff_amclass[jtype].w; // palpha[jclass];
|
||||||
numtyp valk = pval[j];
|
numtyp valk = polar6[j].x;
|
||||||
|
|
||||||
// intermediates involving moments and separation distance
|
// intermediates involving moments and separation distance
|
||||||
|
|
||||||
@ -1133,29 +1133,56 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
}
|
}
|
||||||
for (m = 0; m < 6; m++) bn[m] *= felec;
|
for (m = 0; m < 6; m++) bn[m] *= felec;
|
||||||
|
|
||||||
term1 = ci*ck;
|
term1 = corei*corek;
|
||||||
term2 = ck*dir - ci*dkr + dik;
|
numtyp term1i = corek*vali;
|
||||||
term3 = ci*qkr + ck*qir - dir*dkr + (numtyp)2.0*(dkqi-diqk+qiqk);
|
numtyp term2i = corek*dir;
|
||||||
term4 = dir*qkr - dkr*qir - (numtyp)4.0*qik;
|
numtyp term3i = corek*qir;
|
||||||
term5 = qir*qkr;
|
numtyp term1k = corei*valk;
|
||||||
numtyp scalek = (numtyp)1.0 - factor_mpole;
|
numtyp term2k = -corei*dkr;
|
||||||
rr1 = bn[0] - scalek*rr1;
|
numtyp term3k = corei*qkr;
|
||||||
rr3 = bn[1] - scalek*rr3;
|
numtyp term1ik = vali*valk;
|
||||||
rr5 = bn[2] - scalek*rr5;
|
numtyp term2ik = valk*dir - vali*dkr + dik;
|
||||||
rr7 = bn[3] - scalek*rr7;
|
numtyp term3ik = vali*qkr + valk*qir - dir*dkr + 2.0*(dkqi-diqk+qiqk);
|
||||||
rr9 = bn[4] - scalek*rr9;
|
numtyp term4ik = dir*qkr - dkr*qir - 4.0*qik;
|
||||||
rr11 = bn[5] - scalek*rr11;
|
numtyp term5ik = qir*qkr;
|
||||||
numtyp e = term1*rr1 + term2*rr3 + term3*rr5 + term4*rr7 + term5*rr9;
|
numtyp dmpi[9],dmpj[9];
|
||||||
|
numtyp dmpij[11];
|
||||||
|
damppole(r,11,alphai,alphak,dmpi,dmpj,dmpij);
|
||||||
|
numtyp scalek = factor_mpole;
|
||||||
|
numtyp rr1i = bn[0] - (1.0-scalek*dmpi[0])*rr1;
|
||||||
|
numtyp rr3i = bn[1] - (1.0-scalek*dmpi[2])*rr3;
|
||||||
|
numtyp rr5i = bn[2] - (1.0-scalek*dmpi[4])*rr5;
|
||||||
|
numtyp rr7i = bn[3] - (1.0-scalek*dmpi[6])*rr7;
|
||||||
|
numtyp rr1k = bn[0] - (1.0-scalek*dmpj[0])*rr1;
|
||||||
|
numtyp rr3k = bn[1] - (1.0-scalek*dmpj[2])*rr3;
|
||||||
|
numtyp rr5k = bn[2] - (1.0-scalek*dmpj[4])*rr5;
|
||||||
|
numtyp rr7k = bn[3] - (1.0-scalek*dmpj[6])*rr7;
|
||||||
|
numtyp rr1ik = bn[0] - (1.0-scalek*dmpij[0])*rr1;
|
||||||
|
numtyp rr3ik = bn[1] - (1.0-scalek*dmpij[2])*rr3;
|
||||||
|
numtyp rr5ik = bn[2] - (1.0-scalek*dmpij[4])*rr5;
|
||||||
|
numtyp rr7ik = bn[3] - (1.0-scalek*dmpij[6])*rr7;
|
||||||
|
numtyp rr9ik = bn[4] - (1.0-scalek*dmpij[8])*rr9;
|
||||||
|
numtyp rr11ik = bn[5] - (1.0-scalek*dmpij[10])*rr11;
|
||||||
|
rr1 = bn[0] - (1.0-scalek)*rr1;
|
||||||
|
rr3 = bn[1] - (1.0-scalek)*rr3;
|
||||||
|
numtyp e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik +
|
||||||
|
term1i*rr1i + term1k*rr1k + term1ik*rr1ik +
|
||||||
|
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
|
||||||
|
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
|
||||||
|
|
||||||
// find standard multipole intermediates for force and torque
|
// find damped multipole intermediates for force and torque
|
||||||
|
|
||||||
numtyp de = term1*rr3 + term2*rr5 + term3*rr7 + term4*rr9 + term5*rr11;
|
numtyp de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
|
||||||
term1 = -ck*rr3 + dkr*rr5 - qkr*rr7;
|
term1i*rr3i + term1k*rr3k + term1ik*rr3ik +
|
||||||
term2 = ci*rr3 + dir*rr5 + qir*rr7;
|
term2i*rr5i + term2k*rr5k + term2ik*rr5ik +
|
||||||
term3 = (numtyp)2.0 * rr5;
|
term3i*rr7i + term3k*rr7k + term3ik*rr7ik;
|
||||||
term4 = (numtyp)2.0 * (-ck*rr5+dkr*rr7-qkr*rr9);
|
term1 = -corek*rr3i - valk*rr3ik + dkr*rr5ik - qkr*rr7ik;
|
||||||
term5 = (numtyp)2.0 * (-ci*rr5-dir*rr7-qir*rr9);
|
term2 = corei*rr3k + vali*rr3ik + dir*rr5ik + qir*rr7ik;
|
||||||
term6 = (numtyp)4.0 * rr7;
|
term3 = 2.0 * rr5ik;
|
||||||
|
term4 = -2.0 * (corek*rr5i+valk*rr5ik - dkr*rr7ik+qkr*rr9ik);
|
||||||
|
term5 = -2.0 * (corei*rr5k+vali*rr5ik + dir*rr7ik+qir*rr9ik);
|
||||||
|
term6 = 4.0 * rr7ik;
|
||||||
|
rr3 = rr3ik;
|
||||||
|
|
||||||
energy += e;
|
energy += e;
|
||||||
|
|
||||||
@ -1209,10 +1236,10 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
store_answers_hippo_tq(tq,ii,inum,tid,t_per_atom,offset,i,tep);
|
store_answers_hippo_tq(tq,ii,inum,tid,t_per_atom,offset,i,tep);
|
||||||
|
|
||||||
// accumate force, energy and virial: use _acc if not the first kernel
|
// accumate force, energy and virial: use _acc if not the first kernel
|
||||||
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
offset,eflag,vflag,ans,engv);
|
//offset,eflag,vflag,ans,engv);
|
||||||
//store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
store_answers_acc(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
|
||||||
// offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
offset,eflag,vflag,ans,engv,NUM_BLOCKS_X);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
|
|||||||
@ -54,6 +54,18 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
const double gpu_split, FILE *_screen,
|
const double gpu_split, FILE *_screen,
|
||||||
const double polar_dscale, const double polar_uscale);
|
const double polar_dscale, const double polar_uscale);
|
||||||
|
|
||||||
|
/// Reallocate per-atom arrays if needed, and build neighbor lists once, if needed
|
||||||
|
int** precompute(const int ago, const int inum_full, const int nall,
|
||||||
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
|
int *host_amgroup, double **host_rpole, double **host_uind,
|
||||||
|
double **host_uinp, double* host_pval, double *sublo, double *subhi,
|
||||||
|
tagint *tag, int **nspecial, tagint **special,
|
||||||
|
int *nspecial15, tagint **special15,
|
||||||
|
const bool eflag, const bool vflag,
|
||||||
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
|
int **&ilist, int **&numj, const double cpu_time, bool &success,
|
||||||
|
double *charge, double *boxlo, double *prd);
|
||||||
|
|
||||||
/// Compute dispersion real-space with device neighboring
|
/// Compute dispersion real-space with device neighboring
|
||||||
int** compute_dispersion_real(const int ago, const int inum_full, const int nall,
|
int** compute_dispersion_real(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype,
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
@ -69,8 +81,8 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
|
|||||||
/// Compute multipole real-space with device neighboring
|
/// Compute multipole real-space with device neighboring
|
||||||
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
virtual int** compute_multipole_real(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype,
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
int *host_amgroup, double **host_rpole, double *sublo, double *subhi,
|
int *host_amgroup, double **host_rpole, double *host_pval,
|
||||||
tagint *tag, int **nspecial, tagint **special,
|
double *sublo, double *subhi, tagint *tag, int **nspecial, tagint **special,
|
||||||
int *nspecial15, tagint **special15,
|
int *nspecial15, tagint **special15,
|
||||||
const bool eflag, const bool vflag,
|
const bool eflag, const bool vflag,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
|
|||||||
@ -140,7 +140,7 @@ int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,
|
|||||||
int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full,
|
int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full,
|
||||||
const int nall, double **host_x, int *host_type,
|
const int nall, double **host_x, int *host_type,
|
||||||
int *host_amtype, int *host_amgroup, double **host_rpole,
|
int *host_amtype, int *host_amgroup, double **host_rpole,
|
||||||
double *sublo, double *subhi, tagint *tag, int **nspecial,
|
double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial,
|
||||||
tagint **special, int *nspecial15, tagint** special15,
|
tagint **special, int *nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag, const bool eatom,
|
const bool eflag, const bool vflag, const bool eatom,
|
||||||
const bool vatom, int &host_start,
|
const bool vatom, int &host_start,
|
||||||
@ -148,7 +148,7 @@ int** hippo_gpu_compute_multipole_real(const int ago, const int inum_full,
|
|||||||
bool &success, const double aewald, const double felec, const double off2,
|
bool &success, const double aewald, const double felec, const double off2,
|
||||||
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
|
double *host_q, double *boxlo, double *prd, void **tep_ptr) {
|
||||||
return HIPPOMF.compute_multipole_real(ago, inum_full, nall, host_x, host_type,
|
return HIPPOMF.compute_multipole_real(ago, inum_full, nall, host_x, host_type,
|
||||||
host_amtype, host_amgroup, host_rpole, sublo, subhi,
|
host_amtype, host_amgroup, host_rpole, host_pval, sublo, subhi,
|
||||||
tag, nspecial, special, nspecial15, special15,
|
tag, nspecial, special, nspecial15, special15,
|
||||||
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
eflag, vflag, eatom, vatom, host_start, ilist, jnum,
|
||||||
cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr);
|
cpu_time, success, aewald, felec, off2, host_q, boxlo, prd, tep_ptr);
|
||||||
|
|||||||
@ -79,7 +79,7 @@ int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,
|
|||||||
|
|
||||||
int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int nall,
|
int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
|
||||||
double **host_rpole, double *sublo, double *subhi, tagint *tag,
|
double **host_rpole, double *host_pval, double *sublo, double *subhi, tagint *tag,
|
||||||
int **nspecial, tagint **special, int* nspecial15, tagint** special15,
|
int **nspecial, tagint **special, int* nspecial15, tagint** special15,
|
||||||
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
|
||||||
int &host_start, int **ilist, int **jnum, const double cpu_time,
|
int &host_start, int **ilist, int **jnum, const double cpu_time,
|
||||||
@ -135,7 +135,7 @@ PairHippoGPU::PairHippoGPU(LAMMPS *lmp) : PairAmoeba(lmp), gpu_mode(GPU_FORCE)
|
|||||||
gpu_hal_ready = false; // always false for HIPPO
|
gpu_hal_ready = false; // always false for HIPPO
|
||||||
gpu_repulsion_ready = false; // true for HIPPO when ready
|
gpu_repulsion_ready = false; // true for HIPPO when ready
|
||||||
gpu_dispersion_real_ready = true; // true for HIPPO when ready
|
gpu_dispersion_real_ready = true; // true for HIPPO when ready
|
||||||
gpu_multipole_real_ready = false;
|
gpu_multipole_real_ready = true;
|
||||||
gpu_udirect2b_ready = false;
|
gpu_udirect2b_ready = false;
|
||||||
gpu_umutual2b_ready = false;
|
gpu_umutual2b_ready = false;
|
||||||
gpu_polar_real_ready = false;
|
gpu_polar_real_ready = false;
|
||||||
@ -294,14 +294,14 @@ void PairHippoGPU::multipole_real()
|
|||||||
double felec = electric / am_dielectric;
|
double felec = electric / am_dielectric;
|
||||||
|
|
||||||
firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
|
firstneigh = hippo_gpu_compute_multipole_real(neighbor->ago, inum, nall, atom->x,
|
||||||
atom->type, amtype, amgroup, rpole,
|
atom->type, amtype, amgroup, rpole, pval,
|
||||||
sublo, subhi, atom->tag,
|
sublo, subhi, atom->tag,
|
||||||
atom->nspecial, atom->special,
|
atom->nspecial, atom->special,
|
||||||
atom->nspecial15, atom->special15,
|
atom->nspecial15, atom->special15,
|
||||||
eflag, vflag, eflag_atom, vflag_atom,
|
eflag, vflag, eflag_atom, vflag_atom,
|
||||||
host_start, &ilist, &numneigh, cpu_time,
|
host_start, &ilist, &numneigh, cpu_time,
|
||||||
success, aewald, felec, off2, atom->q,
|
success, aewald, felec, off2, atom->q,
|
||||||
domain->boxlo, domain->prd, &tq_pinned);
|
domain->boxlo, domain->prd, &tq_pinned);
|
||||||
|
|
||||||
if (!success)
|
if (!success)
|
||||||
error->one(FLERR,"Insufficient memory on accelerator");
|
error->one(FLERR,"Insufficient memory on accelerator");
|
||||||
|
|||||||
Reference in New Issue
Block a user