Cleaned up and re-arranged the functions to reflect the order of calling in a time step
This commit is contained in:
@ -126,49 +126,6 @@ double AmoebaT::host_memory_usage() const {
|
|||||||
return this->host_memory_usage_atomic()+sizeof(Amoeba<numtyp,acctyp>);
|
return this->host_memory_usage_atomic()+sizeof(Amoeba<numtyp,acctyp>);
|
||||||
}
|
}
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
// Calculate the polar real-space term, returning tep
|
|
||||||
// ---------------------------------------------------------------------------
|
|
||||||
template <class numtyp, class acctyp>
|
|
||||||
int AmoebaT::polar_real(const int eflag, const int vflag) {
|
|
||||||
// Compute the block size and grid size to keep all cores busy
|
|
||||||
const int BX=this->block_size();
|
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
|
||||||
(BX/this->_threads_per_atom)));
|
|
||||||
|
|
||||||
int _nall=this->atom->nall();
|
|
||||||
int ainum=this->ans->inum();
|
|
||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
|
||||||
this->time_pair.start();
|
|
||||||
|
|
||||||
// Build the short neighbor list if needed
|
|
||||||
if (!this->short_nbor_avail) {
|
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
|
||||||
&this->_nbor_data->begin(),
|
|
||||||
&this->dev_short_nbor, &_off2, &ainum,
|
|
||||||
&nbor_pitch, &this->_threads_per_atom);
|
|
||||||
this->short_nbor_avail = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
this->k_polar.set_size(GX,BX);
|
|
||||||
this->k_polar.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
|
||||||
&this->dev_short_nbor,
|
|
||||||
&this->ans->force, &this->ans->engv, &this->_tep,
|
|
||||||
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
|
||||||
&this->_threads_per_atom,
|
|
||||||
&_aewald, &_felec, &_off2, &_polar_dscale, &_polar_uscale);
|
|
||||||
this->time_pair.stop();
|
|
||||||
|
|
||||||
// Signal that short nbor list is not avail for the next time step
|
|
||||||
// do it here because polar_real() is the last kernel in a time step at this point
|
|
||||||
|
|
||||||
this->short_nbor_avail = false;
|
|
||||||
|
|
||||||
return GX;
|
|
||||||
}
|
|
||||||
|
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
// Calculate the real-space permanent field, returning field and fieldp
|
// Calculate the real-space permanent field, returning field and fieldp
|
||||||
// ---------------------------------------------------------------------------
|
// ---------------------------------------------------------------------------
|
||||||
@ -182,13 +139,13 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
|
|||||||
const int BX=this->block_size();
|
const int BX=this->block_size();
|
||||||
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/(BX/this->_threads_per_atom)));
|
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/(BX/this->_threads_per_atom)));
|
||||||
|
|
||||||
// Build the short neighbor list if needed
|
// Build the short neighbor list if not done yet
|
||||||
if (!this->short_nbor_avail) {
|
if (!this->short_nbor_avail) {
|
||||||
this->k_short_nbor.set_size(GX,BX);
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
&this->_nbor_data->begin(),
|
&this->_nbor_data->begin(),
|
||||||
&this->dev_short_nbor, &_off2, &ainum,
|
&this->dev_short_nbor, &_off2, &ainum,
|
||||||
&nbor_pitch, &this->_threads_per_atom);
|
&nbor_pitch, &this->_threads_per_atom);
|
||||||
this->short_nbor_avail = true;
|
this->short_nbor_avail = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -219,9 +176,20 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
|||||||
int nbor_pitch=this->nbor->nbor_pitch();
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
this->time_pair.start();
|
this->time_pair.start();
|
||||||
|
|
||||||
|
// Build the short neighbor list if not done yet
|
||||||
|
if (!this->short_nbor_avail) {
|
||||||
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
|
&this->_nbor_data->begin(),
|
||||||
|
&this->dev_short_nbor, &_off2, &ainum,
|
||||||
|
&nbor_pitch, &this->_threads_per_atom);
|
||||||
|
this->short_nbor_avail = true;
|
||||||
|
}
|
||||||
|
|
||||||
this->k_umutual2b.set_size(GX,BX);
|
this->k_umutual2b.set_size(GX,BX);
|
||||||
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
this->k_umutual2b.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
||||||
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
|
&this->dev_short_nbor,
|
||||||
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
&this->_fieldp, &ainum, &_nall, &nbor_pitch,
|
||||||
&this->_threads_per_atom, &_aewald, &_off2,
|
&this->_threads_per_atom, &_aewald, &_off2,
|
||||||
&_polar_dscale, &_polar_uscale);
|
&_polar_dscale, &_polar_uscale);
|
||||||
@ -230,5 +198,48 @@ int AmoebaT::umutual2b(const int eflag, const int vflag) {
|
|||||||
return GX;
|
return GX;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
// Calculate the polar real-space term, returning tep
|
||||||
|
// ---------------------------------------------------------------------------
|
||||||
|
template <class numtyp, class acctyp>
|
||||||
|
int AmoebaT::polar_real(const int eflag, const int vflag) {
|
||||||
|
// Compute the block size and grid size to keep all cores busy
|
||||||
|
const int BX=this->block_size();
|
||||||
|
int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
|
||||||
|
(BX/this->_threads_per_atom)));
|
||||||
|
|
||||||
|
int _nall=this->atom->nall();
|
||||||
|
int ainum=this->ans->inum();
|
||||||
|
int nbor_pitch=this->nbor->nbor_pitch();
|
||||||
|
this->time_pair.start();
|
||||||
|
|
||||||
|
// Build the short neighbor list if not done yet
|
||||||
|
if (!this->short_nbor_avail) {
|
||||||
|
this->k_short_nbor.set_size(GX,BX);
|
||||||
|
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
|
||||||
|
&this->_nbor_data->begin(),
|
||||||
|
&this->dev_short_nbor, &_off2, &ainum,
|
||||||
|
&nbor_pitch, &this->_threads_per_atom);
|
||||||
|
this->short_nbor_avail = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
this->k_polar.set_size(GX,BX);
|
||||||
|
this->k_polar.run(&this->atom->x, &this->atom->extra, &damping, &sp_polar,
|
||||||
|
&this->nbor->dev_nbor, &this->_nbor_data->begin(),
|
||||||
|
&this->dev_short_nbor,
|
||||||
|
&this->ans->force, &this->ans->engv, &this->_tep,
|
||||||
|
&eflag, &vflag, &ainum, &_nall, &nbor_pitch,
|
||||||
|
&this->_threads_per_atom,
|
||||||
|
&_aewald, &_felec, &_off2, &_polar_dscale, &_polar_uscale);
|
||||||
|
this->time_pair.stop();
|
||||||
|
|
||||||
|
// Signal that short nbor list is not avail for the next time step
|
||||||
|
// do it here because polar_real() is the last kernel in a time step at this point
|
||||||
|
|
||||||
|
this->short_nbor_avail = false;
|
||||||
|
|
||||||
|
return GX;
|
||||||
|
}
|
||||||
|
|
||||||
template class Amoeba<PRECISION,ACC_PRECISION>;
|
template class Amoeba<PRECISION,ACC_PRECISION>;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -185,6 +185,421 @@ _texture( q_tex,int2);
|
|||||||
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
#define MIN(A,B) ((A) < (B) ? (A) : (B))
|
||||||
#define MY_PIS (acctyp)1.77245385090551602729
|
#define MY_PIS (acctyp)1.77245385090551602729
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
udirect2b = Ewald real direct field via list
|
||||||
|
udirect2b computes the real space contribution of the permanent
|
||||||
|
atomic multipole moments to the field via a neighbor list
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
__kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
||||||
|
const __global numtyp *restrict extra,
|
||||||
|
const __global numtyp4 *restrict damping,
|
||||||
|
const __global numtyp4 *restrict sp_polar,
|
||||||
|
const __global int *dev_nbor,
|
||||||
|
const __global int *dev_packed,
|
||||||
|
const __global int *dev_short_nbor,
|
||||||
|
__global numtyp4 *restrict fieldp,
|
||||||
|
const int inum, const int nall,
|
||||||
|
const int nbor_pitch, const int t_per_atom,
|
||||||
|
const numtyp aewald, const numtyp off2,
|
||||||
|
const numtyp polar_dscale, const numtyp polar_uscale)
|
||||||
|
{
|
||||||
|
int tid, ii, offset, i;
|
||||||
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
|
int n_stride;
|
||||||
|
local_allocate_store_charge();
|
||||||
|
|
||||||
|
acctyp _fieldp[6];
|
||||||
|
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
||||||
|
|
||||||
|
numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
||||||
|
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
||||||
|
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
||||||
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
|
|
||||||
|
//numtyp4 xi__;
|
||||||
|
|
||||||
|
if (ii<inum) {
|
||||||
|
int numj, nbor, nbor_end;
|
||||||
|
const __global int* nbor_mem=dev_packed;
|
||||||
|
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||||
|
n_stride,nbor_end,nbor);
|
||||||
|
|
||||||
|
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||||
|
//numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||||
|
//int itype=ix.w;
|
||||||
|
|
||||||
|
// recalculate numj and nbor_end for use of the short nbor list
|
||||||
|
if (dev_packed==dev_nbor) {
|
||||||
|
numj = dev_short_nbor[nbor];
|
||||||
|
nbor += n_stride;
|
||||||
|
nbor_end = nbor+fast_mul(numj,n_stride);
|
||||||
|
nbor_mem = dev_short_nbor;
|
||||||
|
}
|
||||||
|
|
||||||
|
int itype,igroup;
|
||||||
|
numtyp bn[4],bcn[3];
|
||||||
|
numtyp fid[3],fip[3];
|
||||||
|
|
||||||
|
dix = polar1[i].y; // rpole[i][1];
|
||||||
|
diy = polar1[i].z; // rpole[i][2];
|
||||||
|
diz = polar1[i].w; // rpole[i][3];
|
||||||
|
qixx = polar2[i].x; // rpole[i][4];
|
||||||
|
qixy = polar2[i].y; // rpole[i][5];
|
||||||
|
qixz = polar2[i].z; // rpole[i][6];
|
||||||
|
qiyy = polar2[i].w; // rpole[i][8];
|
||||||
|
qiyz = polar3[i].x; // rpole[i][9];
|
||||||
|
qizz = polar3[i].y; // rpole[i][12];
|
||||||
|
itype = polar3[i].z; // amtype[i];
|
||||||
|
igroup = polar3[i].w; // amgroup[i];
|
||||||
|
|
||||||
|
// debug:
|
||||||
|
// xi__ = ix; xi__.w = itype;
|
||||||
|
|
||||||
|
numtyp pdi = damping[itype].x;
|
||||||
|
numtyp pti = damping[itype].y;
|
||||||
|
numtyp ddi = damping[itype].z;
|
||||||
|
|
||||||
|
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
||||||
|
numtyp aesq2n = (numtyp)0.0;
|
||||||
|
if (aewald > (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald);
|
||||||
|
|
||||||
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
|
int jextra=nbor_mem[nbor];
|
||||||
|
int j = jextra & NEIGHMASK15;
|
||||||
|
|
||||||
|
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||||
|
//int jtype=jx.w;
|
||||||
|
|
||||||
|
// Compute r12
|
||||||
|
numtyp xr = jx.x - ix.x;
|
||||||
|
numtyp yr = jx.y - ix.y;
|
||||||
|
numtyp zr = jx.z - ix.z;
|
||||||
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
|
//if (r2>off2) continue;
|
||||||
|
|
||||||
|
numtyp r = ucl_sqrt(r2);
|
||||||
|
numtyp rinv = ucl_recip(r);
|
||||||
|
numtyp r2inv = rinv*rinv;
|
||||||
|
numtyp rr1 = rinv;
|
||||||
|
numtyp rr3 = rr1 * r2inv;
|
||||||
|
numtyp rr5 = (numtyp)3.0 * rr3 * r2inv;
|
||||||
|
numtyp rr7 = (numtyp)5.0 * rr5 * r2inv;
|
||||||
|
|
||||||
|
numtyp ck = polar1[j].x; // rpole[j][0];
|
||||||
|
numtyp dkx = polar1[j].y; // rpole[j][1];
|
||||||
|
numtyp dky = polar1[j].z; // rpole[j][2];
|
||||||
|
numtyp dkz = polar1[j].w; // rpole[j][3];
|
||||||
|
numtyp qkxx = polar2[j].x; // rpole[j][4];
|
||||||
|
numtyp qkxy = polar2[j].y; // rpole[j][5];
|
||||||
|
numtyp qkxz = polar2[j].z; // rpole[j][6];
|
||||||
|
numtyp qkyy = polar2[j].w; // rpole[j][8];
|
||||||
|
numtyp qkyz = polar3[j].x; // rpole[j][9];
|
||||||
|
numtyp qkzz = polar3[j].y; // rpole[j][12];
|
||||||
|
int jtype = polar3[j].z; // amtype[j];
|
||||||
|
int jgroup = polar3[j].w; // amgroup[j];
|
||||||
|
|
||||||
|
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
|
||||||
|
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
||||||
|
factor_wscale = sp_pol.x; // sp_polar_wscale[sbmask15(jextra)];
|
||||||
|
if (igroup == jgroup) {
|
||||||
|
factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
|
||||||
|
factor_dscale = polar_dscale;
|
||||||
|
factor_uscale = polar_uscale;
|
||||||
|
} else {
|
||||||
|
factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)];
|
||||||
|
factor_dscale = factor_uscale = (numtyp)1.0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// intermediates involving moments and separation distance
|
||||||
|
|
||||||
|
numtyp dir = dix*xr + diy*yr + diz*zr;
|
||||||
|
numtyp qix = qixx*xr + qixy*yr + qixz*zr;
|
||||||
|
numtyp qiy = qixy*xr + qiyy*yr + qiyz*zr;
|
||||||
|
numtyp qiz = qixz*xr + qiyz*yr + qizz*zr;
|
||||||
|
numtyp qir = qix*xr + qiy*yr + qiz*zr;
|
||||||
|
numtyp dkr = dkx*xr + dky*yr + dkz*zr;
|
||||||
|
numtyp qkx = qkxx*xr + qkxy*yr + qkxz*zr;
|
||||||
|
numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
|
||||||
|
numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
|
||||||
|
numtyp qkr = qkx*xr + qky*yr + qkz*zr;
|
||||||
|
|
||||||
|
// calculate the real space Ewald error function terms
|
||||||
|
|
||||||
|
numtyp ralpha = aewald * r;
|
||||||
|
numtyp exp2a = ucl_exp(-ralpha*ralpha);
|
||||||
|
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*ralpha);
|
||||||
|
numtyp _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * exp2a;
|
||||||
|
//bn[0] = erfc(ralpha) / r;
|
||||||
|
bn[0] = _erfc * rinv;
|
||||||
|
|
||||||
|
numtyp aefac = aesq2n;
|
||||||
|
for (int m = 1; m <= 3; m++) {
|
||||||
|
numtyp bfac = (numtyp) (m+m-1);
|
||||||
|
aefac = aesq2 * aefac;
|
||||||
|
bn[m] = (bfac*bn[m-1]+aefac*exp2a) * r2inv;
|
||||||
|
}
|
||||||
|
|
||||||
|
// find the field components for Thole polarization damping
|
||||||
|
|
||||||
|
numtyp scale3 = (numtyp)1.0;
|
||||||
|
numtyp scale5 = (numtyp)1.0;
|
||||||
|
numtyp scale7 = (numtyp)1.0;
|
||||||
|
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
|
||||||
|
if (damp != (numtyp)0.0) {
|
||||||
|
numtyp pgamma = MIN(ddi,damping[jtype].z); // dirdamp[jtype]
|
||||||
|
if (pgamma != (numtyp)0.0) {
|
||||||
|
damp = pgamma * ucl_powr(r/damp,(numtyp)1.5);
|
||||||
|
if (damp < (numtyp)50.0) {
|
||||||
|
numtyp expdamp = ucl_exp(-damp) ;
|
||||||
|
scale3 = (numtyp)1.0 - expdamp ;
|
||||||
|
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.5*damp);
|
||||||
|
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pgamma = MIN(pti,damping[jtype].y); // thole[jtype]
|
||||||
|
damp = pgamma * ucl_powr(r/damp,3.0);
|
||||||
|
if (damp < (numtyp)50.0) {
|
||||||
|
numtyp expdamp = ucl_exp(-damp);
|
||||||
|
scale3 = (numtyp)1.0 - expdamp;
|
||||||
|
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
|
||||||
|
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp + (numtyp)0.6*damp*damp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { // damp == 0: ???
|
||||||
|
}
|
||||||
|
|
||||||
|
numtyp scalek = factor_dscale;
|
||||||
|
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
||||||
|
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
||||||
|
bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
|
||||||
|
fid[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
|
||||||
|
fid[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
|
||||||
|
fid[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
|
||||||
|
|
||||||
|
scalek = factor_pscale;
|
||||||
|
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
||||||
|
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
||||||
|
bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
|
||||||
|
fip[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
|
||||||
|
fip[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
|
||||||
|
fip[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
|
||||||
|
|
||||||
|
_fieldp[0] += fid[0];
|
||||||
|
_fieldp[1] += fid[1];
|
||||||
|
_fieldp[2] += fid[2];
|
||||||
|
_fieldp[3] += fip[0];
|
||||||
|
_fieldp[4] += fip[1];
|
||||||
|
_fieldp[5] += fip[2];
|
||||||
|
} // nbor
|
||||||
|
|
||||||
|
} // ii<inum
|
||||||
|
|
||||||
|
// accumulate field and fieldp
|
||||||
|
|
||||||
|
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
umutual2b = Ewald real mutual field via list
|
||||||
|
umutual2b computes the real space contribution of the induced
|
||||||
|
atomic dipole moments to the field via a neighbor list
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
__kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
||||||
|
const __global numtyp *restrict extra,
|
||||||
|
const __global numtyp4 *restrict damping,
|
||||||
|
const __global numtyp4 *restrict sp_polar,
|
||||||
|
const __global int *dev_nbor,
|
||||||
|
const __global int *dev_packed,
|
||||||
|
const __global int *dev_short_nbor,
|
||||||
|
__global numtyp4 *restrict fieldp,
|
||||||
|
const int inum, const int nall,
|
||||||
|
const int nbor_pitch, const int t_per_atom,
|
||||||
|
const numtyp aewald, const numtyp off2,
|
||||||
|
const numtyp polar_dscale, const numtyp polar_uscale)
|
||||||
|
{
|
||||||
|
int tid, ii, offset, i;
|
||||||
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
|
int n_stride;
|
||||||
|
local_allocate_store_charge();
|
||||||
|
|
||||||
|
acctyp _fieldp[6];
|
||||||
|
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
||||||
|
|
||||||
|
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
||||||
|
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
||||||
|
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
||||||
|
|
||||||
|
//numtyp4 xi__;
|
||||||
|
|
||||||
|
if (ii<inum) {
|
||||||
|
int numj, nbor, nbor_end;
|
||||||
|
const __global int* nbor_mem=dev_packed;
|
||||||
|
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||||
|
n_stride,nbor_end,nbor);
|
||||||
|
|
||||||
|
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
||||||
|
//numtyp qtmp; fetch(qtmp,i,q_tex);
|
||||||
|
//int itype=ix.w;
|
||||||
|
|
||||||
|
// recalculate numj and nbor_end for use of the short nbor list
|
||||||
|
if (dev_packed==dev_nbor) {
|
||||||
|
numj = dev_short_nbor[nbor];
|
||||||
|
nbor += n_stride;
|
||||||
|
nbor_end = nbor+fast_mul(numj,n_stride);
|
||||||
|
nbor_mem = dev_short_nbor;
|
||||||
|
}
|
||||||
|
|
||||||
|
int itype,igroup;
|
||||||
|
numtyp bn[4],bcn[3];
|
||||||
|
numtyp fid[3],fip[3];
|
||||||
|
|
||||||
|
itype = polar3[i].z; // amtype[i];
|
||||||
|
igroup = polar3[i].w; // amgroup[i];
|
||||||
|
|
||||||
|
numtyp pdi = damping[itype].x;
|
||||||
|
numtyp ddi = damping[itype].z;
|
||||||
|
|
||||||
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
|
int jextra=nbor_mem[nbor];
|
||||||
|
int j = jextra & NEIGHMASK15;
|
||||||
|
|
||||||
|
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
||||||
|
//int jtype=jx.w;
|
||||||
|
|
||||||
|
// Compute r12
|
||||||
|
numtyp xr = jx.x - ix.x;
|
||||||
|
numtyp yr = jx.y - ix.y;
|
||||||
|
numtyp zr = jx.z - ix.z;
|
||||||
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
|
if (r2>off2) continue;
|
||||||
|
|
||||||
|
numtyp r = ucl_sqrt(r2);
|
||||||
|
numtyp rinv = ucl_recip(r);
|
||||||
|
numtyp r2inv = rinv*rinv;
|
||||||
|
numtyp rr1 = rinv;
|
||||||
|
numtyp rr3 = rr1 * r2inv;
|
||||||
|
numtyp rr5 = (numtyp)3.0 * rr3 * r2inv;
|
||||||
|
|
||||||
|
int jtype = polar3[j].z; // amtype[j];
|
||||||
|
int jgroup = polar3[j].w; // amgroup[j];
|
||||||
|
numtyp ukx = polar4[j].x; // uind[j][0];
|
||||||
|
numtyp uky = polar4[j].y; // uind[j][1];
|
||||||
|
numtyp ukz = polar4[j].z; // uind[j][2];
|
||||||
|
numtyp ukxp = polar5[j].x; // uinp[j][0];
|
||||||
|
numtyp ukyp = polar5[j].y; // uinp[j][1];
|
||||||
|
numtyp ukzp = polar5[j].z; // uinp[j][2];
|
||||||
|
|
||||||
|
numtyp factor_uscale;
|
||||||
|
|
||||||
|
// find terms needed later to compute mutual polarization
|
||||||
|
// if (poltyp != DIRECT)
|
||||||
|
numtyp scale3 = (numtyp)1.0;
|
||||||
|
numtyp scale5 = (numtyp)1.0;
|
||||||
|
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
|
||||||
|
if (damp != (numtyp)0.0) {
|
||||||
|
numtyp pgamma = MIN(ddi,damping[jtype].z); // dirdamp[jtype]
|
||||||
|
if (pgamma != (numtyp)0.0) {
|
||||||
|
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
||||||
|
if (damp < (numtyp)50.0) {
|
||||||
|
numtyp expdamp = ucl_exp(-damp);
|
||||||
|
scale3 = (numtyp)1.0 - expdamp;
|
||||||
|
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else { // damp == 0: ???
|
||||||
|
}
|
||||||
|
|
||||||
|
numtyp scalek = factor_uscale;
|
||||||
|
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
||||||
|
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
||||||
|
numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip
|
||||||
|
tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
|
||||||
|
tdipdip[1] = bcn[1]*xr*yr;
|
||||||
|
tdipdip[2] = bcn[1]*xr*zr;
|
||||||
|
tdipdip[3] = -bcn[0] + bcn[1]*yr*yr;
|
||||||
|
tdipdip[4] = bcn[1]*yr*zr;
|
||||||
|
tdipdip[5] = -bcn[0] + bcn[1]*zr*zr;
|
||||||
|
|
||||||
|
fid[0] = tdipdip[0]*ukx + tdipdip[1]*uky + tdipdip[2]*ukz;
|
||||||
|
fid[1] = tdipdip[1]*ukx + tdipdip[3]*uky + tdipdip[4]*ukz;
|
||||||
|
fid[2] = tdipdip[2]*ukx + tdipdip[4]*uky + tdipdip[5]*ukz;
|
||||||
|
|
||||||
|
fip[0] = tdipdip[0]*ukxp + tdipdip[1]*ukyp + tdipdip[2]*ukzp;
|
||||||
|
fip[1] = tdipdip[1]*ukxp + tdipdip[3]*ukyp + tdipdip[4]*ukzp;
|
||||||
|
fip[2] = tdipdip[2]*ukxp + tdipdip[4]*ukyp + tdipdip[5]*ukzp;
|
||||||
|
|
||||||
|
_fieldp[0] += fid[0];
|
||||||
|
_fieldp[1] += fid[1];
|
||||||
|
_fieldp[2] += fid[2];
|
||||||
|
_fieldp[3] += fip[0];
|
||||||
|
_fieldp[4] += fip[1];
|
||||||
|
_fieldp[5] += fip[2];
|
||||||
|
} // nbor
|
||||||
|
|
||||||
|
} // ii<inum
|
||||||
|
|
||||||
|
// accumulate field and fieldp
|
||||||
|
|
||||||
|
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* ----------------------------------------------------------------------
|
||||||
|
scan standard neighbor list and make it compatible with 1-5 neighbors
|
||||||
|
if IJ entry is a 1-2,1-3,1-4 neighbor then adjust offset to SBBITS15
|
||||||
|
else scan special15 to see if a 1-5 neighbor and adjust offset to SBBITS15
|
||||||
|
else do nothing to IJ entry
|
||||||
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
__kernel void k_special15(__global int * dev_nbor,
|
||||||
|
const __global int * dev_packed,
|
||||||
|
const __global tagint *restrict tag,
|
||||||
|
const __global int *restrict nspecial15,
|
||||||
|
const __global tagint *restrict special15,
|
||||||
|
const int inum, const int nall, const int nbor_pitch,
|
||||||
|
const int t_per_atom) {
|
||||||
|
int tid, ii, offset, n_stride, i;
|
||||||
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
|
if (ii<inum) {
|
||||||
|
|
||||||
|
int numj, nbor, nbor_end;
|
||||||
|
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||||
|
n_stride,nbor_end,nbor);
|
||||||
|
|
||||||
|
int n15 = nspecial15[ii];
|
||||||
|
|
||||||
|
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
||||||
|
|
||||||
|
int sj=dev_packed[nbor];
|
||||||
|
int which = sj >> SBBITS & 3;
|
||||||
|
int j = sj & NEIGHMASK;
|
||||||
|
tagint jtag = tag[j];
|
||||||
|
|
||||||
|
if (!which) {
|
||||||
|
int offset=ii;
|
||||||
|
for (int k=0; k<n15; k++) {
|
||||||
|
if (special15[offset] == jtag) {
|
||||||
|
which = 4;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
offset += nall;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (which) dev_nbor[nbor] = j ^ (which << SBBITS15);
|
||||||
|
} // for nbor
|
||||||
|
|
||||||
|
} // if ii
|
||||||
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
/* ----------------------------------------------------------------------
|
||||||
polar_real = real-space portion of induced dipole polarization
|
polar_real = real-space portion of induced dipole polarization
|
||||||
adapted from Tinker epreal1d() routine
|
adapted from Tinker epreal1d() routine
|
||||||
@ -311,7 +726,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
numtyp zr = jx.z - ix.z;
|
numtyp zr = jx.z - ix.z;
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
if (r2>off2) continue;
|
//if (r2>off2) continue;
|
||||||
|
|
||||||
numtyp r = ucl_sqrt(r2);
|
numtyp r = ucl_sqrt(r2);
|
||||||
|
|
||||||
@ -707,474 +1122,13 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
offset,eflag,vflag,ans,engv);
|
offset,eflag,vflag,ans,engv);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
udirect2b = Ewald real direct field via list
|
|
||||||
udirect2b computes the real space contribution of the permanent
|
|
||||||
atomic multipole moments to the field via a neighbor list
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
__kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
|
||||||
const __global numtyp *restrict extra,
|
|
||||||
const __global numtyp4 *restrict damping,
|
|
||||||
const __global numtyp4 *restrict sp_polar,
|
|
||||||
const __global int *dev_nbor,
|
|
||||||
const __global int *dev_packed,
|
|
||||||
const __global int *dev_short_nbor,
|
|
||||||
__global numtyp4 *restrict fieldp,
|
|
||||||
const int inum, const int nall,
|
|
||||||
const int nbor_pitch, const int t_per_atom,
|
|
||||||
const numtyp aewald, const numtyp off2,
|
|
||||||
const numtyp polar_dscale, const numtyp polar_uscale)
|
|
||||||
{
|
|
||||||
int tid, ii, offset, i;
|
|
||||||
atom_info(t_per_atom,ii,tid,offset);
|
|
||||||
|
|
||||||
int n_stride;
|
|
||||||
local_allocate_store_charge();
|
|
||||||
|
|
||||||
acctyp _fieldp[6];
|
|
||||||
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
|
||||||
|
|
||||||
numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
|
||||||
|
|
||||||
//numtyp4 xi__;
|
|
||||||
|
|
||||||
if (ii<inum) {
|
|
||||||
int numj, nbor, nbor_end;
|
|
||||||
const __global int* nbor_mem=dev_packed;
|
|
||||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
|
||||||
n_stride,nbor_end,nbor);
|
|
||||||
|
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
|
||||||
//numtyp qtmp; fetch(qtmp,i,q_tex);
|
|
||||||
//int itype=ix.w;
|
|
||||||
|
|
||||||
// recalculate numj and nbor_end for use of the short nbor list
|
|
||||||
if (dev_packed==dev_nbor) {
|
|
||||||
numj = dev_short_nbor[nbor];
|
|
||||||
nbor += n_stride;
|
|
||||||
nbor_end = nbor+fast_mul(numj,n_stride);
|
|
||||||
nbor_mem = dev_short_nbor;
|
|
||||||
}
|
|
||||||
|
|
||||||
int itype,igroup;
|
|
||||||
numtyp bn[4],bcn[3];
|
|
||||||
numtyp fid[3],fip[3];
|
|
||||||
|
|
||||||
dix = polar1[i].y; // rpole[i][1];
|
|
||||||
diy = polar1[i].z; // rpole[i][2];
|
|
||||||
diz = polar1[i].w; // rpole[i][3];
|
|
||||||
qixx = polar2[i].x; // rpole[i][4];
|
|
||||||
qixy = polar2[i].y; // rpole[i][5];
|
|
||||||
qixz = polar2[i].z; // rpole[i][6];
|
|
||||||
qiyy = polar2[i].w; // rpole[i][8];
|
|
||||||
qiyz = polar3[i].x; // rpole[i][9];
|
|
||||||
qizz = polar3[i].y; // rpole[i][12];
|
|
||||||
itype = polar3[i].z; // amtype[i];
|
|
||||||
igroup = polar3[i].w; // amgroup[i];
|
|
||||||
|
|
||||||
// debug:
|
|
||||||
// xi__ = ix; xi__.w = itype;
|
|
||||||
|
|
||||||
numtyp pdi = damping[itype].x;
|
|
||||||
numtyp pti = damping[itype].y;
|
|
||||||
numtyp ddi = damping[itype].z;
|
|
||||||
|
|
||||||
numtyp aesq2 = (numtyp)2.0 * aewald*aewald;
|
|
||||||
numtyp aesq2n = (numtyp)0.0;
|
|
||||||
if (aewald > (numtyp)0.0) aesq2n = (numtyp)1.0 / (MY_PIS*aewald);
|
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
|
||||||
|
|
||||||
int jextra=nbor_mem[nbor];
|
|
||||||
int j = jextra & NEIGHMASK15;
|
|
||||||
|
|
||||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
|
||||||
//int jtype=jx.w;
|
|
||||||
|
|
||||||
// Compute r12
|
|
||||||
numtyp xr = jx.x - ix.x;
|
|
||||||
numtyp yr = jx.y - ix.y;
|
|
||||||
numtyp zr = jx.z - ix.z;
|
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
|
||||||
|
|
||||||
if (r2>off2) {
|
|
||||||
if (i == 0) printf("i = 0: j = %d: r2 = %f; numj = %d\n", j, r2, numj);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
numtyp r = ucl_sqrt(r2);
|
|
||||||
numtyp rinv = ucl_recip(r);
|
|
||||||
numtyp r2inv = rinv*rinv;
|
|
||||||
numtyp rr1 = rinv;
|
|
||||||
numtyp rr3 = rr1 * r2inv;
|
|
||||||
numtyp rr5 = (numtyp)3.0 * rr3 * r2inv;
|
|
||||||
numtyp rr7 = (numtyp)5.0 * rr5 * r2inv;
|
|
||||||
|
|
||||||
numtyp ck = polar1[j].x; // rpole[j][0];
|
|
||||||
numtyp dkx = polar1[j].y; // rpole[j][1];
|
|
||||||
numtyp dky = polar1[j].z; // rpole[j][2];
|
|
||||||
numtyp dkz = polar1[j].w; // rpole[j][3];
|
|
||||||
numtyp qkxx = polar2[j].x; // rpole[j][4];
|
|
||||||
numtyp qkxy = polar2[j].y; // rpole[j][5];
|
|
||||||
numtyp qkxz = polar2[j].z; // rpole[j][6];
|
|
||||||
numtyp qkyy = polar2[j].w; // rpole[j][8];
|
|
||||||
numtyp qkyz = polar3[j].x; // rpole[j][9];
|
|
||||||
numtyp qkzz = polar3[j].y; // rpole[j][12];
|
|
||||||
int jtype = polar3[j].z; // amtype[j];
|
|
||||||
int jgroup = polar3[j].w; // amgroup[j];
|
|
||||||
|
|
||||||
numtyp factor_wscale, factor_dscale, factor_pscale, factor_uscale;
|
|
||||||
const numtyp4 sp_pol = sp_polar[sbmask15(jextra)];
|
|
||||||
factor_wscale = sp_pol.x; // sp_polar_wscale[sbmask15(jextra)];
|
|
||||||
if (igroup == jgroup) {
|
|
||||||
factor_pscale = sp_pol.y; // sp_polar_piscale[sbmask15(jextra)];
|
|
||||||
factor_dscale = polar_dscale;
|
|
||||||
factor_uscale = polar_uscale;
|
|
||||||
} else {
|
|
||||||
factor_pscale = sp_pol.z; // sp_polar_pscale[sbmask15(jextra)];
|
|
||||||
factor_dscale = factor_uscale = (numtyp)1.0;
|
|
||||||
}
|
|
||||||
|
|
||||||
// intermediates involving moments and separation distance
|
|
||||||
|
|
||||||
numtyp dir = dix*xr + diy*yr + diz*zr;
|
|
||||||
numtyp qix = qixx*xr + qixy*yr + qixz*zr;
|
|
||||||
numtyp qiy = qixy*xr + qiyy*yr + qiyz*zr;
|
|
||||||
numtyp qiz = qixz*xr + qiyz*yr + qizz*zr;
|
|
||||||
numtyp qir = qix*xr + qiy*yr + qiz*zr;
|
|
||||||
numtyp dkr = dkx*xr + dky*yr + dkz*zr;
|
|
||||||
numtyp qkx = qkxx*xr + qkxy*yr + qkxz*zr;
|
|
||||||
numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
|
|
||||||
numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
|
|
||||||
numtyp qkr = qkx*xr + qky*yr + qkz*zr;
|
|
||||||
|
|
||||||
// calculate the real space Ewald error function terms
|
|
||||||
|
|
||||||
numtyp ralpha = aewald * r;
|
|
||||||
numtyp exp2a = ucl_exp(-ralpha*ralpha);
|
|
||||||
numtyp t = ucl_recip((numtyp)1.0 + EWALD_P*ralpha);
|
|
||||||
numtyp _erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * exp2a;
|
|
||||||
//bn[0] = erfc(ralpha) / r;
|
|
||||||
bn[0] = _erfc * rinv;
|
|
||||||
|
|
||||||
numtyp aefac = aesq2n;
|
|
||||||
for (int m = 1; m <= 3; m++) {
|
|
||||||
numtyp bfac = (numtyp) (m+m-1);
|
|
||||||
aefac = aesq2 * aefac;
|
|
||||||
bn[m] = (bfac*bn[m-1]+aefac*exp2a) * r2inv;
|
|
||||||
}
|
|
||||||
|
|
||||||
// find the field components for Thole polarization damping
|
|
||||||
|
|
||||||
numtyp scale3 = (numtyp)1.0;
|
|
||||||
numtyp scale5 = (numtyp)1.0;
|
|
||||||
numtyp scale7 = (numtyp)1.0;
|
|
||||||
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
|
|
||||||
if (damp != (numtyp)0.0) {
|
|
||||||
numtyp pgamma = MIN(ddi,damping[jtype].z); // dirdamp[jtype]
|
|
||||||
if (pgamma != (numtyp)0.0) {
|
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)1.5);
|
|
||||||
if (damp < (numtyp)50.0) {
|
|
||||||
numtyp expdamp = ucl_exp(-damp) ;
|
|
||||||
scale3 = (numtyp)1.0 - expdamp ;
|
|
||||||
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.5*damp);
|
|
||||||
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+(numtyp)0.65*damp + (numtyp)0.15*damp*damp);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
pgamma = MIN(pti,damping[jtype].y); // thole[jtype]
|
|
||||||
damp = pgamma * ucl_powr(r/damp,3.0);
|
|
||||||
if (damp < (numtyp)50.0) {
|
|
||||||
numtyp expdamp = ucl_exp(-damp);
|
|
||||||
scale3 = (numtyp)1.0 - expdamp;
|
|
||||||
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
|
|
||||||
scale7 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp + (numtyp)0.6*damp*damp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // damp == 0: ???
|
|
||||||
}
|
|
||||||
|
|
||||||
numtyp scalek = factor_dscale;
|
|
||||||
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
|
||||||
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
|
||||||
bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
|
|
||||||
fid[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
|
|
||||||
fid[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
|
|
||||||
fid[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
|
|
||||||
|
|
||||||
scalek = factor_pscale;
|
|
||||||
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
|
||||||
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
|
||||||
bcn[2] = bn[3] - ((numtyp)1.0-scalek*scale7)*rr7;
|
|
||||||
fip[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
|
|
||||||
fip[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
|
|
||||||
fip[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
|
|
||||||
|
|
||||||
_fieldp[0] += fid[0];
|
|
||||||
_fieldp[1] += fid[1];
|
|
||||||
_fieldp[2] += fid[2];
|
|
||||||
_fieldp[3] += fip[0];
|
|
||||||
_fieldp[4] += fip[1];
|
|
||||||
_fieldp[5] += fip[2];
|
|
||||||
} // nbor
|
|
||||||
|
|
||||||
} // ii<inum
|
|
||||||
|
|
||||||
// accumulate field and fieldp
|
|
||||||
|
|
||||||
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
umutual2b = Ewald real mutual field via list
|
|
||||||
umutual2b computes the real space contribution of the induced
|
|
||||||
atomic dipole moments to the field via a neighbor list
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
__kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
|
||||||
const __global numtyp *restrict extra,
|
|
||||||
const __global numtyp4 *restrict damping,
|
|
||||||
const __global numtyp4 *restrict sp_polar,
|
|
||||||
const __global int *dev_nbor,
|
|
||||||
const __global int *dev_packed,
|
|
||||||
__global numtyp4 *restrict fieldp,
|
|
||||||
const int inum, const int nall,
|
|
||||||
const int nbor_pitch, const int t_per_atom,
|
|
||||||
const numtyp aewald, const numtyp off2,
|
|
||||||
const numtyp polar_dscale, const numtyp polar_uscale)
|
|
||||||
{
|
|
||||||
int tid, ii, offset, i;
|
|
||||||
atom_info(t_per_atom,ii,tid,offset);
|
|
||||||
|
|
||||||
int n_stride;
|
|
||||||
local_allocate_store_charge();
|
|
||||||
|
|
||||||
acctyp _fieldp[6];
|
|
||||||
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
|
||||||
|
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
|
||||||
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
|
||||||
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
|
||||||
|
|
||||||
//numtyp4 xi__;
|
|
||||||
|
|
||||||
if (ii<inum) {
|
|
||||||
int numj, nbor, nbor_end;
|
|
||||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
|
||||||
n_stride,nbor_end,nbor);
|
|
||||||
|
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
|
||||||
//numtyp qtmp; fetch(qtmp,i,q_tex);
|
|
||||||
//int itype=ix.w;
|
|
||||||
|
|
||||||
int itype,igroup;
|
|
||||||
numtyp bn[4],bcn[3];
|
|
||||||
numtyp fid[3],fip[3];
|
|
||||||
|
|
||||||
itype = polar3[i].z; // amtype[i];
|
|
||||||
igroup = polar3[i].w; // amgroup[i];
|
|
||||||
|
|
||||||
numtyp pdi = damping[itype].x;
|
|
||||||
numtyp ddi = damping[itype].z;
|
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
|
||||||
|
|
||||||
int jextra=dev_packed[nbor];
|
|
||||||
int j = jextra & NEIGHMASK15;
|
|
||||||
|
|
||||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
|
||||||
//int jtype=jx.w;
|
|
||||||
|
|
||||||
// Compute r12
|
|
||||||
numtyp xr = jx.x - ix.x;
|
|
||||||
numtyp yr = jx.y - ix.y;
|
|
||||||
numtyp zr = jx.z - ix.z;
|
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
|
||||||
|
|
||||||
if (r2>off2) continue;
|
|
||||||
|
|
||||||
numtyp r = ucl_sqrt(r2);
|
|
||||||
numtyp rinv = ucl_recip(r);
|
|
||||||
numtyp r2inv = rinv*rinv;
|
|
||||||
numtyp rr1 = rinv;
|
|
||||||
numtyp rr3 = rr1 * r2inv;
|
|
||||||
numtyp rr5 = (numtyp)3.0 * rr3 * r2inv;
|
|
||||||
|
|
||||||
int jtype = polar3[j].z; // amtype[j];
|
|
||||||
int jgroup = polar3[j].w; // amgroup[j];
|
|
||||||
numtyp ukx = polar4[j].x; // uind[j][0];
|
|
||||||
numtyp uky = polar4[j].y; // uind[j][1];
|
|
||||||
numtyp ukz = polar4[j].z; // uind[j][2];
|
|
||||||
numtyp ukxp = polar5[j].x; // uinp[j][0];
|
|
||||||
numtyp ukyp = polar5[j].y; // uinp[j][1];
|
|
||||||
numtyp ukzp = polar5[j].z; // uinp[j][2];
|
|
||||||
|
|
||||||
numtyp factor_uscale;
|
|
||||||
|
|
||||||
// find terms needed later to compute mutual polarization
|
|
||||||
// if (poltyp != DIRECT)
|
|
||||||
numtyp scale3 = (numtyp)1.0;
|
|
||||||
numtyp scale5 = (numtyp)1.0;
|
|
||||||
numtyp damp = pdi * damping[jtype].x; // pdamp[jtype]
|
|
||||||
if (damp != (numtyp)0.0) {
|
|
||||||
numtyp pgamma = MIN(ddi,damping[jtype].z); // dirdamp[jtype]
|
|
||||||
if (pgamma != (numtyp)0.0) {
|
|
||||||
damp = pgamma * ucl_powr(r/damp,(numtyp)3.0);
|
|
||||||
if (damp < (numtyp)50.0) {
|
|
||||||
numtyp expdamp = ucl_exp(-damp);
|
|
||||||
scale3 = (numtyp)1.0 - expdamp;
|
|
||||||
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else { // damp == 0: ???
|
|
||||||
}
|
|
||||||
|
|
||||||
numtyp scalek = factor_uscale;
|
|
||||||
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
|
|
||||||
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
|
|
||||||
numtyp tdipdip[6]; // the following tdipdip is incorrect!! needs work to store tdipdip
|
|
||||||
tdipdip[0] = -bcn[0] + bcn[1]*xr*xr;
|
|
||||||
tdipdip[1] = bcn[1]*xr*yr;
|
|
||||||
tdipdip[2] = bcn[1]*xr*zr;
|
|
||||||
tdipdip[3] = -bcn[0] + bcn[1]*yr*yr;
|
|
||||||
tdipdip[4] = bcn[1]*yr*zr;
|
|
||||||
tdipdip[5] = -bcn[0] + bcn[1]*zr*zr;
|
|
||||||
|
|
||||||
fid[0] = tdipdip[0]*ukx + tdipdip[1]*uky + tdipdip[2]*ukz;
|
|
||||||
fid[1] = tdipdip[1]*ukx + tdipdip[3]*uky + tdipdip[4]*ukz;
|
|
||||||
fid[2] = tdipdip[2]*ukx + tdipdip[4]*uky + tdipdip[5]*ukz;
|
|
||||||
|
|
||||||
fip[0] = tdipdip[0]*ukxp + tdipdip[1]*ukyp + tdipdip[2]*ukzp;
|
|
||||||
fip[1] = tdipdip[1]*ukxp + tdipdip[3]*ukyp + tdipdip[4]*ukzp;
|
|
||||||
fip[2] = tdipdip[2]*ukxp + tdipdip[4]*ukyp + tdipdip[5]*ukzp;
|
|
||||||
|
|
||||||
_fieldp[0] += fid[0];
|
|
||||||
_fieldp[1] += fid[1];
|
|
||||||
_fieldp[2] += fid[2];
|
|
||||||
_fieldp[3] += fip[0];
|
|
||||||
_fieldp[4] += fip[1];
|
|
||||||
_fieldp[5] += fip[2];
|
|
||||||
} // nbor
|
|
||||||
|
|
||||||
} // ii<inum
|
|
||||||
|
|
||||||
// accumulate field and fieldp
|
|
||||||
|
|
||||||
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* ----------------------------------------------------------------------
|
|
||||||
scan standard neighbor list and make it compatible with 1-5 neighbors
|
|
||||||
if IJ entry is a 1-2,1-3,1-4 neighbor then adjust offset to SBBITS15
|
|
||||||
else scan special15 to see if a 1-5 neighbor and adjust offset to SBBITS15
|
|
||||||
else do nothing to IJ entry
|
|
||||||
------------------------------------------------------------------------- */
|
|
||||||
|
|
||||||
__kernel void k_special15(__global int * dev_nbor,
|
|
||||||
const __global int * dev_packed,
|
|
||||||
const __global tagint *restrict tag,
|
|
||||||
const __global int *restrict nspecial15,
|
|
||||||
const __global tagint *restrict special15,
|
|
||||||
const int inum, const int nall, const int nbor_pitch,
|
|
||||||
const int t_per_atom) {
|
|
||||||
int tid, ii, offset, n_stride, i;
|
|
||||||
atom_info(t_per_atom,ii,tid,offset);
|
|
||||||
|
|
||||||
if (ii<inum) {
|
|
||||||
|
|
||||||
int numj, nbor, nbor_end;
|
|
||||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
|
||||||
n_stride,nbor_end,nbor);
|
|
||||||
|
|
||||||
int n15 = nspecial15[ii];
|
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
|
||||||
|
|
||||||
int sj=dev_packed[nbor];
|
|
||||||
int which = sj >> SBBITS & 3;
|
|
||||||
int j = sj & NEIGHMASK;
|
|
||||||
tagint jtag = tag[j];
|
|
||||||
|
|
||||||
if (!which) {
|
|
||||||
int offset=ii;
|
|
||||||
for (int k=0; k<n15; k++) {
|
|
||||||
if (special15[offset] == jtag) {
|
|
||||||
which = 4;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
offset += nall;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (which) dev_nbor[nbor] = j ^ (which << SBBITS15);
|
|
||||||
} // for nbor
|
|
||||||
|
|
||||||
} // if ii
|
|
||||||
}
|
|
||||||
/*
|
|
||||||
__kernel void k_amoeba_short_nbor(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_short_nbor(const __global numtyp4 *restrict x_,
|
||||||
const numtyp off2, __global int * dev_nbor,
|
const __global int * dev_nbor,
|
||||||
const __global int * dev_packed,
|
const __global int * dev_packed,
|
||||||
const int inum, const int nbor_pitch,
|
__global int * dev_short_nbor,
|
||||||
const int t_per_atom) {
|
const numtyp off2,
|
||||||
int tid, ii, offset, n_stride, i;
|
const int inum, const int nbor_pitch,
|
||||||
atom_info(t_per_atom,ii,tid,offset);
|
const int t_per_atom) {
|
||||||
|
|
||||||
int new_numj=0;
|
|
||||||
|
|
||||||
if (ii<inum) {
|
|
||||||
int numj, nbor, nbor_end;
|
|
||||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
|
||||||
n_stride,nbor_end,nbor);
|
|
||||||
|
|
||||||
numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
|
|
||||||
if (i == 0) printf("i = 0: numj before = %d\n", numj);
|
|
||||||
__global int *out_list=dev_nbor+nbor;
|
|
||||||
const int out_stride=n_stride;
|
|
||||||
|
|
||||||
for ( ; nbor<nbor_end; nbor+=n_stride) {
|
|
||||||
|
|
||||||
int sj=dev_packed[nbor];
|
|
||||||
int j = sj & NEIGHMASK15;
|
|
||||||
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
|
|
||||||
|
|
||||||
// Compute r12
|
|
||||||
numtyp delx = ix.x-jx.x;
|
|
||||||
numtyp dely = ix.y-jx.y;
|
|
||||||
numtyp delz = ix.z-jx.z;
|
|
||||||
numtyp rsq = delx*delx+dely*dely+delz*delz;
|
|
||||||
|
|
||||||
if (rsq<=off2) {
|
|
||||||
*out_list=sj;
|
|
||||||
out_list+=out_stride;
|
|
||||||
|
|
||||||
new_numj++;
|
|
||||||
if (i == 0 && offset == 0) printf("neighbor of i = 0 within off2: j = %d; rsq = %f; new_numj = %d\n", j, rsq, new_numj);
|
|
||||||
} else {
|
|
||||||
if (i == 0 && offset == 0) printf("neighbor of i = 0 outside off2: j = %d; rsq = %f; new_numj = %d\n", j, rsq, new_numj);
|
|
||||||
}
|
|
||||||
} // for nbor
|
|
||||||
} // if ii
|
|
||||||
|
|
||||||
if (t_per_atom>1) {
|
|
||||||
for (unsigned int s=t_per_atom/2; s>0; s>>=1)
|
|
||||||
new_numj += shfl_down(new_numj, s, t_per_atom);
|
|
||||||
}
|
|
||||||
if (offset==0 && ii<inum) {
|
|
||||||
dev_nbor[ii+nbor_pitch]=new_numj;
|
|
||||||
if (i == 0) printf("i = 0: numj after = %d\n", new_numj);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
__kernel void k_amoeba_short_nbor(const __global numtyp4 *restrict x_,
|
|
||||||
const __global int * dev_nbor,
|
|
||||||
const __global int * dev_packed,
|
|
||||||
__global int * dev_short_nbor,
|
|
||||||
const numtyp off2,
|
|
||||||
const int inum, const int nbor_pitch,
|
|
||||||
const int t_per_atom) {
|
|
||||||
__local int n_stride;
|
__local int n_stride;
|
||||||
int tid, ii, offset;
|
int tid, ii, offset;
|
||||||
atom_info(t_per_atom,ii,tid,offset);
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|||||||
@ -101,9 +101,9 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
|
|||||||
_nbor_data=&(nbor->dev_nbor);
|
_nbor_data=&(nbor->dev_nbor);
|
||||||
}
|
}
|
||||||
|
|
||||||
bool allocate_packed = false;
|
bool alloc_packed=false;
|
||||||
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,
|
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,
|
||||||
_gpu_host,max_nbors,cell_size,allocate_packed,_threads_per_atom);
|
_gpu_host,max_nbors,cell_size,alloc_packed,_threads_per_atom);
|
||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
@ -231,8 +231,6 @@ inline int BaseAmoebaT::build_nbor_list(const int inum, const int host_inum,
|
|||||||
add_onefive_neighbors();
|
add_onefive_neighbors();
|
||||||
}
|
}
|
||||||
|
|
||||||
//nbor->copy_unpacked(inum,mn);
|
|
||||||
|
|
||||||
double bytes=ans->gpu_bytes()+nbor->gpu_bytes();
|
double bytes=ans->gpu_bytes()+nbor->gpu_bytes();
|
||||||
if (bytes>_max_an_bytes)
|
if (bytes>_max_an_bytes)
|
||||||
_max_an_bytes=bytes;
|
_max_an_bytes=bytes;
|
||||||
@ -336,17 +334,17 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
|
|||||||
|
|
||||||
template <class numtyp, class acctyp>
|
template <class numtyp, class acctyp>
|
||||||
int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall,
|
int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall,
|
||||||
double **host_x, int *host_type, int *host_amtype,
|
double **host_x, int *host_type, int *host_amtype,
|
||||||
int *host_amgroup, double **host_rpole,
|
int *host_amgroup, double **host_rpole,
|
||||||
double **host_uind, double **host_uinp,
|
double **host_uind, double **host_uinp,
|
||||||
double *sublo, double *subhi, tagint *tag,
|
double *sublo, double *subhi, tagint *tag,
|
||||||
int **nspecial, tagint **special,
|
int **nspecial, tagint **special,
|
||||||
int *nspecial15, tagint **special15,
|
int *nspecial15, tagint **special15,
|
||||||
const bool eflag_in, const bool vflag_in,
|
const bool eflag_in, const bool vflag_in,
|
||||||
const bool eatom, const bool vatom, int &host_start,
|
const bool eatom, const bool vatom, int &host_start,
|
||||||
int **&ilist, int **&jnum, const double cpu_time,
|
int **&ilist, int **&jnum, const double cpu_time,
|
||||||
bool &success, double *host_q, double *boxlo,
|
bool &success, double *host_q, double *boxlo,
|
||||||
double *prd) {
|
double *prd) {
|
||||||
acc_timers();
|
acc_timers();
|
||||||
int eflag, vflag;
|
int eflag, vflag;
|
||||||
if (eatom) eflag=2;
|
if (eatom) eflag=2;
|
||||||
|
|||||||
Reference in New Issue
Block a user