Removed trailing spaces

This commit is contained in:
Trung Nguyen
2021-09-28 17:28:33 -05:00
parent bf88ab77fa
commit b874feb127
13 changed files with 283 additions and 281 deletions

View File

@ -140,7 +140,7 @@ void AmoebaT::clear() {
coeff_amclass.clear();
sp_polar.clear();
sp_nonpolar.clear();
this->clear_atomic();
}
@ -169,7 +169,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
// Build the short neighbor list for the cutoff off2_mpole,
// at this point mpole is the first kernel in a time step
this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(),
@ -194,7 +194,7 @@ int AmoebaT::multipole_real(const int eflag, const int vflag) {
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int AmoebaT::udirect2b(const int eflag, const int vflag) {
int ainum=this->ans->inum();
int ainum=this->ans->inum();
if (ainum == 0)
return 0;
@ -216,7 +216,7 @@ int AmoebaT::udirect2b(const int eflag, const int vflag) {
&nbor_pitch, &this->_threads_per_atom);
this->short_nbor_polar_avail = true;
}
this->k_udirect2b.set_size(GX,BX);
this->k_udirect2b.run(&this->atom->x, &this->atom->extra, &coeff_amtype, &sp_polar,
&this->nbor->dev_nbor, &this->_nbor_data->begin(),

View File

@ -492,7 +492,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -500,7 +500,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
const numtyp4 pol1j = polar1[j];
numtyp ck = pol1j.x; // rpole[j][0];
@ -533,12 +533,12 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
numtyp qkr = qkx*xr + qky*yr + qkz*zr;
numtyp dik = dix*dkx + diy*dky + diz*dkz;
numtyp qik = qix*qkx + qiy*qky + qiz*qkz;
numtyp diqk = dix*qkx + diy*qky + diz*qkz;
numtyp dkqi = dkx*qix + dky*qiy + dkz*qiz;
numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
qixx*qkxx + qiyy*qkyy + qizz*qkzz;
// additional intermediates involving moments and distance
@ -585,11 +585,11 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
numtyp dkqirx = dkqiz*yr - dkqiy*zr;
numtyp dkqiry = dkqix*zr - dkqiz*xr;
numtyp dkqirz = dkqiy*xr - dkqix*yr;
numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
(numtyp)2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
(numtyp)2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
(numtyp)2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
// get reciprocal distance terms for this interaction
@ -650,20 +650,20 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
// compute the force components for this interaction
numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
term4*qix + term5*qkx + term6*(qixk+qkxi);
numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
term4*qiy + term5*qky + term6*(qiyk+qkyi);
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
term4*qiz + term5*qkz + term6*(qizk+qkzi);
// compute the torque components for this interaction
numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) -
numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) -
term4*qirx - term6*(qikrx+qikx);
numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) -
numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) -
term4*qiry - term6*(qikry+qiky);
numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) -
numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) -
term4*qirz - term6*(qikrz+qikz);
// increment force-based gradient and torque on first site
@ -691,12 +691,12 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
virial[5] += vyz;
}
} // nbor
} // ii<inum
// accumulate tq
store_answers_amoeba_tq(tq,ii,inum,tid,t_per_atom,offset,i,tep);
// accumate force, energy and virial: use _acc if not the first kernel
store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
offset,eflag,vflag,ans,engv);
@ -771,7 +771,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
numtyp qizz = pol3i.y; // rpole[i][12];
int itype = pol3i.z; // amtype[i];
int igroup = pol3i.w; // amgroup[i];
// debug:
// xi__ = ix; xi__.w = itype;
@ -790,7 +790,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -798,7 +798,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
numtyp rinv = ucl_recip(r);
numtyp r2inv = rinv*rinv;
@ -898,7 +898,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
fid[0] = -xr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkx + (numtyp)2.0*bcn[1]*qkx;
fid[1] = -yr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dky + (numtyp)2.0*bcn[1]*qky;
fid[2] = -zr*(bcn[0]*ck-bcn[1]*dkr+bcn[2]*qkr) - bcn[0]*dkz + (numtyp)2.0*bcn[1]*qkz;
scalek = factor_pscale;
bcn[0] = bn[1] - ((numtyp)1.0-scalek*scale3)*rr3;
bcn[1] = bn[2] - ((numtyp)1.0-scalek*scale5)*rr5;
@ -918,7 +918,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
} // ii<inum
// accumulate field and fieldp
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
}
@ -977,10 +977,10 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
int itype,igroup;
numtyp bn[4],bcn[3];
numtyp fid[3],fip[3];
itype = polar3[i].z; // amtype[i];
igroup = polar3[i].w; // amgroup[i];
numtyp pdi = coeff[itype].x;
numtyp pti = coeff[itype].y;
@ -995,7 +995,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -1003,7 +1003,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
numtyp rinv = ucl_recip(r);
numtyp r2inv = rinv*rinv;
@ -1044,7 +1044,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
}
// find terms needed later to compute mutual polarization
// if (poltyp != DIRECT)
// if (poltyp != DIRECT)
numtyp scale3 = (numtyp)1.0;
numtyp scale5 = (numtyp)1.0;
numtyp damp = pdi * coeff[jtype].x; // pdamp[jtype]
@ -1056,7 +1056,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
scale3 = (numtyp)1.0 - expdamp;
scale5 = (numtyp)1.0 - expdamp*((numtyp)1.0+damp);
}
} else { // damp == 0: ???
}
@ -1071,17 +1071,17 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
tdipdip[3] = -bcn[0] + bcn[1]*yr*yr;
tdipdip[4] = bcn[1]*yr*zr;
tdipdip[5] = -bcn[0] + bcn[1]*zr*zr;
//if (i==0 && j == 10)
//if (i==0 && j == 10)
// printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
// i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
fid[0] = tdipdip[0]*ukx + tdipdip[1]*uky + tdipdip[2]*ukz;
fid[1] = tdipdip[1]*ukx + tdipdip[3]*uky + tdipdip[4]*ukz;
fid[2] = tdipdip[2]*ukx + tdipdip[4]*uky + tdipdip[5]*ukz;
fip[0] = tdipdip[0]*ukxp + tdipdip[1]*ukyp + tdipdip[2]*ukzp;
fip[1] = tdipdip[1]*ukxp + tdipdip[3]*ukyp + tdipdip[4]*ukzp;
fip[2] = tdipdip[2]*ukxp + tdipdip[4]*ukyp + tdipdip[5]*ukzp;
_fieldp[0] += fid[0];
_fieldp[1] += fid[1];
_fieldp[2] += fid[2];
@ -1093,7 +1093,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
} // ii<inum
// accumulate field and fieldp
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
}
@ -1221,7 +1221,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -1229,9 +1229,9 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
const numtyp4 pol1j = polar1[j];
numtyp ck = pol1j.x; // rpole[j][0];
numtyp dkx = pol1j.y; // rpole[j][1];
@ -1383,7 +1383,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
numtyp tiy3 = psr3*uky + dsr3*ukyp;
numtyp tiz3 = psr3*ukz + dsr3*ukzp;
numtyp tuir = -psr5*ukr - dsr5*ukrp;
ufld[0] += tix3 + xr*tuir;
ufld[1] += tiy3 + yr*tuir;
ufld[2] += tiz3 + zr*tuir;
@ -1394,14 +1394,14 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
numtyp tiy5 = (numtyp)2.0 * (psr5*uky+dsr5*ukyp);
numtyp tiz5 = (numtyp)2.0 * (psr5*ukz+dsr5*ukzp);
tuir = -psr7*ukr - dsr7*ukrp;
dufld[0] += xr*tix5 + xr*xr*tuir;
dufld[1] += xr*tiy5 + yr*tix5 + (numtyp)2.0*xr*yr*tuir;
dufld[2] += yr*tiy5 + yr*yr*tuir;
dufld[3] += xr*tiz5 + zr*tix5 + (numtyp)2.0*xr*zr*tuir;
dufld[4] += yr*tiz5 + zr*tiy5 + (numtyp)2.0*yr*zr*tuir;
dufld[5] += zr*tiz5 + zr*zr*tuir;
// get the dEd/dR terms used for direct polarization force
term1 = bn[2] - dsc3*rr5;
@ -1473,7 +1473,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
numtyp frcz = depz;
// get the dEp/dR terms used for direct polarization force
// tixx and tkxx
term1 = bn[2] - psc3*rr5;
term2 = bn[3] - psc5*rr7;
@ -1550,7 +1550,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
// get the dtau/dr terms used for mutual polarization force
// poltyp == MUTUAL && amoeba
term1 = bn[2] - usc3*rr5;
term2 = bn[3] - usc5*rr7;
term3 = usr5 + term1;
@ -1617,7 +1617,7 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
virial[5] += vyz;
}
} // nbor
} // ii<inum
// accumulate ufld and dufld to compute tep
@ -1648,7 +1648,7 @@ __kernel void k_special15(__global int * dev_nbor,
atom_info(t_per_atom,ii,tid,offset);
if (ii<inum) {
int numj, nbor, nbor_end;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
n_stride,nbor_end,nbor);

View File

@ -70,13 +70,13 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
UCL_D_Vec<numtyp4> coeff_amtype;
/// csix = coeff_amclass.x; adisp = coeff_amclass.y;
UCL_D_Vec<numtyp4> coeff_amclass;
/// Special polar values [0-4]:
/// Special polar values [0-4]:
/// sp_polar.x = special_polar_wscale
/// sp_polar.y special_polar_pscale,
/// sp_polar.z = special_polar_piscale
/// sp_polar.w = special_mpole
UCL_D_Vec<numtyp4> sp_polar;
/// Special nonpolar values [0-4]:
/// Special nonpolar values [0-4]:
/// sp_nonpolar.x = special_hal
/// sp_nonpolar.y special_repel
/// sp_nonpolar.z = special_disp
@ -97,7 +97,7 @@ class Amoeba : public BaseAmoeba<numtyp, acctyp> {
int udirect2b(const int eflag, const int vflag);
int umutual2b(const int eflag, const int vflag);
int polar_real(const int eflag, const int vflag);
};
}

View File

@ -106,7 +106,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
_threads_per_atom);
if (success!=0)
return success;
// Initialize host-device load balancer
hd_balancer.init(device,gpu_nbor,gpu_split);
@ -121,7 +121,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
_maxspecial=maxspecial;
_maxspecial15=maxspecial15;
// allocate per-atom array tep
// allocate per-atom array tep
int ef_nall=nlocal; //nall;
if (ef_nall==0)
@ -250,7 +250,7 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
const bool eflag_in, const bool vflag_in,
const bool eatom, const bool vatom,
int &host_start, const double cpu_time,
bool &success, const double aewald, const double felec,
bool &success, const double aewald, const double felec,
const double off2_polar, double *host_q, const int nlocal,
double *boxlo, double *prd, void **tep_ptr) {
acc_timers();
@ -280,7 +280,7 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
dev_special15_t.clear();
dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
dev_special15.alloc(_maxspecial15*nall,*(this->ucl_device),UCL_READ_ONLY);
dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
}
*tep_ptr=_tep.host.begin();
@ -320,7 +320,7 @@ void BaseAmoebaT::compute_polar_real_host_nbor(const int f_ago, const int inum_f
_off2_polar = off2_polar;
_felec = felec;
const int red_blocks=polar_real(eflag,vflag);
ans->copy_answers(eflag_in,vflag_in,eatom,vatom,ilist,red_blocks);
device->add_ans_object(ans);
hd_balancer.stop_timer();
@ -375,7 +375,7 @@ int** BaseAmoebaT::precompute(const int ago, const int inum_full, const int nall
dev_special15_t.clear();
dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
dev_special15.alloc(_maxspecial15*nall,*(this->ucl_device),UCL_READ_ONLY);
dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
dev_special15_t.alloc(nall*_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
}
if (inum_full==0) {
@ -462,7 +462,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
// reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed
// NOTE:
// NOTE:
// For now we invoke precompute() again here,
// to be able to turn on/off the udirect2b kernel (which comes before this)
// Once all the kernels are ready, precompute() is needed only once
@ -509,7 +509,7 @@ int** BaseAmoebaT::compute_multipole_real(const int ago, const int inum_full,
numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
}
*/
*/
return firstneigh; // nbor->host_jlist.begin()-host_start;
}
@ -560,7 +560,7 @@ int** BaseAmoebaT::compute_udirect2b(const int ago, const int inum_full,
eflag_in, vflag_in, eatom, vatom,
host_start, ilist, jnum, cpu_time,
success, host_q, boxlo, prd);
// ------------------- Resize _fieldp array ------------------------
if (inum_full>_max_fieldp_size) {
@ -698,7 +698,7 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
// reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed
// NOTE:
// NOTE:
// For now we invoke precompute() again here,
// to be able to turn on/off the udirect2b kernel (which comes before this)
// Once all the kernels are ready, precompute() is needed only once
@ -745,7 +745,7 @@ int** BaseAmoebaT::compute_polar_real(const int ago, const int inum_full,
numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
}
*/
*/
return firstneigh; // nbor->host_jlist.begin()-host_start;
}
@ -809,7 +809,7 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
int idx = n+i*nstride;
pextra[idx] = uinp[i][0];
pextra[idx+1] = uinp[i][1];
pextra[idx+2] = uinp[i][2];
pextra[idx+2] = uinp[i][2];
}
}
@ -818,7 +818,7 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
for (int i = 0; i < _nall; i++) {
int idx = n+i*nstride;
pextra[idx] = pval[i];
pextra[idx] = pval[i];
}
}
}
@ -846,7 +846,7 @@ void BaseAmoebaT::compile_kernels(UCL_Device &dev, const void *pair_str,
k_special15.set_function(*pair_program,"k_special15");
pos_tex.get_texture(*pair_program,"pos_tex");
q_tex.get_texture(*pair_program,"q_tex");
_compiled=true;
#if defined(USE_OPENCL) && (defined(CL_VERSION_2_1) || defined(CL_VERSION_3_0))
@ -874,13 +874,13 @@ int BaseAmoebaT::add_onefive_neighbors() {
int _nall=atom->nall();
int ainum=ans->inum();
int nbor_pitch=nbor->nbor_pitch();
k_special15.set_size(GX,BX);
k_special15.run(&nbor->dev_nbor, &_nbor_data->begin(),
&atom->dev_tag, &dev_nspecial15, &dev_special15,
&ainum, &_nall, &nbor_pitch,
&_threads_per_atom);
return GX;
}

View File

@ -287,7 +287,7 @@ class BaseAmoeba {
virtual int udirect2b(const int eflag, const int vflag) = 0;
virtual int umutual2b(const int eflag, const int vflag) = 0;
virtual int polar_real(const int eflag, const int vflag) = 0;
};
}

View File

@ -145,7 +145,7 @@ void HippoT::clear() {
coeff_amclass.clear();
sp_polar.clear();
sp_nonpolar.clear();
this->clear_atomic();
}
@ -199,7 +199,7 @@ int** HippoT::precompute(const int ago, const int inum_full, const int nall,
this->dev_special15_t.clear();
this->dev_nspecial15.alloc(nall,*(this->ucl_device),UCL_READ_ONLY);
this->dev_special15.alloc(this->_maxspecial15*nall,*(this->ucl_device),UCL_READ_ONLY);
this->dev_special15_t.alloc(nall*this->_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
this->dev_special15_t.alloc(nall*this->_maxspecial15,*(this->ucl_device),UCL_READ_ONLY);
}
if (inum_full==0) {
@ -286,7 +286,7 @@ int** HippoT::compute_dispersion_real(const int ago, const int inum_full,
// reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed
// NOTE:
// NOTE:
// For now we invoke precompute() again here,
// to be able to turn on/off the udirect2b kernel (which comes before this)
// Once all the kernels are ready, precompute() is needed only once
@ -339,7 +339,7 @@ int HippoT::dispersion_real(const int eflag, const int vflag) {
// Build the short neighbor list for the cutoff off2_disp,
// at this point mpole is the first kernel in a time step
this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(),
@ -397,7 +397,7 @@ int** HippoT::compute_multipole_real(const int ago, const int inum_full,
// reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed
// NOTE:
// NOTE:
// For now we invoke precompute() again here,
// to be able to turn on/off the udirect2b kernel (which comes before this)
// Once all the kernels are ready, precompute() is needed only once
@ -468,7 +468,7 @@ int HippoT::multipole_real(const int eflag, const int vflag) {
// Build the short neighbor list for the cutoff off2_mpole,
// at this point mpole is the first kernel in a time step
this->k_short_nbor.set_size(GX,BX);
this->k_short_nbor.run(&this->atom->x, &this->nbor->dev_nbor,
&this->_nbor_data->begin(),
@ -537,7 +537,7 @@ int** HippoT::compute_udirect2b(const int ago, const int inum_full,
eflag_in, vflag_in, eatom, vatom,
host_start, ilist, jnum, cpu_time,
success, host_q, boxlo, prd);
// ------------------- Resize _fieldp array ------------------------
if (inum_full>this->_max_fieldp_size) {
@ -569,7 +569,7 @@ int** HippoT::compute_udirect2b(const int ago, const int inum_full,
// ---------------------------------------------------------------------------
template <class numtyp, class acctyp>
int HippoT::udirect2b(const int eflag, const int vflag) {
int ainum=this->ans->inum();
int ainum=this->ans->inum();
if (ainum == 0)
return 0;
@ -591,7 +591,7 @@ int HippoT::udirect2b(const int eflag, const int vflag) {
&nbor_pitch, &this->_threads_per_atom);
this->short_nbor_polar_avail = true;
}
this->k_udirect2b.set_size(GX,BX);
this->k_udirect2b.run(&this->atom->x, &this->atom->extra,
&coeff_amtype, &coeff_amclass, &sp_polar,
@ -756,7 +756,7 @@ int** HippoT::compute_polar_real(const int ago, const int inum_full,
// reallocate per-atom arrays, transfer data from the host
// and build the neighbor lists if needed
// NOTE:
// NOTE:
// For now we invoke precompute() again here,
// to be able to turn on/off the udirect2b kernel (which comes before this)
// Once all the kernels are ready, precompute() is needed only once
@ -803,7 +803,7 @@ int** HippoT::compute_polar_real(const int ago, const int inum_full,
numtyp4* p = (numtyp4*)(&this->_tep[4*i]);
printf("i = %d; tep = %f %f %f\n", i, p->x, p->y, p->z);
}
*/
*/
return firstneigh; // nbor->host_jlist.begin()-host_start;
}

View File

@ -491,7 +491,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = ix.x - jx.x;
numtyp yr = ix.y - jx.y;
@ -499,7 +499,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
if (r2>off2) continue;
const numtyp4 pol1j = polar1[j];
numtyp ck = pol1j.x; // rpole[j][0];
numtyp dkx = pol1j.y; // rpole[j][1];
@ -514,7 +514,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp qkyz = pol3j.x; // rpole[j][9];
numtyp qkzz = pol3j.y; // rpole[j][12];
int jtype = pol3j.z; // amtype[j];
numtyp sizk = coeff[jtype].x; // sizpr[jtype];
numtyp dmpk = coeff[jtype].y; // dmppr[jtype];
numtyp valk = coeff[jtype].z; // elepr[jtype];
@ -534,12 +534,12 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
numtyp qkr = qkx*xr + qky*yr + qkz*zr;
numtyp dik = dix*dkx + diy*dky + diz*dkz;
numtyp qik = qix*qkx + qiy*qky + qiz*qkz;
numtyp diqk = dix*qkx + diy*qky + diz*qkz;
numtyp dkqi = dkx*qix + dky*qiy + dkz*qiz;
numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
qixx*qkxx + qiyy*qkyy + qizz*qkzz;
// additional intermediates involving moments and distance
@ -586,11 +586,11 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp dkqirx = dkqiz*yr - dkqiy*zr;
numtyp dkqiry = dkqix*zr - dkqiz*xr;
numtyp dkqirz = dkqiy*xr - dkqix*yr;
numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
(numtyp)2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
(numtyp)2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
(numtyp)2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
// get reciprocal distance terms for this interaction
@ -616,7 +616,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
numtyp term3 = vali*qkr + valk*qir - dir*dkr + (numtyp)2.0*(dkqi-diqk+qiqk);
numtyp term4 = dir*qkr - dkr*qir - 4.0*qik;
numtyp term5 = qir*qkr;
numtyp eterm = term1*dmpik[0] + term2*dmpik[2] +
numtyp eterm = term1*dmpik[0] + term2*dmpik[2] +
term3*dmpik[4] + term4*dmpik[6] + term5*dmpik[8];
// compute the Pauli repulsion energy for this interaction
@ -626,7 +626,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
// calculate intermediate terms for force and torque
numtyp de = term1*dmpik[2] + term2*dmpik[4] + term3*dmpik[6] +
numtyp de = term1*dmpik[2] + term2*dmpik[4] + term3*dmpik[6] +
term4*dmpik[8] + term5*dmpik[10];
term1 = -valk*dmpik[2] + dkr*dmpik[4] - qkr*dmpik[6];
term2 = vali*dmpik[2] + dir*dmpik[4] + qir*dmpik[6];
@ -637,23 +637,23 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
// compute the force components for this interaction
numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
term4*qix + term5*qkx + term6*(qixk+qkxi);
numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
term4*qiy + term5*qky + term6*(qiyk+qkyi);
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
term4*qiz + term5*qkz + term6*(qizk+qkzi);
frcx = frcx*rr1 + eterm*rr3*xr;
frcy = frcy*rr1 + eterm*rr3*yr;
frcz = frcz*rr1 + eterm*rr3*zr;
// compute the torque components for this interaction
numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
numtyp ttmix = -dmpik[2]*dikx + term1*dirx + term3*(dqikx+dkqirx) -
term4*qirx - term6*(qikrx+qikx);
numtyp ttmiy = -dmpik[2]*diky + term1*diry + term3*(dqiky+dkqiry) -
numtyp ttmiy = -dmpik[2]*diky + term1*diry + term3*(dqiky+dkqiry) -
term4*qiry - term6*(qikry+qiky);
numtyp ttmiz = -dmpik[2]*dikz + term1*dirz + term3*(dqikz+dkqirz) -
numtyp ttmiz = -dmpik[2]*dikz + term1*dirz + term3*(dqikz+dkqirz) -
term4*qirz - term6*(qikrz+qikz);
ttmix = sizik * ttmix * rr1;
ttmiy = sizik * ttmiy * rr1;
@ -706,7 +706,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
virial[5] += vyz;
}
} // nbor
} // ii<inum
// accumulate tq
@ -786,7 +786,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = ix.x - jx.x;
numtyp yr = ix.y - jx.y;
@ -794,7 +794,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
int jtype = polar3[j].z; // amtype[j];
int jclass = coeff_amtype[jtype].w; // amtype2class[jtype];
numtyp ck = coeff_amclass[jclass].x; // csix[jclass];
@ -816,7 +816,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
numtyp dk = ak * r;
numtyp expi = ucl_exp(-di);
numtyp expk = ucl_exp(-dk);
numtyp ai2,ak2;
numtyp di4,di5;
numtyp dk2,dk3;
@ -844,7 +844,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
- tk2*((numtyp)1.0 + dk + (numtyp)0.5*dk2 + dk3/(numtyp)6.0) * expk
- (numtyp)2.0*ti2*tk*((numtyp)1.0 + di + di2/(numtyp)3.0) * expi
- (numtyp)2.0*tk2*ti*((numtyp)1.0 + dk + dk2/(numtyp)3.0) * expk;
ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) +
ddamp = (numtyp)0.25 * di2 * ti2 * ai * expi * (r*ai+(numtyp)4.0*tk - (numtyp)1.0) +
(numtyp)0.25 * dk2 * tk2 * ak * expk * (r*ak+(numtyp)4.0*ti-(numtyp)1.0);
} else {
@ -856,7 +856,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
}
numtyp damp = (numtyp)1.5*damp5 - (numtyp)0.5*damp3;
// apply damping and scaling factors for this interaction
numtyp scale = factor_disp * damp*damp;
@ -892,7 +892,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
virial[4] += vzx;
virial[5] += vzy;
} // nbor
} // ii<inum
// accumate force, energy and virial
@ -997,7 +997,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -1005,7 +1005,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
const numtyp4 pol1j = polar1[j];
numtyp ck = pol1j.x; // rpole[j][0];
@ -1043,12 +1043,12 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp qky = qkxy*xr + qkyy*yr + qkyz*zr;
numtyp qkz = qkxz*xr + qkyz*yr + qkzz*zr;
numtyp qkr = qkx*xr + qky*yr + qkz*zr;
numtyp dik = dix*dkx + diy*dky + diz*dkz;
numtyp qik = qix*qkx + qiy*qky + qiz*qkz;
numtyp diqk = dix*qkx + diy*qky + diz*qkz;
numtyp dkqi = dkx*qix + dky*qiy + dkz*qiz;
numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
numtyp qiqk = (numtyp)2.0*(qixy*qkxy+qixz*qkxz+qiyz*qkyz) +
qixx*qkxx + qiyy*qkyy + qizz*qkzz;
// additional intermediates involving moments and distance
@ -1095,11 +1095,11 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp dkqirx = dkqiz*yr - dkqiy*zr;
numtyp dkqiry = dkqix*zr - dkqiz*xr;
numtyp dkqirz = dkqiy*xr - dkqix*yr;
numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
numtyp dqikx = diy*qkz - diz*qky + dky*qiz - dkz*qiy -
(numtyp)2.0*(qixy*qkxz+qiyy*qkyz+qiyz*qkzz - qixz*qkxy-qiyz*qkyy-qizz*qkyz);
numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
numtyp dqiky = diz*qkx - dix*qkz + dkz*qix - dkx*qiz -
(numtyp)2.0*(qixz*qkxx+qiyz*qkxy+qizz*qkxz - qixx*qkxz-qixy*qkyz-qixz*qkzz);
numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
numtyp dqikz = dix*qky - diy*qkx + dkx*qiy - dky*qix -
(numtyp)2.0*(qixx*qkxy+qixy*qkyy+qixz*qkyz - qixy*qkxx-qiyy*qkxy-qiyz*qkxz);
// get reciprocal distance terms for this interaction
@ -1164,16 +1164,16 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
numtyp rr11ik = bn[5] - ((numtyp)1.0-scalek*dmpij[10])*rr11;
rr1 = bn[0] - ((numtyp)1.0-scalek)*rr1;
rr3 = bn[1] - ((numtyp)1.0-scalek)*rr3;
numtyp e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik +
term1i*rr1i + term1k*rr1k + term1ik*rr1ik +
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
numtyp e = term1*rr1 + term4ik*rr7ik + term5ik*rr9ik +
term1i*rr1i + term1k*rr1k + term1ik*rr1ik +
term2i*rr3i + term2k*rr3k + term2ik*rr3ik +
term3i*rr5i + term3k*rr5k + term3ik*rr5ik;
// find damped multipole intermediates for force and torque
numtyp de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
term1i*rr3i + term1k*rr3k + term1ik*rr3ik +
term2i*rr5i + term2k*rr5k + term2ik*rr5ik +
numtyp de = term1*rr3 + term4ik*rr9ik + term5ik*rr11ik +
term1i*rr3i + term1k*rr3k + term1ik*rr3ik +
term2i*rr5i + term2k*rr5k + term2ik*rr5ik +
term3i*rr7i + term3k*rr7k + term3ik*rr7ik;
term1 = -corek*rr3i - valk*rr3ik + dkr*rr5ik - qkr*rr7ik;
term2 = corei*rr3k + vali*rr3ik + dir*rr5ik + qir*rr7ik;
@ -1187,20 +1187,20 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
// compute the force components for this interaction
numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
numtyp frcx = de*xr + term1*dix + term2*dkx + term3*(diqkx-dkqix) +
term4*qix + term5*qkx + term6*(qixk+qkxi);
numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
numtyp frcy = de*yr + term1*diy + term2*dky + term3*(diqky-dkqiy) +
term4*qiy + term5*qky + term6*(qiyk+qkyi);
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
numtyp frcz = de*zr + term1*diz + term2*dkz + term3*(diqkz-dkqiz) +
term4*qiz + term5*qkz + term6*(qizk+qkzi);
// compute the torque components for this interaction
numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) -
numtyp ttmix = -rr3*dikx + term1*dirx + term3*(dqikx+dkqirx) -
term4*qirx - term6*(qikrx+qikx);
numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) -
numtyp ttmiy = -rr3*diky + term1*diry + term3*(dqiky+dkqiry) -
term4*qiry - term6*(qikry+qiky);
numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) -
numtyp ttmiz = -rr3*dikz + term1*dirz + term3*(dqikz+dkqirz) -
term4*qirz - term6*(qikrz+qikz);
// increment force-based gradient and torque on first site
@ -1228,12 +1228,12 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
virial[5] += vyz;
}
} // nbor
} // ii<inum
// accumulate tq
store_answers_hippo_tq(tq,ii,inum,tid,t_per_atom,offset,i,tep);
// accumate force, energy and virial: use _acc if not the first kernel
//store_answers_q(f,energy,e_coul,virial,ii,inum,tid,t_per_atom,
//offset,eflag,vflag,ans,engv);
@ -1327,7 +1327,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -1335,7 +1335,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
numtyp rinv = ucl_recip(r);
numtyp r2inv = rinv*rinv;
@ -1408,7 +1408,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
// find the field components for charge penetration damping
numtyp dmpi[7],dmpk[7];
dampdir(r,alphai,alphak,dmpi,dmpk);
numtyp scalek = factor_dscale;
numtyp rr3i = bn[1] - ((numtyp)1.0-scalek*dmpi[2])*rr3;
numtyp rr5i = bn[2] - ((numtyp)1.0-scalek*dmpi[4])*rr5;
@ -1439,7 +1439,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
rr3k*dky + (numtyp)2.0*rr5k*qky;
fip[2] = -zr*(rr3*corek + rr3k*valk - rr5k*dkr + rr7k*qkr) -
rr3k*dkz + (numtyp)2.0*rr5k*qkz;
// find terms needed later to compute mutual polarization
_fieldp[0] += fid[0];
@ -1453,7 +1453,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
} // ii<inum
// accumulate field and fieldp
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
}
@ -1514,7 +1514,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
int itype,igroup;
numtyp bn[4],bcn[3];
numtyp fid[3],fip[3];
itype = polar3[i].z; // amtype[i];
igroup = polar3[i].w; // amgroup[i];
@ -1534,7 +1534,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -1542,7 +1542,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
numtyp rinv = ucl_recip(r);
numtyp r2inv = rinv*rinv;
@ -1595,7 +1595,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
}
// find terms needed later to compute mutual polarization
// if (poltyp != DIRECT)
// if (poltyp != DIRECT)
numtyp dmpik[5];
dampmut(r,alphai,alphak,dmpik);
numtyp scalek = factor_wscale;
@ -1610,17 +1610,17 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
tdipdip[3] = -rr3ik + rr5ik*yr*yr;
tdipdip[4] = rr5ik*yr*zr;
tdipdip[5] = -rr3ik + rr5ik*zr*zr;
//if (i==0 && j == 10)
//if (i==0 && j == 10)
// printf("i = %d: j = %d: tdipdip %f %f %f %f %f %f\n",
// i, j,tdipdip[0],tdipdip[1],tdipdip[2],tdipdip[3],tdipdip[4],tdipdip[5]);
fid[0] = tdipdip[0]*ukx + tdipdip[1]*uky + tdipdip[2]*ukz;
fid[1] = tdipdip[1]*ukx + tdipdip[3]*uky + tdipdip[4]*ukz;
fid[2] = tdipdip[2]*ukx + tdipdip[4]*uky + tdipdip[5]*ukz;
fip[0] = tdipdip[0]*ukxp + tdipdip[1]*ukyp + tdipdip[2]*ukzp;
fip[1] = tdipdip[1]*ukxp + tdipdip[3]*ukyp + tdipdip[4]*ukzp;
fip[2] = tdipdip[2]*ukxp + tdipdip[4]*ukyp + tdipdip[5]*ukzp;
_fieldp[0] += fid[0];
_fieldp[1] += fid[1];
_fieldp[2] += fid[2];
@ -1632,7 +1632,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
} // ii<inum
// accumulate field and fieldp
store_answers_fieldp(_fieldp,ii,inum,tid,t_per_atom,offset,i,fieldp);
}
@ -1754,7 +1754,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
//int jtype=jx.w;
// Compute r12
numtyp xr = jx.x - ix.x;
numtyp yr = jx.y - ix.y;
@ -1762,7 +1762,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
numtyp r2 = xr*xr + yr*yr + zr*zr;
//if (r2>off2) continue;
numtyp r = ucl_sqrt(r2);
const numtyp4 pol1j = polar1[j];
@ -1905,7 +1905,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
// get the field gradient for direct polarization force
numtyp term1i,term2i,term3i,term4i,term5i,term6i,term7i,term8i;
numtyp term1k,term2k,term3k,term4k,term5k,term6k,term7k,term8k;
numtyp term1core;
@ -1987,7 +1987,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
dir*term4i - qixy*term5i + qiy*term6i + qix*term7i - qir*term8i;
tkxy = -valk*term1k - corek*term1core - dky*term2k - dkx*term3k +
dkr*term4k - qkxy*term5k + qky*term6k + qkx*term7k - qkr*term8k;
term2i = rr5i*xr;
term1i = zr * term2i;
term1core = rr5core*xr*zr;
@ -2039,7 +2039,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
numtyp frcx = (numtyp)-2.0 * depx;
numtyp frcy = (numtyp)-2.0 * depy;
numtyp frcz = (numtyp)-2.0 * depz;
// get the dEp/dR terms used for direct polarization force
// poltyp == MUTUAL && hippo
// tixx and tkxx
@ -2108,7 +2108,7 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
virial[5] += vyz;
}
} // nbor
} // ii<inum
// accumulate ufld and dufld to compute tep
@ -2139,7 +2139,7 @@ __kernel void k_special15(__global int * dev_nbor,
atom_info(t_per_atom,ii,tid,offset);
if (ii<inum) {
int numj, nbor, nbor_end;
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
n_stride,nbor_end,nbor);

View File

@ -154,13 +154,13 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
UCL_D_Vec<numtyp4> coeff_amtype;
/// csix = coeff_amclass.x; adisp = coeff_amclass.y;
UCL_D_Vec<numtyp4> coeff_amclass;
/// Special polar values [0-4]:
/// Special polar values [0-4]:
/// sp_polar.x = special_polar_wscale
/// sp_polar.y special_polar_pscale,
/// sp_polar.z = special_polar_piscale
/// sp_polar.w = special_mpole
UCL_D_Vec<numtyp4> sp_polar;
/// Special nonpolar values [0-4]:
/// Special nonpolar values [0-4]:
/// sp_nonpolar.x = special_hal
/// sp_nonpolar.y special_repel
/// sp_nonpolar.z = special_disp
@ -184,7 +184,7 @@ class Hippo : public BaseAmoeba<numtyp, acctyp> {
int udirect2b(const int eflag, const int vflag);
int umutual2b(const int eflag, const int vflag);
int polar_real(const int eflag, const int vflag);
};
}

View File

@ -129,7 +129,7 @@ int** hippo_gpu_compute_dispersion_real(const int ago, const int inum_full,
const bool vatom, int &host_start,
int **ilist, int **jnum, const double cpu_time,
bool &success, const double aewald, const double off2,
double *host_q, double *boxlo, double *prd) {
double *host_q, double *boxlo, double *prd) {
return HIPPOMF.compute_dispersion_real(ago, inum_full, nall, host_x, host_type,
host_amtype, host_amgroup, host_rpole, sublo, subhi,
tag, nspecial, special, nspecial15, special15,
@ -175,7 +175,7 @@ int** hippo_gpu_compute_udirect2b(const int ago, const int inum_full,
int** hippo_gpu_compute_umutual2b(const int ago, const int inum_full,
const int nall, double **host_x, int *host_type,
int *host_amtype, int *host_amgroup, double **host_rpole,
double **host_uind, double **host_uinp, double *host_pval,
double **host_uind, double **host_uinp, double *host_pval,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int *nspecial15, tagint** special15,
const bool eflag, const bool vflag,

View File

@ -116,46 +116,46 @@ ucl_inline void damprep(const numtyp r, const numtyp r2, const numtyp rr1,
tmp = (numtyp)4.0 * dmpi2 * dmpk2 / term;
s = (dampi-tmp)*expk + (dampk+tmp)*expi;
ds = (dmpi2*dmpk2*r2 - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
ds = (dmpi2*dmpk2*r2 - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi2*dmpk2*r2 + (numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0*dmpi2*dmpk2/term) * expi;
d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/(numtyp)3.0 -
((numtyp)4.0/(numtyp)3.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 +
((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
d2s = (dmpi2*dmpk2*r2/3.0 + dmpi2*dmpk22*r3/(numtyp)3.0 -
((numtyp)4.0/(numtyp)3.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi2*dmpk2*r2/(numtyp)3.0 + dmpi22*dmpk2*r3/(numtyp)3.0 +
((numtyp)4.0/(numtyp)3.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
(numtyp)4.0*dmpi2*dmpk2/term) * expi;
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
(4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
(numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
(dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 +
((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term +
d3s = (dmpi2*dmpk23*r4/15.0 + dmpi2*dmpk22*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 -
(4.0/15.0)*dmpi2*dmpk24*r3/term - ((numtyp)8.0/(numtyp)5.0)*dmpi2*dmpk23*r2/term -
(numtyp)4.0*dmpi2*dmpk22*r/term - 4.0/term*dmpi2*dmpk2) * expk +
(dmpi23*dmpk2*r4/(numtyp)15.0 + dmpi22*dmpk2*r3/(numtyp)5.0 + dmpi2*dmpk2*r2/(numtyp)5.0 +
((numtyp)4.0/(numtyp)15.0)*dmpi24*dmpk2*r3/term + ((numtyp)8.0/(numtyp)5.0)*dmpi23*dmpk2*r2/term +
(numtyp)4.0*dmpi22*dmpk2*r/term + (numtyp)4.0/term*dmpi2*dmpk2) * expi;
d4s = (dmpi2*dmpk24*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi2*dmpk23*r4 +
dmpi2*dmpk22*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 -
((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term -
((numtyp)12.0/(numtyp)7.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 +
((numtyp)4.0/(numtyp)105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/(numtyp)21.0)*dmpi24*dmpk2*r3/term +
((numtyp)12.0/(numtyp)7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
d4s = (dmpi2*dmpk24*r5/(numtyp)105.0 + ((numtyp)2.0/(numtyp)35.0)*dmpi2*dmpk23*r4 +
dmpi2*dmpk22*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 -
((numtyp)4.0/(numtyp)105.0)*dmpi2*dmpk25*r4/term - ((numtyp)8.0/21.0)*dmpi2*dmpk24*r3/term -
((numtyp)12.0/(numtyp)7.0)*dmpi2*dmpk23*r2/term - (numtyp)4.0*dmpi2*dmpk22*r/term -
(numtyp)4.0*dmpi2*dmpk2/term) * expk +
(dmpi24*dmpk2*r5/(numtyp)105.0 + (2.0/(numtyp)35.0)*dmpi23*dmpk2*r4 +
dmpi22*dmpk2*r3/(numtyp)7.0 + dmpi2*dmpk2*r2/(numtyp)7.0 +
((numtyp)4.0/(numtyp)105.0)*dmpi25*dmpk2*r4/term + ((numtyp)8.0/(numtyp)21.0)*dmpi24*dmpk2*r3/term +
((numtyp)12.0/(numtyp)7.0)*dmpi23*dmpk2*r2/term + (numtyp)4.0*dmpi22*dmpk2*r/term +
(numtyp)4.0*dmpi2*dmpk2/term) * expi;
if (rorder >= 11) {
r6 = r5 * r;
dmpi26 = dmpi25 * dmpi2;
dmpk26 = dmpk25 * dmpk2;
d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 +
dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 -
(4.0/945.0)*dmpi2*dmpk26*r5/term -
(4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term -
(16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
4.0*dmpi2*dmpk2/term) * expk +
(dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 +
dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 +
(4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term +
(4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term +
d5s = (dmpi2*dmpk25*r6/945.0 + (2.0/189.0)*dmpi2*dmpk24*r5 +
dmpi2*dmpk23*r4/21.0 + dmpi2*dmpk22*r3/9.0 + dmpi2*dmpk2*r2/9.0 -
(4.0/945.0)*dmpi2*dmpk26*r5/term -
(4.0/63.0)*dmpi2*dmpk25*r4/term - (4.0/9.0)*dmpi2*dmpk24*r3/term -
(16.0/9.0)*dmpi2*dmpk23*r2/term - 4.0*dmpi2*dmpk22*r/term -
4.0*dmpi2*dmpk2/term) * expk +
(dmpi25*dmpk2*r6/945.0 + (2.0/189.0)*dmpi24*dmpk2*r5 +
dmpi23*dmpk2*r4/21.0 + dmpi22*dmpk2*r3/9.0 + dmpi2*dmpk2*r2/9.0 +
(4.0/945.0)*dmpi26*dmpk2*r5/term + (4.0/63.0)*dmpi25*dmpk2*r4/term +
(4.0/9.0)*dmpi24*dmpk2*r3/term + (16.0/9.0)*dmpi23*dmpk2*r2/term +
4.0*dmpi22*dmpk2*r/term + 4.0*dmpi2*dmpk2/term) * expi;
}
}
@ -214,7 +214,7 @@ ucl_inline void damppole(const numtyp r, const int rorder,
// compute tolerance and exponential damping factors
eps = (numtyp)0.001;
diff = alphai-alphak;
diff = alphai-alphak;
if (diff < (numtyp)0) diff = -diff;
dampi = alphai * r;
dampk = alphak * r;
@ -231,7 +231,7 @@ ucl_inline void damppole(const numtyp r, const int rorder,
dmpi[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2)*expi;
dmpi[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi;
dmpi[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi;
dmpi[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dmpi[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
(numtyp)4.0*dampi4/(numtyp)105.0 + dampi5/(numtyp)210.0)*expi;
if (diff < eps) {
dmpk[0] = dmpi[0];
@ -248,7 +248,7 @@ ucl_inline void damppole(const numtyp r, const int rorder,
dmpk[2] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2)*expk;
dmpk[4] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk;
dmpk[6] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/(numtyp)30.0)*expk;
dmpk[8] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 +
dmpk[8] = (numtyp)1.0 - ((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 +
(numtyp)4.0*dampk4/(numtyp)105.0 + dampk5/(numtyp)210.0)*expk;
}
@ -257,21 +257,21 @@ ucl_inline void damppole(const numtyp r, const int rorder,
if (diff < eps) {
dampi6 = dampi3 * dampi3;
dampi7 = dampi3 * dampi4;
dmpik[0] = (numtyp)1.0 - ((numtyp)1.0 + (numtyp)11.0*dampi/(numtyp)16.0 + (numtyp)3.0*dampi2/(numtyp)16.0 +
dmpik[0] = (numtyp)1.0 - ((numtyp)1.0 + (numtyp)11.0*dampi/(numtyp)16.0 + (numtyp)3.0*dampi2/(numtyp)16.0 +
dampi3/(numtyp)48.0)*expi;
dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
(numtyp)7.0*dampi3/(numtyp)48.0 + dampi4/(numtyp)48.0)*expi;
dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dampi4/(numtyp)24.0 + dampi5/(numtyp)144.0)*expi;
dmpik[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dmpik[6] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0)*expi;
dmpik[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 +
dmpik[8] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 +
dampi7/(numtyp)5040.0)*expi;
if (rorder >= 11) {
dampi8 = dampi4 * dampi4;
dmpik[10] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 +
dampi4/(numtyp)24.0 + dampi5/(numtyp)120.0 + dampi6/(numtyp)720.0 +
dampi7/(numtyp)5040.0 + dampi8/(numtyp)45360.0)*expi;
}
@ -282,41 +282,41 @@ ucl_inline void damppole(const numtyp r, const int rorder,
termk = alphai2 / (alphai2-alphak2);
termi2 = termi * termi;
termk2 = termk * termk;
dmpik[0] = (numtyp)1.0 - termi2*(1.0 + (numtyp)2.0*termk + (numtyp)0.5*dampi)*expi -
dmpik[0] = (numtyp)1.0 - termi2*(1.0 + (numtyp)2.0*termk + (numtyp)0.5*dampi)*expi -
termk2*((numtyp)1.0 + (numtyp)2.0*termi + (numtyp)0.5*dampk)*expk;
dmpik[2] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2)*expi -
termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0+dampi)*expi -
(numtyp)2.0*termk2*termi*((numtyp)1.0+dampk)*expk;
dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi -
termk2*(1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + dampi2/(numtyp)3.0)*expi -
dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi -
termk2*(1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + dampi2/(numtyp)3.0)*expi -
(numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + dampk2/(numtyp)3.0)*expk;
dmpik[6] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi -
termk2*((numtyp)1.0 + dampk + 0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/(numtyp)30.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)2.0*dampi2/(numtyp)5.0 + dampi3/(numtyp)15.0)*expi -
dmpik[6] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
dampi3/(numtyp)6.0 + dampi4/(numtyp)30.0)*expi -
termk2*((numtyp)1.0 + dampk + 0.5*dampk2 + dampk3/(numtyp)6.0 + dampk4/(numtyp)30.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)2.0*dampi2/(numtyp)5.0 + dampi3/(numtyp)15.0)*expi -
(numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + (numtyp)2.0*dampk2/(numtyp)5.0 + dampk3/(numtyp)15.0)*expk;
dmpik[8] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
(numtyp)4.0*dampi4/(numtyp)105.0 + dampi5/(numtyp)210.0)*expi -
termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 +
(numtyp)4.0*dampk4/105.0 + dampk5/(numtyp)210.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)3.0*dampi2/(numtyp)7.0 +
(numtyp)2.0*dampi3/(numtyp)21.0 + dampi4/(numtyp)105.0)*expi -
(numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + (numtyp)3.0*dampk2/(numtyp)7.0 +
dmpik[8] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
(numtyp)4.0*dampi4/(numtyp)105.0 + dampi5/(numtyp)210.0)*expi -
termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 +
(numtyp)4.0*dampk4/105.0 + dampk5/(numtyp)210.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)3.0*dampi2/(numtyp)7.0 +
(numtyp)2.0*dampi3/(numtyp)21.0 + dampi4/(numtyp)105.0)*expi -
(numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + (numtyp)3.0*dampk2/(numtyp)7.0 +
(numtyp)2.0*dampk3/(numtyp)21.0 + dampk4/(numtyp)105.0)*expk;
if (rorder >= 11) {
dampi6 = dampi3 * dampi3;
dampk6 = dampk3 * dampk3;
dmpik[10] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
(numtyp)5.0*dampi4/(numtyp)126.0 + (numtyp)2.0*dampi5/(numtyp)315.0 +
dampi6/(numtyp)1890.0)*expi -
termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + (numtyp)5.0*dampk4/(numtyp)126.0 +
(numtyp)2.0*dampk5/(numtyp)315.0 + dampk6/(numtyp)1890.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)4.0*dampi2/(numtyp)9.0 + dampi3/(numtyp)9.0 +
dampi4/(numtyp)63.0 + dampi5/(numtyp)945.0)*expi -
(numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + 4.0*dampk2/(numtyp)9.0 + dampk3/(numtyp)9.0 +
dmpik[10] = (numtyp)1.0 - termi2*((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
(numtyp)5.0*dampi4/(numtyp)126.0 + (numtyp)2.0*dampi5/(numtyp)315.0 +
dampi6/(numtyp)1890.0)*expi -
termk2*((numtyp)1.0 + dampk + (numtyp)0.5*dampk2 + dampk3/(numtyp)6.0 + (numtyp)5.0*dampk4/(numtyp)126.0 +
(numtyp)2.0*dampk5/(numtyp)315.0 + dampk6/(numtyp)1890.0)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0 + dampi + (numtyp)4.0*dampi2/(numtyp)9.0 + dampi3/(numtyp)9.0 +
dampi4/(numtyp)63.0 + dampi5/(numtyp)945.0)*expi -
(numtyp)2.0*termk2*termi*((numtyp)1.0 + dampk + 4.0*dampk2/(numtyp)9.0 + dampk3/(numtyp)9.0 +
dampk4/(numtyp)63.0 + dampk5/(numtyp)945.0)*expk;
}
}
@ -404,9 +404,9 @@ ucl_inline void dampmut(numtyp r, numtyp alphai, numtyp alphak, numtyp dmpik[5])
if (diff < eps) {
dampi4 = dampi2 * dampi2;
dampi5 = dampi2 * dampi3;
dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
dmpik[2] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 +
7.0*dampi3/(numtyp)48.0 + dampi4/48.0)*expi;
dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dmpik[4] = (numtyp)1.0 - ((numtyp)1.0 + dampi + (numtyp)0.5*dampi2 + dampi3/(numtyp)6.0 +
dampi4/(numtyp)24.0 + dampi5/(numtyp)144.0)*expi;
} else {
dampk2 = dampk * dampk;
@ -417,12 +417,12 @@ ucl_inline void dampmut(numtyp r, numtyp alphai, numtyp alphak, numtyp dmpik[5])
termk = alphai2 / (alphai2-alphak2);
termi2 = termi * termi;
termk2 = termk * termk;
dmpik[2] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2)*expi -
termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2)*expk -
dmpik[2] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2)*expi -
termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2)*expk -
(numtyp)2.0*termi2*termk*((numtyp)1.0+dampi)*expi - (numtyp)2.0*termk2*termi*((numtyp)1.0+dampk)*expk;
dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi -
termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2 + dampk3/(numtyp)6.00)*expk -
(numtyp)2.0*termi2*termk *((numtyp)1.0+dampi+dampi2/(numtyp)3.0)*expi -
dmpik[4] = (numtyp)1.0 - termi2*((numtyp)1.0+dampi+(numtyp)0.5*dampi2 + dampi3/(numtyp)6.0)*expi -
termk2*((numtyp)1.0+dampk+(numtyp)0.5*dampk2 + dampk3/(numtyp)6.00)*expk -
(numtyp)2.0*termi2*termk *((numtyp)1.0+dampi+dampi2/(numtyp)3.0)*expi -
(numtyp)2.0*termk2*termi *((numtyp)1.0+dampk+dampk2/(numtyp)3.0)*expk;
}
}

View File

@ -91,6 +91,8 @@ action pair_gauss_gpu.cpp pair_gauss.cpp
action pair_gauss_gpu.h pair_gauss.h
action pair_gayberne_gpu.cpp pair_gayberne.cpp
action pair_gayberne_gpu.h pair_gayberne.cpp
action pair_hippo_gpu.cpp pair_hippo.cpp
action pair_hippo_gpu.h pair_hippo.cpp
action pair_lj96_cut_gpu.cpp pair_lj96_cut.cpp
action pair_lj96_cut_gpu.h pair_lj96_cut.h
action pair_lj_charmm_coul_long_gpu.cpp pair_lj_charmm_coul_long.cpp

View File

@ -76,7 +76,7 @@ int ** amoeba_gpu_compute_multipole_real(const int ago, const int inum, const in
int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
double **host_rpole, double **host_uind, double **host_uinp,
double **host_rpole, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int* nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@ -86,7 +86,7 @@ int ** amoeba_gpu_compute_udirect2b(const int ago, const int inum, const int nal
int ** amoeba_gpu_compute_umutual2b(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
double **host_rpole, double **host_uind, double **host_uinp,
double **host_rpole, double **host_uind, double **host_uinp,
double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int* nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@ -170,7 +170,7 @@ void PairAmoebaGPU::init_style()
maxspecial=atom->maxspecial;
maxspecial15=atom->maxspecial15;
}
int tq_size;
int mnf = 5e-2 * neighbor->oneatom;
int success = amoeba_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
@ -207,7 +207,7 @@ void PairAmoebaGPU::multipole_real()
bool success = true;
int *ilist, *numneigh, **firstneigh;
double sublo[3],subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
@ -239,7 +239,7 @@ void PairAmoebaGPU::multipole_real()
host_start, &ilist, &numneigh, cpu_time,
success, aewald, felec, off2, atom->q,
domain->boxlo, domain->prd, &tq_pinned);
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
@ -281,7 +281,7 @@ void PairAmoebaGPU::induce()
// set cutoffs, taper coeffs, and PME params
// create qfac here, free at end of polar()
if (use_ewald) {
choose(POLAR_LONG);
int nmine = p_kspace->nfft_owned;
@ -317,7 +317,7 @@ void PairAmoebaGPU::induce()
memory->create(usump,nlocal,3,"ameoba/induce:usump");
// get the electrostatic field due to permanent multipoles
dfield0c(field,fieldp);
// need reverse_comm_pair if dfield0c (i.e. udirect2b) is CPU-only
@ -345,7 +345,7 @@ void PairAmoebaGPU::induce()
for (i = 0; i < 10; i++) {
printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n",
i, udir[i][0], udir[i][1], udir[i][2],
udirp[i][0], udirp[i][1], udirp[i][2]);
udirp[i][0], udirp[i][1], udirp[i][2]);
}
*/
// get induced dipoles via the OPT extrapolation method
@ -353,7 +353,7 @@ void PairAmoebaGPU::induce()
// uopt,uoptp with a optorder+1 dimension, just optorder ??
// since no need to store optorder+1 values after these loops
if (poltyp == OPT) {
if (poltyp == OPT) {
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
uopt[i][0][j] = udir[i][j];
@ -460,7 +460,7 @@ void PairAmoebaGPU::induce()
crstyle = FIELD;
comm->reverse_comm_pair(this);
}
//error->all(FLERR,"STOP GPU");
// set initial conjugate gradient residual and conjugate vector
@ -486,7 +486,7 @@ void PairAmoebaGPU::induce()
cfstyle = RSD;
comm->forward_comm_pair(this);
uscale0b(BUILD,rsd,rsdp,zrsd,zrsdp);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
crstyle = ZRSD;
comm->reverse_comm_pair(this);
}
@ -574,7 +574,7 @@ void PairAmoebaGPU::induce()
if (pcgprec) {
cfstyle = RSD;
comm->forward_comm_pair(this);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
crstyle = ZRSD;
comm->reverse_comm_pair(this);
}
@ -629,7 +629,7 @@ void PairAmoebaGPU::induce()
if (iter >= politer) done = true;
// apply a "peek" iteration to the mutual induced dipoles
if (done) {
for (i = 0; i < nlocal; i++) {
term = pcgpeek * poli[i];
@ -644,7 +644,7 @@ void PairAmoebaGPU::induce()
// terminate the calculation if dipoles failed to converge
// NOTE: could make this an error
if (iter >= maxiter || eps > epsold)
if (me == 0)
error->warning(FLERR,"AMOEBA induced dipoles did not converge");
@ -652,7 +652,7 @@ void PairAmoebaGPU::induce()
// DEBUG output to dump file
if (uind_flag)
if (uind_flag)
dump6(fp_uind,"id uindx uindy uindz uinpx uinpy uinpz",DEBYE,uind,uinp);
// deallocation of arrays
@ -700,7 +700,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
PairAmoeba::udirect2b(field, fieldp);
return;
}
int eflag=1, vflag=1;
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -753,7 +753,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
int idx = 4*i;
field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2];
field[i][2] += field_ptr[idx+2];
}
double* fieldp_ptr = (double *)fieldp_pinned;
@ -764,7 +764,7 @@ void PairAmoebaGPU::udirect2b(double **field, double **fieldp)
fieldp[i][1] += fieldp_ptr[idx+1];
fieldp[i][2] += fieldp_ptr[idx+2];
}
}
/* ----------------------------------------------------------------------
@ -802,7 +802,7 @@ void PairAmoebaGPU::udirect2b_cpu()
firstneigh = list->firstneigh;
// NOTE: doesn't this have a problem if aewald is tiny ??
aesq2 = 2.0 * aewald * aewald;
aesq2n = 0.0;
if (aewald > 0.0) aesq2n = 1.0 / (MY_PIS*aewald);
@ -829,13 +829,13 @@ void PairAmoebaGPU::udirect2b_cpu()
pdi = pdamp[itype];
pti = thole[itype];
ddi = dirdamp[itype];
// evaluate all sites within the cutoff distance
for (jj = 0; jj < jnum; jj++) {
jextra = jlist[jj];
j = jextra & NEIGHMASK15;
xr = x[j][0] - x[i][0];
yr = x[j][1] - x[i][1];
zr = x[j][2] - x[i][2];
@ -844,7 +844,7 @@ void PairAmoebaGPU::udirect2b_cpu()
jtype = amtype[j];
jgroup = amgroup[j];
factor_wscale = special_polar_wscale[sbmask15(jextra)];
if (igroup == jgroup) {
factor_pscale = special_polar_piscale[sbmask15(jextra)];
@ -872,7 +872,7 @@ void PairAmoebaGPU::udirect2b_cpu()
aefac = aesq2 * aefac;
bn[m] = (bfac*bn[m-1]+aefac*exp2a) * rr2;
}
// find terms needed later to compute mutual polarization
if (poltyp != DIRECT) {
@ -891,7 +891,7 @@ void PairAmoebaGPU::udirect2b_cpu()
scalek = factor_uscale;
bcn[0] = bn[1] - (1.0-scalek*scale3)*rr3;
bcn[1] = bn[2] - (1.0-scalek*scale5)*rr5;
neighptr[n++] = j;
tdipdip[ndip++] = -bcn[0] + bcn[1]*xr*xr;
tdipdip[ndip++] = bcn[1]*xr*yr;
@ -902,7 +902,7 @@ void PairAmoebaGPU::udirect2b_cpu()
} else {
if (comm->me == 0) printf("i = %d: j = %d: poltyp == DIRECT\n", i, j);
}
} // jj
firstneigh_dipole[i] = neighptr;
@ -973,7 +973,7 @@ void PairAmoebaGPU::umutual2b(double **field, double **fieldp)
int idx = 4*i;
field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2];
field[i][2] += field_ptr[idx+2];
}
double* fieldp_ptr = (double *)fieldp_pinned;
@ -1001,7 +1001,7 @@ void PairAmoebaGPU::polar_real()
bool success = true;
int *ilist, *numneigh, **firstneigh;
double sublo[3],subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
@ -1033,7 +1033,7 @@ void PairAmoebaGPU::polar_real()
host_start, &ilist, &numneigh, cpu_time,
success, aewald, felec, off2, atom->q,
domain->boxlo, domain->prd, &tq_pinned);
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
@ -1091,11 +1091,11 @@ void PairAmoebaGPU::compute_force_from_torque(const numtyp* tq_ptr,
vxx = xix*fix[0] + xiy*fiy[0] + xiz*fiz[0];
vyy = yix*fix[1] + yiy*fiy[1] + yiz*fiz[1];
vzz = zix*fix[2] + ziy*fiy[2] + ziz*fiz[2];
vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
xix*fix[1] + xiy*fiy[1] + xiz*fiz[1]);
vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
xix*fix[2] + xiy*fiy[2] + xiz*fiz[2]);
vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
yix*fix[2] + yiy*fiy[2] + yiz*fiz[2]);
virial_comp[0] += vxx;

View File

@ -88,7 +88,7 @@ int ** hippo_gpu_compute_multipole_real(const int ago, const int inum, const int
int ** hippo_gpu_compute_udirect2b(const int ago, const int inum, const int nall,
double **host_x, int *host_type, int *host_amtype, int *host_amgroup,
double **host_rpole, double **host_uind, double **host_uinp,
double **host_rpole, double **host_uind, double **host_uinp,
double *host_pval, double *sublo, double *subhi, tagint *tag, int **nspecial,
tagint **special, int* nspecial15, tagint** special15,
const bool eflag, const bool vflag, const bool eatom, const bool vatom,
@ -185,7 +185,7 @@ void PairHippoGPU::init_style()
maxspecial=atom->maxspecial;
maxspecial15=atom->maxspecial15;
}
int tq_size;
int mnf = 5e-2 * neighbor->oneatom;
int success = hippo_gpu_init(atom->ntypes+1, max_amtype, max_amclass,
@ -222,7 +222,7 @@ void PairHippoGPU::dispersion_real()
bool success = true;
int *ilist, *numneigh, **firstneigh;
double sublo[3],subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
@ -250,7 +250,7 @@ void PairHippoGPU::dispersion_real()
host_start, &ilist, &numneigh, cpu_time,
success, aewald, off2, atom->q,
domain->boxlo, domain->prd);
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
}
@ -270,7 +270,7 @@ void PairHippoGPU::multipole_real()
bool success = true;
int *ilist, *numneigh, **firstneigh;
double sublo[3],subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
@ -302,7 +302,7 @@ void PairHippoGPU::multipole_real()
host_start, &ilist, &numneigh, cpu_time,
success, aewald, felec, off2, atom->q,
domain->boxlo, domain->prd, &tq_pinned);
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
@ -344,7 +344,7 @@ void PairHippoGPU::induce()
// set cutoffs, taper coeffs, and PME params
// create qfac here, free at end of polar()
if (use_ewald) {
choose(POLAR_LONG);
int nmine = p_kspace->nfft_owned;
@ -380,7 +380,7 @@ void PairHippoGPU::induce()
memory->create(usump,nlocal,3,"ameoba/induce:usump");
// get the electrostatic field due to permanent multipoles
dfield0c(field,fieldp);
// need reverse_comm_pair if dfield0c (i.e. udirect2b) is CPU-only
@ -408,7 +408,7 @@ void PairHippoGPU::induce()
for (i = 0; i < 10; i++) {
printf("i = %d: udir = %f %f %f; udirp = %f %f %f\n",
i, udir[i][0], udir[i][1], udir[i][2],
udirp[i][0], udirp[i][1], udirp[i][2]);
udirp[i][0], udirp[i][1], udirp[i][2]);
}
*/
// get induced dipoles via the OPT extrapolation method
@ -416,7 +416,7 @@ void PairHippoGPU::induce()
// uopt,uoptp with a optorder+1 dimension, just optorder ??
// since no need to store optorder+1 values after these loops
if (poltyp == OPT) {
if (poltyp == OPT) {
for (i = 0; i < nlocal; i++) {
for (j = 0; j < 3; j++) {
uopt[i][0][j] = udir[i][j];
@ -523,7 +523,7 @@ void PairHippoGPU::induce()
crstyle = FIELD;
comm->reverse_comm_pair(this);
}
//error->all(FLERR,"STOP GPU");
// set initial conjugate gradient residual and conjugate vector
@ -549,7 +549,7 @@ void PairHippoGPU::induce()
cfstyle = RSD;
comm->forward_comm_pair(this);
uscale0b(BUILD,rsd,rsdp,zrsd,zrsdp);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
crstyle = ZRSD;
comm->reverse_comm_pair(this);
}
@ -637,7 +637,7 @@ void PairHippoGPU::induce()
if (pcgprec) {
cfstyle = RSD;
comm->forward_comm_pair(this);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
uscale0b(APPLY,rsd,rsdp,zrsd,zrsdp);
crstyle = ZRSD;
comm->reverse_comm_pair(this);
}
@ -692,7 +692,7 @@ void PairHippoGPU::induce()
if (iter >= politer) done = true;
// apply a "peek" iteration to the mutual induced dipoles
if (done) {
for (i = 0; i < nlocal; i++) {
term = pcgpeek * poli[i];
@ -707,7 +707,7 @@ void PairHippoGPU::induce()
// terminate the calculation if dipoles failed to converge
// NOTE: could make this an error
if (iter >= maxiter || eps > epsold)
if (me == 0)
error->warning(FLERR,"hippo induced dipoles did not converge");
@ -715,7 +715,7 @@ void PairHippoGPU::induce()
// DEBUG output to dump file
if (uind_flag)
if (uind_flag)
dump6(fp_uind,"id uindx uindy uindz uinpx uinpy uinpz",DEBYE,uind,uinp);
// deallocation of arrays
@ -763,7 +763,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
PairAmoeba::udirect2b(field, fieldp);
return;
}
int eflag=1, vflag=1;
int nall = atom->nlocal + atom->nghost;
int inum, host_start;
@ -816,7 +816,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
int idx = 4*i;
field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2];
field[i][2] += field_ptr[idx+2];
}
double* fieldp_ptr = (double *)fieldp_pinned;
@ -827,7 +827,7 @@ void PairHippoGPU::udirect2b(double **field, double **fieldp)
fieldp[i][1] += fieldp_ptr[idx+1];
fieldp[i][2] += fieldp_ptr[idx+2];
}
}
/* ----------------------------------------------------------------------
@ -865,7 +865,7 @@ void PairHippoGPU::udirect2b_cpu()
firstneigh = list->firstneigh;
// NOTE: doesn't this have a problem if aewald is tiny ??
aesq2 = 2.0 * aewald * aewald;
aesq2n = 0.0;
if (aewald > 0.0) aesq2n = 1.0 / (MY_PIS*aewald);
@ -892,13 +892,13 @@ void PairHippoGPU::udirect2b_cpu()
pdi = pdamp[itype];
pti = thole[itype];
ddi = dirdamp[itype];
// evaluate all sites within the cutoff distance
for (jj = 0; jj < jnum; jj++) {
jextra = jlist[jj];
j = jextra & NEIGHMASK15;
xr = x[j][0] - x[i][0];
yr = x[j][1] - x[i][1];
zr = x[j][2] - x[i][2];
@ -907,7 +907,7 @@ void PairHippoGPU::udirect2b_cpu()
jtype = amtype[j];
jgroup = amgroup[j];
factor_wscale = special_polar_wscale[sbmask15(jextra)];
if (igroup == jgroup) {
factor_pscale = special_polar_piscale[sbmask15(jextra)];
@ -935,7 +935,7 @@ void PairHippoGPU::udirect2b_cpu()
aefac = aesq2 * aefac;
bn[m] = (bfac*bn[m-1]+aefac*exp2a) * rr2;
}
// find terms needed later to compute mutual polarization
if (poltyp != DIRECT) {
@ -954,7 +954,7 @@ void PairHippoGPU::udirect2b_cpu()
scalek = factor_uscale;
bcn[0] = bn[1] - (1.0-scalek*scale3)*rr3;
bcn[1] = bn[2] - (1.0-scalek*scale5)*rr5;
neighptr[n++] = j;
tdipdip[ndip++] = -bcn[0] + bcn[1]*xr*xr;
tdipdip[ndip++] = bcn[1]*xr*yr;
@ -965,7 +965,7 @@ void PairHippoGPU::udirect2b_cpu()
} else {
if (comm->me == 0) printf("i = %d: j = %d: poltyp == DIRECT\n", i, j);
}
} // jj
firstneigh_dipole[i] = neighptr;
@ -1036,7 +1036,7 @@ void PairHippoGPU::umutual2b(double **field, double **fieldp)
int idx = 4*i;
field[i][0] += field_ptr[idx];
field[i][1] += field_ptr[idx+1];
field[i][2] += field_ptr[idx+2];
field[i][2] += field_ptr[idx+2];
}
double* fieldp_ptr = (double *)fieldp_pinned;
@ -1064,7 +1064,7 @@ void PairHippoGPU::polar_real()
bool success = true;
int *ilist, *numneigh, **firstneigh;
double sublo[3],subhi[3];
if (domain->triclinic == 0) {
sublo[0] = domain->sublo[0];
@ -1096,7 +1096,7 @@ void PairHippoGPU::polar_real()
host_start, &ilist, &numneigh, cpu_time,
success, aewald, felec, off2, atom->q,
domain->boxlo, domain->prd, &tq_pinned);
if (!success)
error->one(FLERR,"Insufficient memory on accelerator");
@ -1156,11 +1156,11 @@ void PairHippoGPU::compute_force_from_torque(const numtyp* tq_ptr,
vxx = xix*fix[0] + xiy*fiy[0] + xiz*fiz[0];
vyy = yix*fix[1] + yiy*fiy[1] + yiz*fiz[1];
vzz = zix*fix[2] + ziy*fiy[2] + ziz*fiz[2];
vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
vxy = 0.5 * (yix*fix[0] + yiy*fiy[0] + yiz*fiz[0] +
xix*fix[1] + xiy*fiy[1] + xiz*fiz[1]);
vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
vxz = 0.5 * (zix*fix[0] + ziy*fiy[0] + ziz*fiz[0] +
xix*fix[2] + xiy*fiy[2] + xiz*fiz[2]);
vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
vyz = 0.5 * (zix*fix[1] + ziy*fiy[1] + ziz*fiz[1] +
yix*fix[2] + yiy*fiy[2] + yiz*fiz[2]);
virial_comp[0] += vxx;