Fixed the issues with some OpenCL implementation to avoid errors casting changing the pointer address spaces
This commit is contained in:
@ -410,7 +410,7 @@ _texture( q_tex,int2);
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_amoeba,
|
const __global numtyp4 *restrict sp_amoeba,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
@ -443,9 +443,9 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
|||||||
acctyp4 tq;
|
acctyp4 tq;
|
||||||
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
||||||
|
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
const __global numtyp4* polar1 = &extra[0];
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
const __global numtyp4* polar2 = &extra[nall];
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
@ -490,8 +490,6 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
|||||||
numtyp zr = jx.z - ix.z;
|
numtyp zr = jx.z - ix.z;
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
//if (r2>off2) continue;
|
|
||||||
|
|
||||||
numtyp r = ucl_sqrt(r2);
|
numtyp r = ucl_sqrt(r2);
|
||||||
const numtyp4 pol1j = polar1[j];
|
const numtyp4 pol1j = polar1[j];
|
||||||
numtyp ck = pol1j.x; // rpole[j][0];
|
numtyp ck = pol1j.x; // rpole[j][0];
|
||||||
@ -691,7 +689,7 @@ __kernel void k_amoeba_multipole(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_amoeba,
|
const __global numtyp4 *restrict sp_amoeba,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
@ -707,14 +705,14 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
atom_info(t_per_atom,ii,tid,offset);
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
int n_stride;
|
int n_stride;
|
||||||
//local_allocate_store_charge();
|
local_allocate_store_ufld();
|
||||||
|
|
||||||
acctyp _fieldp[6];
|
acctyp _fieldp[6];
|
||||||
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
||||||
|
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
const __global numtyp4* polar1 = &extra[0];
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
const __global numtyp4* polar2 = &extra[nall];
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
@ -885,7 +883,7 @@ __kernel void k_amoeba_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_amoeba,
|
const __global numtyp4 *restrict sp_amoeba,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
@ -901,13 +899,14 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
atom_info(t_per_atom,ii,tid,offset);
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
int n_stride;
|
int n_stride;
|
||||||
|
local_allocate_store_ufld();
|
||||||
|
|
||||||
acctyp _fieldp[6];
|
acctyp _fieldp[6];
|
||||||
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
||||||
|
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
const __global numtyp4* polar4 = &extra[3*nall];
|
||||||
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
const __global numtyp4* polar5 = &extra[4*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
@ -949,8 +948,6 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
numtyp zr = jx.z - ix.z;
|
numtyp zr = jx.z - ix.z;
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
//if (r2>off2) continue;
|
|
||||||
|
|
||||||
numtyp r = ucl_sqrt(r2);
|
numtyp r = ucl_sqrt(r2);
|
||||||
numtyp rinv = ucl_rsqrt(r2);
|
numtyp rinv = ucl_rsqrt(r2);
|
||||||
numtyp r2inv = rinv*rinv;
|
numtyp r2inv = rinv*rinv;
|
||||||
@ -1049,7 +1046,7 @@ __kernel void k_amoeba_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
__kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff,
|
const __global numtyp4 *restrict coeff,
|
||||||
const __global numtyp4 *restrict sp_amoeba,
|
const __global numtyp4 *restrict sp_amoeba,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
@ -1068,7 +1065,6 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
atom_info(t_per_atom,ii,tid,offset);
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
int n_stride;
|
int n_stride;
|
||||||
local_allocate_store_ufld();
|
|
||||||
local_allocate_store_charge();
|
local_allocate_store_charge();
|
||||||
|
|
||||||
acctyp4 f;
|
acctyp4 f;
|
||||||
@ -1086,11 +1082,12 @@ __kernel void k_amoeba_polar(const __global numtyp4 *restrict x_,
|
|||||||
for (int l=0; l<6; l++) dufld[l]=(acctyp)0;
|
for (int l=0; l<6; l++) dufld[l]=(acctyp)0;
|
||||||
|
|
||||||
numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
const __global numtyp4* polar1 = &extra[0];
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar2 = &extra[nall];
|
||||||
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
const __global numtyp4* polar4 = &extra[3*nall];
|
||||||
|
const __global numtyp4* polar5 = &extra[4*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int itype,igroup;
|
int itype,igroup;
|
||||||
|
|||||||
@ -49,7 +49,7 @@ int AtomT::bytes_per_atom() const {
|
|||||||
if (_vel)
|
if (_vel)
|
||||||
bytes+=4*sizeof(numtyp);
|
bytes+=4*sizeof(numtyp);
|
||||||
if (_extra_fields>0)
|
if (_extra_fields>0)
|
||||||
bytes+=_extra_fields*sizeof(numtyp);
|
bytes+=_extra_fields*sizeof(numtyp4);
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -516,7 +516,7 @@ class Atom {
|
|||||||
/// Velocities
|
/// Velocities
|
||||||
UCL_Vector<numtyp,numtyp> v;
|
UCL_Vector<numtyp,numtyp> v;
|
||||||
/// Extras
|
/// Extras
|
||||||
UCL_Vector<numtyp,numtyp> extra;
|
UCL_Vector<numtyp4,numtyp4> extra;
|
||||||
|
|
||||||
#ifdef GPU_CAST
|
#ifdef GPU_CAST
|
||||||
UCL_Vector<numtyp,numtyp> x_cast;
|
UCL_Vector<numtyp,numtyp> x_cast;
|
||||||
|
|||||||
@ -90,7 +90,7 @@ int BaseAmoebaT::init_atomic(const int nlocal, const int nall,
|
|||||||
bool vel = false;
|
bool vel = false;
|
||||||
_extra_fields = 24; // round up to accomodate quadruples of numtyp values
|
_extra_fields = 24; // round up to accomodate quadruples of numtyp values
|
||||||
// rpole 13; uind 3; uinp 3; amtype, amgroup; pval
|
// rpole 13; uind 3; uinp 3; amtype, amgroup; pval
|
||||||
int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields);
|
int success=device->init(*ans,charge,rot,nlocal,nall,maxspecial,vel,_extra_fields/4);
|
||||||
if (success!=0)
|
if (success!=0)
|
||||||
return success;
|
return success;
|
||||||
|
|
||||||
@ -820,35 +820,35 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
|||||||
atom->extra_data_unavail();
|
atom->extra_data_unavail();
|
||||||
|
|
||||||
int _nall=atom->nall();
|
int _nall=atom->nall();
|
||||||
numtyp *pextra=reinterpret_cast<numtyp*>(&(atom->extra[0]));
|
numtyp4 *pextra=reinterpret_cast<numtyp4*>(&(atom->extra[0]));
|
||||||
|
|
||||||
int n = 0;
|
int n = 0;
|
||||||
int nstride = 4;
|
int nstride = 1; //4;
|
||||||
if (rpole) {
|
if (rpole) {
|
||||||
for (int i = 0; i < _nall; i++) {
|
for (int i = 0; i < _nall; i++) {
|
||||||
int idx = n+i*nstride;
|
int idx = n+i*nstride;
|
||||||
pextra[idx] = rpole[i][0];
|
pextra[idx].x = rpole[i][0];
|
||||||
pextra[idx+1] = rpole[i][1];
|
pextra[idx].y = rpole[i][1];
|
||||||
pextra[idx+2] = rpole[i][2];
|
pextra[idx].z = rpole[i][2];
|
||||||
pextra[idx+3] = rpole[i][3];
|
pextra[idx].w = rpole[i][3];
|
||||||
}
|
}
|
||||||
|
|
||||||
n += nstride*_nall;
|
n += nstride*_nall;
|
||||||
for (int i = 0; i < _nall; i++) {
|
for (int i = 0; i < _nall; i++) {
|
||||||
int idx = n+i*nstride;
|
int idx = n+i*nstride;
|
||||||
pextra[idx] = rpole[i][4];
|
pextra[idx].x = rpole[i][4];
|
||||||
pextra[idx+1] = rpole[i][5];
|
pextra[idx].y = rpole[i][5];
|
||||||
pextra[idx+2] = rpole[i][6];
|
pextra[idx].z = rpole[i][6];
|
||||||
pextra[idx+3] = rpole[i][8];
|
pextra[idx].w = rpole[i][8];
|
||||||
}
|
}
|
||||||
|
|
||||||
n += nstride*_nall;
|
n += nstride*_nall;
|
||||||
for (int i = 0; i < _nall; i++) {
|
for (int i = 0; i < _nall; i++) {
|
||||||
int idx = n+i*nstride;
|
int idx = n+i*nstride;
|
||||||
pextra[idx] = rpole[i][9];
|
pextra[idx].x = rpole[i][9];
|
||||||
pextra[idx+1] = rpole[i][12];
|
pextra[idx].y = rpole[i][12];
|
||||||
pextra[idx+2] = (numtyp)amtype[i];
|
pextra[idx].z = (numtyp)amtype[i];
|
||||||
pextra[idx+3] = (numtyp)amgroup[i];
|
pextra[idx].w = (numtyp)amgroup[i];
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
n += 2*nstride*_nall;
|
n += 2*nstride*_nall;
|
||||||
@ -858,9 +858,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
|||||||
if (uind) {
|
if (uind) {
|
||||||
for (int i = 0; i < _nall; i++) {
|
for (int i = 0; i < _nall; i++) {
|
||||||
int idx = n+i*nstride;
|
int idx = n+i*nstride;
|
||||||
pextra[idx] = uind[i][0];
|
pextra[idx].x = uind[i][0];
|
||||||
pextra[idx+1] = uind[i][1];
|
pextra[idx].y = uind[i][1];
|
||||||
pextra[idx+2] = uind[i][2];
|
pextra[idx].z = uind[i][2];
|
||||||
|
pextra[idx].w = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -868,9 +869,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
|||||||
if (uinp) {
|
if (uinp) {
|
||||||
for (int i = 0; i < _nall; i++) {
|
for (int i = 0; i < _nall; i++) {
|
||||||
int idx = n+i*nstride;
|
int idx = n+i*nstride;
|
||||||
pextra[idx] = uinp[i][0];
|
pextra[idx].x = uinp[i][0];
|
||||||
pextra[idx+1] = uinp[i][1];
|
pextra[idx].y = uinp[i][1];
|
||||||
pextra[idx+2] = uinp[i][2];
|
pextra[idx].z = uinp[i][2];
|
||||||
|
pextra[idx].w = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -878,7 +880,10 @@ void BaseAmoebaT::cast_extra_data(int* amtype, int* amgroup, double** rpole,
|
|||||||
if (pval) {
|
if (pval) {
|
||||||
for (int i = 0; i < _nall; i++) {
|
for (int i = 0; i < _nall; i++) {
|
||||||
int idx = n+i*nstride;
|
int idx = n+i*nstride;
|
||||||
pextra[idx] = pval[i];
|
pextra[idx].x = pval[i];
|
||||||
|
pextra[idx].y = 0;
|
||||||
|
pextra[idx].z = 0;
|
||||||
|
pextra[idx].w = 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -410,7 +410,7 @@ _texture( q_tex,int2);
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff_rep,
|
const __global numtyp4 *restrict coeff_rep,
|
||||||
const __global numtyp4 *restrict sp_nonpolar,
|
const __global numtyp4 *restrict sp_nonpolar,
|
||||||
const __global int *dev_nbor,
|
const __global int *dev_nbor,
|
||||||
@ -444,9 +444,9 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
|||||||
acctyp4 tq;
|
acctyp4 tq;
|
||||||
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
||||||
|
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
const __global numtyp4* polar1 = &extra[0];
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
const __global numtyp4* polar2 = &extra[nall];
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
@ -495,8 +495,6 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
|||||||
numtyp zr = jx.z - ix.z;
|
numtyp zr = jx.z - ix.z;
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
if (r2>off2) continue;
|
|
||||||
|
|
||||||
const numtyp4 pol1j = polar1[j];
|
const numtyp4 pol1j = polar1[j];
|
||||||
//numtyp ck = pol1j.x; // rpole[j][0];
|
//numtyp ck = pol1j.x; // rpole[j][0];
|
||||||
numtyp dkx = pol1j.y; // rpole[j][1];
|
numtyp dkx = pol1j.y; // rpole[j][1];
|
||||||
@ -712,7 +710,7 @@ __kernel void k_hippo_repulsion(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff_amtype,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
const __global numtyp4 *restrict coeff_amclass,
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_nonpolar,
|
const __global numtyp4 *restrict sp_nonpolar,
|
||||||
@ -741,7 +739,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
|||||||
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
|
for (int l=0; l<6; l++) virial[l]=(acctyp)0;
|
||||||
}
|
}
|
||||||
|
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int itype,iclass;
|
int itype,iclass;
|
||||||
@ -890,7 +888,7 @@ __kernel void k_hippo_dispersion(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff_amtype,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
const __global numtyp4 *restrict coeff_amclass,
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
@ -924,15 +922,12 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
acctyp4 tq;
|
acctyp4 tq;
|
||||||
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
tq.x=(acctyp)0; tq.y=(acctyp)0; tq.z=(acctyp)0;
|
||||||
|
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
const __global numtyp4* polar1 = &extra[0];
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
const __global numtyp4* polar2 = &extra[nall];
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
|
const __global numtyp4* polar6 = &extra[5*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int m;
|
|
||||||
int itype,iclass;
|
|
||||||
|
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
const __global int* nbor_mem=dev_packed;
|
const __global int* nbor_mem=dev_packed;
|
||||||
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
|
||||||
@ -960,9 +955,8 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
const numtyp4 pol3i = polar3[i];
|
const numtyp4 pol3i = polar3[i];
|
||||||
numtyp qiyz = pol3i.x; // rpole[i][9];
|
numtyp qiyz = pol3i.x; // rpole[i][9];
|
||||||
numtyp qizz = pol3i.y; // rpole[i][12];
|
numtyp qizz = pol3i.y; // rpole[i][12];
|
||||||
itype = pol3i.z; // amtype[i];
|
int itype = pol3i.z; // amtype[i];
|
||||||
|
int iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
||||||
iclass = coeff_amtype[itype].w; // amtype2class[itype];
|
|
||||||
numtyp corei = coeff_amclass[iclass].z; // pcore[iclass];
|
numtyp corei = coeff_amclass[iclass].z; // pcore[iclass];
|
||||||
numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
|
numtyp alphai = coeff_amclass[iclass].w; // palpha[iclass];
|
||||||
numtyp vali = polar6[i].x;
|
numtyp vali = polar6[i].x;
|
||||||
@ -1084,6 +1078,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
numtyp alsq2n = (numtyp)0.0;
|
numtyp alsq2n = (numtyp)0.0;
|
||||||
if (aewald > (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald);
|
if (aewald > (numtyp)0.0) alsq2n = (numtyp)1.0 / (MY_PIS*aewald);
|
||||||
|
|
||||||
|
int m;
|
||||||
for (m = 1; m < 6; m++) {
|
for (m = 1; m < 6; m++) {
|
||||||
numtyp bfac = (numtyp) (m+m-1);
|
numtyp bfac = (numtyp) (m+m-1);
|
||||||
alsq2n = alsq2 * alsq2n;
|
alsq2n = alsq2 * alsq2n;
|
||||||
@ -1208,7 +1203,7 @@ __kernel void k_hippo_multipole(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff_amtype,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
const __global numtyp4 *restrict coeff_amclass,
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
@ -1225,15 +1220,15 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
atom_info(t_per_atom,ii,tid,offset);
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
int n_stride;
|
int n_stride;
|
||||||
//local_allocate_store_charge();
|
local_allocate_store_charge();
|
||||||
|
|
||||||
acctyp _fieldp[6];
|
acctyp _fieldp[6];
|
||||||
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
||||||
|
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
const __global numtyp4* polar1 = &extra[0];
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
const __global numtyp4* polar2 = &extra[nall];
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
|
const __global numtyp4* polar6 = &extra[5*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
@ -1388,7 +1383,7 @@ __kernel void k_hippo_udirect2b(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff_amtype,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
const __global numtyp4 *restrict coeff_amclass,
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
@ -1405,14 +1400,14 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
atom_info(t_per_atom,ii,tid,offset);
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
int n_stride;
|
int n_stride;
|
||||||
//local_allocate_store_charge();
|
local_allocate_store_charge();
|
||||||
|
|
||||||
acctyp _fieldp[6];
|
acctyp _fieldp[6];
|
||||||
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
for (int l=0; l<6; l++) _fieldp[l]=(acctyp)0;
|
||||||
|
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
const __global numtyp4* polar4 = &extra[3*nall];
|
||||||
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
const __global numtyp4* polar5 = &extra[4*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int numj, nbor, nbor_end;
|
int numj, nbor, nbor_end;
|
||||||
@ -1539,7 +1534,7 @@ __kernel void k_hippo_umutual2b(const __global numtyp4 *restrict x_,
|
|||||||
------------------------------------------------------------------------- */
|
------------------------------------------------------------------------- */
|
||||||
|
|
||||||
__kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
__kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
||||||
const __global numtyp *restrict extra,
|
const __global numtyp4 *restrict extra,
|
||||||
const __global numtyp4 *restrict coeff_amtype,
|
const __global numtyp4 *restrict coeff_amtype,
|
||||||
const __global numtyp4 *restrict coeff_amclass,
|
const __global numtyp4 *restrict coeff_amclass,
|
||||||
const __global numtyp4 *restrict sp_polar,
|
const __global numtyp4 *restrict sp_polar,
|
||||||
@ -1559,7 +1554,6 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
|||||||
atom_info(t_per_atom,ii,tid,offset);
|
atom_info(t_per_atom,ii,tid,offset);
|
||||||
|
|
||||||
int n_stride;
|
int n_stride;
|
||||||
local_allocate_store_ufld();
|
|
||||||
local_allocate_store_charge();
|
local_allocate_store_charge();
|
||||||
|
|
||||||
acctyp4 f;
|
acctyp4 f;
|
||||||
@ -1577,14 +1571,13 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
|||||||
for (int l=0; l<6; l++) dufld[l]=(acctyp)0;
|
for (int l=0; l<6; l++) dufld[l]=(acctyp)0;
|
||||||
|
|
||||||
numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
numtyp dix,diy,diz,qixx,qixy,qixz,qiyy,qiyz,qizz;
|
||||||
numtyp4* polar1 = (numtyp4*)(&extra[0]);
|
|
||||||
numtyp4* polar2 = (numtyp4*)(&extra[4*nall]);
|
|
||||||
numtyp4* polar3 = (numtyp4*)(&extra[8*nall]);
|
|
||||||
numtyp4* polar4 = (numtyp4*)(&extra[12*nall]);
|
|
||||||
numtyp4* polar5 = (numtyp4*)(&extra[16*nall]);
|
|
||||||
numtyp4* polar6 = (numtyp4*)(&extra[20*nall]);
|
|
||||||
|
|
||||||
//numtyp4 xi__;
|
const __global numtyp4* polar1 = &extra[0];
|
||||||
|
const __global numtyp4* polar2 = &extra[nall];
|
||||||
|
const __global numtyp4* polar3 = &extra[2*nall];
|
||||||
|
const __global numtyp4* polar4 = &extra[3*nall];
|
||||||
|
const __global numtyp4* polar5 = &extra[4*nall];
|
||||||
|
const __global numtyp4* polar6 = &extra[5*nall];
|
||||||
|
|
||||||
if (ii<inum) {
|
if (ii<inum) {
|
||||||
int itype,igroup;
|
int itype,igroup;
|
||||||
@ -1644,7 +1637,6 @@ __kernel void k_hippo_polar(const __global numtyp4 *restrict x_,
|
|||||||
numtyp yr = jx.y - ix.y;
|
numtyp yr = jx.y - ix.y;
|
||||||
numtyp zr = jx.z - ix.z;
|
numtyp zr = jx.z - ix.z;
|
||||||
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
numtyp r2 = xr*xr + yr*yr + zr*zr;
|
||||||
|
|
||||||
numtyp r = ucl_sqrt(r2);
|
numtyp r = ucl_sqrt(r2);
|
||||||
|
|
||||||
const numtyp4 pol1j = polar1[j];
|
const numtyp4 pol1j = polar1[j];
|
||||||
|
|||||||
Reference in New Issue
Block a user