tracking down some invalid reads...
This commit is contained in:
@ -28,8 +28,8 @@ action () {
|
||||
|
||||
# force rebuild of files with LMP_KOKKOS switch
|
||||
|
||||
touch ../accelerator_kokkos.h
|
||||
touch ../memory.h
|
||||
#touch ../accelerator_kokkos.h
|
||||
#touch ../memory.h
|
||||
|
||||
# list of files with optional dependcies
|
||||
|
||||
|
||||
@ -94,15 +94,15 @@ void PairTableRXKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
|
||||
template<class DeviceType>
|
||||
template <int NEIGHFLAG, bool STACKPARAMS, int TABSTYLE>
|
||||
PairTableRXKokkos<DeviceType>::Functor<NEIGHFLAG,STACKPARAMS,TABSTYLE>::Functor(
|
||||
PairTableRXKokkos* c_ptr, NeighListKokkos<device_type>* list_ptr):
|
||||
c(*c_ptr),f(c.f),uCG(c.uCG),uCGnew(c.uCGnew),list(*list_ptr)
|
||||
PairTableRXKokkos* c_ptr, NeighListKokkos<device_type>* list_ptr)//:
|
||||
//c(*c_ptr),f(c.f),uCG(c.uCG),uCGnew(c.uCGnew),list(*list_ptr)
|
||||
{}
|
||||
|
||||
template<class DeviceType>
|
||||
template <int NEIGHFLAG, bool STACKPARAMS, int TABSTYLE>
|
||||
PairTableRXKokkos<DeviceType>::Functor<NEIGHFLAG,STACKPARAMS,TABSTYLE>::~Functor() {
|
||||
c.cleanup_copy();
|
||||
list.clean_copy();
|
||||
//c.cleanup_copy();
|
||||
//list.clean_copy();
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -113,89 +113,89 @@ EV_FLOAT
|
||||
PairTableRXKokkos<DeviceType>::Functor<NEIGHFLAG,STACKPARAMS,TABSTYLE>::
|
||||
compute_item(const int& ii) const {
|
||||
EV_FLOAT ev;
|
||||
const int i = list.d_ilist[ii];
|
||||
const X_FLOAT xtmp = c.x(i,0);
|
||||
const X_FLOAT ytmp = c.x(i,1);
|
||||
const X_FLOAT ztmp = c.x(i,2);
|
||||
const int itype = c.type(i);
|
||||
//const int i = list.d_ilist[ii];
|
||||
//const X_FLOAT xtmp = c.x(i,0);
|
||||
//const X_FLOAT ytmp = c.x(i,1);
|
||||
//const X_FLOAT ztmp = c.x(i,2);
|
||||
//const int itype = c.type(i);
|
||||
|
||||
const AtomNeighborsConst jlist = list.get_neighbors_const(i);
|
||||
const int jnum = list.d_numneigh[i];
|
||||
//const AtomNeighborsConst jlist = list.get_neighbors_const(i);
|
||||
//const int jnum = list.d_numneigh[i];
|
||||
|
||||
double uCG_i = 0.0;
|
||||
double uCGnew_i = 0.0;
|
||||
double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0;
|
||||
//double uCG_i = 0.0;
|
||||
//double uCGnew_i = 0.0;
|
||||
//double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0;
|
||||
|
||||
double mixWtSite1old_i = c.mixWtSite1old_(i);
|
||||
double mixWtSite2old_i = c.mixWtSite2old_(i);
|
||||
double mixWtSite1_i = c.mixWtSite1_(i);
|
||||
double mixWtSite2_i = c.mixWtSite2_(i);
|
||||
//double mixWtSite1old_i = c.mixWtSite1old_(i);
|
||||
//double mixWtSite2old_i = c.mixWtSite2old_(i);
|
||||
//double mixWtSite1_i = c.mixWtSite1_(i);
|
||||
//double mixWtSite2_i = c.mixWtSite2_(i);
|
||||
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
int j = jlist(jj);
|
||||
const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
j &= NEIGHMASK;
|
||||
//for (int jj = 0; jj < jnum; jj++) {
|
||||
// int j = jlist(jj);
|
||||
// const F_FLOAT factor_lj = c.special_lj[sbmask(j)];
|
||||
// j &= NEIGHMASK;
|
||||
|
||||
const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
const int jtype = c.type(j);
|
||||
// const X_FLOAT delx = xtmp - c.x(j,0);
|
||||
// const X_FLOAT dely = ytmp - c.x(j,1);
|
||||
// const X_FLOAT delz = ztmp - c.x(j,2);
|
||||
// const F_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
// const int jtype = c.type(j);
|
||||
|
||||
if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
double mixWtSite1old_j = c.mixWtSite1old_(j);
|
||||
double mixWtSite2old_j = c.mixWtSite2old_(j);
|
||||
double mixWtSite1_j = c.mixWtSite1_(j);
|
||||
double mixWtSite2_j = c.mixWtSite2_(j);
|
||||
// if(rsq < (STACKPARAMS?c.m_cutsq[itype][jtype]:c.d_cutsq(itype,jtype))) {
|
||||
// double mixWtSite1old_j = c.mixWtSite1old_(j);
|
||||
// double mixWtSite2old_j = c.mixWtSite2old_(j);
|
||||
// double mixWtSite1_j = c.mixWtSite1_(j);
|
||||
// double mixWtSite2_j = c.mixWtSite2_(j);
|
||||
|
||||
const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,TABSTYLE>(rsq,i,j,itype,jtype);
|
||||
// const F_FLOAT fpair = factor_lj*c.template compute_fpair<STACKPARAMS,TABSTYLE>(rsq,i,j,itype,jtype);
|
||||
|
||||
fx_i += delx*fpair;
|
||||
fy_i += dely*fpair;
|
||||
fz_i += delz*fpair;
|
||||
// fx_i += delx*fpair;
|
||||
// fy_i += dely*fpair;
|
||||
// fz_i += delz*fpair;
|
||||
|
||||
bool do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) &&
|
||||
(NEWTON_PAIR || j < c.nlocal);
|
||||
if (do_half) {
|
||||
f(j,0) -= delx*fpair;
|
||||
f(j,1) -= dely*fpair;
|
||||
f(j,2) -= delz*fpair;
|
||||
}
|
||||
// bool do_half = (NEIGHFLAG==HALF || NEIGHFLAG==HALFTHREAD) &&
|
||||
// (NEWTON_PAIR || j < c.nlocal);
|
||||
// if (do_half) {
|
||||
// f(j,0) -= delx*fpair;
|
||||
// f(j,1) -= dely*fpair;
|
||||
// f(j,2) -= delz*fpair;
|
||||
// }
|
||||
|
||||
auto evdwl = c.template compute_evdwl<STACKPARAMS,TABSTYLE>(rsq,i,j,itype,jtype);
|
||||
// auto evdwl = c.template compute_evdwl<STACKPARAMS,TABSTYLE>(rsq,i,j,itype,jtype);
|
||||
|
||||
double evdwlOld;
|
||||
if (c.isite1 == c.isite2) {
|
||||
evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl;
|
||||
evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl;
|
||||
} else {
|
||||
evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) +
|
||||
sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl;
|
||||
evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) +
|
||||
sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl;
|
||||
}
|
||||
evdwlOld *= factor_lj;
|
||||
evdwl *= factor_lj;
|
||||
// double evdwlOld;
|
||||
// if (c.isite1 == c.isite2) {
|
||||
// evdwlOld = sqrt(mixWtSite1old_i*mixWtSite2old_j)*evdwl;
|
||||
// evdwl = sqrt(mixWtSite1_i*mixWtSite2_j)*evdwl;
|
||||
// } else {
|
||||
// evdwlOld = (sqrt(mixWtSite1old_i*mixWtSite2old_j) +
|
||||
// sqrt(mixWtSite2old_i*mixWtSite1old_j))*evdwl;
|
||||
// evdwl = (sqrt(mixWtSite1_i*mixWtSite2_j) +
|
||||
// sqrt(mixWtSite2_i*mixWtSite1_j))*evdwl;
|
||||
// }
|
||||
// evdwlOld *= factor_lj;
|
||||
// evdwl *= factor_lj;
|
||||
|
||||
uCG_i += 0.5*evdwlOld;
|
||||
if (do_half) uCG(j) += 0.5*evdwlOld;
|
||||
// uCG_i += 0.5*evdwlOld;
|
||||
// if (do_half) uCG(j) += 0.5*evdwlOld;
|
||||
|
||||
uCGnew_i += 0.5*evdwl;
|
||||
if (do_half) uCGnew(j) += 0.5*evdwl;
|
||||
evdwl = evdwlOld;
|
||||
// uCGnew_i += 0.5*evdwl;
|
||||
// if (do_half) uCGnew(j) += 0.5*evdwl;
|
||||
// evdwl = evdwlOld;
|
||||
|
||||
ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl;
|
||||
// ev.evdwl += (do_half ? 1.0 : 0.5)*evdwl;
|
||||
|
||||
if (EVFLAG) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
}
|
||||
}
|
||||
// if (EVFLAG) ev_tally(ev,i,j,evdwl,fpair,delx,dely,delz);
|
||||
// }
|
||||
//}
|
||||
|
||||
uCG(i) += uCG_i;
|
||||
uCGnew(i) += uCGnew_i;
|
||||
//uCG(i) += uCG_i;
|
||||
//uCGnew(i) += uCGnew_i;
|
||||
|
||||
f(i,0) += fx_i;
|
||||
f(i,1) += fy_i;
|
||||
f(i,2) += fz_i;
|
||||
//f(i,0) += fx_i;
|
||||
//f(i,1) += fy_i;
|
||||
//f(i,2) += fz_i;
|
||||
|
||||
return ev;
|
||||
}
|
||||
@ -209,55 +209,55 @@ ev_tally(EV_FLOAT &ev, const int &i, const int &j,
|
||||
const F_FLOAT &epair, const F_FLOAT &fpair, const F_FLOAT &delx,
|
||||
const F_FLOAT &dely, const F_FLOAT &delz) const
|
||||
{
|
||||
const int EFLAG = c.eflag;
|
||||
const int NEWTON_PAIR = c.newton_pair;
|
||||
const int VFLAG = c.vflag_either;
|
||||
//const int EFLAG = c.eflag;
|
||||
//const int NEWTON_PAIR = c.newton_pair;
|
||||
//const int VFLAG = c.vflag_either;
|
||||
|
||||
if (VFLAG) {
|
||||
const E_FLOAT v0 = delx*delx*fpair;
|
||||
const E_FLOAT v1 = dely*dely*fpair;
|
||||
const E_FLOAT v2 = delz*delz*fpair;
|
||||
const E_FLOAT v3 = delx*dely*fpair;
|
||||
const E_FLOAT v4 = delx*delz*fpair;
|
||||
const E_FLOAT v5 = dely*delz*fpair;
|
||||
//if (VFLAG) {
|
||||
// const E_FLOAT v0 = delx*delx*fpair;
|
||||
// const E_FLOAT v1 = dely*dely*fpair;
|
||||
// const E_FLOAT v2 = delz*delz*fpair;
|
||||
// const E_FLOAT v3 = delx*dely*fpair;
|
||||
// const E_FLOAT v4 = delx*delz*fpair;
|
||||
// const E_FLOAT v5 = dely*delz*fpair;
|
||||
|
||||
if (c.vflag_global) {
|
||||
if (NEIGHFLAG!=FULL) {
|
||||
if (NEWTON_PAIR) {
|
||||
ev.v[0] += v0;
|
||||
ev.v[1] += v1;
|
||||
ev.v[2] += v2;
|
||||
ev.v[3] += v3;
|
||||
ev.v[4] += v4;
|
||||
ev.v[5] += v5;
|
||||
} else {
|
||||
if (i < c.nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
if (j < c.nlocal) {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
ev.v[0] += 0.5*v0;
|
||||
ev.v[1] += 0.5*v1;
|
||||
ev.v[2] += 0.5*v2;
|
||||
ev.v[3] += 0.5*v3;
|
||||
ev.v[4] += 0.5*v4;
|
||||
ev.v[5] += 0.5*v5;
|
||||
}
|
||||
}
|
||||
}
|
||||
// if (c.vflag_global) {
|
||||
// if (NEIGHFLAG!=FULL) {
|
||||
// if (NEWTON_PAIR) {
|
||||
// ev.v[0] += v0;
|
||||
// ev.v[1] += v1;
|
||||
// ev.v[2] += v2;
|
||||
// ev.v[3] += v3;
|
||||
// ev.v[4] += v4;
|
||||
// ev.v[5] += v5;
|
||||
// } else {
|
||||
// if (i < c.nlocal) {
|
||||
// ev.v[0] += 0.5*v0;
|
||||
// ev.v[1] += 0.5*v1;
|
||||
// ev.v[2] += 0.5*v2;
|
||||
// ev.v[3] += 0.5*v3;
|
||||
// ev.v[4] += 0.5*v4;
|
||||
// ev.v[5] += 0.5*v5;
|
||||
// }
|
||||
// if (j < c.nlocal) {
|
||||
// ev.v[0] += 0.5*v0;
|
||||
// ev.v[1] += 0.5*v1;
|
||||
// ev.v[2] += 0.5*v2;
|
||||
// ev.v[3] += 0.5*v3;
|
||||
// ev.v[4] += 0.5*v4;
|
||||
// ev.v[5] += 0.5*v5;
|
||||
// }
|
||||
// }
|
||||
// } else {
|
||||
// ev.v[0] += 0.5*v0;
|
||||
// ev.v[1] += 0.5*v1;
|
||||
// ev.v[2] += 0.5*v2;
|
||||
// ev.v[3] += 0.5*v3;
|
||||
// ev.v[4] += 0.5*v4;
|
||||
// ev.v[5] += 0.5*v5;
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -266,8 +266,8 @@ KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
PairTableRXKokkos<DeviceType>::Functor<NEIGHFLAG,STACKPARAMS,TABSTYLE>::
|
||||
operator()(const int i) const {
|
||||
if (c.newton_pair) compute_item<0,1>(i);
|
||||
else compute_item<0,0>(i);
|
||||
//if (c.newton_pair) compute_item<0,1>(i);
|
||||
//else compute_item<0,0>(i);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -276,8 +276,8 @@ KOKKOS_INLINE_FUNCTION
|
||||
void
|
||||
PairTableRXKokkos<DeviceType>::Functor<NEIGHFLAG,STACKPARAMS,TABSTYLE>::
|
||||
operator()(const int i, value_type &energy_virial) const {
|
||||
if (c.newton_pair) energy_virial += compute_item<1,1>(i);
|
||||
else energy_virial += compute_item<1,0>(i);
|
||||
//if (c.newton_pair) energy_virial += compute_item<1,1>(i);
|
||||
//else energy_virial += compute_item<1,0>(i);
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -322,10 +322,10 @@ void PairTableRXKokkos<DeviceType>::compute_style(int eflag_in, int vflag_in)
|
||||
|
||||
typename DAT::t_float_2d_randomread d_dvector = atomKK->k_dvector.view<DeviceType>();
|
||||
|
||||
Kokkos::parallel_for(ntotal, LAMMPS_LAMBDA(int i) {
|
||||
getMixingWeights<DeviceType>(d_dvector, i, mixWtSite1old_(i), mixWtSite2old_(i),
|
||||
mixWtSite1_(i), mixWtSite2_(i));
|
||||
});
|
||||
//Kokkos::parallel_for(ntotal, LAMMPS_LAMBDA(int i) {
|
||||
// getMixingWeights<DeviceType>(d_dvector, i, mixWtSite1old_(i), mixWtSite2old_(i),
|
||||
// mixWtSite1_(i), mixWtSite2_(i));
|
||||
//});
|
||||
|
||||
if (neighflag == N2) error->all(FLERR,"pair table/rx/kk can't handle N2 yet\n");
|
||||
|
||||
@ -971,8 +971,6 @@ void PairTableRXKokkos<DeviceType>::getMixingWeights(
|
||||
nTotal += dvector(ispecies,id);
|
||||
nTotalOld += dvector(ispecies+nspecies,id);
|
||||
}
|
||||
if(nTotal < MY_EPSILON || nTotalOld < MY_EPSILON)
|
||||
error->all(FLERR,"The number of molecules in CG particle is less than 10*DBL_EPSILON.");
|
||||
|
||||
if (isOneFluid(isite1) == false){
|
||||
nMoleculesOld1 = dvector(isite1+nspecies,id);
|
||||
|
||||
@ -132,7 +132,7 @@ class PairTableRXKokkos : public PairTable {
|
||||
struct Functor {
|
||||
using device_type = DeviceType;
|
||||
typedef EV_FLOAT value_type;
|
||||
PairTableRXKokkos c;
|
||||
//PairTableRXKokkos<device_type> c;
|
||||
// arrays are atomic for Half(Thread) neighbor style
|
||||
Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,
|
||||
device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > f;
|
||||
@ -140,7 +140,7 @@ class PairTableRXKokkos : public PairTable {
|
||||
device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > uCG;
|
||||
Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,
|
||||
device_type,Kokkos::MemoryTraits<AtomicF<NEIGHFLAG>::value> > uCGnew;
|
||||
NeighListKokkos<device_type> list;
|
||||
//NeighListKokkos<device_type> list;
|
||||
Functor(PairTableRXKokkos* c_ptr, NeighListKokkos<device_type>* list_ptr);
|
||||
~Functor();
|
||||
KOKKOS_INLINE_FUNCTION int sbmask(const int& j) const {
|
||||
|
||||
Reference in New Issue
Block a user