Remove early exit from blocking BuildLists

This commit is contained in:
Stan Gerald Moore
2022-02-21 14:00:31 -07:00
parent c2cec53701
commit afbcf5e694
4 changed files with 73 additions and 246 deletions

View File

@ -1593,173 +1593,12 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxZeroVAtom, const int &i
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsFull, const int &ii) const {
if (d_resize_bo() || d_resize_hb())
return;
const int i = d_ilist[ii];
const X_FLOAT xtmp = x(i,0);
const X_FLOAT ytmp = x(i,1);
const X_FLOAT ztmp = x(i,2);
const int itype = type(i);
const int jnum = d_numneigh[i];
F_FLOAT C12, C34, C56, BO_s, BO_pi, BO_pi2, BO, delij[3], dBOp_i[3], dln_BOp_pi_i[3], dln_BOp_pi2_i[3];
F_FLOAT total_bo = 0.0;
int j_index = i*maxbo;
d_bo_first[i] = j_index;
const int bo_first_i = j_index;
int ihb = -1;
int jhb = -1;
int hb_index = i*maxhb;
int hb_first_i;
if (cut_hbsq > 0.0) {
ihb = paramssing(itype).p_hbond;
if (ihb == 1) {
d_hb_first[i] = hb_index;
hb_first_i = hb_index;
}
}
for (int jj = 0; jj < jnum; jj++) {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
delij[0] = x(j,0) - xtmp;
delij[1] = x(j,1) - ytmp;
delij[2] = x(j,2) - ztmp;
const F_FLOAT rsq = delij[0]*delij[0] + delij[1]*delij[1] + delij[2]*delij[2];
double cutoffsq;
if (i < nlocal) cutoffsq = MAX(cut_bosq,cut_hbsq);
else cutoffsq = cut_bosq;
if (rsq > cutoffsq) continue;
const int jtype = type(j);
// hbond list
if (i < nlocal && cut_hbsq > 0.0 && (ihb == 1 || ihb == 2) && rsq <= cut_hbsq) {
jhb = paramssing(jtype).p_hbond;
if (ihb == 1 && jhb == 2) {
const int jj_index = hb_index - hb_first_i;
if (jj_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[hb_index] = j;
hb_index++;
}
}
if (rsq > cut_bosq) continue;
// bond_list
const F_FLOAT rij = sqrt(rsq);
const F_FLOAT p_bo1 = paramstwbp(itype,jtype).p_bo1;
const F_FLOAT p_bo2 = paramstwbp(itype,jtype).p_bo2;
const F_FLOAT p_bo3 = paramstwbp(itype,jtype).p_bo3;
const F_FLOAT p_bo4 = paramstwbp(itype,jtype).p_bo4;
const F_FLOAT p_bo5 = paramstwbp(itype,jtype).p_bo5;
const F_FLOAT p_bo6 = paramstwbp(itype,jtype).p_bo6;
const F_FLOAT r_s = paramstwbp(itype,jtype).r_s;
const F_FLOAT r_pi = paramstwbp(itype,jtype).r_pi;
const F_FLOAT r_pi2 = paramstwbp(itype,jtype).r_pi2;
if (paramssing(itype).r_s > 0.0 && paramssing(jtype).r_s > 0.0) {
C12 = p_bo1*pow(rij/r_s,p_bo2);
BO_s = (1.0+bo_cut)*exp(C12);
}
else BO_s = C12 = 0.0;
if (paramssing(itype).r_pi > 0.0 && paramssing(jtype).r_pi > 0.0) {
C34 = p_bo3*pow(rij/r_pi,p_bo4);
BO_pi = exp(C34);
}
else BO_pi = C34 = 0.0;
if (paramssing(itype).r_pi2 > 0.0 && paramssing(jtype).r_pi2 > 0.0) {
C56 = p_bo5*pow(rij/r_pi2,p_bo6);
BO_pi2 = exp(C56);
}
else BO_pi2 = C56 = 0.0;
BO = BO_s + BO_pi + BO_pi2;
if (BO < bo_cut) continue;
const int jj_index = j_index - bo_first_i;
if (jj_index >= maxbo) {
d_resize_bo() = 1;
return;
}
d_bo_list[j_index] = j;
// from BondOrder1
d_BO(i,jj_index) = BO;
d_BO_s(i,jj_index) = BO_s;
d_BO_pi(i,jj_index) = BO_pi;
d_BO_pi2(i,jj_index) = BO_pi2;
F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij;
F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij;
F_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij;
if (nlocal == 0)
Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0;
for (int d = 0; d < 3; d++) dln_BOp_pi_i[d] = -(BO_pi*Cln_BOp_pi)*delij[d];
for (int d = 0; d < 3; d++) dln_BOp_pi2_i[d] = -(BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) d_dDeltap_self(i,d) += dBOp_i[d];
d_dln_BOp_pix(i,jj_index) = dln_BOp_pi_i[0];
d_dln_BOp_piy(i,jj_index) = dln_BOp_pi_i[1];
d_dln_BOp_piz(i,jj_index) = dln_BOp_pi_i[2];
d_dln_BOp_pi2x(i,jj_index) = dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(i,jj_index) = dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(i,jj_index) = dln_BOp_pi2_i[2];
d_dBOpx(i,jj_index) = dBOp_i[0];
d_dBOpy(i,jj_index) = dBOp_i[1];
d_dBOpz(i,jj_index) = dBOp_i[2];
d_BO(i,jj_index) -= bo_cut;
d_BO_s(i,jj_index) -= bo_cut;
total_bo += d_BO(i,jj_index);
j_index++;
}
d_bo_num[i] = j_index - d_bo_first[i];
if (cut_hbsq > 0.0 && ihb == 1) d_hb_num[i] = hb_index - d_hb_first[i];
d_total_bo[i] += total_bo;
const F_FLOAT val_i = paramssing(itype).valency;
d_Deltap[i] = d_total_bo[i] - val_i;
d_Deltap_boc[i] = d_total_bo[i] - paramssing(itype).valency_val;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfBlocking<NEIGHFLAG>, const int &ii) const {
constexpr int blocksize = PairReaxFFKokkos<DeviceType>::build_lists_half_blocksize;
if (d_resize_bo() || d_resize_hb())
return;
const auto v_dDeltap_self = ScatterViewHelper<NeedDup_v<NEIGHFLAG,DeviceType>,decltype(dup_dDeltap_self),decltype(ndup_dDeltap_self)>::get(dup_dDeltap_self,ndup_dDeltap_self);
const auto a_dDeltap_self = v_dDeltap_self.template access<AtomicDup_v<NEIGHFLAG,DeviceType>>();
@ -1852,29 +1691,23 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfBlocking<
const int jj_index = j_index - hb_first_i;
if (jj_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[j_index] = j;
}
else if (j < nlocal && ihb == 2 && jhb == 1) {
if (jj_index >= maxhb)
d_resize_hb() = MAX(d_resize_hb(),jj_index+1);
else
d_hb_list[j_index] = j;
} else if (j < nlocal && ihb == 2 && jhb == 1) {
if (NEIGHFLAG == HALF) {
i_index = d_hb_first[j] + d_hb_num[j];
d_hb_num[j]++;
} else {
} else
i_index = d_hb_first[j] + Kokkos::atomic_fetch_add(&d_hb_num[j],1);
}
const int ii_index = i_index - d_hb_first[j];
if (ii_index >= maxhb) {
d_resize_hb() = 1;
return;
}
d_hb_list[i_index] = i;
if (ii_index >= maxhb)
d_resize_hb() = MAX(d_resize_hb(),ii_index+1);
else
d_hb_list[i_index] = i;
}
}
@ -1929,73 +1762,72 @@ void PairReaxFFKokkos<DeviceType>::operator()(TagPairReaxBuildListsHalfBlocking<
const int ii_index = i_index - d_bo_first[j];
if (jj_index >= maxbo || ii_index >= maxbo) {
d_resize_bo() = 1;
return;
const int max_val = MAX(ii_index+1,jj_index+1);
d_resize_bo() = MAX(d_resize_bo(),max_val);
} else {
d_bo_list[j_index] = j;
d_bo_list[i_index] = i;
// from BondOrder1
d_BO(i,jj_index) = BO;
d_BO_s(i,jj_index) = BO_s;
d_BO_pi(i,jj_index) = BO_pi;
d_BO_pi2(i,jj_index) = BO_pi2;
d_BO(j,ii_index) = BO;
d_BO_s(j,ii_index) = BO_s;
d_BO_pi(j,ii_index) = BO_pi;
d_BO_pi2(j,ii_index) = BO_pi2;
F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij;
F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij;
F_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij;
if (nlocal == 0)
Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0;
for (int d = 0; d < 3; d++) dln_BOp_pi_i[d] = -(BO_pi*Cln_BOp_pi)*delij[d];
for (int d = 0; d < 3; d++) dln_BOp_pi2_i[d] = -(BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) a_dDeltap_self(i,d) += dBOp_i[d];
for (int d = 0; d < 3; d++) a_dDeltap_self(j,d) += -dBOp_i[d];
d_dln_BOp_pix(i,jj_index) = dln_BOp_pi_i[0];
d_dln_BOp_piy(i,jj_index) = dln_BOp_pi_i[1];
d_dln_BOp_piz(i,jj_index) = dln_BOp_pi_i[2];
d_dln_BOp_pix(j,ii_index) = -dln_BOp_pi_i[0];
d_dln_BOp_piy(j,ii_index) = -dln_BOp_pi_i[1];
d_dln_BOp_piz(j,ii_index) = -dln_BOp_pi_i[2];
d_dln_BOp_pi2x(i,jj_index) = dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(i,jj_index) = dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(i,jj_index) = dln_BOp_pi2_i[2];
d_dln_BOp_pi2x(j,ii_index) = -dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(j,ii_index) = -dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(j,ii_index) = -dln_BOp_pi2_i[2];
d_dBOpx(i,jj_index) = dBOp_i[0];
d_dBOpy(i,jj_index) = dBOp_i[1];
d_dBOpz(i,jj_index) = dBOp_i[2];
d_dBOpx(j,ii_index) = -dBOp_i[0];
d_dBOpy(j,ii_index) = -dBOp_i[1];
d_dBOpz(j,ii_index) = -dBOp_i[2];
d_BO(i,jj_index) -= bo_cut;
d_BO(j,ii_index) -= bo_cut;
d_BO_s(i,jj_index) -= bo_cut;
d_BO_s(j,ii_index) -= bo_cut;
total_bo += d_BO(i,jj_index);
a_total_bo[j] += d_BO(j,ii_index);
}
d_bo_list[j_index] = j;
d_bo_list[i_index] = i;
// from BondOrder1
d_BO(i,jj_index) = BO;
d_BO_s(i,jj_index) = BO_s;
d_BO_pi(i,jj_index) = BO_pi;
d_BO_pi2(i,jj_index) = BO_pi2;
d_BO(j,ii_index) = BO;
d_BO_s(j,ii_index) = BO_s;
d_BO_pi(j,ii_index) = BO_pi;
d_BO_pi2(j,ii_index) = BO_pi2;
F_FLOAT Cln_BOp_s = p_bo2 * C12 / rij / rij;
F_FLOAT Cln_BOp_pi = p_bo4 * C34 / rij / rij;
F_FLOAT Cln_BOp_pi2 = p_bo6 * C56 / rij / rij;
if (nlocal == 0)
Cln_BOp_s = Cln_BOp_pi = Cln_BOp_pi2 = 0.0;
for (int d = 0; d < 3; d++) dln_BOp_pi_i[d] = -(BO_pi*Cln_BOp_pi)*delij[d];
for (int d = 0; d < 3; d++) dln_BOp_pi2_i[d] = -(BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) dBOp_i[d] = -(BO_s*Cln_BOp_s+BO_pi*Cln_BOp_pi+BO_pi2*Cln_BOp_pi2)*delij[d];
for (int d = 0; d < 3; d++) a_dDeltap_self(i,d) += dBOp_i[d];
for (int d = 0; d < 3; d++) a_dDeltap_self(j,d) += -dBOp_i[d];
d_dln_BOp_pix(i,jj_index) = dln_BOp_pi_i[0];
d_dln_BOp_piy(i,jj_index) = dln_BOp_pi_i[1];
d_dln_BOp_piz(i,jj_index) = dln_BOp_pi_i[2];
d_dln_BOp_pix(j,ii_index) = -dln_BOp_pi_i[0];
d_dln_BOp_piy(j,ii_index) = -dln_BOp_pi_i[1];
d_dln_BOp_piz(j,ii_index) = -dln_BOp_pi_i[2];
d_dln_BOp_pi2x(i,jj_index) = dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(i,jj_index) = dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(i,jj_index) = dln_BOp_pi2_i[2];
d_dln_BOp_pi2x(j,ii_index) = -dln_BOp_pi2_i[0];
d_dln_BOp_pi2y(j,ii_index) = -dln_BOp_pi2_i[1];
d_dln_BOp_pi2z(j,ii_index) = -dln_BOp_pi2_i[2];
d_dBOpx(i,jj_index) = dBOp_i[0];
d_dBOpy(i,jj_index) = dBOp_i[1];
d_dBOpz(i,jj_index) = dBOp_i[2];
d_dBOpx(j,ii_index) = -dBOp_i[0];
d_dBOpy(j,ii_index) = -dBOp_i[1];
d_dBOpz(j,ii_index) = -dBOp_i[2];
d_BO(i,jj_index) -= bo_cut;
d_BO(j,ii_index) -= bo_cut;
d_BO_s(i,jj_index) -= bo_cut;
d_BO_s(j,ii_index) -= bo_cut;
total_bo += d_BO(i,jj_index);
a_total_bo[j] += d_BO(j,ii_index);
}
}
a_total_bo[i] += total_bo;
}
/* ---------------------------------------------------------------------- */

View File

@ -57,8 +57,6 @@ struct TagPairReaxComputeLJCoulombShortNeigh{};
template<int NEIGHFLAG, int EVFLAG>
struct TagPairReaxComputeTabulatedLJCoulomb{};
struct TagPairReaxBuildListsFull{};
template<int NEIGHFLAG>
struct TagPairReaxBuildListsHalfBlocking{};
@ -167,9 +165,6 @@ class PairReaxFFKokkos : public PairReaxFF {
KOKKOS_INLINE_FUNCTION
void operator()(TagPairReaxComputeLJCoulombShortNeigh, const int&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagPairReaxBuildListsFull, const int&) const;
template<int NEIGHFLAG>
KOKKOS_INLINE_FUNCTION
void operator()(TagPairReaxBuildListsHalfBlocking<NEIGHFLAG>, const int&) const;

View File

@ -151,7 +151,7 @@ class PairTersoffMODKokkos : public PairTersoffMOD {
KOKKOS_INLINE_FUNCTION
int sbmask(const int& j) const;
struct params_ters{
struct params_ters {
KOKKOS_INLINE_FUNCTION
params_ters() {powerm=0;lam3=0;h=0;powern=0;beta=0;lam2=0;bigb=0;bigr=0;bigd=0;
lam1=0;biga=0;powern_del=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;c5=0;ca1=0;ca4=0;};

View File

@ -150,7 +150,7 @@ class PairTersoffZBLKokkos : public PairTersoffZBL {
KOKKOS_INLINE_FUNCTION
int sbmask(const int& j) const;
struct params_ters{
struct params_ters {
KOKKOS_INLINE_FUNCTION
params_ters() {powerm=0;gamma=0;lam3=0;c=0;d=0;h=0;powern=0;beta=0;lam2=0;bigb=0;
bigr=0;bigd=0;lam1=0;biga=0;cutsq=0;c1=0;c2=0;c3=0;c4=0;Z_i=0;Z_j=0;ZBLcut=0;ZBLexpscale=0;};