diff --git a/src/KSPACE/pair_buck_long_coul_long.cpp b/src/KSPACE/pair_buck_long_coul_long.cpp index 859dac8db0..a40970fff5 100644 --- a/src/KSPACE/pair_buck_long_coul_long.cpp +++ b/src/KSPACE/pair_buck_long_coul_long.cpp @@ -236,7 +236,7 @@ void PairBuckLongCoulLong::init_style() int irequest; - if (update->whichflag == 0 && strstr(update->integrate_style,"respa")) { + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; @@ -627,14 +627,14 @@ void PairBuckLongCoulLong::compute_inner() double qri, *cut_bucksqi, *buck1i, *buck2i, *rhoinvi; vector xi, d; - ineighn = (ineigh = list->ilist) + list->inum; + ineighn = (ineigh = listinner->ilist) + listinner->inum; for (; ineighfirstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; for (; jneighilist)+list->inum; + ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum; for (; ineighfirstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; for (; jneighilist)+list->inum; + ineighn = (ineigh = listouter->ilist)+listouter->inum; for (; ineighfirstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; for (; jneighwhichflag == 0 && strstr(update->integrate_style,"respa")) { + if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) { int respa = 0; if (((Respa *) update->integrate)->level_inner >= 0) respa = 1; if (((Respa *) update->integrate)->level_middle >= 0) respa = 2; @@ -633,13 +633,13 @@ void PairLJLongCoulLong::compute_inner() double qri, *cut_ljsqi, *lj1i, *lj2i; vector xi, d; - ineighn = (ineigh = list->ilist)+list->inum; + ineighn = (ineigh = listinner->ilist)+listinner->inum; for (; ineighfirstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; for (; jneighilist)+list->inum; + ineighn = (ineigh = listmiddle->ilist)+listmiddle->inum; for (; ineighfirstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; for (; jneighilist)+list->inum; + ineighn = (ineigh = listouter->ilist)+listouter->inum; for (; ineighfirstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; for (; jneigh>ndispshiftbits; + register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k]; + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) { + forcelj = (rn*=rn)*lj1i[jtype]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[jtype]; + if (eflag) evdwl = rn*lj3i[jtype]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[jtype]; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + forcelj = f*(rn *= rn)*lj1i[jtype]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[jtype]+t*lj2i[jtype]; + if (eflag) evdwl = f*rn*lj3i[jtype]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[jtype]+t*lj4i[jtype]; + } + } } else { // cut lj register double rn = r2inv*r2inv*r2inv; @@ -437,6 +455,922 @@ void PairLJLongTIP4PLong::compute(int eflag, int vflag) } } +/* --------------------------------------------------------------------- */ + +void PairLJLongTIP4PLong::compute_inner() +{ + int i,j,ii,jj,inum,jnum,itype,jtype,itable; + int iH1,iH2,jH1,jH2; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double r,r2inv,forcecoul,forcelj,cforce; + double factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double xiM[3],xjM[3],fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3];// f1[3]; + double *x1,*x2; + int *ilist,*jlist,*numneigh,**firstneigh; + double rsq, qri; + + double cut_out_on = cut_respa[0]; + double cut_out_off = cut_respa[1]; + + double cut_out_diff = cut_out_off - cut_out_on; + double cut_out_on_sq = cut_out_on*cut_out_on; + double cut_out_off_sq = cut_out_off*cut_out_off; + + // reallocate hneigh & newsite if necessary + // initialize hneigh[0] to -1 on steps when reneighboring occurred + // initialize hneigh[2] to 0 every step + + int nlocal = atom->nlocal; + int nall = nlocal + atom->nghost; + + if (atom->nmax > nmax) { + nmax = atom->nmax; + memory->destroy(hneigh); + memory->create(hneigh,nmax,3,"pair:hneigh"); + memory->destroy(newsite); + memory->create(newsite,nmax,3,"pair:newsite"); + } + if (neighbor->ago == 0) + for (i = 0; i < nall; i++) hneigh[i][0] = -1; + for (i = 0; i < nall; i++) hneigh[i][2] = 0; + + double **f = atom->f; + double **x = atom->x; + double *q = atom->q; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist); + + int order1 = ewald_order&(1<<1); + int ni; + double *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; + + inum = listinner->inum; + ilist = listinner->ilist; + numneigh = listinner->numneigh; + firstneigh = listinner->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + if (itype == typeO && order1) { + if (hneigh[i][0] < 0) { + hneigh[i][0] = iH1 = atom->map(atom->tag[i] + 1); + hneigh[i][1] = iH2 = atom->map(atom->tag[i] + 2); + hneigh[i][2] = 1; + if (iH1 == -1 || iH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + } else { + iH1 = hneigh[i][0]; + iH2 = hneigh[i][1]; + if (hneigh[i][2] == 0) { + hneigh[i][2] = 1; + compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + } + } + x1 = newsite[i]; + } else x1 = x[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + offseti = offset[itype]; + lj1i = lj1[itype]; lj2i = lj2[itype]; lj3i = lj3[itype]; lj4i = lj4[itype]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + ni = sbmask(j); + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cut_ljsq[itype][jtype] && rsq < cut_out_off_sq ) { // lj + r2inv = 1.0/rsq; + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + } + + if (rsq > cut_out_on_sq) { // switching + register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + forcelj *= 1.0 + rsw*rsw*(2.0*rsw-3.0); + } + + forcelj *= r2inv; + f[i][0] += delx*forcelj; + f[i][1] += dely*forcelj; + f[i][2] += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; + } + + + // adjust rsq and delxyz for off-site O charge(s) + // ADDITIONAL REQEUST REQUIRED HERE!!!!! + + if (rsq < cut_coulsqplus && order1) { + if (itype == typeO || jtype == typeO) { + if (jtype == typeO) { + if (hneigh[j][0] < 0) { + hneigh[j][0] = jH1 = atom->map(atom->tag[j] + 1); + hneigh[j][1] = jH2 = atom->map(atom->tag[j] + 2); + hneigh[j][2] = 1; + if (jH1 == -1 || jH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + } else { + jH1 = hneigh[j][0]; + jH2 = hneigh[j][1]; + if (hneigh[j][2] == 0) { + hneigh[j][2] = 1; + compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + } + } + x2 = newsite[j]; + } else x2 = x[j]; + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; + } + + // test current rsq against cutoff and compute Coulombic force + + if (rsq < cut_coulsq && rsq < cut_out_off_sq) { + r2inv = 1.0 / rsq; + qri = qqrd2e*qtmp; + if (ni == 0) forcecoul = qri*q[j]*sqrt(r2inv); + else { + forcecoul = qri*q[j]*sqrt(r2inv)*special_coul[ni]; + } + + if (rsq > cut_out_on_sq) { // switching + register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + forcecoul *= 1.0 + rsw*rsw*(2.0*rsw-3.0); + } + + cforce = forcecoul * r2inv; + + //if (evflag) ev_tally(i,j,nlocal,newton_pair, + // evdwl,0.0,cforce,delx,dely,delz); + + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (itype != typeO) { + f[i][0] += delx * cforce; + f[i][1] += dely * cforce; + f[i][2] += delz * cforce; + + } else { + fd[0] = delx*cforce; + fd[1] = dely*cforce; + fd[2] = delz*cforce; + + fO[0] = fd[0]*(1 - alpha); + fO[1] = fd[1]*(1 - alpha); + fO[2] = fd[2]*(1 - alpha); + + fH[0] = 0.5 * alpha * fd[0]; + fH[1] = 0.5 * alpha * fd[1]; + fH[2] = 0.5 * alpha * fd[2]; + + f[i][0] += fO[0]; + f[i][1] += fO[1]; + f[i][2] += fO[2]; + + f[iH1][0] += fH[0]; + f[iH1][1] += fH[1]; + f[iH1][2] += fH[2]; + + f[iH2][0] += fH[0]; + f[iH2][1] += fH[1]; + f[iH2][2] += fH[2]; + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + } else { + fd[0] = -delx*cforce; + fd[1] = -dely*cforce; + fd[2] = -delz*cforce; + + fO[0] = fd[0]*(1 - alpha); + fO[1] = fd[1]*(1 - alpha); + fO[2] = fd[2]*(1 - alpha); + + fH[0] = 0.5 * alpha * fd[0]; + fH[1] = 0.5 * alpha * fd[1]; + fH[2] = 0.5 * alpha * fd[2]; + + f[j][0] += fO[0]; + f[j][1] += fO[1]; + f[j][2] += fO[2]; + + f[jH1][0] += fH[0]; + f[jH1][1] += fH[1]; + f[jH1][2] += fH[2]; + + f[jH2][0] += fH[0]; + f[jH2][1] += fH[1]; + f[jH2][2] += fH[2]; + } + } + } + } + } +} + +/* --------------------------------------------------------------------- */ + +void PairLJLongTIP4PLong::compute_middle() +{ + int i,j,ii,jj,inum,jnum,itype,jtype,itable; + int iH1,iH2,jH1,jH2; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double r,r2inv,forcecoul,forcelj,cforce; + double factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double xiM[3],xjM[3],fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3];// f1[3]; + double *x1,*x2; + int *ilist,*jlist,*numneigh,**firstneigh; + double rsq,qri; + + double cut_in_off = cut_respa[0]; + double cut_in_on = cut_respa[1]; + double cut_out_on = cut_respa[2]; + double cut_out_off = cut_respa[3]; + + double cut_in_diff = cut_in_on - cut_in_off; + double cut_out_diff = cut_out_off - cut_out_on; + double cut_in_off_sq = cut_in_off*cut_in_off; + double cut_in_on_sq = cut_in_on*cut_in_on; + double cut_out_on_sq = cut_out_on*cut_out_on; + double cut_out_off_sq = cut_out_off*cut_out_off; + + // reallocate hneigh & newsite if necessary + // initialize hneigh[0] to -1 on steps when reneighboring occurred + // initialize hneigh[2] to 0 every step + + int nlocal = atom->nlocal; + + double **f = atom->f; + double **x = atom->x; + double *q = atom->q; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist); + + int order1 = ewald_order&(1<<1); + int ni; + double *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; + double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2; + + inum = listmiddle->inum; + ilist = listmiddle->ilist; + numneigh = listmiddle->numneigh; + firstneigh = listmiddle->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + if (itype == typeO && order1) { + if (hneigh[i][0] < 0) { + hneigh[i][0] = iH1 = atom->map(atom->tag[i] + 1); + hneigh[i][1] = iH2 = atom->map(atom->tag[i] + 2); + hneigh[i][2] = 1; + if (iH1 == -1 || iH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + } else { + iH1 = hneigh[i][0]; + iH2 = hneigh[i][1]; + if (hneigh[i][2] == 0) { + hneigh[i][2] = 1; + compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + } + } + x1 = newsite[i]; + } else x1 = x[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + offseti = offset[itype]; + lj1i = lj1[itype]; lj2i = lj2[itype]; lj3i = lj3[itype]; lj4i = lj4[itype]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + ni = sbmask(j); + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cut_ljsq[itype][jtype] && rsq >= cut_in_off_sq && rsq <= cut_out_off_sq ) { // lj + r2inv = 1.0/rsq; + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype]); + else { // special case + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype]); + } + + if (rsq < cut_in_on_sq) { // switching + register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + forcelj *= rsw*rsw*(3.0 - 2.0*rsw); + } + if (rsq > cut_out_on_sq) { + register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + forcelj *= 1.0 + rsw*rsw*(2.0*rsw-3.0); + } + + forcelj *= r2inv; + f[i][0] += delx*forcelj; + f[i][1] += dely*forcelj; + f[i][2] += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; + } + + + // adjust rsq and delxyz for off-site O charge(s) + // ADDITIONAL REQEUST REQUIRED HERE!!!!! + + if (rsq < cut_coulsqplus && order1) { + if (itype == typeO || jtype == typeO) { + if (jtype == typeO) { + if (hneigh[j][0] < 0) { + hneigh[j][0] = jH1 = atom->map(atom->tag[j] + 1); + hneigh[j][1] = jH2 = atom->map(atom->tag[j] + 2); + hneigh[j][2] = 1; + if (jH1 == -1 || jH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + } else { + jH1 = hneigh[j][0]; + jH2 = hneigh[j][1]; + if (hneigh[j][2] == 0) { + hneigh[j][2] = 1; + compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + } + } + x2 = newsite[j]; + } else x2 = x[j]; + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; + } + + // test current rsq against cutoff and compute Coulombic force + + if (rsq < cut_coulsq && rsq >= cut_in_off_sq && rsq <= cut_out_off_sq) { + r2inv = 1.0 / rsq; + qri = qqrd2e*qtmp; + if (ni == 0) forcecoul = qri*q[j]*sqrt(r2inv); + else { + forcecoul = qri*q[j]*sqrt(r2inv)*special_coul[ni]; + } + + if (rsq < cut_in_on_sq) { // switching + register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + forcecoul *= rsw*rsw*(3.0 - 2.0*rsw); + } + if (rsq > cut_out_on_sq) { + register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + forcecoul *= 1.0 + rsw*rsw*(2.0*rsw-3.0); + } + + cforce = forcecoul * r2inv; + + //if (evflag) ev_tally(i,j,nlocal,newton_pair, + // evdwl,0.0,cforce,delx,dely,delz); + + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + if (itype != typeO) { + f[i][0] += delx * cforce; + f[i][1] += dely * cforce; + f[i][2] += delz * cforce; + + } else { + fd[0] = delx*cforce; + fd[1] = dely*cforce; + fd[2] = delz*cforce; + + fO[0] = fd[0]*(1 - alpha); + fO[1] = fd[1]*(1 - alpha); + fO[2] = fd[2]*(1 - alpha); + + fH[0] = 0.5 * alpha * fd[0]; + fH[1] = 0.5 * alpha * fd[1]; + fH[2] = 0.5 * alpha * fd[2]; + + f[i][0] += fO[0]; + f[i][1] += fO[1]; + f[i][2] += fO[2]; + + f[iH1][0] += fH[0]; + f[iH1][1] += fH[1]; + f[iH1][2] += fH[2]; + + f[iH2][0] += fH[0]; + f[iH2][1] += fH[1]; + f[iH2][2] += fH[2]; + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + } else { + fd[0] = -delx*cforce; + fd[1] = -dely*cforce; + fd[2] = -delz*cforce; + + fO[0] = fd[0]*(1 - alpha); + fO[1] = fd[1]*(1 - alpha); + fO[2] = fd[2]*(1 - alpha); + + fH[0] = 0.5 * alpha * fd[0]; + fH[1] = 0.5 * alpha * fd[1]; + fH[2] = 0.5 * alpha * fd[2]; + + f[j][0] += fO[0]; + f[j][1] += fO[1]; + f[j][2] += fO[2]; + + f[jH1][0] += fH[0]; + f[jH1][1] += fH[1]; + f[jH1][2] += fH[2]; + + f[jH2][0] += fH[0]; + f[jH2][1] += fH[1]; + f[jH2][2] += fH[2]; + } + } + } + } + } +} + +/* --------------------------------------------------------------------- */ + +void PairLJLongTIP4PLong::compute_outer(int eflag, int vflag) +{ + int i,j,ii,jj,inum,jnum,itype,jtype,itable; + int n,vlist[6]; + int key; + int iH1,iH2,jH1,jH2; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul; + double fraction,table; + double r,r2inv,forcecoul,forcelj,cforce, respa_coul, respa_lj, frespa,fvirial; + double factor_coul,factor_lj; + double grij,expm2,prefactor,t,erfc; + double xiM[3],xjM[3],fO[3],fH[3],fd[3],v[6],xH1[3],xH2[3];// f1[3]; + double *x1,*x2; + int *ilist,*jlist,*numneigh,**firstneigh; + double rsq,qri; + int respa_flag; + + evdwl = ecoul = 0.0; + if (eflag || vflag) ev_setup(eflag,vflag); + else evflag = vflag_fdotr = 0; + + // reallocate hneigh & newsite if necessary + // initialize hneigh[0] to -1 on steps when reneighboring occurred + // initialize hneigh[2] to 0 every step + + int nlocal = atom->nlocal; + + double **f = atom->f; + double **x = atom->x; + double *q = atom->q; + int *type = atom->type; + double *special_coul = force->special_coul; + double *special_lj = force->special_lj; + int newton_pair = force->newton_pair; + double qqrd2e = force->qqrd2e; + double cut_coulsqplus = (cut_coul+2.0*qdist)*(cut_coul+2.0*qdist); + + int order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6); + int ni; + double *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; + double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2; + + double cut_in_off = cut_respa[2]; + double cut_in_on = cut_respa[3]; + + double cut_in_diff = cut_in_on - cut_in_off; + double cut_in_off_sq = cut_in_off*cut_in_off; + double cut_in_on_sq = cut_in_on*cut_in_on; + + inum = listouter->inum; + ilist = listouter->ilist; + numneigh = listouter->numneigh; + firstneigh = listouter->firstneigh; + + // loop over neighbors of my atoms + + for (ii = 0; ii < inum; ii++) { + i = ilist[ii]; + qtmp = q[i]; + qri = qtmp*qqrd2e; + xtmp = x[i][0]; + ytmp = x[i][1]; + ztmp = x[i][2]; + itype = type[i]; + if (itype == typeO) { + if (hneigh[i][0] < 0) { + hneigh[i][0] = iH1 = atom->map(atom->tag[i] + 1); + hneigh[i][1] = iH2 = atom->map(atom->tag[i] + 2); + hneigh[i][2] = 1; + if (iH1 == -1 || iH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[iH1] != typeH || atom->type[iH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + } else { + iH1 = hneigh[i][0]; + iH2 = hneigh[i][1]; + if (hneigh[i][2] == 0) { + hneigh[i][2] = 1; + compute_newsite(x[i],x[iH1],x[iH2],newsite[i]); + } + } + x1 = newsite[i]; + } else x1 = x[i]; + + jlist = firstneigh[i]; + jnum = numneigh[i]; + offseti = offset[itype]; + lj1i = lj1[itype]; lj2i = lj2[itype]; lj3i = lj3[itype]; lj4i = lj4[itype]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + ni = sbmask(j); + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j][0]; + dely = ytmp - x[j][1]; + delz = ztmp - x[j][2]; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + respa_coul = 0; + respa_lj = 0; + if (rsq < cut_ljsq[itype][jtype]) { // lj + frespa = 1.0; // check whether and how to compute respa corrections + respa_flag = rsq < cut_in_on_sq ? 1 : 0; + if (respa_flag && (rsq > cut_in_off_sq)) { + register double rsw = (sqrt(rsq)-cut_in_off)/cut_in_diff; + frespa = 1-rsw*rsw*(3.0-2.0*rsw); + } + + r2inv = 1.0/rsq; + register double rn = r2inv*r2inv*r2inv; + if (respa_flag) respa_lj = ni == 0 ? // correct for respa + frespa*rn*(rn*lj1i[jtype]-lj2i[jtype]) : + frespa*rn*(rn*lj1i[jtype]-lj2i[jtype])*special_lj[ni]; + if (order6) { // long-range form + if (!ndisptablebits || rsq <= tabinnerdispsq) { + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*lj4i[jtype]; + if (ni == 0) { + forcelj = + (rn*=rn)*lj1i[jtype]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq-respa_lj; + if (eflag) evdwl = rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2; + } + else { // correct for special + register double f = special_lj[ni], t = rn*(1.0-f); + forcelj = f*(rn *= rn)*lj1i[jtype]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[jtype]-respa_lj; + if (eflag) + evdwl = f*rn*lj3i[jtype]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[jtype]; + } + } + else { // table real space + register union_int_float_t disp_t; + disp_t.f = rsq; + register const int disp_k = (disp_t.i & ndispmask)>>ndispshiftbits; + register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k]; + if (ni == 0) { + forcelj = (rn*=rn)*lj1i[jtype]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[jtype]-respa_lj; + if (eflag) evdwl = rn*lj3i[jtype]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[jtype]; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + forcelj = f*(rn *= rn)*lj1i[jtype]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[jtype]+t*lj2i[jtype]-respa_lj; + if (eflag) evdwl = f*rn*lj3i[jtype]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[jtype]+t*lj4i[jtype]; + } + } + } + else { // cut form + if (ni == 0) { + forcelj = rn*(rn*lj1i[jtype]-lj2i[jtype])-respa_lj; + if (eflag) evdwl = rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]; + } + else { // correct for special + register double f = special_lj[ni]; + forcelj = f*rn*(rn*lj1i[jtype]-lj2i[jtype])-respa_lj; + if (eflag) + evdwl = f*(rn*(rn*lj3i[jtype]-lj4i[jtype])-offseti[jtype]); + } + } + + forcelj *= r2inv; + f[i][0] += delx*forcelj; + f[i][1] += dely*forcelj; + f[i][2] += delz*forcelj; + f[j][0] -= delx*forcelj; + f[j][1] -= dely*forcelj; + f[j][2] -= delz*forcelj; + + if (evflag) { + fvirial = forcelj + respa_lj*r2inv; + ev_tally(i,j,nlocal,newton_pair, + evdwl,0.0,fvirial,delx,dely,delz); + } + } + + + // adjust rsq and delxyz for off-site O charge(s) + // ADDITIONAL REQEUST REQUIRED HERE!!!!! + + if (rsq < cut_coulsqplus) { + if (itype == typeO || jtype == typeO) { + if (jtype == typeO) { + if (hneigh[j][0] < 0) { + hneigh[j][0] = jH1 = atom->map(atom->tag[j] + 1); + hneigh[j][1] = jH2 = atom->map(atom->tag[j] + 2); + hneigh[j][2] = 1; + if (jH1 == -1 || jH2 == -1) + error->one(FLERR,"TIP4P hydrogen is missing"); + if (atom->type[jH1] != typeH || atom->type[jH2] != typeH) + error->one(FLERR,"TIP4P hydrogen has incorrect atom type"); + compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + } else { + jH1 = hneigh[j][0]; + jH2 = hneigh[j][1]; + if (hneigh[j][2] == 0) { + hneigh[j][2] = 1; + compute_newsite(x[j],x[jH1],x[jH2],newsite[j]); + } + } + x2 = newsite[j]; + } else x2 = x[j]; + delx = x1[0] - x2[0]; + dely = x1[1] - x2[1]; + delz = x1[2] - x2[2]; + rsq = delx*delx + dely*dely + delz*delz; + } + + // test current rsq against cutoff and compute Coulombic force + if ((rsq < cut_coulsq) && order1) { + + frespa = 1.0; // check whether and how to compute respa corrections + respa_flag = rsq < cut_in_on_sq ? 1 : 0; + if (respa_flag && (rsq > cut_in_off_sq)) { + register double rsw = (sqrt(rsq)-cut_in_off)/cut_in_diff; + frespa = 1-rsw*rsw*(3.0-2.0*rsw); + } + + r2inv = 1.0 / rsq; + if (!ncoultablebits || rsq <= tabinnersq) { // series real space + register double r = sqrt(rsq), s = qri*q[j]; + if (respa_flag) // correct for respa + respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni]; + register double x = g_ewald*r, t = 1.0/(1.0+EWALD_P*x); + if (ni == 0) { + s *= g_ewald*exp(-x*x); + forcecoul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-respa_coul; + if (eflag) ecoul = t; + } + else { // correct for special + r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x); + forcecoul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r-respa_coul; + if (eflag) ecoul = t-r; + } + } // table real space + else { + if (respa_flag) { + register double r = sqrt(rsq), s = qri*q[j]; + respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni]; + } + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; + register double f = (t.f-rtable[k])*drtable[k], qiqj = qtmp*q[j]; + if (ni == 0) { + forcecoul = qiqj*(ftable[k]+f*dftable[k]); + if (eflag) ecoul = qiqj*(etable[k]+f*detable[k]); + } + else { // correct for special + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + forcecoul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (eflag) { + t.f = (1.0-special_coul[ni])*(ptable[k]+f*dptable[k]); + ecoul = qiqj*(etable[k]+f*detable[k]-t.f); + } + } + } + + cforce = forcecoul * r2inv; + fvirial = (forcecoul + respa_coul) * r2inv; + double fvcf = fvirial/cforce; + + // if i,j are not O atoms, force is applied directly + // if i or j are O atoms, force is on fictitious atom & partitioned + // force partitioning due to Feenstra, J Comp Chem, 20, 786 (1999) + // f_f = fictitious force, fO = f_f (1 - 2 alpha), fH = alpha f_f + // preserves total force and torque on water molecule + // virial = sum(r x F) where each water's atoms are near xi and xj + // vlist stores 2,4,6 atoms whose forces contribute to virial + + n = 0; + key = 0; + + if (itype != typeO) { + f[i][0] += delx * cforce; + f[i][1] += dely * cforce; + f[i][2] += delz * cforce; + + if (vflag) { + v[0] = x[i][0] * delx * fvirial; + v[1] = x[i][1] * dely * fvirial; + v[2] = x[i][2] * delz * fvirial; + v[3] = x[i][0] * dely * fvirial; + v[4] = x[i][0] * delz * fvirial; + v[5] = x[i][1] * delz * fvirial; + } + vlist[n++] = i; + + } else { + key += 1; + fd[0] = delx*cforce; + fd[1] = dely*cforce; + fd[2] = delz*cforce; + + fO[0] = fd[0]*(1 - alpha); + fO[1] = fd[1]*(1 - alpha); + fO[2] = fd[2]*(1 - alpha); + + fH[0] = 0.5 * alpha * fd[0]; + fH[1] = 0.5 * alpha * fd[1]; + fH[2] = 0.5 * alpha * fd[2]; + + f[i][0] += fO[0]; + f[i][1] += fO[1]; + f[i][2] += fO[2]; + + f[iH1][0] += fH[0]; + f[iH1][1] += fH[1]; + f[iH1][2] += fH[2]; + + f[iH2][0] += fH[0]; + f[iH2][1] += fH[1]; + f[iH2][2] += fH[2]; + + if (vflag) { + domain->closest_image(x[i],x[iH1],xH1); + domain->closest_image(x[i],x[iH2],xH2); + + v[0] = (x[i][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0])*fvcf; + v[1] = (x[i][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1])*fvcf; + v[2] = (x[i][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2])*fvcf; + v[3] = (x[i][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1])*fvcf; + v[4] = (x[i][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2])*fvcf; + v[5] = (x[i][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2])*fvcf; + } + vlist[n++] = i; + vlist[n++] = iH1; + vlist[n++] = iH2; + } + + if (jtype != typeO) { + f[j][0] -= delx * cforce; + f[j][1] -= dely * cforce; + f[j][2] -= delz * cforce; + + if (vflag) { + v[0] -= x[j][0] * delx * fvirial; + v[1] -= x[j][1] * dely * fvirial; + v[2] -= x[j][2] * delz * fvirial; + v[3] -= x[j][0] * dely * fvirial; + v[4] -= x[j][0] * delz * fvirial; + v[5] -= x[j][1] * delz * fvirial; + } + vlist[n++] = j; + + } else { + key += 2; + + fd[0] = -delx*cforce; + fd[1] = -dely*cforce; + fd[2] = -delz*cforce; + + fO[0] = fd[0]*(1 - alpha); + fO[1] = fd[1]*(1 - alpha); + fO[2] = fd[2]*(1 - alpha); + + fH[0] = 0.5 * alpha * fd[0]; + fH[1] = 0.5 * alpha * fd[1]; + fH[2] = 0.5 * alpha * fd[2]; + + f[j][0] += fO[0]; + f[j][1] += fO[1]; + f[j][2] += fO[2]; + + f[jH1][0] += fH[0]; + f[jH1][1] += fH[1]; + f[jH1][2] += fH[2]; + + f[jH2][0] += fH[0]; + f[jH2][1] += fH[1]; + f[jH2][2] += fH[2]; + + if (vflag) { + domain->closest_image(x[j],x[jH1],xH1); + domain->closest_image(x[j],x[jH2],xH2); + + v[0] += (x[j][0]*fO[0] + xH1[0]*fH[0] + xH2[0]*fH[0])*fvcf; + v[1] += (x[j][1]*fO[1] + xH1[1]*fH[1] + xH2[1]*fH[1])*fvcf; + v[2] += (x[j][2]*fO[2] + xH1[2]*fH[2] + xH2[2]*fH[2])*fvcf; + v[3] += (x[j][0]*fO[1] + xH1[0]*fH[1] + xH2[0]*fH[1])*fvcf; + v[4] += (x[j][0]*fO[2] + xH1[0]*fH[2] + xH2[0]*fH[2])*fvcf; + v[5] += (x[j][1]*fO[2] + xH1[1]*fH[2] + xH2[1]*fH[2])*fvcf; + } + vlist[n++] = j; + vlist[n++] = jH1; + vlist[n++] = jH2; + } + + if (evflag) ev_tally_tip4p(key,vlist,v,ecoul,alpha); + } + } + } + } +} + /* ---------------------------------------------------------------------- global settings ------------------------------------------------------------------------- */ diff --git a/src/KSPACE/pair_lj_long_tip4p_long.h b/src/KSPACE/pair_lj_long_tip4p_long.h index 541ab5c9ad..d863fb8851 100755 --- a/src/KSPACE/pair_lj_long_tip4p_long.h +++ b/src/KSPACE/pair_lj_long_tip4p_long.h @@ -29,6 +29,9 @@ class PairLJLongTIP4PLong : public PairLJLongCoulLong { PairLJLongTIP4PLong(class LAMMPS *); ~PairLJLongTIP4PLong(); virtual void compute(int, int); + virtual void compute_inner(); + virtual void compute_middle(); + virtual void compute_outer(int, int); void settings(int, char **); void init_style(); double init_one(int, int); diff --git a/src/KSPACE/pppm_disp.cpp b/src/KSPACE/pppm_disp.cpp index 62a22bf7a7..95dfb76a9a 100755 --- a/src/KSPACE/pppm_disp.cpp +++ b/src/KSPACE/pppm_disp.cpp @@ -168,6 +168,8 @@ PPPMDisp::PPPMDisp(LAMMPS *lmp, int narg, char **arg) : KSpace(lmp, narg, arg) cg_peratom = NULL; cg_6 = NULL; cg_peratom_6 = NULL; + + memset(function, 0, EWALD_FUNCS*sizeof(int)); } /* ---------------------------------------------------------------------- diff --git a/src/OPT/pair_lj_long_coul_long_opt.cpp b/src/OPT/pair_lj_long_coul_long_opt.cpp index 174821680e..538abb051e 100644 --- a/src/OPT/pair_lj_long_coul_long_opt.cpp +++ b/src/OPT/pair_lj_long_coul_long_opt.cpp @@ -726,7 +726,7 @@ void PairLJLongCoulLongOpt::eval_outer() double cut_in_off_sq = cut_in_off*cut_in_off; double cut_in_on_sq = cut_in_on*cut_in_on; - ineighn = (ineigh = list->ilist)+list->inum; + ineighn = (ineigh = listouter->ilist)+listouter->inum; for (; ineighfirstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; for (; jneighmaxpage) list->add_pages(nthreads); + if (npage >= list->maxpage) list->add_pages(nthreads); } neighptr = &(list->pages[npage][npnt]); n = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (pgsize - npnt_inner < oneatom) { npnt_inner = 0; npage_inner += nthreads; - if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + if (npage_inner >= listinner->maxpage) listinner->add_pages(nthreads); } neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); n_inner = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (respamiddle) { if (pgsize - npnt_middle < oneatom) { npnt_middle = 0; npage_middle += nthreads; - if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + if (npage_middle >= listmiddle->maxpage) listmiddle->add_pages(nthreads); } neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); n_middle = 0; } + } itype = type[i]; xtmp = x[i][0]; @@ -281,37 +277,33 @@ void Neighbor::respa_nsq_newton_omp(NeighList *list) #if defined(_OPENMP) #pragma omp critical #endif + { if (pgsize - npnt < oneatom) { npnt = 0; npage += nthreads; - if (npage == list->maxpage) list->add_pages(nthreads); + if (npage >= list->maxpage) list->add_pages(nthreads); } neighptr = &(list->pages[npage][npnt]); n = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (pgsize - npnt_inner < oneatom) { npnt_inner = 0; npage_inner += nthreads; - if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + if (npage_inner >= listinner->maxpage) listinner->add_pages(nthreads); } neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); n_inner = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (respamiddle) { if (pgsize - npnt_middle < oneatom) { npnt_middle = 0; npage_middle += nthreads; - if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + if (npage_middle >= listmiddle->maxpage) listmiddle->add_pages(nthreads); } neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); n_middle = 0; } + } itag = tag[i]; itype = type[i]; @@ -484,37 +476,33 @@ void Neighbor::respa_bin_no_newton_omp(NeighList *list) #if defined(_OPENMP) #pragma omp critical #endif + { if (pgsize - npnt < oneatom) { npnt = 0; npage += nthreads; - if (npage == list->maxpage) list->add_pages(nthreads); + if (npage >= list->maxpage) list->add_pages(nthreads); } neighptr = &(list->pages[npage][npnt]); n = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (pgsize - npnt_inner < oneatom) { npnt_inner = 0; npage_inner += nthreads; - if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + if (npage_inner >= listinner->maxpage) listinner->add_pages(nthreads); } neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); n_inner = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (respamiddle) { if (pgsize - npnt_middle < oneatom) { npnt_middle = 0; npage_middle += nthreads; - if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + if (npage_middle >= listmiddle->maxpage) listmiddle->add_pages(nthreads); } neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); n_middle = 0; } + } itype = type[i]; xtmp = x[i][0]; @@ -677,37 +665,33 @@ void Neighbor::respa_bin_newton_omp(NeighList *list) #if defined(_OPENMP) #pragma omp critical #endif + { if (pgsize - npnt < oneatom) { npnt = 0; npage += nthreads; - if (npage == list->maxpage) list->add_pages(nthreads); + if (npage >= list->maxpage) list->add_pages(nthreads); } neighptr = &(list->pages[npage][npnt]); n = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (pgsize - npnt_inner < oneatom) { npnt_inner = 0; npage_inner += nthreads; - if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + if (npage_inner >= listinner->maxpage) listinner->add_pages(nthreads); } neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); n_inner = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (respamiddle) { if (pgsize - npnt_middle < oneatom) { npnt_middle = 0; npage_middle += nthreads; - if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + if (npage_middle >= listmiddle->maxpage) listmiddle->add_pages(nthreads); } neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); n_middle = 0; } + } itype = type[i]; xtmp = x[i][0]; @@ -911,37 +895,33 @@ void Neighbor::respa_bin_newton_tri_omp(NeighList *list) #if defined(_OPENMP) #pragma omp critical #endif + { if (pgsize - npnt < oneatom) { npnt = 0; npage += nthreads; - if (npage == list->maxpage) list->add_pages(nthreads); + if (npage >= list->maxpage) list->add_pages(nthreads); } neighptr = &(list->pages[npage][npnt]); n = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (pgsize - npnt_inner < oneatom) { npnt_inner = 0; npage_inner += nthreads; - if (npage_inner == listinner->maxpage) listinner->add_pages(nthreads); + if (npage_inner >= listinner->maxpage) listinner->add_pages(nthreads); } neighptr_inner = &(listinner->pages[npage_inner][npnt_inner]); n_inner = 0; -#if defined(_OPENMP) -#pragma omp critical -#endif if (respamiddle) { if (pgsize - npnt_middle < oneatom) { npnt_middle = 0; npage_middle += nthreads; - if (npage_middle == listmiddle->maxpage) listmiddle->add_pages(nthreads); + if (npage_middle >= listmiddle->maxpage) listmiddle->add_pages(nthreads); } neighptr_middle = &(listmiddle->pages[npage_middle][npnt_middle]); n_middle = 0; } + } itype = type[i]; xtmp = x[i][0]; diff --git a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp index ba31833d7a..ce5b7ac3ef 100644 --- a/src/USER-OMP/pair_buck_long_coul_long_omp.cpp +++ b/src/USER-OMP/pair_buck_long_coul_long_omp.cpp @@ -317,7 +317,7 @@ void PairBuckLongCoulLongOMP::compute_inner() const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = list->inum; + const int inum = listinner->inum; #if defined(_OPENMP) #pragma omp parallel default(none) #endif @@ -339,7 +339,7 @@ void PairBuckLongCoulLongOMP::compute_middle() const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = list->inum; + const int inum = listmiddle->inum; #if defined(_OPENMP) #pragma omp parallel default(none) @@ -367,7 +367,7 @@ void PairBuckLongCoulLongOMP::compute_outer(int eflag, int vflag) const int nall = atom->nlocal + atom->nghost; const int nthreads = comm->nthreads; - const int inum = list->inum; + const int inum = listouter->inum; #if defined(_OPENMP) #pragma omp parallel default(none) shared(eflag,vflag) @@ -803,7 +803,7 @@ void PairBuckLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const t const double *x0 = x[0]; double *f0 = f[0], *fi = 0; - int *ilist = list->ilist; + int *ilist = listinner->ilist; const int newton_pair = force->newton_pair; @@ -827,7 +827,7 @@ void PairBuckLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const t memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_bucksqi = cut_bucksq[typei = type[i]]; buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei]; - jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; for (; jneighilist; + int *ilist = listmiddle->ilist; const int newton_pair = force->newton_pair; @@ -924,7 +924,7 @@ void PairBuckLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); cut_bucksqi = cut_bucksq[typei = type[i]]; buck1i = buck1[typei]; buck2i = buck2[typei]; rhoinvi = rhoinv[typei]; - jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; for (; jneighilist; + int *ilist = listouter->ilist; int i, j, ii; int *jneigh, *jneighn, typei, typej, ni, respa_flag; @@ -1027,7 +1027,7 @@ void PairBuckLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const t buckai = buck_a[typei]; buckci = buck_c[typei]; rhoinvi = rhoinv[typei]; cutsqi = cutsq[typei]; cut_bucksqi = cut_bucksq[typei]; memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); - jneighn = (jneigh = list->firstneigh[i])+list->numneigh[i]; + jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; for (; jneighnlocal + atom->nghost; const int nthreads = comm->nthreads; @@ -64,18 +66,243 @@ void PairLJLongCoulLongOMP::compute(int eflag, int vflag) ThrData *thr = fix->get_thr(tid); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); - if (evflag) { - if (eflag) { - if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); - else eval<1,1,0>(ifrom, ito, thr); + if (order6) { + if (order1) { + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,0,1,1>(ifrom, ito, thr); + else eval<1,1,0,0,0,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,0,1,1>(ifrom, ito, thr); + else eval<1,0,0,0,0,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,0,1,1>(ifrom, ito, thr); + else eval<0,0,0,0,0,1,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,0,1,1>(ifrom, ito, thr); + else eval<1,1,0,1,0,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,0,1,1>(ifrom, ito, thr); + else eval<1,0,0,1,0,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,0,1,1>(ifrom, ito, thr); + else eval<0,0,0,1,0,1,1>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,1,1,1>(ifrom, ito, thr); + else eval<1,1,0,0,1,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,1,1,1>(ifrom, ito, thr); + else eval<1,0,0,0,1,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,1,1,1>(ifrom, ito, thr); + else eval<0,0,0,0,1,1,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,1,1,1>(ifrom, ito, thr); + else eval<1,1,0,1,1,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,1,1,1>(ifrom, ito, thr); + else eval<1,0,0,1,1,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,1,1,1>(ifrom, ito, thr); + else eval<0,0,0,1,1,1,1>(ifrom, ito, thr); + } + } + } } else { - if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); - else eval<1,0,0>(ifrom, ito, thr); + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,0,0,1>(ifrom, ito, thr); + else eval<1,1,0,0,0,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,0,0,1>(ifrom, ito, thr); + else eval<1,0,0,0,0,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,0,0,1>(ifrom, ito, thr); + else eval<0,0,0,0,0,0,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,0,0,1>(ifrom, ito, thr); + else eval<1,1,0,1,0,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,0,0,1>(ifrom, ito, thr); + else eval<1,0,0,1,0,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,0,0,1>(ifrom, ito, thr); + else eval<0,0,0,1,0,0,1>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,1,0,1>(ifrom, ito, thr); + else eval<1,1,0,0,1,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,1,0,1>(ifrom, ito, thr); + else eval<1,0,0,0,1,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,1,0,1>(ifrom, ito, thr); + else eval<0,0,0,0,1,0,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,1,0,1>(ifrom, ito, thr); + else eval<1,1,0,1,1,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,1,0,1>(ifrom, ito, thr); + else eval<1,0,0,1,1,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,1,0,1>(ifrom, ito, thr); + else eval<0,0,0,1,1,0,1>(ifrom, ito, thr); + } + } + } } } else { - if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); - else eval<0,0,0>(ifrom, ito, thr); - } + if (order1) { + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,0,1,0>(ifrom, ito, thr); + else eval<1,1,0,0,0,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,0,1,0>(ifrom, ito, thr); + else eval<1,0,0,0,0,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,0,1,0>(ifrom, ito, thr); + else eval<0,0,0,0,0,1,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,0,1,0>(ifrom, ito, thr); + else eval<1,1,0,1,0,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,0,1,0>(ifrom, ito, thr); + else eval<1,0,0,1,0,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,0,1,0>(ifrom, ito, thr); + else eval<0,0,0,1,0,1,0>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,1,1,0>(ifrom, ito, thr); + else eval<1,1,0,0,1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,1,1,0>(ifrom, ito, thr); + else eval<1,0,0,0,1,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,1,1,0>(ifrom, ito, thr); + else eval<0,0,0,0,1,1,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,1,1,0>(ifrom, ito, thr); + else eval<1,1,0,1,1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,1,1,0>(ifrom, ito, thr); + else eval<1,0,0,1,1,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,1,1,0>(ifrom, ito, thr); + else eval<0,0,0,1,1,1,0>(ifrom, ito, thr); + } + } + } + } else { + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,0,0,0>(ifrom, ito, thr); + else eval<1,1,0,0,0,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,0,0,0>(ifrom, ito, thr); + else eval<1,0,0,0,0,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,0,0,0>(ifrom, ito, thr); + else eval<0,0,0,0,0,0,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,0,0,0>(ifrom, ito, thr); + else eval<1,1,0,1,0,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,0,0,0>(ifrom, ito, thr); + else eval<1,0,0,1,0,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,0,0,0>(ifrom, ito, thr); + else eval<0,0,0,1,0,0,0>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,0,1,0,0>(ifrom, ito, thr); + else eval<1,1,0,0,1,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,0,1,0,0>(ifrom, ito, thr); + else eval<1,0,0,0,1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,0,1,0,0>(ifrom, ito, thr); + else eval<0,0,0,0,1,0,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1,1,1,0,0>(ifrom, ito, thr); + else eval<1,1,0,1,1,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1,1,1,0,0>(ifrom, ito, thr); + else eval<1,0,0,1,1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1,1,1,0,0>(ifrom, ito, thr); + else eval<0,0,0,1,1,0,0>(ifrom, ito, thr); + } + } + } + } + } reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region @@ -83,7 +310,319 @@ void PairLJLongCoulLongOMP::compute(int eflag, int vflag) /* ---------------------------------------------------------------------- */ -template +void PairLJLongCoulLongOMP::compute_inner() +{ + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = listinner->inum; +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(0, 0, nall, 0, 0, thr); + eval_inner(ifrom, ito, thr); + + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +void PairLJLongCoulLongOMP::compute_middle() +{ + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = listmiddle->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(0, 0, nall, 0, 0, thr); + eval_middle(ifrom, ito, thr); + + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +void PairLJLongCoulLongOMP::compute_outer(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + const int order1 = ewald_order&(1<<1); + const int order6 = ewald_order&(1<<6); + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = listouter->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (order6) { + if (order1) { + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,0,1,1>(ifrom, ito, thr); + else eval_outer<1,1,0,0,0,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,0,1,1>(ifrom, ito, thr); + else eval_outer<1,0,0,0,0,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,0,1,1>(ifrom, ito, thr); + else eval_outer<0,0,0,0,0,1,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,0,1,1>(ifrom, ito, thr); + else eval_outer<1,1,0,1,0,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,0,1,1>(ifrom, ito, thr); + else eval_outer<1,0,0,1,0,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,0,1,1>(ifrom, ito, thr); + else eval_outer<0,0,0,1,0,1,1>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,1,1,1>(ifrom, ito, thr); + else eval_outer<1,1,0,0,1,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,1,1,1>(ifrom, ito, thr); + else eval_outer<1,0,0,0,1,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,1,1,1>(ifrom, ito, thr); + else eval_outer<0,0,0,0,1,1,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,1,1,1>(ifrom, ito, thr); + else eval_outer<1,1,0,1,1,1,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,1,1,1>(ifrom, ito, thr); + else eval_outer<1,0,0,1,1,1,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,1,1,1>(ifrom, ito, thr); + else eval_outer<0,0,0,1,1,1,1>(ifrom, ito, thr); + } + } + } + } else { + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,0,0,1>(ifrom, ito, thr); + else eval_outer<1,1,0,0,0,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,0,0,1>(ifrom, ito, thr); + else eval_outer<1,0,0,0,0,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,0,0,1>(ifrom, ito, thr); + else eval_outer<0,0,0,0,0,0,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,0,0,1>(ifrom, ito, thr); + else eval_outer<1,1,0,1,0,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,0,0,1>(ifrom, ito, thr); + else eval_outer<1,0,0,1,0,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,0,0,1>(ifrom, ito, thr); + else eval_outer<0,0,0,1,0,0,1>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,1,0,1>(ifrom, ito, thr); + else eval_outer<1,1,0,0,1,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,1,0,1>(ifrom, ito, thr); + else eval_outer<1,0,0,0,1,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,1,0,1>(ifrom, ito, thr); + else eval_outer<0,0,0,0,1,0,1>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,1,0,1>(ifrom, ito, thr); + else eval_outer<1,1,0,1,1,0,1>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,1,0,1>(ifrom, ito, thr); + else eval_outer<1,0,0,1,1,0,1>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,1,0,1>(ifrom, ito, thr); + else eval_outer<0,0,0,1,1,0,1>(ifrom, ito, thr); + } + } + } + } + } else { + if (order1) { + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,0,1,0>(ifrom, ito, thr); + else eval_outer<1,1,0,0,0,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,0,1,0>(ifrom, ito, thr); + else eval_outer<1,0,0,0,0,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,0,1,0>(ifrom, ito, thr); + else eval_outer<0,0,0,0,0,1,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,0,1,0>(ifrom, ito, thr); + else eval_outer<1,1,0,1,0,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,0,1,0>(ifrom, ito, thr); + else eval_outer<1,0,0,1,0,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,0,1,0>(ifrom, ito, thr); + else eval_outer<0,0,0,1,0,1,0>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,1,1,0>(ifrom, ito, thr); + else eval_outer<1,1,0,0,1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,1,1,0>(ifrom, ito, thr); + else eval_outer<1,0,0,0,1,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,1,1,0>(ifrom, ito, thr); + else eval_outer<0,0,0,0,1,1,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,1,1,0>(ifrom, ito, thr); + else eval_outer<1,1,0,1,1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,1,1,0>(ifrom, ito, thr); + else eval_outer<1,0,0,1,1,1,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,1,1,0>(ifrom, ito, thr); + else eval_outer<0,0,0,1,1,1,0>(ifrom, ito, thr); + } + } + } + } else { + if (!ndisptablebits) { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,0,0,0>(ifrom, ito, thr); + else eval_outer<1,1,0,0,0,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,0,0,0>(ifrom, ito, thr); + else eval_outer<1,0,0,0,0,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,0,0,0>(ifrom, ito, thr); + else eval_outer<0,0,0,0,0,0,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,0,0,0>(ifrom, ito, thr); + else eval_outer<1,1,0,1,0,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,0,0,0>(ifrom, ito, thr); + else eval_outer<1,0,0,1,0,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,0,0,0>(ifrom, ito, thr); + else eval_outer<0,0,0,1,0,0,0>(ifrom, ito, thr); + } + } + } else { + if (!ncoultablebits) { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,0,1,0,0>(ifrom, ito, thr); + else eval_outer<1,1,0,0,1,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,0,1,0,0>(ifrom, ito, thr); + else eval_outer<1,0,0,0,1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,0,1,0,0>(ifrom, ito, thr); + else eval_outer<0,0,0,0,1,0,0>(ifrom, ito, thr); + } + } else { + if (evflag) { + if (eflag) { + if (force->newton_pair) eval_outer<1,1,1,1,1,0,0>(ifrom, ito, thr); + else eval_outer<1,1,0,1,1,0,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval_outer<1,0,1,1,1,0,0>(ifrom, ito, thr); + else eval_outer<1,0,0,1,1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval_outer<0,0,1,1,1,0,0>(ifrom, ito, thr); + else eval_outer<0,0,0,1,1,0,0>(ifrom, ito, thr); + } + } + } + } + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template < const int EVFLAG, const int EFLAG, + const int NEWTON_PAIR, const int CTABLE, const int LJTABLE, const int ORDER1, const int ORDER6 > void PairLJLongCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) { double evdwl,ecoul,fpair; @@ -105,16 +644,16 @@ void PairLJLongCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) // loop over neighbors of my atoms - int i, ii, j, order1 = ewald_order&(1<<1), order6 = ewald_order&(1<<6); + int i, ii, j; int *jneigh, *jneighn, typei, typej, ni; double qi, qri, *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; double rsq, r2inv, force_coul, force_lj; - double g2 = g_ewald*g_ewald, g6 = g2*g2*g2, g8 = g6*g2; + double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2; vector xi, d; for (ii = iifrom; ii < iito; ++ii) { // loop over my atoms i = ilist[ii]; fi = f0+3*i; - if (order1) qri = (qi = q[i])*qqrd2e; // initialize constants + if (ORDER1) qri = (qi = q[i])*qqrd2e; // initialize constants offseti = offset[typei = type[i]]; lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; @@ -134,8 +673,8 @@ void PairLJLongCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) if ((rsq = vec_dot(d, d)) >= cutsqi[typej = type[j]]) continue; r2inv = 1.0/rsq; - if (order1 && (rsq < cut_coulsq)) { // coulombic - if (!ncoultablebits || rsq <= tabinnersq) { // series real space + if (ORDER1 && (rsq < cut_coulsq)) { // coulombic + if (!CTABLE || rsq <= tabinnersq) { // series real space register double r = sqrt(rsq), x = g_ewald*r; register double s = qri*q[j], t = 1.0/(1.0+EWALD_P*x); if (ni == 0) { @@ -168,22 +707,40 @@ void PairLJLongCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) else force_coul = ecoul = 0.0; if (rsq < cut_ljsqi[typej]) { // lj - if (order6) { // long-range lj - register double rn = r2inv*r2inv*r2inv; - register double x2 = g2*rsq, a2 = 1.0/x2; - x2 = a2*exp(-x2)*lj4i[typej]; - if (ni == 0) { - force_lj = - (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; - if (EFLAG) - evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2; - } - else { // special case - register double f = special_lj[ni], t = rn*(1.0-f); - force_lj = f*(rn *= rn)*lj1i[typej]- - g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]; - if (EFLAG) - evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej]; + if (ORDER6) { // long-range lj + if(!LJTABLE || rsq <= tabinnerdispsq) { //series real space + register double rn = r2inv*r2inv*r2inv; + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*lj4i[typej]; + if (ni == 0) { + force_lj = + (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq; + if (EFLAG) + evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + force_lj = f*(rn *= rn)*lj1i[typej]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]; + if (EFLAG) + evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej]; + } + } + else { // table real space + register union_int_float_t disp_t; + disp_t.f = rsq; + register const int disp_k = (disp_t.i & ndispmask)>>ndispshiftbits; + register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k]; + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) { + force_lj = (rn*=rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej]; + if (EFLAG) evdwl = rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej]; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + force_lj = f*(rn *= rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej]+t*lj2i[typej]; + if (EFLAG) evdwl = f*rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej]+t*lj4i[typej]; + } } } else { // cut lj @@ -224,6 +781,390 @@ void PairLJLongCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) /* ---------------------------------------------------------------------- */ +void PairLJLongCoulLongOMP::eval_inner(int iifrom, int iito, ThrData * const thr) +{ + double rsq, r2inv, force_coul = 0.0, force_lj, fpair; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + + const double *x0 = x[0]; + double *f0 = f[0], *fi = 0; + + int *ilist = listinner->ilist; + + const int newton_pair = force->newton_pair; + + const double cut_out_on = cut_respa[0]; + const double cut_out_off = cut_respa[1]; + + + const double cut_out_diff = cut_out_off - cut_out_on; + const double cut_out_on_sq = cut_out_on*cut_out_on; + const double cut_out_off_sq = cut_out_off*cut_out_off; + + int *jneigh, *jneighn, typei, typej, ni; + const int order1 = (ewald_order|(ewald_off^-1))&(1<<1); + int i, j, ii; + double qri, *cut_ljsqi, *lj1i, *lj2i; + vector xi, d; + + for (ii = iifrom; ii < iito; ++ii) { // loop over my atoms + i = ilist[ii]; fi = f0+3*i; + memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); + cut_ljsqi = cut_ljsq[typei = type[i]]; + lj1i = lj1[typei]; lj2i = lj2[typei]; + jneighn = (jneigh = listinner->firstneigh[i])+listinner->numneigh[i]; + for (; jneigh= cut_out_off_sq) continue; + r2inv = 1.0/rsq; + + if (order1 && (rsq < cut_coulsq)) { // coulombic + qri = qqrd2e*q[i]; + force_coul = ni == 0 ? + qri*q[j]*sqrt(r2inv) : qri*q[j]*sqrt(r2inv)*special_coul[ni]; + } + + if (rsq < cut_ljsqi[typej = type[j]]) { // lennard-jones + register double rn = r2inv*r2inv*r2inv; + force_lj = ni == 0 ? + rn*(rn*lj1i[typej]-lj2i[typej]) : + rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni]; + } + else force_lj = 0.0; + + fpair = (force_coul + force_lj) * r2inv; + + if (rsq > cut_out_on_sq) { // switching + register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + fpair *= 1.0 + rsw*rsw*(2.0*rsw-3.0); + } + + if (newton_pair || j < nlocal) { // force update + register double *fj = f0+(j+(j<<1)), f; + fi[0] += f = d[0]*fpair; fj[0] -= f; + fi[1] += f = d[1]*fpair; fj[1] -= f; + fi[2] += f = d[2]*fpair; fj[2] -= f; + } + else { + fi[0] += d[0]*fpair; + fi[1] += d[1]*fpair; + fi[2] += d[2]*fpair; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +void PairLJLongCoulLongOMP::eval_middle(int iifrom, int iito, ThrData * const thr) +{ + double rsq, r2inv, force_coul = 0.0, force_lj, fpair; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + + const double *x0 = x[0]; + double *f0 = f[0], *fi = 0; + + int *ilist = listmiddle->ilist; + + const int newton_pair = force->newton_pair; + + const double cut_in_off = cut_respa[0]; + const double cut_in_on = cut_respa[1]; + const double cut_out_on = cut_respa[2]; + const double cut_out_off = cut_respa[3]; + + const double cut_in_diff = cut_in_on - cut_in_off; + const double cut_out_diff = cut_out_off - cut_out_on; + const double cut_in_off_sq = cut_in_off*cut_in_off; + const double cut_in_on_sq = cut_in_on*cut_in_on; + const double cut_out_on_sq = cut_out_on*cut_out_on; + const double cut_out_off_sq = cut_out_off*cut_out_off; + + int *jneigh, *jneighn, typei, typej, ni; + const int order1 = (ewald_order|(ewald_off^-1))&(1<<1); + int i, j, ii; + double qri, *cut_ljsqi, *lj1i, *lj2i; + vector xi, d; + + + for (ii = iifrom; ii < iito; ++ii) { // loop over my atoms + i = ilist[ii]; fi = f0+3*i; + if (order1) qri = qqrd2e*q[i]; + memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); + cut_ljsqi = cut_ljsq[typei = type[i]]; + lj1i = lj1[typei]; lj2i = lj2[typei]; + jneighn = (jneigh = listmiddle->firstneigh[i])+listmiddle->numneigh[i]; + + for (; jneigh= cut_out_off_sq) continue; + if (rsq <= cut_in_off_sq) continue; + r2inv = 1.0/rsq; + + if (order1 && (rsq < cut_coulsq)) // coulombic + force_coul = ni == 0 ? + qri*q[j]*sqrt(r2inv) : qri*q[j]*sqrt(r2inv)*special_coul[ni]; + + if (rsq < cut_ljsqi[typej = type[j]]) { // lennard-jones + register double rn = r2inv*r2inv*r2inv; + force_lj = ni == 0 ? + rn*(rn*lj1i[typej]-lj2i[typej]) : + rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni]; + } + else force_lj = 0.0; + + fpair = (force_coul + force_lj) * r2inv; + + if (rsq < cut_in_on_sq) { // switching + register double rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff; + fpair *= rsw*rsw*(3.0 - 2.0*rsw); + } + if (rsq > cut_out_on_sq) { + register double rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff; + fpair *= 1.0 + rsw*rsw*(2.0*rsw-3.0); + } + + if (newton_pair || j < nlocal) { // force update + register double *fj = f0+(j+(j<<1)), f; + fi[0] += f = d[0]*fpair; fj[0] -= f; + fi[1] += f = d[1]*fpair; fj[1] -= f; + fi[2] += f = d[2]*fpair; fj[2] -= f; + } + else { + fi[0] += d[0]*fpair; + fi[1] += d[1]*fpair; + fi[2] += d[2]*fpair; + } + } + } +} + +/* ---------------------------------------------------------------------- */ + +template < const int EVFLAG, const int EFLAG, + const int NEWTON_PAIR, const int CTABLE, const int LJTABLE, const int ORDER1, const int ORDER6 > +void PairLJLongCoulLongOMP::eval_outer(int iiform, int iito, ThrData * const thr) +{ + double evdwl,ecoul,fvirial,fpair; + evdwl = ecoul = 0.0; + + const double * const * const x = atom->x; + double * const * const f = thr->get_f(); + const double * const q = atom->q; + const int * const type = atom->type; + const int nlocal = atom->nlocal; + const double * const special_coul = force->special_coul; + const double * const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + + const double *x0 = x[0]; + double *f0 = f[0], *fi = f0; + + int *ilist = listouter->ilist; + + int i, j, ii; + int *jneigh, *jneighn, typei, typej, ni, respa_flag; + double qi = 0.0, qri = 0.0; + double *cutsqi, *cut_ljsqi, *lj1i, *lj2i, *lj3i, *lj4i, *offseti; + double rsq, r2inv, force_coul, force_lj; + double g2 = g_ewald_6*g_ewald_6, g6 = g2*g2*g2, g8 = g6*g2; + double respa_lj = 0.0, respa_coul = 0.0, frespa = 0.0; + vector xi, d; + + const double cut_in_off = cut_respa[2]; + const double cut_in_on = cut_respa[3]; + + const double cut_in_diff = cut_in_on - cut_in_off; + const double cut_in_off_sq = cut_in_off*cut_in_off; + const double cut_in_on_sq = cut_in_on*cut_in_on; + + //ineighn = (ineigh = list->ilist)+list->inum; + + for (ii = iiform; ii < iito; ++ii) { // loop over my atoms + i = ilist[ii]; fi = f0+3*i; + if (ORDER1) qri = (qi = q[i])*qqrd2e; // initialize constants + offseti = offset[typei = type[i]]; + lj1i = lj1[typei]; lj2i = lj2[typei]; lj3i = lj3[typei]; lj4i = lj4[typei]; + cutsqi = cutsq[typei]; cut_ljsqi = cut_ljsq[typei]; + memcpy(xi, x0+(i+(i<<1)), sizeof(vector)); + jneighn = (jneigh = listouter->firstneigh[i])+listouter->numneigh[i]; + + for (; jneigh= cutsqi[typej = type[j]]) continue; + r2inv = 1.0/rsq; + + frespa = 1.0; // check whether and how to compute respa corrections + respa_coul = 0; + respa_lj = 0; + respa_flag = rsq < cut_in_on_sq ? 1 : 0; + if (respa_flag && (rsq > cut_in_off_sq)) { + register double rsw = (sqrt(rsq)-cut_in_off)/cut_in_diff; + frespa = 1-rsw*rsw*(3.0-2.0*rsw); + } + + if (ORDER1 && (rsq < cut_coulsq)) { // coulombic + if (!CTABLE || rsq <= tabinnersq) { // series real space + register double r = sqrt(rsq), s = qri*q[j]; + if (respa_flag) // correct for respa + respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni]; + register double x = g_ewald*r, t = 1.0/(1.0+EWALD_P*x); + if (ni == 0) { + s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-respa_coul; + if (EFLAG) ecoul = t; + } + else { // correct for special + r = s*(1.0-special_coul[ni])/r; s *= g_ewald*exp(-x*x); + force_coul = (t *= ((((t*A5+A4)*t+A3)*t+A2)*t+A1)*s/x)+EWALD_F*s-r-respa_coul; + if (EFLAG) ecoul = t-r; + } + } // table real space + else { + if (respa_flag) { + register double r = sqrt(rsq), s = qri*q[j]; + respa_coul = ni == 0 ? frespa*s/r : frespa*s/r*special_coul[ni]; + } + register union_int_float_t t; + t.f = rsq; + register const int k = (t.i & ncoulmask) >> ncoulshiftbits; + register double f = (rsq-rtable[k])*drtable[k], qiqj = qi*q[j]; + if (ni == 0) { + force_coul = qiqj*(ftable[k]+f*dftable[k]); + if (EFLAG) ecoul = qiqj*(etable[k]+f*detable[k]); + } + else { // correct for special + t.f = (1.0-special_coul[ni])*(ctable[k]+f*dctable[k]); + force_coul = qiqj*(ftable[k]+f*dftable[k]-t.f); + if (EFLAG) { + t.f = (1.0-special_coul[ni])*(ptable[k]+f*dptable[k]); + ecoul = qiqj*(etable[k]+f*detable[k]-t.f); + } + } + } + } + + else force_coul = respa_coul = ecoul = 0.0; + + if (rsq < cut_ljsqi[typej]) { // lennard-jones + register double rn = r2inv*r2inv*r2inv; + if (respa_flag) respa_lj = ni == 0 ? // correct for respa + frespa*rn*(rn*lj1i[typej]-lj2i[typej]) : + frespa*rn*(rn*lj1i[typej]-lj2i[typej])*special_lj[ni]; + if (ORDER6) { // long-range form + if (!LJTABLE || rsq <= tabinnerdispsq) { + register double x2 = g2*rsq, a2 = 1.0/x2; + x2 = a2*exp(-x2)*lj4i[typej]; + if (ni == 0) { + force_lj = + (rn*=rn)*lj1i[typej]-g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq-respa_lj; + if (EFLAG) evdwl = rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2; + } + else { // correct for special + register double f = special_lj[ni], t = rn*(1.0-f); + force_lj = f*(rn *= rn)*lj1i[typej]- + g8*(((6.0*a2+6.0)*a2+3.0)*a2+1.0)*x2*rsq+t*lj2i[typej]-respa_lj; + if (EFLAG) + evdwl = f*rn*lj3i[typej]-g6*((a2+1.0)*a2+0.5)*x2+t*lj4i[typej]; + } + } + else { // table real space + register union_int_float_t disp_t; + disp_t.f = rsq; + register const int disp_k = (disp_t.i & ndispmask)>>ndispshiftbits; + register double f_disp = (rsq-rdisptable[disp_k])*drdisptable[disp_k]; + register double rn = r2inv*r2inv*r2inv; + if (ni == 0) { + force_lj = (rn*=rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej]-respa_lj; + if (EFLAG) evdwl = rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej]; + } + else { // special case + register double f = special_lj[ni], t = rn*(1.0-f); + force_lj = f*(rn *= rn)*lj1i[typej]-(fdisptable[disp_k]+f_disp*dfdisptable[disp_k])*lj4i[typej]+t*lj2i[typej]-respa_lj; + if (EFLAG) evdwl = f*rn*lj3i[typej]-(edisptable[disp_k]+f_disp*dedisptable[disp_k])*lj4i[typej]+t*lj4i[typej]; + } + } + } + else { // cut form + if (ni == 0) { + force_lj = rn*(rn*lj1i[typej]-lj2i[typej])-respa_lj; + if (EFLAG) evdwl = rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]; + } + else { // correct for special + register double f = special_lj[ni]; + force_lj = f*rn*(rn*lj1i[typej]-lj2i[typej])-respa_lj; + if (EFLAG) + evdwl = f*(rn*(rn*lj3i[typej]-lj4i[typej])-offseti[typej]); + } + } + } + else force_lj = respa_lj = evdwl = 0.0; + + fpair = (force_coul+force_lj)*r2inv; + + if (NEWTON_PAIR || j < nlocal) { + register double *fj = f0+(j+(j<<1)), f; + fi[0] += f = d[0]*fpair; fj[0] -= f; + fi[1] += f = d[1]*fpair; fj[1] -= f; + fi[2] += f = d[2]*fpair; fj[2] -= f; + } + else { + fi[0] += d[0]*fpair; + fi[1] += d[1]*fpair; + fi[2] += d[2]*fpair; + } + + if (EVFLAG) { + fvirial = (force_coul + force_lj + respa_coul + respa_lj)*r2inv; + ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fvirial,d[0],d[1],d[2],thr); + } + } + } +} + +/* ---------------------------------------------------------------------- */ + double PairLJLongCoulLongOMP::memory_usage() { double bytes = memory_usage_thr(); diff --git a/src/USER-OMP/pair_lj_long_coul_long_omp.h b/src/USER-OMP/pair_lj_long_coul_long_omp.h index 39114e4d6f..f10cbfd510 100644 --- a/src/USER-OMP/pair_lj_long_coul_long_omp.h +++ b/src/USER-OMP/pair_lj_long_coul_long_omp.h @@ -35,11 +35,28 @@ class PairLJLongCoulLongOMP : public PairLJLongCoulLong, public ThrOMP { PairLJLongCoulLongOMP(class LAMMPS *); virtual void compute(int, int); + virtual void compute_inner(); + virtual void compute_middle(); + virtual void compute_outer(int, int); virtual double memory_usage(); private: - template - void eval(int ifrom, int ito, ThrData * const thr); + template + void eval(int, int, ThrData * const); + + template + void eval_outer(int, int, ThrData * const); + + + void eval_inner(int, int, ThrData *const); + void eval_middle(int, int, ThrData *const); + + + }; }