diff --git a/src/USER-OMP/pair_adp_omp.cpp b/src/USER-OMP/pair_adp_omp.cpp index ec63dead89..56139c8980 100644 --- a/src/USER-OMP/pair_adp_omp.cpp +++ b/src/USER-OMP/pair_adp_omp.cpp @@ -71,7 +71,6 @@ void PairADPOMP::compute(int eflag, int vflag) loop_setup_thr(ifrom, ito, tid, inum, nthreads); ThrData *thr = fix->get_thr(tid); - thr->timer(Timer::START); ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); if (force->newton_pair) @@ -92,7 +91,6 @@ void PairADPOMP::compute(int eflag, int vflag) else eval<0,0,0>(ifrom, ito, thr); } - thr->timer(Timer::PAIR); reduce_thr(this, eflag, vflag, thr); } // end of omp parallel region } diff --git a/src/USER-OMP/pair_coul_dsf_omp.cpp b/src/USER-OMP/pair_coul_dsf_omp.cpp new file mode 100644 index 0000000000..b91edb8e8b --- /dev/null +++ b/src/USER-OMP/pair_coul_dsf_omp.cpp @@ -0,0 +1,177 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_coul_dsf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +#include "math_const.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairCoulDSFOMP::PairCoulDSFOMP(LAMMPS *lmp) : + PairCoulDSF(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairCoulDSFOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairCoulDSFOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair; + double r,rsq,r2inv,forcecoul,factor_coul; + double prefactor,erfcc,erfcd,t; + int *ilist,*jlist,*numneigh,**firstneigh; + + ecoul = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int nlocal = atom->nlocal; + const double * _noalias const special_coul = force->special_coul; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + if (EFLAG) { + double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e; + ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr); + } + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + + if (rsq < cut_coulsq) { + r2inv = 1.0/rsq; + + r = sqrt(rsq); + prefactor = factor_coul * qqrd2e*qtmp*q[j]/r; + erfcd = exp(-alpha*alpha*rsq); + t = 1.0 / (1.0 + EWALD_P*alpha*r); + erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd; + forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + + r*f_shift) * r; + fpair = forcecoul * r2inv; + if (EFLAG) ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + 0.0,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairCoulDSFOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairCoulDSF::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_coul_dsf_omp.h b/src/USER-OMP/pair_coul_dsf_omp.h new file mode 100644 index 0000000000..7897e2b29a --- /dev/null +++ b/src/USER-OMP/pair_coul_dsf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(coul/dsf/omp,PairCoulDSFOMP) + +#else + +#ifndef LMP_PAIR_COUL_DSF_OMP_H +#define LMP_PAIR_COUL_DSF_OMP_H + +#include "pair_coul_dsf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairCoulDSFOMP : public PairCoulDSF, public ThrOMP { + + public: + PairCoulDSFOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp new file mode 100644 index 0000000000..18467e2ed2 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.cpp @@ -0,0 +1,201 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_lj_cut_coul_dsf_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +#include "math_const.h" +using namespace LAMMPS_NS; +using namespace MathConst; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairLJCutCoulDSFOMP::PairLJCutCoulDSFOMP(LAMMPS *lmp) : + PairLJCutCoulDSF(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairLJCutCoulDSFOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairLJCutCoulDSFOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,itype,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj; + double prefactor,erfcc,erfcd,t; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int * _noalias const type = atom->type; + const int nlocal = atom->nlocal; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + qtmp = q[i]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + itype = type[i]; + jlist = firstneigh[i]; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + if (EVFLAG) { + double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e; + ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr); + } + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsq[itype][jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_ljsq[itype][jtype]) { + r6inv = r2inv*r2inv*r2inv; + forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]); + forcelj *= factor_lj; + } else forcelj = 0.0; + + if (rsq < cut_coulsq) { + r = sqrt(rsq); + prefactor = factor_coul * qqrd2e*qtmp*q[j]/r; + erfcd = exp(-alpha*alpha*r*r); + t = 1.0 / (1.0 + EWALD_P*alpha*r); + erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd; + forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd + + r*f_shift) * r; + } else forcecoul = 0.0; + fpair = (forcecoul + forcelj) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + } + + if (EFLAG) { + if (rsq < cut_ljsq[itype][jtype]) { + evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) - + offset[itype][jtype]; + evdwl *= factor_lj; + } else evdwl = 0.0; + + if (rsq < cut_coulsq) { + ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift); + } else ecoul = 0.0; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairLJCutCoulDSFOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairLJCutCoulDSF::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_lj_cut_coul_dsf_omp.h b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.h new file mode 100644 index 0000000000..bf1d74b017 --- /dev/null +++ b/src/USER-OMP/pair_lj_cut_coul_dsf_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(lj/cut/coul/dsf/omp,PairLJCutCoulDSFOMP) + +#else + +#ifndef LMP_PAIR_LJ_CUT_COUL_DSF_OMP_H +#define LMP_PAIR_LJ_CUT_COUL_DSF_OMP_H + +#include "pair_lj_cut_coul_dsf.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairLJCutCoulDSFOMP : public PairLJCutCoulDSF, public ThrOMP { + + public: + PairLJCutCoulDSFOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp new file mode 100644 index 0000000000..b3fafc1693 --- /dev/null +++ b/src/USER-OMP/pair_nm_cut_coul_cut_omp.cpp @@ -0,0 +1,198 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_nm_cut_coul_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairNMCutCoulCutOMP::PairNMCutCoulCutOMP(LAMMPS *lmp) : + PairNMCutCoulCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairNMCutCoulCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairNMCutCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int j,ii,jj,jnum,jtype; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double r,rsq,r2inv,rminv,rninv,forcecoul,forcenm,factor_coul,factor_lj; + int *ilist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int * _noalias const type = atom->type; + const int nlocal = atom->nlocal; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + const int i = ilist[ii]; + const int itype = type[i]; + const int * _noalias const jlist = firstneigh[i]; + const double * _noalias const cutsqi = cutsq[itype]; + const double * _noalias const cut_coulsqi = cut_coulsq[itype]; + const double * _noalias const cut_ljsqi = cut_ljsq[itype]; + const double * _noalias const offseti = offset[itype]; + const double * _noalias const mmi = mm[itype]; + const double * _noalias const nni = nn[itype]; + const double * _noalias const nmi = nm[itype]; + const double * _noalias const e0nmi = e0nm[itype]; + const double * _noalias const r0mi = r0m[itype]; + const double * _noalias const r0ni = r0n[itype]; + + qtmp = q[i]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsqi[jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsqi[jtype]) { + const double rinv = sqrt(r2inv); + forcecoul = qqrd2e * qtmp*q[j]*rinv; + forcecoul *= factor_coul; + if (EFLAG) ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv; + } else { + forcecoul = 0.0; + if (EFLAG) ecoul = 0.0; + } + + if (rsq < cut_ljsqi[jtype]) { + r = sqrt(rsq); + rminv = pow(r2inv,mmi[jtype]/2.0); + rninv = pow(r2inv,nni[jtype]/2.0); + forcenm = e0nmi[jtype]*nmi[jtype] * + (r0ni[jtype]/pow(r,nni[jtype]) - + r0mi[jtype]/pow(r,mmi[jtype])); + forcenm *= factor_lj; + if (EFLAG) + evdwl = (e0nmi[jtype]*(mmi[jtype] * + r0ni[jtype]*rninv - + nni[jtype] * + r0mi[jtype]*rminv) - + offseti[jtype]) * factor_lj; + } else { + forcenm = 0.0; + if (EFLAG) evdwl = 0.0; + } + + fpair = (forcecoul + forcenm) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairNMCutCoulCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairNMCutCoulCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_nm_cut_coul_cut_omp.h b/src/USER-OMP/pair_nm_cut_coul_cut_omp.h new file mode 100644 index 0000000000..1892c80816 --- /dev/null +++ b/src/USER-OMP/pair_nm_cut_coul_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(nm/cut/coul/cut/omp,PairNMCutCoulCutOMP) + +#else + +#ifndef LMP_PAIR_NM_CUT_COUL_CUT_OMP_H +#define LMP_PAIR_NM_CUT_COUL_CUT_OMP_H + +#include "pair_nm_cut_coul_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairNMCutCoulCutOMP : public PairNMCutCoulCut, public ThrOMP { + + public: + PairNMCutCoulCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp new file mode 100644 index 0000000000..39b2936a6e --- /dev/null +++ b/src/USER-OMP/pair_nm_cut_coul_long_omp.cpp @@ -0,0 +1,234 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_nm_cut_coul_long_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +#define EWALD_F 1.12837917 +#define EWALD_P 0.3275911 +#define A1 0.254829592 +#define A2 -0.284496736 +#define A3 1.421413741 +#define A4 -1.453152027 +#define A5 1.061405429 + +/* ---------------------------------------------------------------------- */ + +PairNMCutCoulLongOMP::PairNMCutCoulLongOMP(LAMMPS *lmp) : + PairNMCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairNMCutCoulLongOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +/* ---------------------------------------------------------------------- */ + +template +void PairNMCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,ii,jj,jnum,jtype,itable; + double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair; + double fraction,table; + double r,rsq,rinv,r2inv,factor_coul,factor_lj; + double forcecoul,forcenm,rminv,rninv; + double grij,expm2,prefactor,t,erfc; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = ecoul = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const double * _noalias const q = atom->q; + const int * _noalias const type = atom->type; + const int nlocal = atom->nlocal; + const double * _noalias const special_coul = force->special_coul; + const double * _noalias const special_lj = force->special_lj; + const double qqrd2e = force->qqrd2e; + double fxtmp,fytmp,fztmp; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + // loop over neighbors of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + const int i = ilist[ii]; + const int itype = type[i]; + const int * _noalias const jlist = firstneigh[i]; + const double * _noalias const cutsqi = cutsq[itype]; + const double * _noalias const cut_ljsqi = cut_ljsq[itype]; + const double * _noalias const offseti = offset[itype]; + const double * _noalias const mmi = mm[itype]; + const double * _noalias const nni = nn[itype]; + const double * _noalias const nmi = nm[itype]; + const double * _noalias const e0nmi = e0nm[itype]; + const double * _noalias const r0mi = r0m[itype]; + const double * _noalias const r0ni = r0n[itype]; + + qtmp = q[i]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + factor_coul = special_coul[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsqi[jtype]) { + r2inv = 1.0/rsq; + + if (rsq < cut_coulsq) { + if (!ncoultablebits || rsq <= tabinnersq) { + r = sqrt(rsq); + grij = g_ewald * r; + expm2 = exp(-grij*grij); + t = 1.0 / (1.0 + EWALD_P*grij); + erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2; + prefactor = qqrd2e * qtmp*q[j]/r; + forcecoul = prefactor * (erfc + EWALD_F*grij*expm2); + if (EFLAG) ecoul = prefactor*erfc; + if (factor_coul < 1.0) { + forcecoul -= (1.0-factor_coul)*prefactor; + if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor; + } + } else { + union_int_float_t rsq_lookup; + rsq_lookup.f = rsq; + itable = rsq_lookup.i & ncoulmask; + itable >>= ncoulshiftbits; + fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable]; + table = ftable[itable] + fraction*dftable[itable]; + forcecoul = qtmp*q[j] * table; + if (EFLAG) + ecoul = qtmp*q[j] * (etable[itable] + fraction*detable[itable]); + if (factor_coul < 1.0) { + table = ctable[itable] + fraction*dctable[itable]; + prefactor = qtmp*q[j] * table; + forcecoul -= (1.0-factor_coul)*prefactor; + if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor; + } + } + } else { + forcecoul = 0.0; + if (EFLAG) ecoul = 0.0; + } + + if (rsq < cut_ljsqi[jtype]) { + r = sqrt(rsq); + rminv = pow(r2inv,mmi[jtype]/2.0); + rninv = pow(r2inv,nni[jtype]/2.0); + forcenm = e0nmi[jtype]*nmi[jtype] * + (r0ni[jtype]/pow(r,nni[jtype]) - + r0mi[jtype]/pow(r,mmi[jtype])); + forcenm *= factor_lj; + if (EFLAG) + evdwl = (e0nmi[jtype]*(mmi[jtype] * + r0ni[jtype]*rninv - + nni[jtype] * + r0mi[jtype]*rminv) - + offseti[jtype]) * factor_lj; + } else { + forcenm = 0.0; + if (EFLAG) evdwl = 0.0; + } + + fpair = (forcecoul + forcenm) * r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + } + + if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR, + evdwl,ecoul,fpair,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + + double PairNMCutCoulLongOMP::memory_usage() + { + double bytes = memory_usage_thr(); + bytes += PairNMCutCoulLong::memory_usage(); + + return bytes; + } diff --git a/src/USER-OMP/pair_nm_cut_coul_long_omp.h b/src/USER-OMP/pair_nm_cut_coul_long_omp.h new file mode 100644 index 0000000000..fe6317ce91 --- /dev/null +++ b/src/USER-OMP/pair_nm_cut_coul_long_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(nm/cut/coul/long/omp,PairNMCutCoulLongOMP) + +#else + +#ifndef LMP_PAIR_NM_CUT_COUL_LONG_OMP_H +#define LMP_PAIR_NM_CUT_COUL_LONG_OMP_H + +#include "pair_nm_cut_coul_long.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairNMCutCoulLongOMP : public PairNMCutCoulLong, public ThrOMP { + + public: + PairNMCutCoulLongOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_nm_cut_omp.cpp b/src/USER-OMP/pair_nm_cut_omp.cpp new file mode 100644 index 0000000000..27f1d46931 --- /dev/null +++ b/src/USER-OMP/pair_nm_cut_omp.cpp @@ -0,0 +1,169 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_nm_cut_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairNMCutOMP::PairNMCutOMP(LAMMPS *lmp) : + PairNMCut(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairNMCutOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairNMCutOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const int * _noalias const type = atom->type; + const double * _noalias const special_lj = force->special_lj; + const int * _noalias const ilist = list->ilist; + const int * _noalias const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + double xtmp,ytmp,ztmp,delx,dely,delz,fxtmp,fytmp,fztmp; + double r,rsq,r2inv,rminv,rninv,forcenm,factor_lj,evdwl,fpair; + + const int nlocal = atom->nlocal; + int j,jj,jnum,jtype; + + evdwl = 0.0; + + // loop over neighbors of my atoms + + for (int ii = iifrom; ii < iito; ++ii) { + const int i = ilist[ii]; + const int itype = type[i]; + const int * _noalias const jlist = firstneigh[i]; + const double * _noalias const cutsqi = cutsq[itype]; + const double * _noalias const offseti = offset[itype]; + const double * _noalias const mmi = mm[itype]; + const double * _noalias const nni = nn[itype]; + const double * _noalias const nmi = nm[itype]; + const double * _noalias const e0nmi = e0nm[itype]; + const double * _noalias const r0mi = r0m[itype]; + const double * _noalias const r0ni = r0n[itype]; + + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + factor_lj = special_lj[sbmask(j)]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cutsqi[jtype]) { + r2inv = 1.0/rsq; + r = sqrt(rsq); + + rminv = pow(r2inv,mmi[jtype]*0.5); + rninv = pow(r2inv,nni[jtype]*0.5); + + forcenm = e0nmi[jtype]*nmi[jtype] * + (r0ni[jtype]/pow(r,nni[jtype]) - + r0mi[jtype]/pow(r,mmi[jtype])); + fpair = factor_lj*forcenm*r2inv; + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + } + + if (EFLAG) { + evdwl = e0nmi[jtype] * + (mmi[jtype]*r0ni[jtype]*rninv - + nni[jtype]*r0mi[jtype]*rminv) - offseti[jtype]; + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairNMCutOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairNMCut::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_nm_cut_omp.h b/src/USER-OMP/pair_nm_cut_omp.h new file mode 100644 index 0000000000..161804c783 --- /dev/null +++ b/src/USER-OMP/pair_nm_cut_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(nm/cut/omp,PairNMCutOMP) + +#else + +#ifndef LMP_PAIR_NM_CUT_OMP_H +#define LMP_PAIR_NM_CUT_OMP_H + +#include "pair_nm_cut.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairNMCutOMP : public PairNMCut, public ThrOMP { + + public: + PairNMCutOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_tersoff_mod_omp.cpp b/src/USER-OMP/pair_tersoff_mod_omp.cpp new file mode 100644 index 0000000000..b66e08aedc --- /dev/null +++ b/src/USER-OMP/pair_tersoff_mod_omp.cpp @@ -0,0 +1,250 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_tersoff_mod_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; + +/* ---------------------------------------------------------------------- */ + +PairTersoffMODOMP::PairTersoffMODOMP(LAMMPS *lmp) : + PairTersoffMOD(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairTersoffMODOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = vflag_atom = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (vflag_atom) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (vflag_atom) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else eval<0,0,0>(ifrom, ito, thr); + + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairTersoffMODOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + int i,j,k,ii,jj,kk,jnum; + int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk; + double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair; + double rsq,rsq1,rsq2; + double delr1[3],delr2[3],fi[3],fj[3],fk[3]; + double zeta_ij,prefactor; + int *ilist,*jlist,*numneigh,**firstneigh; + + evdwl = 0.0; + + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const int * _noalias const tag = atom->tag; + const int * _noalias const type = atom->type; + const int nlocal = atom->nlocal; + + ilist = list->ilist; + numneigh = list->numneigh; + firstneigh = list->firstneigh; + + double fxtmp,fytmp,fztmp; + + // loop over full neighbor list of my atoms + + for (ii = iifrom; ii < iito; ++ii) { + + i = ilist[ii]; + itag = tag[i]; + itype = map[type[i]]; + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + fxtmp = fytmp = fztmp = 0.0; + + // two-body interactions, skip half of them + + jlist = firstneigh[i]; + jnum = numneigh[i]; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtag = tag[j]; + + if (itag > jtag) { + if ((itag+jtag) % 2 == 0) continue; + } else if (itag < jtag) { + if ((itag+jtag) % 2 == 1) continue; + } else { + if (x[j].z < ztmp) continue; + if (x[j].z == ztmp && x[j].y < ytmp) continue; + if (x[j].z == ztmp && x[j].y == ytmp && x[j].x < xtmp) continue; + } + + jtype = map[type[j]]; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + + iparam_ij = elem2param[itype][jtype][jtype]; + if (rsq > params[iparam_ij].cutsq) continue; + + repulsive(¶ms[iparam_ij],rsq,fpair,EFLAG,evdwl); + + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + + // three-body interactions + // skip immediately if I-J is not within cutoff + double fjxtmp,fjytmp,fjztmp; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + jtype = map[type[j]]; + iparam_ij = elem2param[itype][jtype][jtype]; + + delr1[0] = x[j].x - xtmp; + delr1[1] = x[j].y - ytmp; + delr1[2] = x[j].z - ztmp; + rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2]; + if (rsq1 > params[iparam_ij].cutsq) continue; + + // accumulate bondorder zeta for each i-j interaction via loop over k + + fjxtmp = fjytmp = fjztmp = 0.0; + zeta_ij = 0.0; + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k].x - xtmp; + delr2[1] = x[k].y - ytmp; + delr2[2] = x[k].z - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + zeta_ij += zeta(¶ms[iparam_ijk],rsq1,rsq2,delr1,delr2); + } + + // pairwise force due to zeta + + force_zeta(¶ms[iparam_ij],rsq1,zeta_ij,fpair,prefactor,EFLAG,evdwl); + + fxtmp += delr1[0]*fpair; + fytmp += delr1[1]*fpair; + fztmp += delr1[2]*fpair; + fjxtmp -= delr1[0]*fpair; + fjytmp -= delr1[1]*fpair; + fjztmp -= delr1[2]*fpair; + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0, + -fpair,-delr1[0],-delr1[1],-delr1[2],thr); + + // attractive term via loop over k + + for (kk = 0; kk < jnum; kk++) { + if (jj == kk) continue; + k = jlist[kk]; + k &= NEIGHMASK; + ktype = map[type[k]]; + iparam_ijk = elem2param[itype][jtype][ktype]; + + delr2[0] = x[k].x - xtmp; + delr2[1] = x[k].y - ytmp; + delr2[2] = x[k].z - ztmp; + rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2]; + if (rsq2 > params[iparam_ijk].cutsq) continue; + + attractive(¶ms[iparam_ijk],prefactor, + rsq1,rsq2,delr1,delr2,fi,fj,fk); + + fxtmp += fi[0]; + fytmp += fi[1]; + fztmp += fi[2]; + fjxtmp += fj[0]; + fjytmp += fj[1]; + fjztmp += fj[2]; + f[k].x += fk[0]; + f[k].y += fk[1]; + f[k].z += fk[2]; + + if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr); + } + f[j].x += fjxtmp; + f[j].y += fjytmp; + f[j].z += fjztmp; + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairTersoffMODOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairTersoffMOD::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_tersoff_mod_omp.h b/src/USER-OMP/pair_tersoff_mod_omp.h new file mode 100644 index 0000000000..ed01094491 --- /dev/null +++ b/src/USER-OMP/pair_tersoff_mod_omp.h @@ -0,0 +1,43 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(tersoff/mod/omp,PairTersoffMODOMP) + +#else + +#ifndef LMP_PAIR_TERSOFF_MOD_OMP_H +#define LMP_PAIR_TERSOFF_MOD_OMP_H + +#include "pair_tersoff_mod.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairTersoffMODOMP : public PairTersoffMOD, public ThrOMP { + + public: + PairTersoffMODOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/pair_zbl_omp.cpp b/src/USER-OMP/pair_zbl_omp.cpp new file mode 100644 index 0000000000..454934f679 --- /dev/null +++ b/src/USER-OMP/pair_zbl_omp.cpp @@ -0,0 +1,170 @@ +/* ---------------------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + This software is distributed under the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#include "math.h" +#include "pair_zbl_omp.h" +#include "atom.h" +#include "comm.h" +#include "force.h" +#include "neighbor.h" +#include "neigh_list.h" + +#include "suffix.h" +using namespace LAMMPS_NS; +using namespace PairZBLConstants; + +/* ---------------------------------------------------------------------- */ + +PairZBLOMP::PairZBLOMP(LAMMPS *lmp) : + PairZBL(lmp), ThrOMP(lmp, THR_PAIR) +{ + suffix_flag |= Suffix::OMP; + respa_enable = 0; +} + +/* ---------------------------------------------------------------------- */ + +void PairZBLOMP::compute(int eflag, int vflag) +{ + if (eflag || vflag) { + ev_setup(eflag,vflag); + } else evflag = vflag_fdotr = 0; + + const int nall = atom->nlocal + atom->nghost; + const int nthreads = comm->nthreads; + const int inum = list->inum; + +#if defined(_OPENMP) +#pragma omp parallel default(none) shared(eflag,vflag) +#endif + { + int ifrom, ito, tid; + + loop_setup_thr(ifrom, ito, tid, inum, nthreads); + ThrData *thr = fix->get_thr(tid); + ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr); + + if (evflag) { + if (eflag) { + if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr); + else eval<1,1,0>(ifrom, ito, thr); + } else { + if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr); + else eval<1,0,0>(ifrom, ito, thr); + } + } else { + if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr); + else eval<0,0,0>(ifrom, ito, thr); + } + reduce_thr(this, eflag, vflag, thr); + } // end of omp parallel region +} + +template +void PairZBLOMP::eval(int iifrom, int iito, ThrData * const thr) +{ + const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0]; + dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0]; + const int * _noalias const type = atom->type; + const int * _noalias const ilist = list->ilist; + const int * _noalias const numneigh = list->numneigh; + const int * const * const firstneigh = list->firstneigh; + + double xtmp,ytmp,ztmp,delx,dely,delz,fxtmp,fytmp,fztmp; + double rsq,t,fswitch,eswitch,evdwl,fpair; + + const int nlocal = atom->nlocal; + int j,jj,jnum,jtype; + + evdwl = 0.0; + + // loop over neighbors of my atoms + + for (int ii = iifrom; ii < iito; ++ii) { + const int i = ilist[ii]; + const int itype = type[i]; + const int * _noalias const jlist = firstneigh[i]; + const double * _noalias const sw1i = sw1[itype]; + const double * _noalias const sw2i = sw2[itype]; + const double * _noalias const sw3i = sw3[itype]; + const double * _noalias const sw4i = sw4[itype]; + const double * _noalias const sw5i = sw5[itype]; + + xtmp = x[i].x; + ytmp = x[i].y; + ztmp = x[i].z; + jnum = numneigh[i]; + fxtmp=fytmp=fztmp=0.0; + + for (jj = 0; jj < jnum; jj++) { + j = jlist[jj]; + j &= NEIGHMASK; + + delx = xtmp - x[j].x; + dely = ytmp - x[j].y; + delz = ztmp - x[j].z; + rsq = delx*delx + dely*dely + delz*delz; + jtype = type[j]; + + if (rsq < cut_globalsq) { + const double r = sqrt(rsq); + fpair = dzbldr(r, itype, jtype); + + if (r > cut_inner) { + t = r - cut_inner; + fswitch = t*t * + (sw1i[jtype] + sw2i[jtype]*t); + fpair += fswitch; + } + + fpair *= -1.0/r; + fxtmp += delx*fpair; + fytmp += dely*fpair; + fztmp += delz*fpair; + + if (NEWTON_PAIR || j < nlocal) { + f[j].x -= delx*fpair; + f[j].y -= dely*fpair; + f[j].z -= delz*fpair; + } + + if (EFLAG) { + evdwl = e_zbl(r, itype, jtype); + evdwl += sw5i[jtype]; + if (r > cut_inner) { + eswitch = t*t*t * + (sw3i[jtype] + sw4i[jtype]*t); + evdwl += eswitch; + } + } + + if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR, + evdwl,0.0,fpair,delx,dely,delz,thr); + } + } + f[i].x += fxtmp; + f[i].y += fytmp; + f[i].z += fztmp; + } +} + +/* ---------------------------------------------------------------------- */ + +double PairZBLOMP::memory_usage() +{ + double bytes = memory_usage_thr(); + bytes += PairZBL::memory_usage(); + + return bytes; +} diff --git a/src/USER-OMP/pair_zbl_omp.h b/src/USER-OMP/pair_zbl_omp.h new file mode 100644 index 0000000000..a75d9dba53 --- /dev/null +++ b/src/USER-OMP/pair_zbl_omp.h @@ -0,0 +1,48 @@ +/* -*- c++ -*- ---------------------------------------------------------- + LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator + http://lammps.sandia.gov, Sandia National Laboratories + Steve Plimpton, sjplimp@sandia.gov + + Copyright (2003) Sandia Corporation. Under the terms of Contract + DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains + certain rights in this software. This software is distributed under + the GNU General Public License. + + See the README file in the top-level LAMMPS directory. +------------------------------------------------------------------------- */ + +/* ---------------------------------------------------------------------- + Contributing author: Axel Kohlmeyer (Temple U) +------------------------------------------------------------------------- */ + +#ifdef PAIR_CLASS + +PairStyle(zbl/omp,PairZBLOMP) + +#else + +#ifndef LMP_PAIR_ZBL_OMP_H +#define LMP_PAIR_ZBL_OMP_H + +#include "pair_zbl.h" +#include "thr_omp.h" + +namespace LAMMPS_NS { + +class PairZBLOMP : public PairZBL, public ThrOMP { + + public: + PairZBLOMP(class LAMMPS *); + + virtual void compute(int, int); + virtual double memory_usage(); + + private: + template + void eval(int ifrom, int ito, ThrData * const thr); +}; + +} + +#endif +#endif diff --git a/src/USER-OMP/thr_data.cpp b/src/USER-OMP/thr_data.cpp index 7f116fcd71..598fb85289 100644 --- a/src/USER-OMP/thr_data.cpp +++ b/src/USER-OMP/thr_data.cpp @@ -25,6 +25,7 @@ using namespace LAMMPS_NS; +/* ---------------------------------------------------------------------- */ ThrData::ThrData(int tid) : _f(0),_torque(0),_erforce(0),_de(0),_drho(0),_mu(0),_lambda(0),_rhoB(0), @@ -261,7 +262,7 @@ double ThrData::memory_usage() void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid) { #if defined(_OPENMP) - // NOOP in non-threaded execution. + // NOOP in single-threaded execution. if (nthreads == 1) return; #pragma omp barrier { @@ -270,15 +271,72 @@ void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, const int ifrom = tid*idelta; const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta); - // this if protects against having more threads than atoms +#if defined(USER_OMP_NO_UNROLL) if (ifrom < nvals) { - for (int m = ifrom; m < ito; ++m) { + int m = 0; + + for (m = ifrom; m < ito; ++m) { for (int n = 1; n < nthreads; ++n) { dall[m] += dall[n*nvals + m]; dall[n*nvals + m] = 0.0; } } } +#else + // this if protects against having more threads than atoms + if (ifrom < nvals) { + int m = 0; + + // for architectures that have L1 D-cache line sizes of 64 bytes + // (8 doubles) wide, explictly unroll this loop to compute 8 + // contiguous values in the array at a time + // -- modify this code based on the size of the cache line + double t0, t1, t2, t3, t4, t5, t6, t7; + for (m = ifrom; m < (ito-7); m+=8) { + t0 = dall[m ]; + t1 = dall[m+1]; + t2 = dall[m+2]; + t3 = dall[m+3]; + t4 = dall[m+4]; + t5 = dall[m+5]; + t6 = dall[m+6]; + t7 = dall[m+7]; + for (int n = 1; n < nthreads; ++n) { + t0 += dall[n*nvals + m ]; + t1 += dall[n*nvals + m+1]; + t2 += dall[n*nvals + m+2]; + t3 += dall[n*nvals + m+3]; + t4 += dall[n*nvals + m+4]; + t5 += dall[n*nvals + m+5]; + t6 += dall[n*nvals + m+6]; + t7 += dall[n*nvals + m+7]; + dall[n*nvals + m ] = 0.0; + dall[n*nvals + m+1] = 0.0; + dall[n*nvals + m+2] = 0.0; + dall[n*nvals + m+3] = 0.0; + dall[n*nvals + m+4] = 0.0; + dall[n*nvals + m+5] = 0.0; + dall[n*nvals + m+6] = 0.0; + dall[n*nvals + m+7] = 0.0; + } + dall[m ] = t0; + dall[m+1] = t1; + dall[m+2] = t2; + dall[m+3] = t3; + dall[m+4] = t4; + dall[m+5] = t5; + dall[m+6] = t6; + dall[m+7] = t7; + } + // do the last < 8 values + for (; m < ito; m++) { + for (int n = 1; n < nthreads; ++n) { + dall[m] += dall[n*nvals + m]; + dall[n*nvals + m] = 0.0; + } + } + } +#endif } #else // NOOP in non-threaded execution.