git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@10907 f3b2605a-c512-4ea7-a41b-209d697bcdaa

This commit is contained in:
sjplimp
2013-10-30 15:42:14 +00:00
parent 15b7374ff0
commit 0fa3867f34
16 changed files with 1791 additions and 5 deletions

View File

@ -71,7 +71,6 @@ void PairADPOMP::compute(int eflag, int vflag)
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
thr->timer(Timer::START);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (force->newton_pair)
@ -92,7 +91,6 @@ void PairADPOMP::compute(int eflag, int vflag)
else eval<0,0,0>(ifrom, ito, thr);
}
thr->timer(Timer::PAIR);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}

View File

@ -0,0 +1,177 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_coul_dsf_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairCoulDSFOMP::PairCoulDSFOMP(LAMMPS *lmp) :
PairCoulDSF(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairCoulDSFOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairCoulDSFOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,ecoul,fpair;
double r,rsq,r2inv,forcecoul,factor_coul;
double prefactor,erfcc,erfcd,t;
int *ilist,*jlist,*numneigh,**firstneigh;
ecoul = 0.0;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const double * _noalias const q = atom->q;
const int nlocal = atom->nlocal;
const double * _noalias const special_coul = force->special_coul;
const double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
jlist = firstneigh[i];
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
if (EFLAG) {
double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr);
}
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx*delx + dely*dely + delz*delz;
if (rsq < cut_coulsq) {
r2inv = 1.0/rsq;
r = sqrt(rsq);
prefactor = factor_coul * qqrd2e*qtmp*q[j]/r;
erfcd = exp(-alpha*alpha*rsq);
t = 1.0 / (1.0 + EWALD_P*alpha*r);
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd +
r*f_shift) * r;
fpair = forcecoul * r2inv;
if (EFLAG) ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
0.0,ecoul,fpair,delx,dely,delz,thr);
}
}
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairCoulDSFOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairCoulDSF::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(coul/dsf/omp,PairCoulDSFOMP)
#else
#ifndef LMP_PAIR_COUL_DSF_OMP_H
#define LMP_PAIR_COUL_DSF_OMP_H
#include "pair_coul_dsf.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairCoulDSFOMP : public PairCoulDSF, public ThrOMP {
public:
PairCoulDSFOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,201 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_lj_cut_coul_dsf_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
#include "math_const.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairLJCutCoulDSFOMP::PairLJCutCoulDSFOMP(LAMMPS *lmp) :
PairLJCutCoulDSF(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairLJCutCoulDSFOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairLJCutCoulDSFOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,itype,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double r,rsq,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
double prefactor,erfcc,erfcd,t;
int *ilist,*jlist,*numneigh,**firstneigh;
evdwl = ecoul = 0.0;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const double * _noalias const q = atom->q;
const int * _noalias const type = atom->type;
const int nlocal = atom->nlocal;
const double * _noalias const special_coul = force->special_coul;
const double * _noalias const special_lj = force->special_lj;
const double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
qtmp = q[i];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
if (EVFLAG) {
double e_self = -(e_shift/2.0 + alpha/MY_PIS) * qtmp*qtmp*qqrd2e;
ev_tally_thr(this,i,i,nlocal,0,0.0,e_self,0.0,0.0,0.0,0.0,thr);
}
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
r2inv = 1.0/rsq;
if (rsq < cut_ljsq[itype][jtype]) {
r6inv = r2inv*r2inv*r2inv;
forcelj = r6inv * (lj1[itype][jtype]*r6inv - lj2[itype][jtype]);
forcelj *= factor_lj;
} else forcelj = 0.0;
if (rsq < cut_coulsq) {
r = sqrt(rsq);
prefactor = factor_coul * qqrd2e*qtmp*q[j]/r;
erfcd = exp(-alpha*alpha*r*r);
t = 1.0 / (1.0 + EWALD_P*alpha*r);
erfcc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * erfcd;
forcecoul = prefactor * (erfcc/r + 2.0*alpha/MY_PIS * erfcd +
r*f_shift) * r;
} else forcecoul = 0.0;
fpair = (forcecoul + forcelj) * r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EFLAG) {
if (rsq < cut_ljsq[itype][jtype]) {
evdwl = r6inv*(lj3[itype][jtype]*r6inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl *= factor_lj;
} else evdwl = 0.0;
if (rsq < cut_coulsq) {
ecoul = prefactor * (erfcc - r*e_shift - rsq*f_shift);
} else ecoul = 0.0;
}
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz,thr);
}
}
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairLJCutCoulDSFOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairLJCutCoulDSF::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(lj/cut/coul/dsf/omp,PairLJCutCoulDSFOMP)
#else
#ifndef LMP_PAIR_LJ_CUT_COUL_DSF_OMP_H
#define LMP_PAIR_LJ_CUT_COUL_DSF_OMP_H
#include "pair_lj_cut_coul_dsf.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairLJCutCoulDSFOMP : public PairLJCutCoulDSF, public ThrOMP {
public:
PairLJCutCoulDSFOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,198 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_nm_cut_coul_cut_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairNMCutCoulCutOMP::PairNMCutCoulCutOMP(LAMMPS *lmp) :
PairNMCutCoulCut(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairNMCutCoulCutOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairNMCutCoulCutOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int j,ii,jj,jnum,jtype;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double r,rsq,r2inv,rminv,rninv,forcecoul,forcenm,factor_coul,factor_lj;
int *ilist,*numneigh,**firstneigh;
evdwl = ecoul = 0.0;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const double * _noalias const q = atom->q;
const int * _noalias const type = atom->type;
const int nlocal = atom->nlocal;
const double * _noalias const special_coul = force->special_coul;
const double * _noalias const special_lj = force->special_lj;
const double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
const int i = ilist[ii];
const int itype = type[i];
const int * _noalias const jlist = firstneigh[i];
const double * _noalias const cutsqi = cutsq[itype];
const double * _noalias const cut_coulsqi = cut_coulsq[itype];
const double * _noalias const cut_ljsqi = cut_ljsq[itype];
const double * _noalias const offseti = offset[itype];
const double * _noalias const mmi = mm[itype];
const double * _noalias const nni = nn[itype];
const double * _noalias const nmi = nm[itype];
const double * _noalias const e0nmi = e0nm[itype];
const double * _noalias const r0mi = r0m[itype];
const double * _noalias const r0ni = r0n[itype];
qtmp = q[i];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsqi[jtype]) {
r2inv = 1.0/rsq;
if (rsq < cut_coulsqi[jtype]) {
const double rinv = sqrt(r2inv);
forcecoul = qqrd2e * qtmp*q[j]*rinv;
forcecoul *= factor_coul;
if (EFLAG) ecoul = factor_coul * qqrd2e * qtmp*q[j]*rinv;
} else {
forcecoul = 0.0;
if (EFLAG) ecoul = 0.0;
}
if (rsq < cut_ljsqi[jtype]) {
r = sqrt(rsq);
rminv = pow(r2inv,mmi[jtype]/2.0);
rninv = pow(r2inv,nni[jtype]/2.0);
forcenm = e0nmi[jtype]*nmi[jtype] *
(r0ni[jtype]/pow(r,nni[jtype]) -
r0mi[jtype]/pow(r,mmi[jtype]));
forcenm *= factor_lj;
if (EFLAG)
evdwl = (e0nmi[jtype]*(mmi[jtype] *
r0ni[jtype]*rninv -
nni[jtype] *
r0mi[jtype]*rminv) -
offseti[jtype]) * factor_lj;
} else {
forcenm = 0.0;
if (EFLAG) evdwl = 0.0;
}
fpair = (forcecoul + forcenm) * r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz,thr);
}
}
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairNMCutCoulCutOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairNMCutCoulCut::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(nm/cut/coul/cut/omp,PairNMCutCoulCutOMP)
#else
#ifndef LMP_PAIR_NM_CUT_COUL_CUT_OMP_H
#define LMP_PAIR_NM_CUT_COUL_CUT_OMP_H
#include "pair_nm_cut_coul_cut.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairNMCutCoulCutOMP : public PairNMCutCoulCut, public ThrOMP {
public:
PairNMCutCoulCutOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,234 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_nm_cut_coul_long_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
using namespace LAMMPS_NS;
#define EWALD_F 1.12837917
#define EWALD_P 0.3275911
#define A1 0.254829592
#define A2 -0.284496736
#define A3 1.421413741
#define A4 -1.453152027
#define A5 1.061405429
/* ---------------------------------------------------------------------- */
PairNMCutCoulLongOMP::PairNMCutCoulLongOMP(LAMMPS *lmp) :
PairNMCutCoulLong(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairNMCutCoulLongOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
/* ---------------------------------------------------------------------- */
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairNMCutCoulLongOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,ii,jj,jnum,jtype,itable;
double qtmp,xtmp,ytmp,ztmp,delx,dely,delz,evdwl,ecoul,fpair;
double fraction,table;
double r,rsq,rinv,r2inv,factor_coul,factor_lj;
double forcecoul,forcenm,rminv,rninv;
double grij,expm2,prefactor,t,erfc;
int *ilist,*jlist,*numneigh,**firstneigh;
evdwl = ecoul = 0.0;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const double * _noalias const q = atom->q;
const int * _noalias const type = atom->type;
const int nlocal = atom->nlocal;
const double * _noalias const special_coul = force->special_coul;
const double * _noalias const special_lj = force->special_lj;
const double qqrd2e = force->qqrd2e;
double fxtmp,fytmp,fztmp;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
// loop over neighbors of my atoms
for (ii = iifrom; ii < iito; ++ii) {
const int i = ilist[ii];
const int itype = type[i];
const int * _noalias const jlist = firstneigh[i];
const double * _noalias const cutsqi = cutsq[itype];
const double * _noalias const cut_ljsqi = cut_ljsq[itype];
const double * _noalias const offseti = offset[itype];
const double * _noalias const mmi = mm[itype];
const double * _noalias const nni = nn[itype];
const double * _noalias const nmi = nm[itype];
const double * _noalias const e0nmi = e0nm[itype];
const double * _noalias const r0mi = r0m[itype];
const double * _noalias const r0ni = r0n[itype];
qtmp = q[i];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
factor_coul = special_coul[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsqi[jtype]) {
r2inv = 1.0/rsq;
if (rsq < cut_coulsq) {
if (!ncoultablebits || rsq <= tabinnersq) {
r = sqrt(rsq);
grij = g_ewald * r;
expm2 = exp(-grij*grij);
t = 1.0 / (1.0 + EWALD_P*grij);
erfc = t * (A1+t*(A2+t*(A3+t*(A4+t*A5)))) * expm2;
prefactor = qqrd2e * qtmp*q[j]/r;
forcecoul = prefactor * (erfc + EWALD_F*grij*expm2);
if (EFLAG) ecoul = prefactor*erfc;
if (factor_coul < 1.0) {
forcecoul -= (1.0-factor_coul)*prefactor;
if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
}
} else {
union_int_float_t rsq_lookup;
rsq_lookup.f = rsq;
itable = rsq_lookup.i & ncoulmask;
itable >>= ncoulshiftbits;
fraction = (rsq_lookup.f - rtable[itable]) * drtable[itable];
table = ftable[itable] + fraction*dftable[itable];
forcecoul = qtmp*q[j] * table;
if (EFLAG)
ecoul = qtmp*q[j] * (etable[itable] + fraction*detable[itable]);
if (factor_coul < 1.0) {
table = ctable[itable] + fraction*dctable[itable];
prefactor = qtmp*q[j] * table;
forcecoul -= (1.0-factor_coul)*prefactor;
if (EFLAG) ecoul -= (1.0-factor_coul)*prefactor;
}
}
} else {
forcecoul = 0.0;
if (EFLAG) ecoul = 0.0;
}
if (rsq < cut_ljsqi[jtype]) {
r = sqrt(rsq);
rminv = pow(r2inv,mmi[jtype]/2.0);
rninv = pow(r2inv,nni[jtype]/2.0);
forcenm = e0nmi[jtype]*nmi[jtype] *
(r0ni[jtype]/pow(r,nni[jtype]) -
r0mi[jtype]/pow(r,mmi[jtype]));
forcenm *= factor_lj;
if (EFLAG)
evdwl = (e0nmi[jtype]*(mmi[jtype] *
r0ni[jtype]*rninv -
nni[jtype] *
r0mi[jtype]*rminv) -
offseti[jtype]) * factor_lj;
} else {
forcenm = 0.0;
if (EFLAG) evdwl = 0.0;
}
fpair = (forcecoul + forcenm) * r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EVFLAG) ev_tally_thr(this, i,j,nlocal,NEWTON_PAIR,
evdwl,ecoul,fpair,delx,dely,delz,thr);
}
}
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairNMCutCoulLongOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairNMCutCoulLong::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(nm/cut/coul/long/omp,PairNMCutCoulLongOMP)
#else
#ifndef LMP_PAIR_NM_CUT_COUL_LONG_OMP_H
#define LMP_PAIR_NM_CUT_COUL_LONG_OMP_H
#include "pair_nm_cut_coul_long.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairNMCutCoulLongOMP : public PairNMCutCoulLong, public ThrOMP {
public:
PairNMCutCoulLongOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,169 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_nm_cut_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairNMCutOMP::PairNMCutOMP(LAMMPS *lmp) :
PairNMCut(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairNMCutOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairNMCutOMP::eval(int iifrom, int iito, ThrData * const thr)
{
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const int * _noalias const type = atom->type;
const double * _noalias const special_lj = force->special_lj;
const int * _noalias const ilist = list->ilist;
const int * _noalias const numneigh = list->numneigh;
const int * const * const firstneigh = list->firstneigh;
double xtmp,ytmp,ztmp,delx,dely,delz,fxtmp,fytmp,fztmp;
double r,rsq,r2inv,rminv,rninv,forcenm,factor_lj,evdwl,fpair;
const int nlocal = atom->nlocal;
int j,jj,jnum,jtype;
evdwl = 0.0;
// loop over neighbors of my atoms
for (int ii = iifrom; ii < iito; ++ii) {
const int i = ilist[ii];
const int itype = type[i];
const int * _noalias const jlist = firstneigh[i];
const double * _noalias const cutsqi = cutsq[itype];
const double * _noalias const offseti = offset[itype];
const double * _noalias const mmi = mm[itype];
const double * _noalias const nni = nn[itype];
const double * _noalias const nmi = nm[itype];
const double * _noalias const e0nmi = e0nm[itype];
const double * _noalias const r0mi = r0m[itype];
const double * _noalias const r0ni = r0n[itype];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsqi[jtype]) {
r2inv = 1.0/rsq;
r = sqrt(rsq);
rminv = pow(r2inv,mmi[jtype]*0.5);
rninv = pow(r2inv,nni[jtype]*0.5);
forcenm = e0nmi[jtype]*nmi[jtype] *
(r0ni[jtype]/pow(r,nni[jtype]) -
r0mi[jtype]/pow(r,mmi[jtype]));
fpair = factor_lj*forcenm*r2inv;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EFLAG) {
evdwl = e0nmi[jtype] *
(mmi[jtype]*r0ni[jtype]*rninv -
nni[jtype]*r0mi[jtype]*rminv) - offseti[jtype];
}
if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz,thr);
}
}
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairNMCutOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairNMCut::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(nm/cut/omp,PairNMCutOMP)
#else
#ifndef LMP_PAIR_NM_CUT_OMP_H
#define LMP_PAIR_NM_CUT_OMP_H
#include "pair_nm_cut.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairNMCutOMP : public PairNMCut, public ThrOMP {
public:
PairNMCutOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,250 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_tersoff_mod_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
PairTersoffMODOMP::PairTersoffMODOMP(LAMMPS *lmp) :
PairTersoffMOD(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairTersoffMODOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = vflag_atom = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (vflag_atom) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (vflag_atom) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else eval<0,0,0>(ifrom, ito, thr);
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
void PairTersoffMODOMP::eval(int iifrom, int iito, ThrData * const thr)
{
int i,j,k,ii,jj,kk,jnum;
int itag,jtag,itype,jtype,ktype,iparam_ij,iparam_ijk;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,rsq1,rsq2;
double delr1[3],delr2[3],fi[3],fj[3],fk[3];
double zeta_ij,prefactor;
int *ilist,*jlist,*numneigh,**firstneigh;
evdwl = 0.0;
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const int * _noalias const tag = atom->tag;
const int * _noalias const type = atom->type;
const int nlocal = atom->nlocal;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
double fxtmp,fytmp,fztmp;
// loop over full neighbor list of my atoms
for (ii = iifrom; ii < iito; ++ii) {
i = ilist[ii];
itag = tag[i];
itype = map[type[i]];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
fxtmp = fytmp = fztmp = 0.0;
// two-body interactions, skip half of them
jlist = firstneigh[i];
jnum = numneigh[i];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
jtag = tag[j];
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (x[j].z < ztmp) continue;
if (x[j].z == ztmp && x[j].y < ytmp) continue;
if (x[j].z == ztmp && x[j].y == ytmp && x[j].x < xtmp) continue;
}
jtype = map[type[j]];
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx*delx + dely*dely + delz*delz;
iparam_ij = elem2param[itype][jtype][jtype];
if (rsq > params[iparam_ij].cutsq) continue;
repulsive(&params[iparam_ij],rsq,fpair,EFLAG,evdwl);
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,
evdwl,0.0,fpair,delx,dely,delz,thr);
}
// three-body interactions
// skip immediately if I-J is not within cutoff
double fjxtmp,fjytmp,fjztmp;
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
jtype = map[type[j]];
iparam_ij = elem2param[itype][jtype][jtype];
delr1[0] = x[j].x - xtmp;
delr1[1] = x[j].y - ytmp;
delr1[2] = x[j].z - ztmp;
rsq1 = delr1[0]*delr1[0] + delr1[1]*delr1[1] + delr1[2]*delr1[2];
if (rsq1 > params[iparam_ij].cutsq) continue;
// accumulate bondorder zeta for each i-j interaction via loop over k
fjxtmp = fjytmp = fjztmp = 0.0;
zeta_ij = 0.0;
for (kk = 0; kk < jnum; kk++) {
if (jj == kk) continue;
k = jlist[kk];
k &= NEIGHMASK;
ktype = map[type[k]];
iparam_ijk = elem2param[itype][jtype][ktype];
delr2[0] = x[k].x - xtmp;
delr2[1] = x[k].y - ytmp;
delr2[2] = x[k].z - ztmp;
rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
if (rsq2 > params[iparam_ijk].cutsq) continue;
zeta_ij += zeta(&params[iparam_ijk],rsq1,rsq2,delr1,delr2);
}
// pairwise force due to zeta
force_zeta(&params[iparam_ij],rsq1,zeta_ij,fpair,prefactor,EFLAG,evdwl);
fxtmp += delr1[0]*fpair;
fytmp += delr1[1]*fpair;
fztmp += delr1[2]*fpair;
fjxtmp -= delr1[0]*fpair;
fjytmp -= delr1[1]*fpair;
fjztmp -= delr1[2]*fpair;
if (EVFLAG) ev_tally_thr(this,i,j,nlocal,/* newton_pair */ 1,evdwl,0.0,
-fpair,-delr1[0],-delr1[1],-delr1[2],thr);
// attractive term via loop over k
for (kk = 0; kk < jnum; kk++) {
if (jj == kk) continue;
k = jlist[kk];
k &= NEIGHMASK;
ktype = map[type[k]];
iparam_ijk = elem2param[itype][jtype][ktype];
delr2[0] = x[k].x - xtmp;
delr2[1] = x[k].y - ytmp;
delr2[2] = x[k].z - ztmp;
rsq2 = delr2[0]*delr2[0] + delr2[1]*delr2[1] + delr2[2]*delr2[2];
if (rsq2 > params[iparam_ijk].cutsq) continue;
attractive(&params[iparam_ijk],prefactor,
rsq1,rsq2,delr1,delr2,fi,fj,fk);
fxtmp += fi[0];
fytmp += fi[1];
fztmp += fi[2];
fjxtmp += fj[0];
fjytmp += fj[1];
fjztmp += fj[2];
f[k].x += fk[0];
f[k].y += fk[1];
f[k].z += fk[2];
if (VFLAG_ATOM) v_tally3_thr(i,j,k,fj,fk,delr1,delr2,thr);
}
f[j].x += fjxtmp;
f[j].y += fjytmp;
f[j].z += fjztmp;
}
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairTersoffMODOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairTersoffMOD::memory_usage();
return bytes;
}

View File

@ -0,0 +1,43 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(tersoff/mod/omp,PairTersoffMODOMP)
#else
#ifndef LMP_PAIR_TERSOFF_MOD_OMP_H
#define LMP_PAIR_TERSOFF_MOD_OMP_H
#include "pair_tersoff_mod.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairTersoffMODOMP : public PairTersoffMOD, public ThrOMP {
public:
PairTersoffMODOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int VFLAG_ATOM>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -0,0 +1,170 @@
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
This software is distributed under the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#include "math.h"
#include "pair_zbl_omp.h"
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "suffix.h"
using namespace LAMMPS_NS;
using namespace PairZBLConstants;
/* ---------------------------------------------------------------------- */
PairZBLOMP::PairZBLOMP(LAMMPS *lmp) :
PairZBL(lmp), ThrOMP(lmp, THR_PAIR)
{
suffix_flag |= Suffix::OMP;
respa_enable = 0;
}
/* ---------------------------------------------------------------------- */
void PairZBLOMP::compute(int eflag, int vflag)
{
if (eflag || vflag) {
ev_setup(eflag,vflag);
} else evflag = vflag_fdotr = 0;
const int nall = atom->nlocal + atom->nghost;
const int nthreads = comm->nthreads;
const int inum = list->inum;
#if defined(_OPENMP)
#pragma omp parallel default(none) shared(eflag,vflag)
#endif
{
int ifrom, ito, tid;
loop_setup_thr(ifrom, ito, tid, inum, nthreads);
ThrData *thr = fix->get_thr(tid);
ev_setup_thr(eflag, vflag, nall, eatom, vatom, thr);
if (evflag) {
if (eflag) {
if (force->newton_pair) eval<1,1,1>(ifrom, ito, thr);
else eval<1,1,0>(ifrom, ito, thr);
} else {
if (force->newton_pair) eval<1,0,1>(ifrom, ito, thr);
else eval<1,0,0>(ifrom, ito, thr);
}
} else {
if (force->newton_pair) eval<0,0,1>(ifrom, ito, thr);
else eval<0,0,0>(ifrom, ito, thr);
}
reduce_thr(this, eflag, vflag, thr);
} // end of omp parallel region
}
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void PairZBLOMP::eval(int iifrom, int iito, ThrData * const thr)
{
const dbl3_t * _noalias const x = (dbl3_t *) atom->x[0];
dbl3_t * _noalias const f = (dbl3_t *) thr->get_f()[0];
const int * _noalias const type = atom->type;
const int * _noalias const ilist = list->ilist;
const int * _noalias const numneigh = list->numneigh;
const int * const * const firstneigh = list->firstneigh;
double xtmp,ytmp,ztmp,delx,dely,delz,fxtmp,fytmp,fztmp;
double rsq,t,fswitch,eswitch,evdwl,fpair;
const int nlocal = atom->nlocal;
int j,jj,jnum,jtype;
evdwl = 0.0;
// loop over neighbors of my atoms
for (int ii = iifrom; ii < iito; ++ii) {
const int i = ilist[ii];
const int itype = type[i];
const int * _noalias const jlist = firstneigh[i];
const double * _noalias const sw1i = sw1[itype];
const double * _noalias const sw2i = sw2[itype];
const double * _noalias const sw3i = sw3[itype];
const double * _noalias const sw4i = sw4[itype];
const double * _noalias const sw5i = sw5[itype];
xtmp = x[i].x;
ytmp = x[i].y;
ztmp = x[i].z;
jnum = numneigh[i];
fxtmp=fytmp=fztmp=0.0;
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
delx = xtmp - x[j].x;
dely = ytmp - x[j].y;
delz = ztmp - x[j].z;
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cut_globalsq) {
const double r = sqrt(rsq);
fpair = dzbldr(r, itype, jtype);
if (r > cut_inner) {
t = r - cut_inner;
fswitch = t*t *
(sw1i[jtype] + sw2i[jtype]*t);
fpair += fswitch;
}
fpair *= -1.0/r;
fxtmp += delx*fpair;
fytmp += dely*fpair;
fztmp += delz*fpair;
if (NEWTON_PAIR || j < nlocal) {
f[j].x -= delx*fpair;
f[j].y -= dely*fpair;
f[j].z -= delz*fpair;
}
if (EFLAG) {
evdwl = e_zbl(r, itype, jtype);
evdwl += sw5i[jtype];
if (r > cut_inner) {
eswitch = t*t*t *
(sw3i[jtype] + sw4i[jtype]*t);
evdwl += eswitch;
}
}
if (EVFLAG) ev_tally_thr(this,i,j,nlocal,NEWTON_PAIR,
evdwl,0.0,fpair,delx,dely,delz,thr);
}
}
f[i].x += fxtmp;
f[i].y += fytmp;
f[i].z += fztmp;
}
}
/* ---------------------------------------------------------------------- */
double PairZBLOMP::memory_usage()
{
double bytes = memory_usage_thr();
bytes += PairZBL::memory_usage();
return bytes;
}

View File

@ -0,0 +1,48 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Axel Kohlmeyer (Temple U)
------------------------------------------------------------------------- */
#ifdef PAIR_CLASS
PairStyle(zbl/omp,PairZBLOMP)
#else
#ifndef LMP_PAIR_ZBL_OMP_H
#define LMP_PAIR_ZBL_OMP_H
#include "pair_zbl.h"
#include "thr_omp.h"
namespace LAMMPS_NS {
class PairZBLOMP : public PairZBL, public ThrOMP {
public:
PairZBLOMP(class LAMMPS *);
virtual void compute(int, int);
virtual double memory_usage();
private:
template <int EVFLAG, int EFLAG, int NEWTON_PAIR>
void eval(int ifrom, int ito, ThrData * const thr);
};
}
#endif
#endif

View File

@ -25,6 +25,7 @@
using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
ThrData::ThrData(int tid)
: _f(0),_torque(0),_erforce(0),_de(0),_drho(0),_mu(0),_lambda(0),_rhoB(0),
@ -261,7 +262,7 @@ double ThrData::memory_usage()
void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim, int tid)
{
#if defined(_OPENMP)
// NOOP in non-threaded execution.
// NOOP in single-threaded execution.
if (nthreads == 1) return;
#pragma omp barrier
{
@ -270,15 +271,72 @@ void LAMMPS_NS::data_reduce_thr(double *dall, int nall, int nthreads, int ndim,
const int ifrom = tid*idelta;
const int ito = ((ifrom + idelta) > nvals) ? nvals : (ifrom + idelta);
// this if protects against having more threads than atoms
#if defined(USER_OMP_NO_UNROLL)
if (ifrom < nvals) {
for (int m = ifrom; m < ito; ++m) {
int m = 0;
for (m = ifrom; m < ito; ++m) {
for (int n = 1; n < nthreads; ++n) {
dall[m] += dall[n*nvals + m];
dall[n*nvals + m] = 0.0;
}
}
}
#else
// this if protects against having more threads than atoms
if (ifrom < nvals) {
int m = 0;
// for architectures that have L1 D-cache line sizes of 64 bytes
// (8 doubles) wide, explictly unroll this loop to compute 8
// contiguous values in the array at a time
// -- modify this code based on the size of the cache line
double t0, t1, t2, t3, t4, t5, t6, t7;
for (m = ifrom; m < (ito-7); m+=8) {
t0 = dall[m ];
t1 = dall[m+1];
t2 = dall[m+2];
t3 = dall[m+3];
t4 = dall[m+4];
t5 = dall[m+5];
t6 = dall[m+6];
t7 = dall[m+7];
for (int n = 1; n < nthreads; ++n) {
t0 += dall[n*nvals + m ];
t1 += dall[n*nvals + m+1];
t2 += dall[n*nvals + m+2];
t3 += dall[n*nvals + m+3];
t4 += dall[n*nvals + m+4];
t5 += dall[n*nvals + m+5];
t6 += dall[n*nvals + m+6];
t7 += dall[n*nvals + m+7];
dall[n*nvals + m ] = 0.0;
dall[n*nvals + m+1] = 0.0;
dall[n*nvals + m+2] = 0.0;
dall[n*nvals + m+3] = 0.0;
dall[n*nvals + m+4] = 0.0;
dall[n*nvals + m+5] = 0.0;
dall[n*nvals + m+6] = 0.0;
dall[n*nvals + m+7] = 0.0;
}
dall[m ] = t0;
dall[m+1] = t1;
dall[m+2] = t2;
dall[m+3] = t3;
dall[m+4] = t4;
dall[m+5] = t5;
dall[m+6] = t6;
dall[m+7] = t7;
}
// do the last < 8 values
for (; m < ito; m++) {
for (int n = 1; n < nthreads; ++n) {
dall[m] += dall[n*nvals + m];
dall[n*nvals + m] = 0.0;
}
}
}
#endif
}
#else
// NOOP in non-threaded execution.