git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@14596 f3b2605a-c512-4ea7-a41b-209d697bcdaa
This commit is contained in:
@ -31,7 +31,8 @@
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define SMALL (flt_t)0.001
|
||||
#define SMALL2 (flt_t)0.000001
|
||||
#define INVSMALL (flt_t)1000.0
|
||||
typedef struct { int a,b,c,t; } int4_t;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -162,7 +163,7 @@ void AngleCharmmIntel::eval(const int vflag,
|
||||
const flt_t delz1 = x[i1].z - x[i2].z;
|
||||
|
||||
const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
|
||||
const flt_t r1 = sqrt(rsq1);
|
||||
flt_t ir12 = (flt_t)1.0/sqrt(rsq1);
|
||||
|
||||
// 2nd bond
|
||||
|
||||
@ -171,7 +172,7 @@ void AngleCharmmIntel::eval(const int vflag,
|
||||
const flt_t delz2 = x[i3].z - x[i2].z;
|
||||
|
||||
const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
const flt_t r2 = sqrt(rsq2);
|
||||
ir12 *= (flt_t)1.0/sqrt(rsq2);
|
||||
|
||||
// Urey-Bradley bond
|
||||
|
||||
@ -180,15 +181,15 @@ void AngleCharmmIntel::eval(const int vflag,
|
||||
const flt_t delzUB = x[i3].z - x[i1].z;
|
||||
|
||||
const flt_t rsqUB = delxUB*delxUB + delyUB*delyUB + delzUB*delzUB;
|
||||
const flt_t rUB = sqrt(rsqUB);
|
||||
const flt_t irUB = (flt_t)1.0/sqrt(rsqUB);
|
||||
|
||||
// Urey-Bradley force & energy
|
||||
|
||||
const flt_t dr = rUB - fc.fc[type].r_ub;
|
||||
const flt_t dr = (flt_t)1.0/irUB - fc.fc[type].r_ub;
|
||||
const flt_t rk = fc.fc[type].k_ub * dr;
|
||||
|
||||
flt_t forceUB;
|
||||
if (rUB > (flt_t)0.0) forceUB = (flt_t)-2.0*rk/rUB;
|
||||
if (rsqUB > (flt_t)0.0) forceUB = (flt_t)-2.0*rk*irUB;
|
||||
else forceUB = 0.0;
|
||||
|
||||
flt_t eangle;
|
||||
@ -197,14 +198,14 @@ void AngleCharmmIntel::eval(const int vflag,
|
||||
// angle (cos and sin)
|
||||
|
||||
flt_t c = delx1*delx2 + dely1*dely2 + delz1*delz2;
|
||||
c /= r1*r2;
|
||||
c *= ir12;
|
||||
|
||||
if (c > (flt_t)1.0) c = (flt_t)1.0;
|
||||
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
|
||||
|
||||
flt_t s = sqrt((flt_t)1.0 - c*c);
|
||||
if (s < SMALL) s = SMALL;
|
||||
s = (flt_t)1.0/s;
|
||||
const flt_t sd = (flt_t)1.0 - c * c;
|
||||
flt_t s = (flt_t)1.0 / sqrt(sd);
|
||||
if (sd < SMALL2) s = INVSMALL;
|
||||
|
||||
// harmonic force & energy
|
||||
|
||||
@ -215,7 +216,7 @@ void AngleCharmmIntel::eval(const int vflag,
|
||||
|
||||
const flt_t a = (flt_t)-2.0 * tk * s;
|
||||
const flt_t a11 = a*c / rsq1;
|
||||
const flt_t a12 = -a / (r1*r2);
|
||||
const flt_t a12 = -a * ir12;
|
||||
const flt_t a22 = a*c / rsq2;
|
||||
|
||||
const flt_t f1x = a11*delx1 + a12*delx2 - delxUB*forceUB;
|
||||
@ -303,7 +304,7 @@ template <class flt_t, class acc_t>
|
||||
void AngleCharmmIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int bp1 = atom->ndihedraltypes + 1;
|
||||
const int bp1 = atom->nangletypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
for (int i = 0; i < bp1; i++) {
|
||||
|
||||
312
src/USER-INTEL/angle_harmonic_intel.cpp
Normal file
312
src/USER-INTEL/angle_harmonic_intel.cpp
Normal file
@ -0,0 +1,312 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include "angle_harmonic_intel.h"
|
||||
#include "atom.h"
|
||||
#include "neighbor.h"
|
||||
#include "domain.h"
|
||||
#include "comm.h"
|
||||
#include "force.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "suffix.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define SMALL2 (flt_t)0.000001
|
||||
#define INVSMALL (flt_t)1000.0
|
||||
typedef struct { int a,b,c,t; } int4_t;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AngleHarmonicIntel::AngleHarmonicIntel(LAMMPS *lmp) : AngleHarmonic(lmp)
|
||||
{
|
||||
suffix_flag |= Suffix::INTEL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
AngleHarmonicIntel::~AngleHarmonicIntel()
|
||||
{
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleHarmonicIntel::compute(int eflag, int vflag)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_use_base) {
|
||||
AngleHarmonic::compute(eflag, vflag);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||
force_const_single);
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||
force_const_double);
|
||||
else
|
||||
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||
force_const_single);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void AngleHarmonicIntel::compute(int eflag, int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = 0;
|
||||
|
||||
if (evflag) {
|
||||
if (eflag) {
|
||||
if (force->newton_bond)
|
||||
eval<1,1,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,1,0>(vflag, buffers, fc);
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<1,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<0,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<0,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void AngleHarmonicIntel::eval(const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
|
||||
{
|
||||
const int inum = neighbor->nanglelist;
|
||||
if (inum == 0) return;
|
||||
|
||||
ATOM_T * _noalias const x = buffers->get_x(0);
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
int f_stride;
|
||||
if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
|
||||
else f_stride = buffers->get_stride(nlocal);
|
||||
|
||||
int tc;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
|
||||
const int nthreads = tc;
|
||||
|
||||
acc_t oeangle, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
oeangle = (acc_t)0.0;
|
||||
if (vflag) {
|
||||
ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(f_start,f_stride,fc) \
|
||||
reduction(+:oeangle,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
#endif
|
||||
{
|
||||
int nfrom, nto, tid;
|
||||
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
|
||||
|
||||
FORCE_T * _noalias const f = f_start + (tid * f_stride);
|
||||
if (fix->need_zero(tid))
|
||||
memset(f, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
const int4_t * _noalias const anglelist =
|
||||
(int4_t *) neighbor->anglelist[0];
|
||||
|
||||
for (int n = nfrom; n < nto; n++) {
|
||||
const int i1 = anglelist[n].a;
|
||||
const int i2 = anglelist[n].b;
|
||||
const int i3 = anglelist[n].c;
|
||||
const int type = anglelist[n].t;
|
||||
|
||||
// 1st bond
|
||||
|
||||
const flt_t delx1 = x[i1].x - x[i2].x;
|
||||
const flt_t dely1 = x[i1].y - x[i2].y;
|
||||
const flt_t delz1 = x[i1].z - x[i2].z;
|
||||
|
||||
const flt_t rsq1 = delx1*delx1 + dely1*dely1 + delz1*delz1;
|
||||
const flt_t r1 = (flt_t)1.0/sqrt(rsq1);
|
||||
|
||||
// 2nd bond
|
||||
|
||||
const flt_t delx2 = x[i3].x - x[i2].x;
|
||||
const flt_t dely2 = x[i3].y - x[i2].y;
|
||||
const flt_t delz2 = x[i3].z - x[i2].z;
|
||||
|
||||
const flt_t rsq2 = delx2*delx2 + dely2*dely2 + delz2*delz2;
|
||||
const flt_t r2 = (flt_t)1.0/sqrt(rsq2);
|
||||
|
||||
// angle (cos and sin)
|
||||
|
||||
flt_t c = delx1*delx2 + dely1*dely2 + delz1*delz2;
|
||||
const flt_t r1r2 = r1 * r2;
|
||||
c *= r1r2;
|
||||
|
||||
if (c > (flt_t)1.0) c = (flt_t)1.0;
|
||||
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
|
||||
|
||||
const flt_t sd = (flt_t)1.0 - c * c;
|
||||
flt_t s = (flt_t)1.0/sqrt(sd);
|
||||
if (sd < SMALL2) s = INVSMALL;
|
||||
|
||||
// harmonic force & energy
|
||||
|
||||
const flt_t dtheta = acos(c) - fc.fc[type].theta0;
|
||||
const flt_t tk = fc.fc[type].k * dtheta;
|
||||
|
||||
flt_t eangle;
|
||||
if (EFLAG) eangle = tk*dtheta;
|
||||
|
||||
const flt_t a = (flt_t)-2.0 * tk * s;
|
||||
const flt_t ac = a*c;
|
||||
const flt_t a11 = ac / rsq1;
|
||||
const flt_t a12 = -a * (r1r2);
|
||||
const flt_t a22 = ac / rsq2;
|
||||
|
||||
const flt_t f1x = a11*delx1 + a12*delx2;
|
||||
const flt_t f1y = a11*dely1 + a12*dely2;
|
||||
const flt_t f1z = a11*delz1 + a12*delz2;
|
||||
|
||||
const flt_t f3x = a22*delx2 + a12*delx1;
|
||||
const flt_t f3y = a22*dely2 + a12*dely1;
|
||||
const flt_t f3z = a22*delz2 + a12*delz1;
|
||||
|
||||
// apply force to each of 3 atoms
|
||||
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
f[i1].x += f1x;
|
||||
f[i1].y += f1y;
|
||||
f[i1].z += f1z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i2 < nlocal) {
|
||||
f[i2].x -= f1x + f3x;
|
||||
f[i2].y -= f1y + f3y;
|
||||
f[i2].z -= f1z + f3z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i3 < nlocal) {
|
||||
f[i3].x += f3x;
|
||||
f[i3].y += f3y;
|
||||
f[i3].z += f3z;
|
||||
}
|
||||
|
||||
if (EVFLAG) {
|
||||
IP_PRE_ev_tally_angle(EFLAG, eatom, vflag, eangle, i1, i2, i3,f1x,
|
||||
f1y, f1z, f3x, f3y, f3z, delx1, dely1, delz1,
|
||||
delx2, dely2, delz2, oeangle, f, NEWTON_BOND,
|
||||
nlocal, ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
}
|
||||
} // for n
|
||||
} // omp parallel
|
||||
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
energy += oeangle;
|
||||
if (vflag) {
|
||||
virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
|
||||
virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
|
||||
}
|
||||
}
|
||||
|
||||
fix->set_reduce_flag();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AngleHarmonicIntel::init_style()
|
||||
{
|
||||
AngleHarmonic::init_style();
|
||||
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_use_base = 0;
|
||||
if (fix->offload_balance() != 0.0) {
|
||||
_use_base = 1;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
fix->bond_init_check();
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void AngleHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int bp1 = atom->nangletypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
for (int i = 0; i < bp1; i++) {
|
||||
fc.fc[i].k = k[i];
|
||||
fc.fc[i].theta0 = theta0[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void AngleHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nangletypes,
|
||||
Memory *memory) {
|
||||
if (nangletypes != _nangletypes) {
|
||||
if (_nangletypes > 0)
|
||||
_memory->destroy(fc);
|
||||
|
||||
if (nangletypes > 0)
|
||||
_memory->create(fc,nangletypes,"anglecharmmintel.fc");
|
||||
}
|
||||
_nangletypes = nangletypes;
|
||||
_memory = memory;
|
||||
}
|
||||
84
src/USER-INTEL/angle_harmonic_intel.h
Normal file
84
src/USER-INTEL/angle_harmonic_intel.h
Normal file
@ -0,0 +1,84 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef ANGLE_CLASS
|
||||
|
||||
AngleStyle(harmonic/intel,AngleHarmonicIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_ANGLE_HARMONIC_INTEL_H
|
||||
#define LMP_ANGLE_HARMONIC_INTEL_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "angle_harmonic.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class AngleHarmonicIntel : public AngleHarmonic {
|
||||
public:
|
||||
AngleHarmonicIntel(class LAMMPS *);
|
||||
virtual ~AngleHarmonicIntel();
|
||||
virtual void compute(int, int);
|
||||
virtual void init_style();
|
||||
|
||||
protected:
|
||||
FixIntel *fix;
|
||||
|
||||
template <class flt_t> class ForceConst;
|
||||
template <class flt_t, class acc_t>
|
||||
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <class flt_t, class acc_t>
|
||||
void pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t, acc_t> *buffers);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int _use_base;
|
||||
#endif
|
||||
|
||||
template <class flt_t>
|
||||
class ForceConst {
|
||||
public:
|
||||
typedef struct { flt_t k, theta0; } fc_packed1;
|
||||
|
||||
fc_packed1 *fc;
|
||||
ForceConst() : _nangletypes(0) {}
|
||||
~ForceConst() { set_ntypes(0, NULL); }
|
||||
|
||||
void set_ntypes(const int nangletypes, Memory *memory);
|
||||
|
||||
private:
|
||||
int _nangletypes;
|
||||
Memory *_memory;
|
||||
};
|
||||
ForceConst<float> force_const_single;
|
||||
ForceConst<double> force_const_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
*/
|
||||
411
src/USER-INTEL/dihedral_harmonic_intel.cpp
Normal file
411
src/USER-INTEL/dihedral_harmonic_intel.cpp
Normal file
@ -0,0 +1,411 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "math.h"
|
||||
#include "dihedral_harmonic_intel.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "neighbor.h"
|
||||
#include "domain.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include "error.h"
|
||||
|
||||
#include "suffix.h"
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define PTOLERANCE (flt_t)1.05
|
||||
#define MTOLERANCE (flt_t)-1.05
|
||||
typedef struct { int a,b,c,d,t; } int5_t;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
DihedralHarmonicIntel::DihedralHarmonicIntel(class LAMMPS *lmp)
|
||||
: DihedralHarmonic(lmp)
|
||||
{
|
||||
suffix_flag |= Suffix::INTEL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void DihedralHarmonicIntel::compute(int eflag, int vflag)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_use_base) {
|
||||
DihedralHarmonic::compute(eflag, vflag);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||
force_const_single);
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||
force_const_double);
|
||||
else
|
||||
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||
force_const_single);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void DihedralHarmonicIntel::compute(int eflag, int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
{
|
||||
if (eflag || vflag) {
|
||||
ev_setup(eflag,vflag);
|
||||
} else evflag = 0;
|
||||
|
||||
if (evflag) {
|
||||
if (eflag) {
|
||||
if (force->newton_bond)
|
||||
eval<1,1,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,1,0>(vflag, buffers, fc);
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<1,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<0,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<0,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
}
|
||||
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void DihedralHarmonicIntel::eval(const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
|
||||
{
|
||||
const int inum = neighbor->ndihedrallist;
|
||||
if (inum == 0) return;
|
||||
|
||||
ATOM_T * _noalias const x = buffers->get_x(0);
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
int f_stride;
|
||||
if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
|
||||
else f_stride = buffers->get_stride(nlocal);
|
||||
|
||||
int tc;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
|
||||
const int nthreads = tc;
|
||||
|
||||
acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
oedihedral = (acc_t)0.0;
|
||||
if (vflag) {
|
||||
ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(f_start,f_stride,fc) \
|
||||
reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
#endif
|
||||
{
|
||||
int nfrom, nto, tid;
|
||||
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
|
||||
|
||||
FORCE_T * _noalias const f = f_start + (tid * f_stride);
|
||||
if (fix->need_zero(tid))
|
||||
memset(f, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
const int5_t * _noalias const dihedrallist =
|
||||
(int5_t *) neighbor->dihedrallist[0];
|
||||
|
||||
acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5;
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
sedihedral = (acc_t)0.0;
|
||||
if (vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int n = nfrom; n < nto; n++) {
|
||||
const int i1 = dihedrallist[n].a;
|
||||
const int i2 = dihedrallist[n].b;
|
||||
const int i3 = dihedrallist[n].c;
|
||||
const int i4 = dihedrallist[n].d;
|
||||
const int type = dihedrallist[n].t;
|
||||
|
||||
// 1st bond
|
||||
|
||||
const flt_t vb1x = x[i1].x - x[i2].x;
|
||||
const flt_t vb1y = x[i1].y - x[i2].y;
|
||||
const flt_t vb1z = x[i1].z - x[i2].z;
|
||||
|
||||
// 2nd bond
|
||||
|
||||
const flt_t vb2xm = x[i2].x - x[i3].x;
|
||||
const flt_t vb2ym = x[i2].y - x[i3].y;
|
||||
const flt_t vb2zm = x[i2].z - x[i3].z;
|
||||
|
||||
// 3rd bond
|
||||
|
||||
const flt_t vb3x = x[i4].x - x[i3].x;
|
||||
const flt_t vb3y = x[i4].y - x[i3].y;
|
||||
const flt_t vb3z = x[i4].z - x[i3].z;
|
||||
|
||||
// c,s calculation
|
||||
|
||||
const flt_t ax = vb1y*vb2zm - vb1z*vb2ym;
|
||||
const flt_t ay = vb1z*vb2xm - vb1x*vb2zm;
|
||||
const flt_t az = vb1x*vb2ym - vb1y*vb2xm;
|
||||
const flt_t bx = vb3y*vb2zm - vb3z*vb2ym;
|
||||
const flt_t by = vb3z*vb2xm - vb3x*vb2zm;
|
||||
const flt_t bz = vb3x*vb2ym - vb3y*vb2xm;
|
||||
|
||||
const flt_t rasq = ax*ax + ay*ay + az*az;
|
||||
const flt_t rbsq = bx*bx + by*by + bz*bz;
|
||||
const flt_t rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
|
||||
const flt_t rg = sqrt(rgsq);
|
||||
|
||||
flt_t rginv, ra2inv, rb2inv;
|
||||
rginv = ra2inv = rb2inv = (flt_t)0.0;
|
||||
if (rg > 0) rginv = (flt_t)1.0/rg;
|
||||
if (rasq > 0) ra2inv = (flt_t)1.0/rasq;
|
||||
if (rbsq > 0) rb2inv = (flt_t)1.0/rbsq;
|
||||
const flt_t rabinv = sqrt(ra2inv*rb2inv);
|
||||
|
||||
flt_t c = (ax*bx + ay*by + az*bz)*rabinv;
|
||||
const flt_t s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
|
||||
|
||||
// error check
|
||||
if (c > PTOLERANCE || c < MTOLERANCE) {
|
||||
int me = comm->me;
|
||||
|
||||
if (screen) {
|
||||
char str[128];
|
||||
sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
|
||||
TAGINT_FORMAT " " TAGINT_FORMAT " "
|
||||
TAGINT_FORMAT " " TAGINT_FORMAT,
|
||||
me,tid,update->ntimestep,
|
||||
atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
|
||||
error->warning(FLERR,str,0);
|
||||
fprintf(screen," 1st atom: %d %g %g %g\n",
|
||||
me,x[i1].x,x[i1].y,x[i1].z);
|
||||
fprintf(screen," 2nd atom: %d %g %g %g\n",
|
||||
me,x[i2].x,x[i2].y,x[i2].z);
|
||||
fprintf(screen," 3rd atom: %d %g %g %g\n",
|
||||
me,x[i3].x,x[i3].y,x[i3].z);
|
||||
fprintf(screen," 4th atom: %d %g %g %g\n",
|
||||
me,x[i4].x,x[i4].y,x[i4].z);
|
||||
}
|
||||
}
|
||||
|
||||
if (c > (flt_t)1.0) c = (flt_t)1.0;
|
||||
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
|
||||
|
||||
const flt_t tcos_shift = fc.bp[type].cos_shift;
|
||||
const flt_t tsin_shift = fc.bp[type].sin_shift;
|
||||
const flt_t tk = fc.bp[type].k;
|
||||
const int m = fc.bp[type].multiplicity;
|
||||
|
||||
flt_t p = (flt_t)1.0;
|
||||
flt_t ddf1, df1;
|
||||
ddf1 = df1 = (flt_t)0.0;
|
||||
|
||||
for (int i = 0; i < m; i++) {
|
||||
ddf1 = p*c - df1*s;
|
||||
df1 = p*s + df1*c;
|
||||
p = ddf1;
|
||||
}
|
||||
|
||||
p = p*tcos_shift + df1*tsin_shift;
|
||||
df1 = df1*tcos_shift - ddf1*tsin_shift;
|
||||
df1 *= -m;
|
||||
p += (flt_t)1.0;
|
||||
|
||||
if (m == 0) {
|
||||
p = (flt_t)1.0 + tcos_shift;
|
||||
df1 = (flt_t)0.0;
|
||||
}
|
||||
|
||||
const flt_t fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
|
||||
const flt_t hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
|
||||
const flt_t fga = fg*ra2inv*rginv;
|
||||
const flt_t hgb = hg*rb2inv*rginv;
|
||||
const flt_t gaa = -ra2inv*rg;
|
||||
const flt_t gbb = rb2inv*rg;
|
||||
|
||||
const flt_t dtfx = gaa*ax;
|
||||
const flt_t dtfy = gaa*ay;
|
||||
const flt_t dtfz = gaa*az;
|
||||
const flt_t dtgx = fga*ax - hgb*bx;
|
||||
const flt_t dtgy = fga*ay - hgb*by;
|
||||
const flt_t dtgz = fga*az - hgb*bz;
|
||||
const flt_t dthx = gbb*bx;
|
||||
const flt_t dthy = gbb*by;
|
||||
const flt_t dthz = gbb*bz;
|
||||
|
||||
const flt_t df = -tk * df1;
|
||||
|
||||
const flt_t sx2 = df*dtgx;
|
||||
const flt_t sy2 = df*dtgy;
|
||||
const flt_t sz2 = df*dtgz;
|
||||
|
||||
flt_t f1x = df*dtfx;
|
||||
flt_t f1y = df*dtfy;
|
||||
flt_t f1z = df*dtfz;
|
||||
|
||||
const flt_t f2x = sx2 - f1x;
|
||||
const flt_t f2y = sy2 - f1y;
|
||||
const flt_t f2z = sz2 - f1z;
|
||||
|
||||
flt_t f4x = df*dthx;
|
||||
flt_t f4y = df*dthy;
|
||||
flt_t f4z = df*dthz;
|
||||
|
||||
const flt_t f3x = -sx2 - f4x;
|
||||
const flt_t f3y = -sy2 - f4y;
|
||||
const flt_t f3z = -sz2 - f4z;
|
||||
|
||||
if (EVFLAG) {
|
||||
flt_t deng;
|
||||
if (EFLAG) deng = tk * p;
|
||||
IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, deng, i1, i2, i3, i4, f1x,
|
||||
f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x,
|
||||
vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y,
|
||||
vb3z, sedihedral, f, NEWTON_BOND, nlocal,
|
||||
sv0, sv1, sv2, sv3, sv4, sv5);
|
||||
}
|
||||
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
f[i1].x += f1x;
|
||||
f[i1].y += f1y;
|
||||
f[i1].z += f1z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i2 < nlocal) {
|
||||
f[i2].x += f2x;
|
||||
f[i2].y += f2y;
|
||||
f[i2].z += f2z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i3 < nlocal) {
|
||||
f[i3].x += f3x;
|
||||
f[i3].y += f3y;
|
||||
f[i3].z += f3z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i4 < nlocal) {
|
||||
f[i4].x += f4x;
|
||||
f[i4].y += f4y;
|
||||
f[i4].z += f4z;
|
||||
}
|
||||
}
|
||||
} // for n
|
||||
if (EVFLAG) {
|
||||
if (EFLAG) oedihedral += sedihedral;
|
||||
if (vflag) {
|
||||
ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5;
|
||||
}
|
||||
}
|
||||
} // omp parallel
|
||||
|
||||
if (EVFLAG) {
|
||||
if (EFLAG) energy += oedihedral;
|
||||
if (vflag) {
|
||||
virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
|
||||
virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
|
||||
}
|
||||
}
|
||||
|
||||
fix->set_reduce_flag();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void DihedralHarmonicIntel::init_style()
|
||||
{
|
||||
DihedralHarmonic::init_style();
|
||||
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_use_base = 0;
|
||||
if (fix->offload_balance() != 0.0) {
|
||||
_use_base = 1;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
fix->bond_init_check();
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void DihedralHarmonicIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int bp1 = atom->ndihedraltypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
for (int i = 0; i < bp1; i++) {
|
||||
fc.bp[i].multiplicity = multiplicity[i];
|
||||
fc.bp[i].cos_shift = cos_shift[i];
|
||||
fc.bp[i].sin_shift = sin_shift[i];
|
||||
fc.bp[i].k = k[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void DihedralHarmonicIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes,
|
||||
Memory *memory) {
|
||||
if (nbondtypes != _nbondtypes) {
|
||||
if (_nbondtypes > 0)
|
||||
_memory->destroy(bp);
|
||||
|
||||
if (nbondtypes > 0)
|
||||
_memory->create(bp,nbondtypes,"dihedralcharmmintel.bp");
|
||||
}
|
||||
_nbondtypes = nbondtypes;
|
||||
_memory = memory;
|
||||
}
|
||||
81
src/USER-INTEL/dihedral_harmonic_intel.h
Normal file
81
src/USER-INTEL/dihedral_harmonic_intel.h
Normal file
@ -0,0 +1,81 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef DIHEDRAL_CLASS
|
||||
|
||||
DihedralStyle(harmonic/intel,DihedralHarmonicIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_DIHEDRAL_HARMONIC_INTEL_H
|
||||
#define LMP_DIHEDRAL_HARMONIC_INTEL_H
|
||||
|
||||
#include "dihedral_harmonic.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class DihedralHarmonicIntel : public DihedralHarmonic {
|
||||
|
||||
public:
|
||||
DihedralHarmonicIntel(class LAMMPS *lmp);
|
||||
virtual void compute(int, int);
|
||||
void init_style();
|
||||
|
||||
private:
|
||||
FixIntel *fix;
|
||||
|
||||
template <class flt_t> class ForceConst;
|
||||
template <class flt_t, class acc_t>
|
||||
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <class flt_t, class acc_t>
|
||||
void pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t, acc_t> *buffers);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int _use_base;
|
||||
#endif
|
||||
|
||||
template <class flt_t>
|
||||
class ForceConst {
|
||||
public:
|
||||
typedef struct { flt_t cos_shift, sin_shift, k;
|
||||
int multiplicity; } fc_packed1;
|
||||
|
||||
fc_packed1 *bp;
|
||||
|
||||
ForceConst() : _nbondtypes(0) {}
|
||||
~ForceConst() { set_ntypes(0, NULL); }
|
||||
|
||||
void set_ntypes(const int nbondtypes, Memory *memory);
|
||||
|
||||
private:
|
||||
int _nbondtypes;
|
||||
Memory *_memory;
|
||||
};
|
||||
ForceConst<float> force_const_single;
|
||||
ForceConst<double> force_const_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
438
src/USER-INTEL/dihedral_opls_intel.cpp
Normal file
438
src/USER-INTEL/dihedral_opls_intel.cpp
Normal file
@ -0,0 +1,438 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include "mpi.h"
|
||||
#include "math.h"
|
||||
#include "dihedral_opls_intel.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "memory.h"
|
||||
#include "neighbor.h"
|
||||
#include "domain.h"
|
||||
#include "force.h"
|
||||
#include "pair.h"
|
||||
#include "update.h"
|
||||
#include "error.h"
|
||||
|
||||
#include "suffix.h"
|
||||
using namespace LAMMPS_NS;
|
||||
|
||||
#define PTOLERANCE (flt_t)1.05
|
||||
#define MTOLERANCE (flt_t)-1.05
|
||||
#define SMALL2 (flt_t)0.000001
|
||||
#define INVSMALL (flt_t)1000.0
|
||||
#define SMALLER2 (flt_t)0.0000000001
|
||||
#define INVSMALLER (flt_t)100000.0
|
||||
typedef struct { int a,b,c,d,t; } int5_t;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
DihedralOPLSIntel::DihedralOPLSIntel(class LAMMPS *lmp)
|
||||
: DihedralOPLS(lmp)
|
||||
{
|
||||
suffix_flag |= Suffix::INTEL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void DihedralOPLSIntel::compute(int eflag, int vflag)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_use_base) {
|
||||
DihedralOPLS::compute(eflag, vflag);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||
force_const_single);
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||
force_const_double);
|
||||
else
|
||||
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||
force_const_single);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void DihedralOPLSIntel::compute(int eflag, int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
{
|
||||
if (eflag || vflag) {
|
||||
ev_setup(eflag,vflag);
|
||||
} else evflag = 0;
|
||||
|
||||
if (evflag) {
|
||||
if (eflag) {
|
||||
if (force->newton_bond)
|
||||
eval<1,1,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,1,0>(vflag, buffers, fc);
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<1,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<0,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<0,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
}
|
||||
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void DihedralOPLSIntel::eval(const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
|
||||
{
|
||||
const int inum = neighbor->ndihedrallist;
|
||||
if (inum == 0) return;
|
||||
|
||||
ATOM_T * _noalias const x = buffers->get_x(0);
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
int f_stride;
|
||||
if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
|
||||
else f_stride = buffers->get_stride(nlocal);
|
||||
|
||||
int tc;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
|
||||
const int nthreads = tc;
|
||||
|
||||
acc_t oedihedral, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
oedihedral = (acc_t)0.0;
|
||||
if (vflag) {
|
||||
ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(f_start,f_stride,fc) \
|
||||
reduction(+:oedihedral,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
#endif
|
||||
{
|
||||
int nfrom, nto, tid;
|
||||
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
|
||||
|
||||
FORCE_T * _noalias const f = f_start + (tid * f_stride);
|
||||
if (fix->need_zero(tid))
|
||||
memset(f, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
const int5_t * _noalias const dihedrallist =
|
||||
(int5_t *) neighbor->dihedrallist[0];
|
||||
|
||||
acc_t sedihedral, sv0, sv1, sv2, sv3, sv4, sv5;
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
sedihedral = (acc_t)0.0;
|
||||
if (vflag) {
|
||||
sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
for (int n = nfrom; n < nto; n++) {
|
||||
const int i1 = dihedrallist[n].a;
|
||||
const int i2 = dihedrallist[n].b;
|
||||
const int i3 = dihedrallist[n].c;
|
||||
const int i4 = dihedrallist[n].d;
|
||||
const int type = dihedrallist[n].t;
|
||||
|
||||
// 1st bond
|
||||
|
||||
const flt_t vb1x = x[i1].x - x[i2].x;
|
||||
const flt_t vb1y = x[i1].y - x[i2].y;
|
||||
const flt_t vb1z = x[i1].z - x[i2].z;
|
||||
|
||||
// 2nd bond
|
||||
|
||||
const flt_t vb2xm = x[i2].x - x[i3].x;
|
||||
const flt_t vb2ym = x[i2].y - x[i3].y;
|
||||
const flt_t vb2zm = x[i2].z - x[i3].z;
|
||||
|
||||
// 3rd bond
|
||||
|
||||
const flt_t vb3x = x[i4].x - x[i3].x;
|
||||
const flt_t vb3y = x[i4].y - x[i3].y;
|
||||
const flt_t vb3z = x[i4].z - x[i3].z;
|
||||
|
||||
// 1-4
|
||||
|
||||
const flt_t delx = x[i1].x - x[i4].x;
|
||||
const flt_t dely = x[i1].y - x[i4].y;
|
||||
const flt_t delz = x[i1].z - x[i4].z;
|
||||
|
||||
|
||||
// c0 calculation
|
||||
// 1st and 2nd angle
|
||||
|
||||
const flt_t b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
|
||||
const flt_t rb1 = (flt_t)1.0 / sqrt(b1mag2);
|
||||
const flt_t sb1 = (flt_t)1.0 / b1mag2;
|
||||
|
||||
const flt_t b2mag2 = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
|
||||
const flt_t rb2 = (flt_t)1.0 / sqrt(b2mag2);
|
||||
const flt_t sb2 = (flt_t)1.0 / b2mag2;
|
||||
|
||||
const flt_t b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
|
||||
const flt_t rb3 = (flt_t)1.0 / sqrt(b3mag2);
|
||||
const flt_t sb3 = (flt_t)1.0 / b3mag2;
|
||||
|
||||
const flt_t c0 = (vb1x*vb3x + vb1y*vb3y + vb1z*vb3z) * rb1*rb3;
|
||||
|
||||
flt_t ctmp = -vb1x*vb2xm - vb1y*vb2ym - vb1z*vb2zm;
|
||||
const flt_t r12c1 = rb1 * rb2;
|
||||
const flt_t c1mag = ctmp * r12c1;
|
||||
|
||||
ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
|
||||
const flt_t r12c2 = rb2 * rb3;
|
||||
const flt_t c2mag = ctmp * r12c2;
|
||||
|
||||
// cos and sin of 2 angles and final c
|
||||
|
||||
flt_t sin2 = MAX((flt_t)1.0 - c1mag*c1mag,(flt_t)0.0);
|
||||
flt_t sc1 = (flt_t)1.0/sqrt(sin2);
|
||||
if (sin2 < SMALL2) sc1 = INVSMALL;
|
||||
|
||||
sin2 = MAX((flt_t)1.0 - c2mag*c2mag,(flt_t)0.0);
|
||||
flt_t sc2 = (flt_t)1.0/sqrt(sin2);
|
||||
if (sin2 < SMALL2) sc2 = INVSMALL;
|
||||
|
||||
const flt_t s1 = sc1 * sc1;
|
||||
const flt_t s2 = sc2 * sc2;
|
||||
flt_t s12 = sc1 * sc2;
|
||||
flt_t c = (c0 + c1mag*c2mag) * s12;
|
||||
|
||||
const flt_t cx = vb1z*vb2ym - vb1y*vb2zm;
|
||||
const flt_t cy = vb1x*vb2zm - vb1z*vb2xm;
|
||||
const flt_t cz = vb1y*vb2xm - vb1x*vb2ym;
|
||||
const flt_t cmag = (flt_t)1.0/sqrt(cx*cx + cy*cy + cz*cz);
|
||||
const flt_t dx = (cx*vb3x + cy*vb3y + cz*vb3z)*cmag*rb3;
|
||||
|
||||
// error check
|
||||
if (c > PTOLERANCE || c < MTOLERANCE) {
|
||||
int me = comm->me;
|
||||
|
||||
if (screen) {
|
||||
char str[128];
|
||||
sprintf(str,"Dihedral problem: %d/%d " BIGINT_FORMAT " "
|
||||
TAGINT_FORMAT " " TAGINT_FORMAT " "
|
||||
TAGINT_FORMAT " " TAGINT_FORMAT,
|
||||
me,tid,update->ntimestep,
|
||||
atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
|
||||
error->warning(FLERR,str,0);
|
||||
fprintf(screen," 1st atom: %d %g %g %g\n",
|
||||
me,x[i1].x,x[i1].y,x[i1].z);
|
||||
fprintf(screen," 2nd atom: %d %g %g %g\n",
|
||||
me,x[i2].x,x[i2].y,x[i2].z);
|
||||
fprintf(screen," 3rd atom: %d %g %g %g\n",
|
||||
me,x[i3].x,x[i3].y,x[i3].z);
|
||||
fprintf(screen," 4th atom: %d %g %g %g\n",
|
||||
me,x[i4].x,x[i4].y,x[i4].z);
|
||||
}
|
||||
}
|
||||
|
||||
if (c > (flt_t)1.0) c = (flt_t)1.0;
|
||||
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
|
||||
|
||||
// force & energy
|
||||
// p = sum (i=1,4) k_i * (1 + (-1)**(i+1)*cos(i*phi) )
|
||||
// pd = dp/dc
|
||||
|
||||
const flt_t cossq = c * c;
|
||||
const flt_t sinsq = (flt_t)1.0 - cossq;
|
||||
flt_t siinv = (flt_t)1.0/sqrt(sinsq);
|
||||
if (sinsq < SMALLER2 ) siinv = INVSMALLER;
|
||||
if (dx < (flt_t)0.0) siinv = -siinv;
|
||||
|
||||
const flt_t cos_2phi = cossq - sinsq;
|
||||
const flt_t sin_2phim = (flt_t)2.0 * c;
|
||||
const flt_t cos_3phi = (flt_t)2.0 * c * cos_2phi - c;
|
||||
const flt_t sin_3phim = (flt_t)2.0 * cos_2phi + (flt_t)1.0;
|
||||
const flt_t cos_4phi = (flt_t)2.0 * cos_2phi * cos_2phi - (flt_t)1.0;
|
||||
const flt_t sin_4phim = (flt_t)2.0 * cos_2phi * sin_2phim;
|
||||
|
||||
flt_t p, pd;
|
||||
p = fc.bp[type].k1*((flt_t)1.0 + c) +
|
||||
fc.bp[type].k2*((flt_t)1.0 - cos_2phi) +
|
||||
fc.bp[type].k3*((flt_t)1.0 + cos_3phi) +
|
||||
fc.bp[type].k4*((flt_t)1.0 - cos_4phi) ;
|
||||
pd = fc.bp[type].k1 -
|
||||
(flt_t)2.0 * fc.bp[type].k2 * sin_2phim +
|
||||
(flt_t)3.0 * fc.bp[type].k3 * sin_3phim -
|
||||
(flt_t)4.0 * fc.bp[type].k4 * sin_4phim;
|
||||
|
||||
flt_t edihed;
|
||||
if (EFLAG) edihed = p;
|
||||
|
||||
const flt_t a = pd;
|
||||
c = c * a;
|
||||
s12 = s12 * a;
|
||||
const flt_t a11 = c*sb1*s1;
|
||||
const flt_t a22 = -sb2 * ((flt_t)2.0*c0*s12 - c*(s1+s2));
|
||||
const flt_t a33 = c*sb3*s2;
|
||||
const flt_t a12 = -r12c1 * (c1mag*c*s1 + c2mag*s12);
|
||||
const flt_t a13 = -rb1*rb3*s12;
|
||||
const flt_t a23 = r12c2 * (c2mag*c*s2 + c1mag*s12);
|
||||
|
||||
const flt_t sx2 = a12*vb1x - a22*vb2xm + a23*vb3x;
|
||||
const flt_t sy2 = a12*vb1y - a22*vb2ym + a23*vb3y;
|
||||
const flt_t sz2 = a12*vb1z - a22*vb2zm + a23*vb3z;
|
||||
|
||||
const flt_t f1x = a11*vb1x - a12*vb2xm + a13*vb3x;
|
||||
const flt_t f1y = a11*vb1y - a12*vb2ym + a13*vb3y;
|
||||
const flt_t f1z = a11*vb1z - a12*vb2zm + a13*vb3z;
|
||||
|
||||
const flt_t f2x = -sx2 - f1x;
|
||||
const flt_t f2y = -sy2 - f1y;
|
||||
const flt_t f2z = -sz2 - f1z;
|
||||
|
||||
const flt_t f4x = a13*vb1x - a23*vb2xm + a33*vb3x;
|
||||
const flt_t f4y = a13*vb1y - a23*vb2ym + a33*vb3y;
|
||||
const flt_t f4z = a13*vb1z - a23*vb2zm + a33*vb3z;
|
||||
|
||||
const flt_t f3x = sx2 - f4x;
|
||||
const flt_t f3y = sy2 - f4y;
|
||||
const flt_t f3z = sz2 - f4z;
|
||||
|
||||
if (EVFLAG) {
|
||||
IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, edihed, i1, i2, i3, i4, f1x,
|
||||
f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z, vb1x,
|
||||
vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x, vb3y,
|
||||
vb3z, sedihedral, f, NEWTON_BOND, nlocal,
|
||||
sv0, sv1, sv2, sv3, sv4, sv5);
|
||||
}
|
||||
|
||||
{
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
f[i1].x += f1x;
|
||||
f[i1].y += f1y;
|
||||
f[i1].z += f1z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i2 < nlocal) {
|
||||
f[i2].x += f2x;
|
||||
f[i2].y += f2y;
|
||||
f[i2].z += f2z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i3 < nlocal) {
|
||||
f[i3].x += f3x;
|
||||
f[i3].y += f3y;
|
||||
f[i3].z += f3z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i4 < nlocal) {
|
||||
f[i4].x += f4x;
|
||||
f[i4].y += f4y;
|
||||
f[i4].z += f4z;
|
||||
}
|
||||
}
|
||||
} // for n
|
||||
if (EVFLAG) {
|
||||
if (EFLAG) oedihedral += sedihedral;
|
||||
if (vflag) {
|
||||
ov0 += sv0; ov1 += sv1; ov2 += sv2; ov3 += sv3; ov4 += sv4; ov5 += sv5;
|
||||
}
|
||||
}
|
||||
} // omp parallel
|
||||
|
||||
if (EVFLAG) {
|
||||
if (EFLAG) energy += oedihedral;
|
||||
if (vflag) {
|
||||
virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
|
||||
virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
|
||||
}
|
||||
}
|
||||
|
||||
fix->set_reduce_flag();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void DihedralOPLSIntel::init_style()
|
||||
{
|
||||
DihedralOPLS::init_style();
|
||||
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_use_base = 0;
|
||||
if (fix->offload_balance() != 0.0) {
|
||||
_use_base = 1;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
fix->bond_init_check();
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void DihedralOPLSIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int bp1 = atom->ndihedraltypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
for (int i = 0; i < bp1; i++) {
|
||||
fc.bp[i].k1 = k1[i];
|
||||
fc.bp[i].k2 = k2[i];
|
||||
fc.bp[i].k3 = k3[i];
|
||||
fc.bp[i].k4 = k4[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void DihedralOPLSIntel::ForceConst<flt_t>::set_ntypes(const int nbondtypes,
|
||||
Memory *memory) {
|
||||
if (nbondtypes != _nbondtypes) {
|
||||
if (_nbondtypes > 0)
|
||||
_memory->destroy(bp);
|
||||
|
||||
if (nbondtypes > 0)
|
||||
_memory->create(bp,nbondtypes,"dihedralcharmmintel.bp");
|
||||
}
|
||||
_nbondtypes = nbondtypes;
|
||||
_memory = memory;
|
||||
}
|
||||
80
src/USER-INTEL/dihedral_opls_intel.h
Normal file
80
src/USER-INTEL/dihedral_opls_intel.h
Normal file
@ -0,0 +1,80 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef DIHEDRAL_CLASS
|
||||
|
||||
DihedralStyle(opls/intel,DihedralOPLSIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_DIHEDRAL_OPLS_INTEL_H
|
||||
#define LMP_DIHEDRAL_OPLS_INTEL_H
|
||||
|
||||
#include "dihedral_opls.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class DihedralOPLSIntel : public DihedralOPLS {
|
||||
|
||||
public:
|
||||
DihedralOPLSIntel(class LAMMPS *lmp);
|
||||
virtual void compute(int, int);
|
||||
void init_style();
|
||||
|
||||
private:
|
||||
FixIntel *fix;
|
||||
|
||||
template <class flt_t> class ForceConst;
|
||||
template <class flt_t, class acc_t>
|
||||
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <class flt_t, class acc_t>
|
||||
void pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t, acc_t> *buffers);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int _use_base;
|
||||
#endif
|
||||
|
||||
template <class flt_t>
|
||||
class ForceConst {
|
||||
public:
|
||||
typedef struct { flt_t k1, k2, k3, k4; } fc_packed1;
|
||||
|
||||
fc_packed1 *bp;
|
||||
|
||||
ForceConst() : _nbondtypes(0) {}
|
||||
~ForceConst() { set_ntypes(0, NULL); }
|
||||
|
||||
void set_ntypes(const int nbondtypes, Memory *memory);
|
||||
|
||||
private:
|
||||
int _nbondtypes;
|
||||
Memory *_memory;
|
||||
};
|
||||
ForceConst<float> force_const_single;
|
||||
ForceConst<double> force_const_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
50
src/USER-INTEL/fix_nvt_intel.cpp
Normal file
50
src/USER-INTEL/fix_nvt_intel.cpp
Normal file
@ -0,0 +1,50 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <string.h>
|
||||
#include "fix_nvt_intel.h"
|
||||
#include "group.h"
|
||||
#include "modify.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixNVTIntel::FixNVTIntel(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixNHIntel(lmp, narg, arg)
|
||||
{
|
||||
if (!tstat_flag)
|
||||
error->all(FLERR,"Temperature control must be used with fix nvt");
|
||||
if (pstat_flag)
|
||||
error->all(FLERR,"Pressure control can not be used with fix nvt");
|
||||
|
||||
// create a new compute temp style
|
||||
// id = fix-ID + temp
|
||||
|
||||
int n = strlen(id) + 6;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,id);
|
||||
strcat(id_temp,"_temp");
|
||||
|
||||
char **newarg = new char*[3];
|
||||
newarg[0] = id_temp;
|
||||
newarg[1] = group->names[igroup];
|
||||
newarg[2] = (char *) "temp";
|
||||
|
||||
modify->add_compute(3,newarg);
|
||||
delete [] newarg;
|
||||
tflag = 1;
|
||||
}
|
||||
|
||||
52
src/USER-INTEL/fix_nvt_intel.h
Normal file
52
src/USER-INTEL/fix_nvt_intel.h
Normal file
@ -0,0 +1,52 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(nvt/intel,FixNVTIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_NVT_INTEL_H
|
||||
#define LMP_FIX_NVT_INTEL_H
|
||||
|
||||
#include "fix_nh_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixNVTIntel : public FixNHIntel {
|
||||
public:
|
||||
FixNVTIntel(class LAMMPS *, int, char **);
|
||||
~FixNVTIntel() {}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Temperature control must be used with fix nvt
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Pressure control can not be used with fix nvt
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
*/
|
||||
126
src/USER-INTEL/fix_nvt_sllod_intel.cpp
Normal file
126
src/USER-INTEL/fix_nvt_sllod_intel.cpp
Normal file
@ -0,0 +1,126 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "fix_nvt_sllod_intel.h"
|
||||
#include "math_extra.h"
|
||||
#include "atom.h"
|
||||
#include "domain.h"
|
||||
#include "group.h"
|
||||
#include "modify.h"
|
||||
#include "fix.h"
|
||||
#include "fix_deform.h"
|
||||
#include "compute.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace FixConst;
|
||||
|
||||
enum{NO_REMAP,X_REMAP,V_REMAP}; // same as fix_deform.cpp
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
FixNVTSllodIntel::FixNVTSllodIntel(LAMMPS *lmp, int narg, char **arg) :
|
||||
FixNHIntel(lmp, narg, arg)
|
||||
{
|
||||
if (!tstat_flag)
|
||||
error->all(FLERR,"Temperature control must be used with fix nvt/sllod");
|
||||
if (pstat_flag)
|
||||
error->all(FLERR,"Pressure control can not be used with fix nvt/sllod");
|
||||
|
||||
// default values
|
||||
|
||||
if (mtchain_default_flag) mtchain = 1;
|
||||
|
||||
|
||||
// create a new compute temp style
|
||||
// id = fix-ID + temp
|
||||
|
||||
int n = strlen(id) + 6;
|
||||
id_temp = new char[n];
|
||||
strcpy(id_temp,id);
|
||||
strcat(id_temp,"_temp");
|
||||
|
||||
char **newarg = new char*[3];
|
||||
newarg[0] = id_temp;
|
||||
newarg[1] = group->names[igroup];
|
||||
newarg[2] = (char *) "temp/deform";
|
||||
|
||||
modify->add_compute(3,newarg);
|
||||
delete [] newarg;
|
||||
tflag = 1;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void FixNVTSllodIntel::init()
|
||||
{
|
||||
FixNHIntel::init();
|
||||
|
||||
if (!temperature->tempbias)
|
||||
error->all(FLERR,"Temperature for fix nvt/sllod does not have a bias");
|
||||
|
||||
nondeformbias = 0;
|
||||
if (strcmp(temperature->style,"temp/deform") != 0) nondeformbias = 1;
|
||||
|
||||
// check fix deform remap settings
|
||||
|
||||
int i;
|
||||
for (i = 0; i < modify->nfix; i++)
|
||||
if (strcmp(modify->fix[i]->style,"deform") == 0) {
|
||||
if (((FixDeform *) modify->fix[i])->remapflag != V_REMAP)
|
||||
error->all(FLERR,"Using fix nvt/sllod with inconsistent fix deform "
|
||||
"remap option");
|
||||
break;
|
||||
}
|
||||
if (i == modify->nfix)
|
||||
error->all(FLERR,"Using fix nvt/sllod with no fix deform defined");
|
||||
}
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
perform half-step scaling of velocities
|
||||
-----------------------------------------------------------------------*/
|
||||
|
||||
void FixNVTSllodIntel::nh_v_temp()
|
||||
{
|
||||
// remove and restore bias = streaming velocity = Hrate*lamda + Hratelo
|
||||
// thermostat thermal velocity only
|
||||
// vdelu = SLLOD correction = Hrate*Hinv*vthermal
|
||||
// for non temp/deform BIAS:
|
||||
// calculate temperature since some computes require temp
|
||||
// computed on current nlocal atoms to remove bias
|
||||
|
||||
if (nondeformbias) temperature->compute_scalar();
|
||||
|
||||
double **v = atom->v;
|
||||
int *mask = atom->mask;
|
||||
int nlocal = atom->nlocal;
|
||||
if (igroup == atom->firstgroup) nlocal = atom->nfirst;
|
||||
|
||||
double h_two[6],vdelu[3];
|
||||
MathExtra::multiply_shape_shape(domain->h_rate,domain->h_inv,h_two);
|
||||
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
vdelu[0] = h_two[0]*v[i][0] + h_two[5]*v[i][1] + h_two[4]*v[i][2];
|
||||
vdelu[1] = h_two[1]*v[i][1] + h_two[3]*v[i][2];
|
||||
vdelu[2] = h_two[2]*v[i][2];
|
||||
temperature->remove_bias(i,v[i]);
|
||||
v[i][0] = v[i][0]*factor_eta - dthalf*vdelu[0];
|
||||
v[i][1] = v[i][1]*factor_eta - dthalf*vdelu[1];
|
||||
v[i][2] = v[i][2]*factor_eta - dthalf*vdelu[2];
|
||||
temperature->restore_bias(i,v[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
72
src/USER-INTEL/fix_nvt_sllod_intel.h
Normal file
72
src/USER-INTEL/fix_nvt_sllod_intel.h
Normal file
@ -0,0 +1,72 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef FIX_CLASS
|
||||
|
||||
FixStyle(nvt/sllod/intel,FixNVTSllodIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_FIX_NVTSLLOD_INTEL_H
|
||||
#define LMP_FIX_NVTSLLOD_INTEL_H
|
||||
|
||||
#include "fix_nh_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class FixNVTSllodIntel : public FixNHIntel {
|
||||
public:
|
||||
FixNVTSllodIntel(class LAMMPS *, int, char **);
|
||||
~FixNVTSllodIntel() {}
|
||||
void init();
|
||||
|
||||
private:
|
||||
int nondeformbias;
|
||||
|
||||
void nh_v_temp();
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
E: Temperature control must be used with fix nvt/sllod
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Pressure control can not be used with fix nvt/sllod
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
E: Temperature for fix nvt/sllod does not have a bias
|
||||
|
||||
The specified compute must compute temperature with a bias.
|
||||
|
||||
E: Using fix nvt/sllod with inconsistent fix deform remap option
|
||||
|
||||
Fix nvt/sllod requires that deforming atoms have a velocity profile
|
||||
provided by "remap v" as a fix deform option.
|
||||
|
||||
E: Using fix nvt/sllod with no fix deform defined
|
||||
|
||||
Self-explanatory.
|
||||
|
||||
*/
|
||||
|
||||
425
src/USER-INTEL/improper_cvff_intel.cpp
Normal file
425
src/USER-INTEL/improper_cvff_intel.cpp
Normal file
@ -0,0 +1,425 @@
|
||||
/* ----------------------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#include <mpi.h>
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include "improper_cvff_intel.h"
|
||||
#include "atom.h"
|
||||
#include "comm.h"
|
||||
#include "neighbor.h"
|
||||
#include "domain.h"
|
||||
#include "force.h"
|
||||
#include "update.h"
|
||||
#include "math_const.h"
|
||||
#include "memory.h"
|
||||
#include "modify.h"
|
||||
#include "suffix.h"
|
||||
#include "error.h"
|
||||
|
||||
using namespace LAMMPS_NS;
|
||||
using namespace MathConst;
|
||||
|
||||
#define PTOLERANCE (flt_t)1.05
|
||||
#define MTOLERANCE (flt_t)-1.05
|
||||
#define SMALL2 (flt_t)0.000001
|
||||
#define INVSMALL (flt_t)1000.0
|
||||
typedef struct { int a,b,c,d,t; } int5_t;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ImproperCvffIntel::ImproperCvffIntel(LAMMPS *lmp) :
|
||||
ImproperCvff(lmp)
|
||||
{
|
||||
suffix_flag |= Suffix::INTEL;
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
ImproperCvffIntel::~ImproperCvffIntel()
|
||||
{
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ImproperCvffIntel::compute(int eflag, int vflag)
|
||||
{
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (_use_base) {
|
||||
ImproperCvff::compute(eflag, vflag);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
compute<float,double>(eflag, vflag, fix->get_mixed_buffers(),
|
||||
force_const_single);
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
compute<double,double>(eflag, vflag, fix->get_double_buffers(),
|
||||
force_const_double);
|
||||
else
|
||||
compute<float,float>(eflag, vflag, fix->get_single_buffers(),
|
||||
force_const_single);
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void ImproperCvffIntel::compute(int eflag, int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
{
|
||||
if (eflag || vflag) ev_setup(eflag,vflag);
|
||||
else evflag = 0;
|
||||
|
||||
if (evflag) {
|
||||
if (eflag) {
|
||||
if (force->newton_bond)
|
||||
eval<1,1,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,1,0>(vflag, buffers, fc);
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<1,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<1,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
} else {
|
||||
if (force->newton_bond)
|
||||
eval<0,0,1>(vflag, buffers, fc);
|
||||
else
|
||||
eval<0,0,0>(vflag, buffers, fc);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void ImproperCvffIntel::eval(const int vflag,
|
||||
IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc)
|
||||
{
|
||||
const int inum = neighbor->nimproperlist;
|
||||
if (inum == 0) return;
|
||||
|
||||
ATOM_T * _noalias const x = buffers->get_x(0);
|
||||
const int nlocal = atom->nlocal;
|
||||
const int nall = nlocal + atom->nghost;
|
||||
|
||||
int f_stride;
|
||||
if (NEWTON_BOND) f_stride = buffers->get_stride(nall);
|
||||
else f_stride = buffers->get_stride(nlocal);
|
||||
|
||||
int tc;
|
||||
FORCE_T * _noalias f_start;
|
||||
acc_t * _noalias ev_global;
|
||||
IP_PRE_get_buffers(0, buffers, fix, tc, f_start, ev_global);
|
||||
const int nthreads = tc;
|
||||
|
||||
acc_t oeimproper, ov0, ov1, ov2, ov3, ov4, ov5;
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
oeimproper = (acc_t)0.0;
|
||||
if (vflag) {
|
||||
ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0.0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(_OPENMP)
|
||||
#pragma omp parallel default(none) \
|
||||
shared(f_start,f_stride,fc) \
|
||||
reduction(+:oeimproper,ov0,ov1,ov2,ov3,ov4,ov5)
|
||||
#endif
|
||||
{
|
||||
int nfrom, nto, tid;
|
||||
IP_PRE_omp_range_id(nfrom, nto, tid, inum, nthreads);
|
||||
|
||||
FORCE_T * _noalias const f = f_start + (tid * f_stride);
|
||||
if (fix->need_zero(tid))
|
||||
memset(f, 0, f_stride * sizeof(FORCE_T));
|
||||
|
||||
const int5_t * _noalias const improperlist =
|
||||
(int5_t *) neighbor->improperlist[0];
|
||||
|
||||
for (int n = nfrom; n < nto; n++) {
|
||||
const int i1 = improperlist[n].a;
|
||||
const int i2 = improperlist[n].b;
|
||||
const int i3 = improperlist[n].c;
|
||||
const int i4 = improperlist[n].d;
|
||||
const int type = improperlist[n].t;
|
||||
|
||||
// geometry of 4-body
|
||||
|
||||
const flt_t vb1x = x[i1].x - x[i2].x;
|
||||
const flt_t vb1y = x[i1].y - x[i2].y;
|
||||
const flt_t vb1z = x[i1].z - x[i2].z;
|
||||
|
||||
const flt_t vb2xm = x[i2].x - x[i3].x;
|
||||
const flt_t vb2ym = x[i2].y - x[i3].y;
|
||||
const flt_t vb2zm = x[i2].z - x[i3].z;
|
||||
|
||||
const flt_t vb3x = x[i4].x - x[i3].x;
|
||||
const flt_t vb3y = x[i4].y - x[i3].y;
|
||||
const flt_t vb3z = x[i4].z - x[i3].z;
|
||||
|
||||
// 1st and 2nd angle
|
||||
|
||||
const flt_t b1mag2 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
|
||||
const flt_t rb1 = (flt_t)1.0 / sqrt(b1mag2);
|
||||
const flt_t sb1 = (flt_t)1.0 / b1mag2;
|
||||
|
||||
const flt_t b2mag2 = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
|
||||
const flt_t rb2 = (flt_t)1.0 / sqrt(b2mag2);
|
||||
const flt_t sb2 = (flt_t)1.0 / b2mag2;
|
||||
|
||||
const flt_t b3mag2 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
|
||||
const flt_t rb3 = (flt_t)1.0 / sqrt(b3mag2);
|
||||
const flt_t sb3 = (flt_t)1.0 / b3mag2;
|
||||
|
||||
const flt_t c0 = (vb1x * vb3x + vb1y * vb3y + vb1z * vb3z) * rb1 * rb3;
|
||||
|
||||
flt_t ctmp = -vb1x*vb2xm - vb1y*vb2ym - vb1z*vb2zm;
|
||||
const flt_t r12c1 = rb1 * rb2;
|
||||
const flt_t c1mag = ctmp * r12c1;
|
||||
|
||||
ctmp = vb2xm*vb3x + vb2ym*vb3y + vb2zm*vb3z;
|
||||
const flt_t r12c2 = rb2 * rb3;
|
||||
const flt_t c2mag = ctmp * r12c2;
|
||||
|
||||
// cos and sin of 2 angles and final c
|
||||
|
||||
const flt_t sd1 = (flt_t)1.0 - c1mag * c1mag;
|
||||
flt_t sc1 = (flt_t)1.0/sqrt(sd1);
|
||||
if (sd1 < SMALL2) sc1 = INVSMALL;
|
||||
|
||||
const flt_t sd2 = (flt_t)1.0 - c2mag * c2mag;
|
||||
flt_t sc2 = (flt_t)1.0/sqrt(sd2);
|
||||
if (sc2 < SMALL2) sc2 = INVSMALL;
|
||||
|
||||
const flt_t s1 = sc1 * sc1;
|
||||
const flt_t s2 = sc2 * sc2;
|
||||
flt_t s12 = sc1 * sc2;
|
||||
flt_t c = (c0 + c1mag*c2mag) * s12;
|
||||
|
||||
// error check
|
||||
|
||||
if (c > PTOLERANCE || c < MTOLERANCE) {
|
||||
int me;
|
||||
MPI_Comm_rank(world,&me);
|
||||
if (screen) {
|
||||
char str[128];
|
||||
sprintf(str,"Improper problem: %d " BIGINT_FORMAT " "
|
||||
TAGINT_FORMAT " " TAGINT_FORMAT " "
|
||||
TAGINT_FORMAT " " TAGINT_FORMAT,
|
||||
me,update->ntimestep,
|
||||
atom->tag[i1],atom->tag[i2],atom->tag[i3],atom->tag[i4]);
|
||||
error->warning(FLERR,str,0);
|
||||
fprintf(screen," 1st atom: %d %g %g %g\n",
|
||||
me,x[i1].x,x[i1].y,x[i1].z);
|
||||
fprintf(screen," 2nd atom: %d %g %g %g\n",
|
||||
me,x[i2].x,x[i2].y,x[i2].z);
|
||||
fprintf(screen," 3rd atom: %d %g %g %g\n",
|
||||
me,x[i3].x,x[i3].y,x[i3].z);
|
||||
fprintf(screen," 4th atom: %d %g %g %g\n",
|
||||
me,x[i4].x,x[i4].y,x[i4].z);
|
||||
}
|
||||
}
|
||||
|
||||
if (c > (flt_t)1.0) c = (flt_t)1.0;
|
||||
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
|
||||
|
||||
// force & energy
|
||||
// p = 1 + cos(n*phi) for d = 1
|
||||
// p = 1 - cos(n*phi) for d = -1
|
||||
// pd = dp/dc / 2
|
||||
|
||||
const int m = fc.fc[type].multiplicity;
|
||||
|
||||
flt_t p, pd;
|
||||
if (m == 2) {
|
||||
p = (flt_t)2.0*c*c;
|
||||
pd = (flt_t)2.0*c;
|
||||
} else if (m == 3) {
|
||||
const flt_t rc2 = c*c;
|
||||
p = ((flt_t)4.0*rc2-(flt_t)3.0)*c + (flt_t)1.0;
|
||||
pd = (flt_t)6.0*rc2 - (flt_t)1.5;
|
||||
} else if (m == 4) {
|
||||
const flt_t rc2 = c*c;
|
||||
p = (flt_t)8.0*(rc2-1)*rc2 + (flt_t)2.0;
|
||||
pd = ((flt_t)16.0*rc2-(flt_t)8.0)*c;
|
||||
} else if (m == 6) {
|
||||
const flt_t rc2 = c*c;
|
||||
p = (((flt_t)32.0*rc2-(flt_t)48.0)*rc2 + (flt_t)18.0)*rc2;
|
||||
pd = ((flt_t)96.0*(rc2-(flt_t)1.0)*rc2 + (flt_t)18.0)*c;
|
||||
} else if (m == 1) {
|
||||
p = c + (flt_t)1.0;
|
||||
pd = (flt_t)0.5;
|
||||
} else if (m == 5) {
|
||||
const flt_t rc2 = c*c;
|
||||
p = (((flt_t)16.0*rc2-(flt_t)20.0)*rc2 + (flt_t)5.0)*c + (flt_t)1.0;
|
||||
pd = ((flt_t)40.0*rc2-(flt_t)30.0)*rc2 + (flt_t)2.5;
|
||||
} else if (m == 0) {
|
||||
p = (flt_t)2.0;
|
||||
pd = (flt_t)0.0;
|
||||
}
|
||||
|
||||
if (fc.fc[type].sign == -1) {
|
||||
p = (flt_t)2.0 - p;
|
||||
pd = -pd;
|
||||
}
|
||||
|
||||
flt_t eimproper;
|
||||
if (EFLAG) eimproper = fc.fc[type].k * p;
|
||||
|
||||
const flt_t a = (flt_t)2.0 * fc.fc[type].k * pd;
|
||||
c = c * a;
|
||||
s12 = s12 * a;
|
||||
const flt_t a11 = c*sb1*s1;
|
||||
const flt_t a22 = -sb2*((flt_t)2.0*c0*s12 - c*(s1+s2));
|
||||
const flt_t a33 = c*sb3*s2;
|
||||
const flt_t a12 = -r12c1*(c1mag*c*s1 + c2mag*s12);
|
||||
const flt_t a13 = -rb1*rb3*s12;
|
||||
const flt_t a23 = r12c2*(c2mag*c*s2 + c1mag*s12);
|
||||
|
||||
const flt_t sx2 = a12*vb1x - a22*vb2xm + a23*vb3x;
|
||||
const flt_t sy2 = a12*vb1y - a22*vb2ym + a23*vb3y;
|
||||
const flt_t sz2 = a12*vb1z - a22*vb2zm + a23*vb3z;
|
||||
|
||||
const flt_t f1x = a11*vb1x - a12*vb2xm + a13*vb3x;
|
||||
const flt_t f1y = a11*vb1y - a12*vb2ym + a13*vb3y;
|
||||
const flt_t f1z = a11*vb1z - a12*vb2zm + a13*vb3z;
|
||||
|
||||
const flt_t f2x = -sx2 - f1x;
|
||||
const flt_t f2y = -sy2 - f1y;
|
||||
const flt_t f2z = -sz2 - f1z;
|
||||
|
||||
const flt_t f4x = a13*vb1x - a23*vb2xm + a33*vb3x;
|
||||
const flt_t f4y = a13*vb1y - a23*vb2ym + a33*vb3y;
|
||||
const flt_t f4z = a13*vb1z - a23*vb2zm + a33*vb3z;
|
||||
|
||||
const flt_t f3x = sx2 - f4x;
|
||||
const flt_t f3y = sy2 - f4y;
|
||||
const flt_t f3z = sz2 - f4z;
|
||||
|
||||
// apply force to each of 4 atoms
|
||||
|
||||
if (NEWTON_BOND || i1 < nlocal) {
|
||||
f[i1].x += f1x;
|
||||
f[i1].y += f1y;
|
||||
f[i1].z += f1z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i2 < nlocal) {
|
||||
f[i2].x += f2x;
|
||||
f[i2].y += f2y;
|
||||
f[i2].z += f2z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i3 < nlocal) {
|
||||
f[i3].x += f3x;
|
||||
f[i3].y += f3y;
|
||||
f[i3].z += f3z;
|
||||
}
|
||||
|
||||
if (NEWTON_BOND || i4 < nlocal) {
|
||||
f[i4].x += f4x;
|
||||
f[i4].y += f4y;
|
||||
f[i4].z += f4z;
|
||||
}
|
||||
|
||||
if (EVFLAG) {
|
||||
IP_PRE_ev_tally_dihed(EFLAG, eatom, vflag, eimproper, i1, i2, i3, i4,
|
||||
f1x, f1y, f1z, f3x, f3y, f3z, f4x, f4y, f4z,
|
||||
vb1x, vb1y, vb1z, -vb2xm, -vb2ym, -vb2zm, vb3x,
|
||||
vb3y, vb3z, oeimproper, f, NEWTON_BOND, nlocal,
|
||||
ov0, ov1, ov2, ov3, ov4, ov5);
|
||||
}
|
||||
} // for n
|
||||
} // omp parallel
|
||||
if (EVFLAG) {
|
||||
if (EFLAG)
|
||||
energy += oeimproper;
|
||||
if (vflag) {
|
||||
virial[0] += ov0; virial[1] += ov1; virial[2] += ov2;
|
||||
virial[3] += ov3; virial[4] += ov4; virial[5] += ov5;
|
||||
}
|
||||
}
|
||||
|
||||
fix->set_reduce_flag();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void ImproperCvffIntel::init()
|
||||
{
|
||||
ImproperCvff::init();
|
||||
|
||||
int ifix = modify->find_fix("package_intel");
|
||||
if (ifix < 0)
|
||||
error->all(FLERR,
|
||||
"The 'package intel' command is required for /intel styles");
|
||||
fix = static_cast<FixIntel *>(modify->fix[ifix]);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
_use_base = 0;
|
||||
if (fix->offload_balance() != 0.0) {
|
||||
_use_base = 1;
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
fix->bond_init_check();
|
||||
|
||||
if (fix->precision() == FixIntel::PREC_MODE_MIXED)
|
||||
pack_force_const(force_const_single, fix->get_mixed_buffers());
|
||||
else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
|
||||
pack_force_const(force_const_double, fix->get_double_buffers());
|
||||
else
|
||||
pack_force_const(force_const_single, fix->get_single_buffers());
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t, class acc_t>
|
||||
void ImproperCvffIntel::pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t,acc_t> *buffers)
|
||||
{
|
||||
const int bp1 = atom->nimpropertypes + 1;
|
||||
fc.set_ntypes(bp1,memory);
|
||||
|
||||
for (int i = 0; i < bp1; i++) {
|
||||
fc.fc[i].k = k[i];
|
||||
fc.fc[i].sign = sign[i];
|
||||
fc.fc[i].multiplicity = multiplicity[i];
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
template <class flt_t>
|
||||
void ImproperCvffIntel::ForceConst<flt_t>::set_ntypes(const int nimproper,
|
||||
Memory *memory) {
|
||||
if (nimproper != _nimpropertypes) {
|
||||
if (_nimpropertypes > 0)
|
||||
_memory->destroy(fc);
|
||||
|
||||
if (nimproper > 0)
|
||||
_memory->create(fc,nimproper,"improperharmonicintel.fc");
|
||||
}
|
||||
_nimpropertypes = nimproper;
|
||||
_memory = memory;
|
||||
}
|
||||
90
src/USER-INTEL/improper_cvff_intel.h
Normal file
90
src/USER-INTEL/improper_cvff_intel.h
Normal file
@ -0,0 +1,90 @@
|
||||
/* -*- c++ -*- ----------------------------------------------------------
|
||||
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
http://lammps.sandia.gov, Sandia National Laboratories
|
||||
Steve Plimpton, sjplimp@sandia.gov
|
||||
|
||||
Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
certain rights in this software. This software is distributed under
|
||||
the GNU General Public License.
|
||||
|
||||
See the README file in the top-level LAMMPS directory.
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
/* ----------------------------------------------------------------------
|
||||
Contributing author: W. Michael Brown (Intel)
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
#ifdef IMPROPER_CLASS
|
||||
|
||||
ImproperStyle(cvff/intel,ImproperCvffIntel)
|
||||
|
||||
#else
|
||||
|
||||
#ifndef LMP_IMPROPER_CVFF_INTEL_H
|
||||
#define LMP_IMPROPER_CVFF_INTEL_H
|
||||
|
||||
#include <stdio.h>
|
||||
#include "improper_cvff.h"
|
||||
#include "fix_intel.h"
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
class ImproperCvffIntel : public ImproperCvff {
|
||||
public:
|
||||
ImproperCvffIntel(class LAMMPS *);
|
||||
virtual ~ImproperCvffIntel();
|
||||
virtual void compute(int, int);
|
||||
virtual void init();
|
||||
|
||||
protected:
|
||||
FixIntel *fix;
|
||||
|
||||
template <class flt_t> class ForceConst;
|
||||
template <class flt_t, class acc_t>
|
||||
void compute(int eflag, int vflag, IntelBuffers<flt_t,acc_t> *buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <int EVFLAG, int EFLAG, int NEWTON_BOND, class flt_t, class acc_t>
|
||||
void eval(const int vflag, IntelBuffers<flt_t,acc_t> * buffers,
|
||||
const ForceConst<flt_t> &fc);
|
||||
template <class flt_t, class acc_t>
|
||||
void pack_force_const(ForceConst<flt_t> &fc,
|
||||
IntelBuffers<flt_t, acc_t> *buffers);
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int _use_base;
|
||||
#endif
|
||||
|
||||
template <class flt_t>
|
||||
class ForceConst {
|
||||
public:
|
||||
typedef struct { flt_t k; int sign, multiplicity; } fc_packed1;
|
||||
|
||||
fc_packed1 *fc;
|
||||
|
||||
ForceConst() : _nimpropertypes(0) {}
|
||||
~ForceConst() { set_ntypes(0, NULL); }
|
||||
|
||||
void set_ntypes(const int nimpropertypes, Memory *memory);
|
||||
|
||||
private:
|
||||
int _nimpropertypes;
|
||||
Memory *_memory;
|
||||
};
|
||||
ForceConst<float> force_const_single;
|
||||
ForceConst<double> force_const_double;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* ERROR/WARNING messages:
|
||||
|
||||
W: Improper problem: %d %ld %d %d %d %d
|
||||
|
||||
Conformation of the 4 listed improper atoms is extreme; you may want
|
||||
to check your simulation geometry.
|
||||
|
||||
*/
|
||||
@ -37,6 +37,8 @@ using namespace MathConst;
|
||||
#define PTOLERANCE (flt_t)1.05
|
||||
#define MTOLERANCE (flt_t)-1.05
|
||||
#define SMALL (flt_t)0.001
|
||||
#define SMALL2 (flt_t)0.000001
|
||||
#define INVSMALL (flt_t)1000.0
|
||||
typedef struct { int a,b,c,d,t; } int5_t;
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -175,13 +177,17 @@ void ImproperHarmonicIntel::eval(const int vflag,
|
||||
const flt_t vb3y = x[i4].y - x[i3].y;
|
||||
const flt_t vb3z = x[i4].z - x[i3].z;
|
||||
|
||||
const flt_t ss1 = (flt_t)1.0 / (vb1x*vb1x + vb1y*vb1y + vb1z*vb1z);
|
||||
const flt_t ss2 = (flt_t)1.0 / (vb2x*vb2x + vb2y*vb2y + vb2z*vb2z);
|
||||
const flt_t ss3 = (flt_t)1.0 / (vb3x*vb3x + vb3y*vb3y + vb3z*vb3z);
|
||||
flt_t ss1 = vb1x*vb1x + vb1y*vb1y + vb1z*vb1z;
|
||||
flt_t ss2 = vb2x*vb2x + vb2y*vb2y + vb2z*vb2z;
|
||||
flt_t ss3 = vb3x*vb3x + vb3y*vb3y + vb3z*vb3z;
|
||||
|
||||
const flt_t r1 = sqrt(ss1);
|
||||
const flt_t r2 = sqrt(ss2);
|
||||
const flt_t r3 = sqrt(ss3);
|
||||
const flt_t r1 = (flt_t)1.0 / sqrt(ss1);
|
||||
const flt_t r2 = (flt_t)1.0 / sqrt(ss2);
|
||||
const flt_t r3 = (flt_t)1.0 / sqrt(ss3);
|
||||
|
||||
ss1 = (flt_t)1.0 / ss1;
|
||||
ss2 = (flt_t)1.0 / ss2;
|
||||
ss3 = (flt_t)1.0 / ss3;
|
||||
|
||||
// sin and cos of angle
|
||||
|
||||
@ -191,13 +197,13 @@ void ImproperHarmonicIntel::eval(const int vflag,
|
||||
|
||||
flt_t s1 = 1.0 - c1*c1;
|
||||
if (s1 < SMALL) s1 = SMALL;
|
||||
s1 = (flt_t)1.0 / s1;
|
||||
|
||||
flt_t s2 = (flt_t)1.0 - c2*c2;
|
||||
if (s2 < SMALL) s2 = SMALL;
|
||||
s2 = (flt_t)1.0 / s2;
|
||||
|
||||
flt_t s12 = sqrt(s1*s2);
|
||||
flt_t s12 = (flt_t)1.0 / sqrt(s1*s2);
|
||||
s1 = (flt_t)1.0 / s1;
|
||||
s2 = (flt_t)1.0 / s2;
|
||||
flt_t c = (c1*c2 + c0) * s12;
|
||||
|
||||
// error check
|
||||
@ -227,8 +233,9 @@ void ImproperHarmonicIntel::eval(const int vflag,
|
||||
if (c > (flt_t)1.0) c = (flt_t)1.0;
|
||||
if (c < (flt_t)-1.0) c = (flt_t)-1.0;
|
||||
|
||||
flt_t s = sqrt((flt_t)1.0 - c*c);
|
||||
if (s < SMALL) s = SMALL;
|
||||
const flt_t sd = (flt_t)1.0 - c * c;
|
||||
flt_t s = (flt_t)1.0 / sqrt(sd);
|
||||
if (sd < SMALL2) s = INVSMALL;
|
||||
|
||||
// force & energy
|
||||
|
||||
@ -239,7 +246,7 @@ void ImproperHarmonicIntel::eval(const int vflag,
|
||||
flt_t eimproper;
|
||||
if (EFLAG) eimproper = a*domega;
|
||||
|
||||
a = -a * (flt_t)2.0/s;
|
||||
a = -a * (flt_t)2.0 * s;
|
||||
c = c * a;
|
||||
s12 = s12 * a;
|
||||
const flt_t a11 = c*ss1*s1;
|
||||
|
||||
@ -1454,28 +1454,24 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
|
||||
#endif
|
||||
|
||||
const int num = aend-astart;
|
||||
const int num = aend - astart;
|
||||
int tid, ifrom, ito;
|
||||
IP_PRE_omp_range_id(ifrom,ito,tid,num,nthreads);
|
||||
IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
|
||||
ifrom += astart;
|
||||
ito += astart;
|
||||
|
||||
int which;
|
||||
|
||||
const int list_size = (ito + tid + 1) * maxnbors;
|
||||
int ct = (ifrom + tid) * maxnbors;
|
||||
const int list_size = (ito + tid * 2 + 2) * maxnbors;
|
||||
int ct = (ifrom + tid * 2) * maxnbors;
|
||||
int *neighptr = firstneigh + ct;
|
||||
const int obound = maxnbors * 3;
|
||||
|
||||
for (int i = ifrom; i < ito; i++) {
|
||||
int j, k, n, n2, itype, jtype, ibin;
|
||||
double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
|
||||
|
||||
n = 0;
|
||||
n2 = maxnbors;
|
||||
|
||||
xtmp = x[i].x;
|
||||
ytmp = x[i].y;
|
||||
ztmp = x[i].z;
|
||||
itype = x[i].w;
|
||||
const flt_t xtmp = x[i].x;
|
||||
const flt_t ytmp = x[i].y;
|
||||
const flt_t ztmp = x[i].z;
|
||||
const int itype = x[i].w;
|
||||
const int ioffset = ntypes * itype;
|
||||
|
||||
// loop over all atoms in bins in stencil
|
||||
@ -1484,10 +1480,11 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
// (equal zyx and j <= i)
|
||||
// latter excludes self-self interaction but allows superposed atoms
|
||||
|
||||
ibin = atombin[i];
|
||||
const int ibin = atombin[i];
|
||||
|
||||
for (k = 0; k < nstencil; k++) {
|
||||
for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
|
||||
int raw_count = maxnbors;
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
for (int j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
|
||||
if (offload_noghost) {
|
||||
if (j < nlocal) {
|
||||
if (i < offload_end) continue;
|
||||
@ -1503,62 +1500,93 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
}
|
||||
}
|
||||
|
||||
jtype = x[j].w;
|
||||
#ifndef _LMP_INTEL_OFFLOAD
|
||||
if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
if (exclude) {
|
||||
const int jtype = x[j].w;
|
||||
if (exclusion(i,j,itype,jtype,mask,molecule)) continue;
|
||||
}
|
||||
#endif
|
||||
|
||||
delx = xtmp - x[j].x;
|
||||
dely = ytmp - x[j].y;
|
||||
delz = ztmp - x[j].z;
|
||||
rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq <= cutneighsq[ioffset + jtype]) {
|
||||
if (j < nlocal) {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[n++] = -j - 1;
|
||||
else
|
||||
neighptr[n++] = j;
|
||||
} else
|
||||
neighptr[n++] = j;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < lmin) lmin = j;
|
||||
if (j > lmax) lmax = j;
|
||||
#endif
|
||||
} else {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[n2++] = -j - 1;
|
||||
else
|
||||
neighptr[n2++] = j;
|
||||
} else
|
||||
neighptr[n2++] = j;
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < gmin) gmin = j;
|
||||
if (j > gmax) gmax = j;
|
||||
#endif
|
||||
}
|
||||
neighptr[raw_count++] = j;
|
||||
}
|
||||
}
|
||||
if (raw_count > obound)
|
||||
*overflow = 1;
|
||||
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
int vlmin = lmin, vlmax = lmax, vgmin = gmin, vgmax = gmax;
|
||||
#pragma vector aligned
|
||||
#pragma simd reduction(max:vlmax,vgmax) reduction(min:vlmin, vgmin)
|
||||
#else
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
#endif
|
||||
for (int u = maxnbors; u < raw_count; u++) {
|
||||
int j = neighptr[u];
|
||||
const flt_t delx = xtmp - x[j].x;
|
||||
const flt_t dely = ytmp - x[j].y;
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const int jtype = x[j].w;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
if (rsq > cutneighsq[ioffset + jtype])
|
||||
neighptr[u] = e_nall;
|
||||
else {
|
||||
if (need_ic) {
|
||||
int no_special;
|
||||
ominimum_image_check(no_special, delx, dely, delz);
|
||||
if (no_special)
|
||||
neighptr[u] = -j - 1;
|
||||
}
|
||||
|
||||
#ifdef _LMP_INTEL_OFFLOAD
|
||||
if (j < nlocal) {
|
||||
if (j < vlmin) vlmin = j;
|
||||
if (j > vlmax) vlmax = j;
|
||||
} else {
|
||||
if (j < vgmin) vgmin = j;
|
||||
if (j > vgmax) vgmax = j;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}
|
||||
ilist[i] = i;
|
||||
|
||||
int n = 0, n2 = maxnbors;
|
||||
for (int u = maxnbors; u < raw_count; u++) {
|
||||
const int j = neighptr[u];
|
||||
int pj = j;
|
||||
if (pj < e_nall) {
|
||||
if (need_ic)
|
||||
if (pj < 0) pj = -pj - 1;
|
||||
|
||||
if (pj < nlocal)
|
||||
neighptr[n++] = j;
|
||||
else
|
||||
neighptr[n2++] = j;
|
||||
}
|
||||
}
|
||||
int ns = n;
|
||||
for (int u = maxnbors; u < n2; u++)
|
||||
neighptr[n++] = neighptr[u];
|
||||
|
||||
ilist[i] = i;
|
||||
cnumneigh[i] = ct;
|
||||
if (n > maxnbors) *overflow = 1;
|
||||
for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k];
|
||||
while( (n % pad_width) != 0 ) neighptr[n++] = e_nall;
|
||||
numneigh[i] = n;
|
||||
while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++;
|
||||
ct += n;
|
||||
neighptr += n;
|
||||
if (ct + n + maxnbors > list_size) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid) * maxnbors;
|
||||
}
|
||||
ns += n2 - maxnbors;
|
||||
while( (ns % pad_width) != 0 ) neighptr[ns++] = e_nall;
|
||||
numneigh[i] = ns;
|
||||
|
||||
ct += ns;
|
||||
const int alignb = (INTEL_DATA_ALIGN / sizeof(int));
|
||||
const int edge = (ct % alignb);
|
||||
if (edge) ct += alignb - edge;
|
||||
neighptr = firstneigh + ct;
|
||||
if (ct + obound > list_size) {
|
||||
if (i < ito - 1) {
|
||||
*overflow = 1;
|
||||
ct = (ifrom + tid * 2) * maxnbors;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (*overflow == 1)
|
||||
@ -1597,6 +1625,10 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
|
||||
for (int i = ifrom; i < ito; ++i) {
|
||||
int * _noalias jlist = firstneigh + cnumneigh[i];
|
||||
const int jnum = numneigh[i];
|
||||
#if defined(LMP_SIMD_COMPILER)
|
||||
#pragma vector aligned
|
||||
#pragma simd
|
||||
#endif
|
||||
for (int jj = 0; jj < jnum; jj++) {
|
||||
const int j = jlist[jj];
|
||||
if (need_ic && j < 0) {
|
||||
|
||||
@ -284,9 +284,8 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const flt_t delz = ztmp - x[j].z;
|
||||
const int jtype = x[j].w;
|
||||
const flt_t rsq = delx * delx + dely * dely + delz * delz;
|
||||
const flt_t r = sqrt(rsq);
|
||||
|
||||
const flt_t r2inv = (flt_t)1.0 / rsq;
|
||||
const flt_t r = (flt_t)1.0 / sqrt(r2inv);
|
||||
|
||||
#ifdef INTEL_VMASK
|
||||
if (rsq < c_forcei[jtype].cutsq) {
|
||||
@ -309,12 +308,12 @@ void PairBuckCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
|
||||
if (EFLAG) ecoul = prefactor * erfc;
|
||||
if (sbindex) {
|
||||
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||
prefactor;
|
||||
forcecoul -= adjust;
|
||||
if (EFLAG) ecoul -= adjust;
|
||||
}
|
||||
|
||||
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||
prefactor;
|
||||
forcecoul -= adjust;
|
||||
if (EFLAG) ecoul -= adjust;
|
||||
|
||||
#ifdef INTEL_ALLOW_TABLE
|
||||
} else {
|
||||
float rsq_lookup = rsq;
|
||||
|
||||
@ -303,7 +303,7 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const flt_t EWALD_F = 1.12837917;
|
||||
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
|
||||
|
||||
const flt_t r = sqrt(rsq);
|
||||
const flt_t r = (flt_t)1.0 / sqrt(r2inv);
|
||||
const flt_t grij = g_ewald * r;
|
||||
const flt_t expm2 = exp(-grij * grij);
|
||||
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
|
||||
@ -311,12 +311,12 @@ void PairLJCharmmCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
|
||||
if (EFLAG) ecoul = prefactor * erfc;
|
||||
if (sbindex) {
|
||||
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||
prefactor;
|
||||
forcecoul -= adjust;
|
||||
if (EFLAG) ecoul -= adjust;
|
||||
}
|
||||
|
||||
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||
prefactor;
|
||||
forcecoul -= adjust;
|
||||
if (EFLAG) ecoul -= adjust;
|
||||
|
||||
#ifdef INTEL_ALLOW_TABLE
|
||||
} else {
|
||||
float rsq_lookup = rsq;
|
||||
|
||||
@ -297,7 +297,7 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const flt_t EWALD_F = 1.12837917;
|
||||
const flt_t INV_EWALD_P = 1.0 / 0.3275911;
|
||||
|
||||
const flt_t r = sqrt(rsq);
|
||||
const flt_t r = (flt_t)1.0 / sqrt(r2inv);
|
||||
const flt_t grij = g_ewald * r;
|
||||
const flt_t expm2 = exp(-grij * grij);
|
||||
const flt_t t = INV_EWALD_P / (INV_EWALD_P + grij);
|
||||
@ -305,12 +305,12 @@ void PairLJCutCoulLongIntel::eval(const int offload, const int vflag,
|
||||
const flt_t prefactor = qqrd2e * qtmp * q[j] / r;
|
||||
forcecoul = prefactor * (erfc + EWALD_F * grij * expm2);
|
||||
if (EFLAG) ecoul = prefactor * erfc;
|
||||
if (sbindex) {
|
||||
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||
prefactor;
|
||||
forcecoul -= adjust;
|
||||
if (EFLAG) ecoul -= adjust;
|
||||
}
|
||||
|
||||
const flt_t adjust = ((flt_t)1.0 - special_coul[sbindex])*
|
||||
prefactor;
|
||||
forcecoul -= adjust;
|
||||
if (EFLAG) ecoul -= adjust;
|
||||
|
||||
#ifdef INTEL_ALLOW_TABLE
|
||||
} else {
|
||||
float rsq_lookup = rsq;
|
||||
|
||||
Reference in New Issue
Block a user