From 1a1b064e9d12199b10df51d3a00cb5ebadb1e62a Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 26 Feb 2015 21:13:18 +0000
Subject: [PATCH 01/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13166
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/compute_chunk_atom.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/compute_chunk_atom.cpp b/src/compute_chunk_atom.cpp
index 7197767b4a..e2bfc1cc27 100644
--- a/src/compute_chunk_atom.cpp
+++ b/src/compute_chunk_atom.cpp
@@ -1026,7 +1026,7 @@ void ComputeChunkAtom::idring(int n, char *cbuf)
void ComputeChunkAtom::check_molecules()
{
- int *molecule = atom->molecule;
+ tagint *molecule = atom->molecule;
int nlocal = atom->nlocal;
int flag = 0;
From 0c65dab76433b80d2b3717d95ab8b3b50db731c9 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Thu, 26 Feb 2015 21:13:29 +0000
Subject: [PATCH 02/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13167
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/atom_vec.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/atom_vec.cpp b/src/atom_vec.cpp
index 15388e3c5f..157e9c0969 100644
--- a/src/atom_vec.cpp
+++ b/src/atom_vec.cpp
@@ -349,7 +349,7 @@ void AtomVec::pack_improper(tagint **buf)
if (newton_bond) {
for (i = 0; i < nlocal; i++)
for (j = 0; j < num_improper[i]; j++) {
- buf[m][0] = improper_type[i][j];
+ buf[m][0] = MAX(improper_type[i][j],-improper_type[i][j]);
buf[m][1] = improper_atom1[i][j];
buf[m][2] = improper_atom2[i][j];
buf[m][3] = improper_atom3[i][j];
@@ -360,7 +360,7 @@ void AtomVec::pack_improper(tagint **buf)
for (i = 0; i < nlocal; i++)
for (j = 0; j < num_improper[i]; j++)
if (tag[i] == improper_atom2[i][j]) {
- buf[m][0] = improper_type[i][j];
+ buf[m][0] = MAX(improper_type[i][j],-improper_type[i][j]);
buf[m][1] = improper_atom1[i][j];
buf[m][2] = improper_atom2[i][j];
buf[m][3] = improper_atom3[i][j];
From 472b4fd62b9a64240b30191f7cd6f850e0469234 Mon Sep 17 00:00:00 2001
From: pscrozi
Date: Sat, 28 Feb 2015 14:17:32 +0000
Subject: [PATCH 03/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13168
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/MC/fix_atom_swap.cpp | 1 +
1 file changed, 1 insertion(+)
diff --git a/src/MC/fix_atom_swap.cpp b/src/MC/fix_atom_swap.cpp
index 1192dd4776..37264fe185 100644
--- a/src/MC/fix_atom_swap.cpp
+++ b/src/MC/fix_atom_swap.cpp
@@ -739,6 +739,7 @@ void FixAtomSwap::write_restart(FILE *fp)
int n = 0;
double list[4];
list[n++] = random_equal->state();
+ list[n++] = random_unequal->state();
list[n++] = next_reneighbor;
if (comm->me == 0) {
From ee00c0f654115629bb9de70254d73d86e888a9a9 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 15:24:51 +0000
Subject: [PATCH 04/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13169
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/Makefile | 2 +-
src/QEQ/fix_qeq_slater.cpp | 4 +--
src/QEQ/fix_qeq_slater.h | 2 +-
src/compute_chunk_atom.cpp | 4 +--
src/compute_chunk_atom.h | 2 +-
src/pair_coul_streitz.cpp | 34 ++++++++++-------------
src/velocity.cpp | 56 +++++++++++++++++++++++---------------
7 files changed, 55 insertions(+), 49 deletions(-)
diff --git a/src/Makefile b/src/Makefile
index 6fafae1fd8..82d1182897 100755
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,7 +13,7 @@ OBJ = $(SRC:.cpp=.o)
# Package variables
-PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \
+PACKAGE = asphere body class2 colloid coreshell dipole fld gpu granular kim \
kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \
qeq reax replica rigid shock snap srd voronoi xtc
diff --git a/src/QEQ/fix_qeq_slater.cpp b/src/QEQ/fix_qeq_slater.cpp
index 95d0079fe0..36b3b2c65f 100644
--- a/src/QEQ/fix_qeq_slater.cpp
+++ b/src/QEQ/fix_qeq_slater.cpp
@@ -57,7 +57,7 @@ FixQEqSlater::FixQEqSlater(LAMMPS *lmp, int narg, char **arg) :
} else error->all(FLERR,"Illegal fix qeq/slater command");
}
- if (streitz_flag) extract();
+ if (streitz_flag) extract_streitz();
}
/* ---------------------------------------------------------------------- */
@@ -88,7 +88,7 @@ void FixQEqSlater::init()
/* ---------------------------------------------------------------------- */
-void FixQEqSlater::extract()
+void FixQEqSlater::extract_streitz()
{
int ntypes = atom->ntypes;
diff --git a/src/QEQ/fix_qeq_slater.h b/src/QEQ/fix_qeq_slater.h
index c0174ee7c1..5bb31cfde4 100644
--- a/src/QEQ/fix_qeq_slater.h
+++ b/src/QEQ/fix_qeq_slater.h
@@ -37,7 +37,7 @@ class FixQEqSlater : public FixQEq {
void compute_H();
double calculate_H(double, double, double, double, double &);
double calculate_H_wolf(double, double, double, double, double &);
- void extract();
+ void extract_streitz();
class PairCoulStreitz *streitz;
};
diff --git a/src/compute_chunk_atom.cpp b/src/compute_chunk_atom.cpp
index e2bfc1cc27..d83643b047 100644
--- a/src/compute_chunk_atom.cpp
+++ b/src/compute_chunk_atom.cpp
@@ -373,7 +373,7 @@ ComputeChunkAtom::ComputeChunkAtom(LAMMPS *lmp, int narg, char **arg) :
id_fix = NULL;
fixstore = NULL;
- if (compress) hash = new std::map();
+ if (compress) hash = new std::map();
else hash = NULL;
maxvar = 0;
@@ -1010,7 +1010,7 @@ void ComputeChunkAtom::compress_chunk_ids()
void ComputeChunkAtom::idring(int n, char *cbuf)
{
tagint *list = (tagint *) cbuf;
- std::map *hash = cptr->hash;
+ std::map *hash = cptr->hash;
for (int i = 0; i < n; i++) (*hash)[list[i]] = 0;
}
diff --git a/src/compute_chunk_atom.h b/src/compute_chunk_atom.h
index 8f69735d92..aa42a1f0c4 100644
--- a/src/compute_chunk_atom.h
+++ b/src/compute_chunk_atom.h
@@ -86,7 +86,7 @@ class ComputeChunkAtom : public Compute {
int molcheck; // one-time check if all molecule atoms in chunk
int *exclude; // 1 if atom is not assigned to any chunk
- std::map *hash; // store original chunks IDs before compression
+ std::map *hash; // store original chunks IDs before compression
// static variable for ring communication callback to access class data
// callback functions for ring communication
diff --git a/src/pair_coul_streitz.cpp b/src/pair_coul_streitz.cpp
index a715c1ebc8..f501be61cf 100644
--- a/src/pair_coul_streitz.cpp
+++ b/src/pair_coul_streitz.cpp
@@ -286,7 +286,7 @@ void PairCoulStreitz::read_file(char *file)
// strip comment, skip line if blank
- if (ptr = strchr(line,'#')) *ptr = '\0';
+ if ((ptr = strchr(line,'#'))) *ptr = '\0';
nwords = atom->count_words(line);
if (nwords == 0) continue;
@@ -305,7 +305,7 @@ void PairCoulStreitz::read_file(char *file)
if (eof) break;
MPI_Bcast(&n,1,MPI_INT,0,world);
MPI_Bcast(line,n,MPI_CHAR,0,world);
- if (ptr = strchr(line,'#')) *ptr = '\0';
+ if ((ptr = strchr(line,'#'))) *ptr = '\0';
nwords = atom->count_words(line);
}
@@ -316,7 +316,7 @@ void PairCoulStreitz::read_file(char *file)
nwords = 0;
words[nwords++] = strtok(line," \t\n\r\f");
- while (words[nwords++] = strtok(NULL," \t\n\r\f")) continue;
+ while ((words[nwords++] = strtok(NULL," \t\n\r\f"))) continue;
// ielement = 1st args
@@ -355,7 +355,7 @@ void PairCoulStreitz::read_file(char *file)
void PairCoulStreitz::setup()
{
- int i,j,k,m,n;
+ int i,m,n;
// set elem2param
@@ -391,29 +391,27 @@ void PairCoulStreitz::setup()
void PairCoulStreitz::compute(int eflag, int vflag)
{
int i, j, ii, jj, inum, jnum;
- int itag, jtag, itype, jtype, iparam_i,iparam_j;
+ int itype, jtype, iparam_i,iparam_j;
int *ilist, *jlist, *numneigh, **firstneigh;
- int *tag = atom->tag;
int *type = atom->type;
int nlocal = atom->nlocal;
int newton_pair = force->newton_pair;
- double xtmp, ytmp, ztmp, evdwl, ecoul, fpair;
+ double xtmp, ytmp, ztmp, ecoul, fpair;
double qi, qj, selfion, r, rsq, delr[3];
- double zei, zej, zi, zj, ci_jfi, dci_jfi, ci_fifj, dci_fifj;
- double prefactor, forcecoul, factor_coul;
+ double zei, zej, zj, ci_jfi, dci_jfi, ci_fifj, dci_fifj;
+ double forcecoul, factor_coul;
double **x = atom->x;
double **f = atom->f;
double *q = atom->q;
- double qqrd2e = force->qqrd2e;
double *special_coul = force->special_coul;
- evdwl = ecoul = 0.0;
+ ecoul = 0.0;
selfion = fpair = 0.0;
ci_jfi = ci_fifj = dci_jfi = dci_fifj = 0.0;
- prefactor = forcecoul = 0.0;
+ forcecoul = 0.0;
if (eflag || vflag) ev_setup(eflag,vflag);
else evflag = vflag_fdotr = vflag_atom = 0;
@@ -429,7 +427,6 @@ void PairCoulStreitz::compute(int eflag, int vflag)
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
- itag = tag[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
@@ -437,7 +434,6 @@ void PairCoulStreitz::compute(int eflag, int vflag)
iparam_i = elem2param[itype];
qi = q[i];
zei = params[iparam_i].zeta;
- zi = params[iparam_i].zcore;
// self energy: ionization + wolf sum
@@ -454,7 +450,6 @@ void PairCoulStreitz::compute(int eflag, int vflag)
j = jlist[jj];
j &= NEIGHMASK;
- jtag = tag[j];
jtype = map[type[j]];
iparam_j = elem2param[jtype];
qj = q[j];
@@ -505,7 +500,6 @@ void PairCoulStreitz::compute(int eflag, int vflag)
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
- itag = tag[i];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
@@ -513,7 +507,6 @@ void PairCoulStreitz::compute(int eflag, int vflag)
iparam_i = elem2param[itype];
qi = q[i];
zei = params[iparam_i].zeta;
- zi = params[iparam_i].zcore;
// self ionizition energy, only on i atom
@@ -529,7 +522,6 @@ void PairCoulStreitz::compute(int eflag, int vflag)
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
j &= NEIGHMASK;
- jtag = tag[j];
jtype = map[type[j]];
iparam_j = elem2param[jtype];
qj = q[j];
@@ -587,6 +579,8 @@ double PairCoulStreitz::self(Param *param, double qi)
if (kspacetype == 1) return 1.0*qi*(s1+qi*(0.50*s2 - qqrd2e*woself));
if (kspacetype == 2) return 1.0*qi*(s1+qi*(0.50*s2));
+
+ return 0.0;
}
/* ---------------------------------------------------------------------- */
@@ -673,8 +667,8 @@ void PairCoulStreitz::wolf_sum(double qi, double qj, double zj, double r,
double derfcr = exp(-a*a*r*r);
double erfcrc = erfc(a*rc);
- double etmp1, etmp2, etmp3, etmp4;
- double ftmp1, ftmp2, ftmp3, ftmp4;
+ double etmp1, etmp2, etmp3;
+ double ftmp1, ftmp2, ftmp3;
etmp = etmp1 = etmp2 = etmp3 = 0.0;
ftmp = ftmp1 = ftmp2 = ftmp3 = 0.0;
diff --git a/src/velocity.cpp b/src/velocity.cpp
index 0ea6b01673..ba1bc06bd0 100644
--- a/src/velocity.cpp
+++ b/src/velocity.cpp
@@ -98,6 +98,28 @@ void Velocity::command(int narg, char **arg)
else if (style == RAMP) options(narg-8,&arg[8]);
else if (style == ZERO) options(narg-3,&arg[3]);
+ // special cases where full init and border communication must be done first
+ // for CREATE/SET if compute temp/cs is used
+ // for ZERO if fix rigid/small is used
+ // b/c methods invoked in the compute/fix perform forward/reverse comm
+
+ int initcomm = 0;
+ if (style == ZERO && rfix >= 0 &&
+ strcmp(modify->fix[rfix]->style,"rigid/small") == 0) initcomm = 1;
+ if ((style == CREATE || style == SET) && temperature &&
+ strcmp(temperature->style,"temp/cs2") == 0) initcomm = 1;
+
+ if (initcomm) {
+ lmp->init();
+ if (domain->triclinic) domain->x2lamda(atom->nlocal);
+ domain->pbc();
+ domain->reset_box();
+ comm->setup();
+ comm->exchange();
+ comm->borders();
+ if (domain->triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+ }
+
// initialize velocities based on style
// create() invoked differently, so can be called externally
@@ -670,37 +692,27 @@ void Velocity::ramp(int narg, char **arg)
/* ----------------------------------------------------------------------
zero linear or angular momentum of a group
- if using rigid/small requires init of entire system since
- its methods perform forward/reverse comm,
- comm::init needs neighbor::init needs pair::init needs kspace::init, etc
- also requires setup_pre_neighbor call to setup bodies
------------------------------------------------------------------------- */
void Velocity::zero(int narg, char **arg)
{
if (strcmp(arg[0],"linear") == 0) {
if (rfix < 0) zero_momentum();
- else {
- if (strcmp(modify->fix[rfix]->style,"rigid/small") == 0) {
- lmp->init();
- modify->fix[rfix]->setup_pre_neighbor();
- modify->fix[rfix]->zero_momentum();
- } else if (strstr(modify->fix[rfix]->style,"rigid")) {
- modify->fix[rfix]->zero_momentum();
- } else error->all(FLERR,"Velocity rigid used with non-rigid fix-ID");
- }
+ else if (strcmp(modify->fix[rfix]->style,"rigid/small") == 0) {
+ modify->fix[rfix]->setup_pre_neighbor();
+ modify->fix[rfix]->zero_momentum();
+ } else if (strstr(modify->fix[rfix]->style,"rigid")) {
+ modify->fix[rfix]->zero_momentum();
+ } else error->all(FLERR,"Velocity rigid used with non-rigid fix-ID");
} else if (strcmp(arg[0],"angular") == 0) {
if (rfix < 0) zero_rotation();
- else {
- if (strcmp(modify->fix[rfix]->style,"rigid/small") == 0) {
- lmp->init();
- modify->fix[rfix]->setup_pre_neighbor();
- modify->fix[rfix]->zero_rotation();
- } else if (strstr(modify->fix[rfix]->style,"rigid")) {
- modify->fix[rfix]->zero_rotation();
- } else error->all(FLERR,"Velocity rigid used with non-rigid fix-ID");
- }
+ else if (strcmp(modify->fix[rfix]->style,"rigid/small") == 0) {
+ modify->fix[rfix]->setup_pre_neighbor();
+ modify->fix[rfix]->zero_rotation();
+ } else if (strstr(modify->fix[rfix]->style,"rigid")) {
+ modify->fix[rfix]->zero_rotation();
+ } else error->all(FLERR,"Velocity rigid used with non-rigid fix-ID");
} else error->all(FLERR,"Illegal velocity command");
}
From b33a0164ded0fb30a9afd4b9d3c0b34ea3f27564 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 16:33:18 +0000
Subject: [PATCH 05/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13170
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/Makefile | 2 +-
src/USER-MISC/fix_ttm_mod.cpp | 221 +++++++++++++++++++++-------------
2 files changed, 136 insertions(+), 87 deletions(-)
diff --git a/src/Makefile b/src/Makefile
index 82d1182897..6fafae1fd8 100755
--- a/src/Makefile
+++ b/src/Makefile
@@ -13,7 +13,7 @@ OBJ = $(SRC:.cpp=.o)
# Package variables
-PACKAGE = asphere body class2 colloid coreshell dipole fld gpu granular kim \
+PACKAGE = asphere body class2 colloid dipole fld gpu granular kim \
kokkos kspace manybody mc meam misc molecule mpiio opt peri poems \
qeq reax replica rigid shock snap srd voronoi xtc
diff --git a/src/USER-MISC/fix_ttm_mod.cpp b/src/USER-MISC/fix_ttm_mod.cpp
index dcbca2debc..d66e49d6a2 100644
--- a/src/USER-MISC/fix_ttm_mod.cpp
+++ b/src/USER-MISC/fix_ttm_mod.cpp
@@ -33,6 +33,7 @@
#include "random_mars.h"
#include "memory.h"
#include "error.h"
+#include "citeme.h"
#include "math_const.h"
using namespace LAMMPS_NS;
@@ -41,11 +42,34 @@ using namespace MathConst;
#define MAXLINE 1024
+static const char cite_fix_ttm_mod[] =
+ "fix ttm/mod command:\n\n"
+ "@article{Pisarev2014,\n"
+ "author = {Pisarev, V. V. and Starikov, S. V.},\n"
+ "title = {{Atomistic simulation of ion track formation in UO2.}},\n"
+ "journal = {J.~Phys.:~Condens.~Matter},\n"
+ "volume = {26},\n"
+ "number = {47},\n"
+ "pages = {475401},\n"
+ "year = {2014}\n"
+ "}\n\n"
+ "@article{Norman2013,\n"
+ "author = {Norman, G. E. and Starikov, S. V. and Stegailov, V. V. and Saitov, I. M. and Zhilyaev, P. A.},\n"
+ "title = {{Atomistic Modeling of Warm Dense Matter in the Two-Temperature State}},\n"
+ "journal = {Contrib.~Plasm.~Phys.},\n"
+ "number = {2},\n"
+ "volume = {53},\n"
+ "pages = {129--139},\n"
+ "year = {2013}\n"
+ "}\n\n";
+
/* ---------------------------------------------------------------------- */
FixTTMMod::FixTTMMod(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg)
{
+ if (lmp->citeme) lmp->citeme->add(cite_fix_ttm_mod);
+
if (narg < 9) error->all(FLERR,"Illegal fix ttm/mod command");
vector_flag = 1;
size_vector = 2;
@@ -55,16 +79,16 @@ FixTTMMod::FixTTMMod(LAMMPS *lmp, int narg, char **arg) :
restart_peratom = 1;
restart_global = 1;
seed = atoi(arg[3]);
- nxnodes = atoi(arg[4]);
- nynodes = atoi(arg[5]);
- nznodes = atoi(arg[6]);
- fpr = fopen(arg[7],"r");
+ fpr_2 = fopen(arg[4],"r");
+ nxnodes = atoi(arg[5]);
+ nynodes = atoi(arg[6]);
+ nznodes = atoi(arg[7]);
+ fpr = fopen(arg[8],"r");
if (fpr == NULL) {
char str[128];
sprintf(str,"Cannot open file %s",arg[7]);
error->one(FLERR,str);
}
- fpr_2 = fopen(arg[8],"r");
if (fpr == NULL) {
char str[128];
sprintf(str,"Cannot open file %s",arg[8]);
@@ -233,6 +257,7 @@ FixTTMMod::FixTTMMod(LAMMPS *lmp, int narg, char **arg) :
memory->create(sum_mass_vsq_all,nxnodes,nynodes,nznodes,
"ttm/mod:sum_mass_vsq_all");
memory->create(T_electron_old,nxnodes,nynodes,nznodes,"ttm/mod:T_electron_old");
+ memory->create(T_electron_first,nxnodes,nynodes,nznodes,"ttm/mod:T_electron_first");
memory->create(T_electron,nxnodes,nynodes,nznodes,"ttm/mod:T_electron");
memory->create(net_energy_transfer,nxnodes,nynodes,nznodes,
"ttm/mod:net_energy_transfer");
@@ -585,89 +610,113 @@ void FixTTMMod::end_of_step()
// required this MD step to maintain a stable explicit solve
int num_inner_timesteps = 1;
double inner_dt = update->dt;
- double stability_criterion = 1.0 -
- 2.0*inner_dt/el_specific_heat *
- (el_thermal_conductivity*(1.0/dx/dx + 1.0/dy/dy + 1.0/dz/dz));
- if (stability_criterion < 0.0) {
- inner_dt = 0.25*el_specific_heat /
- (el_thermal_conductivity*(1.0/dx/dx + 1.0/dy/dy + 1.0/dz/dz));
- }
- num_inner_timesteps = static_cast(update->dt/inner_dt) + 1;
- inner_dt = update->dt/double(num_inner_timesteps);
- if (num_inner_timesteps > 1000000)
- error->warning(FLERR,"Too many inner timesteps in fix ttm",0);
- for (int ith_inner_timestep = 0; ith_inner_timestep < num_inner_timesteps;
- ith_inner_timestep++) {
- for (int ixnode = 0; ixnode < nxnodes; ixnode++)
+ double stability_criterion = 0.0;
+
+ for (int ixnode = 0; ixnode < nxnodes; ixnode++)
+ for (int iynode = 0; iynode < nynodes; iynode++)
+ for (int iznode = 0; iznode < nznodes; iznode++)
+ T_electron_first[ixnode][iynode][iznode] =
+ T_electron[ixnode][iynode][iznode];
+ do {
+ for (int ixnode = 0; ixnode < nxnodes; ixnode++)
for (int iynode = 0; iynode < nynodes; iynode++)
for (int iznode = 0; iznode < nznodes; iznode++)
- T_electron_old[ixnode][iynode][iznode] =
- T_electron[ixnode][iynode][iznode];
- // compute new electron T profile
- duration = duration + inner_dt;
- for (int ixnode = 0; ixnode < nxnodes; ixnode++)
- for (int iynode = 0; iynode < nynodes; iynode++)
- for (int iznode = 0; iznode < nznodes; iznode++) {
- int right_xnode = ixnode + 1;
- int right_ynode = iynode + 1;
- int right_znode = iznode + 1;
- if (right_xnode == nxnodes) right_xnode = 0;
- if (right_ynode == nynodes) right_ynode = 0;
- if (right_znode == nznodes) right_znode = 0;
- int left_xnode = ixnode - 1;
- int left_ynode = iynode - 1;
- int left_znode = iznode - 1;
- if (left_xnode == -1) left_xnode = nxnodes - 1;
- if (left_ynode == -1) left_ynode = nynodes - 1;
- if (left_znode == -1) left_znode = nznodes - 1;
- double skin_layer_d = double(skin_layer);
- double ixnode_d = double(ixnode);
- double surface_d = double(t_surface_l);
- mult_factor = 0.0;
- if (duration < width){
- if (ixnode >= t_surface_l) mult_factor = (intensity/(dx*skin_layer_d))*exp((-1.0)*(ixnode_d - surface_d)/skin_layer_d);
- }
- if (ixnode < t_surface_l) net_energy_transfer_all[ixnode][iynode][iznode] = 0.0;
- double cr_vac = 1;
- if (T_electron_old[ixnode][iynode][iznode] == 0) cr_vac = 0;
- double cr_v_l_x = 1;
- if (T_electron_old[left_xnode][iynode][iznode] == 0) cr_v_l_x = 0;
- double cr_v_r_x = 1;
- if (T_electron_old[right_xnode][iynode][iznode] == 0) cr_v_r_x = 0;
- double cr_v_l_y = 1;
- if (T_electron_old[ixnode][left_ynode][iznode] == 0) cr_v_l_y = 0;
- double cr_v_r_y = 1;
- if (T_electron_old[ixnode][right_ynode][iznode] == 0) cr_v_r_y = 0;
- double cr_v_l_z = 1;
- if (T_electron_old[ixnode][iynode][left_znode] == 0) cr_v_l_z = 0;
- double cr_v_r_z = 1;
- if (T_electron_old[ixnode][iynode][right_znode] == 0) cr_v_r_z = 0;
- if (cr_vac != 0) {
- T_electron[ixnode][iynode][iznode] =
- T_electron_old[ixnode][iynode][iznode] +
- inner_dt/el_properties(T_electron_old[ixnode][iynode][iznode]).el_heat_capacity *
- ((cr_v_r_x*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[right_xnode][iynode][iznode]/2.0).el_thermal_conductivity*
- (T_electron_old[right_xnode][iynode][iznode]-T_electron_old[ixnode][iynode][iznode])/dx -
- cr_v_l_x*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[left_xnode][iynode][iznode]/2.0).el_thermal_conductivity*
- (T_electron_old[ixnode][iynode][iznode]-T_electron_old[left_xnode][iynode][iznode])/dx)/dx +
- (cr_v_r_y*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][right_ynode][iznode]/2.0).el_thermal_conductivity*
- (T_electron_old[ixnode][right_ynode][iznode]-T_electron_old[ixnode][iynode][iznode])/dy -
- cr_v_l_y*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][left_ynode][iznode]/2.0).el_thermal_conductivity*
- (T_electron_old[ixnode][iynode][iznode]-T_electron_old[ixnode][left_ynode][iznode])/dy)/dy +
- (cr_v_r_z*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][iynode][right_znode]/2.0).el_thermal_conductivity*
- (T_electron_old[ixnode][iynode][right_znode]-T_electron_old[ixnode][iynode][iznode])/dz -
- cr_v_l_z*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][iynode][left_znode]/2.0).el_thermal_conductivity*
- (T_electron_old[ixnode][iynode][iznode]-T_electron_old[ixnode][iynode][left_znode])/dz)/dz);
- T_electron[ixnode][iynode][iznode]+=inner_dt/el_properties(T_electron[ixnode][iynode][iznode]).el_heat_capacity*
- (mult_factor -
- net_energy_transfer_all[ixnode][iynode][iznode]/del_vol);
- }
- else T_electron[ixnode][iynode][iznode] =
- T_electron_old[ixnode][iynode][iznode];
- if ((T_electron[ixnode][iynode][iznode] > 0.0) && (T_electron[ixnode][iynode][iznode] < electron_temperature_min))
- T_electron[ixnode][iynode][iznode] = T_electron[ixnode][iynode][iznode] + 0.5*(electron_temperature_min - T_electron[ixnode][iynode][iznode]);
- }
- }
+ T_electron[ixnode][iynode][iznode] =
+ T_electron_first[ixnode][iynode][iznode];
+
+ stability_criterion = 1.0 -
+ 2.0*inner_dt/el_specific_heat *
+ (el_thermal_conductivity*(1.0/dx/dx + 1.0/dy/dy + 1.0/dz/dz));
+ if (stability_criterion < 0.0) {
+ inner_dt = 0.25*el_specific_heat /
+ (el_thermal_conductivity*(1.0/dx/dx + 1.0/dy/dy + 1.0/dz/dz));
+ }
+ num_inner_timesteps = static_cast(update->dt/inner_dt) + 1;
+ inner_dt = update->dt/double(num_inner_timesteps);
+ if (num_inner_timesteps > 1000000)
+ error->warning(FLERR,"Too many inner timesteps in fix ttm",0);
+ for (int ith_inner_timestep = 0; ith_inner_timestep < num_inner_timesteps;
+ ith_inner_timestep++) {
+ for (int ixnode = 0; ixnode < nxnodes; ixnode++)
+ for (int iynode = 0; iynode < nynodes; iynode++)
+ for (int iznode = 0; iznode < nznodes; iznode++)
+ T_electron_old[ixnode][iynode][iznode] =
+ T_electron[ixnode][iynode][iznode];
+ // compute new electron T profile
+ duration = duration + inner_dt;
+ for (int ixnode = 0; ixnode < nxnodes; ixnode++)
+ for (int iynode = 0; iynode < nynodes; iynode++)
+ for (int iznode = 0; iznode < nznodes; iznode++) {
+ int right_xnode = ixnode + 1;
+ int right_ynode = iynode + 1;
+ int right_znode = iznode + 1;
+ if (right_xnode == nxnodes) right_xnode = 0;
+ if (right_ynode == nynodes) right_ynode = 0;
+ if (right_znode == nznodes) right_znode = 0;
+ int left_xnode = ixnode - 1;
+ int left_ynode = iynode - 1;
+ int left_znode = iznode - 1;
+ if (left_xnode == -1) left_xnode = nxnodes - 1;
+ if (left_ynode == -1) left_ynode = nynodes - 1;
+ if (left_znode == -1) left_znode = nznodes - 1;
+ double skin_layer_d = double(skin_layer);
+ double ixnode_d = double(ixnode);
+ double surface_d = double(t_surface_l);
+ mult_factor = 0.0;
+ if (duration < width){
+ if (ixnode >= t_surface_l) mult_factor = (intensity/(dx*skin_layer_d))*exp((-1.0)*(ixnode_d - surface_d)/skin_layer_d);
+ }
+ if (ixnode < t_surface_l) net_energy_transfer_all[ixnode][iynode][iznode] = 0.0;
+ double cr_vac = 1;
+ if (T_electron_old[ixnode][iynode][iznode] == 0) cr_vac = 0;
+ double cr_v_l_x = 1;
+ if (T_electron_old[left_xnode][iynode][iznode] == 0) cr_v_l_x = 0;
+ double cr_v_r_x = 1;
+ if (T_electron_old[right_xnode][iynode][iznode] == 0) cr_v_r_x = 0;
+ double cr_v_l_y = 1;
+ if (T_electron_old[ixnode][left_ynode][iznode] == 0) cr_v_l_y = 0;
+ double cr_v_r_y = 1;
+ if (T_electron_old[ixnode][right_ynode][iznode] == 0) cr_v_r_y = 0;
+ double cr_v_l_z = 1;
+ if (T_electron_old[ixnode][iynode][left_znode] == 0) cr_v_l_z = 0;
+ double cr_v_r_z = 1;
+ if (T_electron_old[ixnode][iynode][right_znode] == 0) cr_v_r_z = 0;
+ if (cr_vac != 0) {
+ T_electron[ixnode][iynode][iznode] =
+ T_electron_old[ixnode][iynode][iznode] +
+ inner_dt/el_properties(T_electron_old[ixnode][iynode][iznode]).el_heat_capacity *
+ ((cr_v_r_x*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[right_xnode][iynode][iznode]/2.0).el_thermal_conductivity*
+ (T_electron_old[right_xnode][iynode][iznode]-T_electron_old[ixnode][iynode][iznode])/dx -
+ cr_v_l_x*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[left_xnode][iynode][iznode]/2.0).el_thermal_conductivity*
+ (T_electron_old[ixnode][iynode][iznode]-T_electron_old[left_xnode][iynode][iznode])/dx)/dx +
+ (cr_v_r_y*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][right_ynode][iznode]/2.0).el_thermal_conductivity*
+ (T_electron_old[ixnode][right_ynode][iznode]-T_electron_old[ixnode][iynode][iznode])/dy -
+ cr_v_l_y*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][left_ynode][iznode]/2.0).el_thermal_conductivity*
+ (T_electron_old[ixnode][iynode][iznode]-T_electron_old[ixnode][left_ynode][iznode])/dy)/dy +
+ (cr_v_r_z*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][iynode][right_znode]/2.0).el_thermal_conductivity*
+ (T_electron_old[ixnode][iynode][right_znode]-T_electron_old[ixnode][iynode][iznode])/dz -
+ cr_v_l_z*el_properties(T_electron_old[ixnode][iynode][iznode]/2.0+T_electron_old[ixnode][iynode][left_znode]/2.0).el_thermal_conductivity*
+ (T_electron_old[ixnode][iynode][iznode]-T_electron_old[ixnode][iynode][left_znode])/dz)/dz);
+ T_electron[ixnode][iynode][iznode]+=inner_dt/el_properties(T_electron[ixnode][iynode][iznode]).el_heat_capacity*
+ (mult_factor -
+ net_energy_transfer_all[ixnode][iynode][iznode]/del_vol);
+ }
+ else T_electron[ixnode][iynode][iznode] =
+ T_electron_old[ixnode][iynode][iznode];
+ if ((T_electron[ixnode][iynode][iznode] > 0.0) && (T_electron[ixnode][iynode][iznode] < electron_temperature_min))
+ T_electron[ixnode][iynode][iznode] = T_electron[ixnode][iynode][iznode] + 0.5*(electron_temperature_min - T_electron[ixnode][iynode][iznode]);
+
+ if (el_properties(T_electron[ixnode][iynode][iznode]).el_thermal_conductivity > el_thermal_conductivity)
+ el_thermal_conductivity = el_properties(T_electron[ixnode][iynode][iznode]).el_thermal_conductivity;
+ if ((T_electron[ixnode][iynode][iznode] > 0.0) && (el_properties(T_electron[ixnode][iynode][iznode]).el_heat_capacity < el_specific_heat))
+ el_specific_heat = el_properties(T_electron[ixnode][iynode][iznode]).el_heat_capacity;
+ }
+ }
+ stability_criterion = 1.0 -
+ 2.0*inner_dt/el_specific_heat *
+ (el_thermal_conductivity*(1.0/dx/dx + 1.0/dy/dy + 1.0/dz/dz));
+
+ } while (stability_criterion < 0.0);
// output nodal temperatures for current timestep
if ((nfileevery) && !(update->ntimestep % nfileevery)) {
// compute atomic Ta for each grid point
From 68ef75c74e26c9f9e3809a7047c6c8d0f5101ec9 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 17:30:20 +0000
Subject: [PATCH 06/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13171
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Section_start.html | 9 +++++----
doc/Section_start.txt | 9 +++++----
2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/doc/Section_start.html b/doc/Section_start.html
index 06e22f8b92..98e33fc5f7 100644
--- a/doc/Section_start.html
+++ b/doc/Section_start.html
@@ -377,10 +377,11 @@ need to build the STUBS library for your platform before making LAMMPS
itself. Note that if you are building with src/MAKE/Makefile.serial,
e.g. by typing "make serial", then the STUBS library is built for you.
-To build the STUBS library from the src directory, type "make stubs",
-or from the src/STUBS dir, type "make". This should create a
-libmpi_stubs.a file suitable for linking to LAMMPS. If the build
-fails, you will need to edit the STUBS/Makefile for your platform.
+
To build the STUBS library from the src directory, type "make
+mpi-stubs", or from the src/STUBS dir, type "make". This should
+create a libmpi_stubs.a file suitable for linking to LAMMPS. If the
+build fails, you will need to edit the STUBS/Makefile for your
+platform.
The file STUBS/mpi.c provides a CPU timer function called MPI_Wtime()
that calls gettimeofday() . If your system doesn't support
diff --git a/doc/Section_start.txt b/doc/Section_start.txt
index 8f9071860d..544004e8de 100644
--- a/doc/Section_start.txt
+++ b/doc/Section_start.txt
@@ -371,10 +371,11 @@ need to build the STUBS library for your platform before making LAMMPS
itself. Note that if you are building with src/MAKE/Makefile.serial,
e.g. by typing "make serial", then the STUBS library is built for you.
-To build the STUBS library from the src directory, type "make stubs",
-or from the src/STUBS dir, type "make". This should create a
-libmpi_stubs.a file suitable for linking to LAMMPS. If the build
-fails, you will need to edit the STUBS/Makefile for your platform.
+To build the STUBS library from the src directory, type "make
+mpi-stubs", or from the src/STUBS dir, type "make". This should
+create a libmpi_stubs.a file suitable for linking to LAMMPS. If the
+build fails, you will need to edit the STUBS/Makefile for your
+platform.
The file STUBS/mpi.c provides a CPU timer function called MPI_Wtime()
that calls gettimeofday() . If your system doesn't support
From ed6ef330070f72272e11a29011325cb1fc99a760 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 17:30:27 +0000
Subject: [PATCH 07/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13172
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/Makefile | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/src/Makefile b/src/Makefile
index 6fafae1fd8..3c4be1c991 100755
--- a/src/Makefile
+++ b/src/Makefile
@@ -46,7 +46,7 @@ help:
@echo 'make -f Makefile.lib machine build LAMMPS as static library for machine'
@echo 'make -f Makefile.shlib machine build LAMMPS as shared library for machine'
@echo 'make -f Makefile.list machine build LAMMPS from explicit list of files'
- @echo 'make stubs build dummy MPI library in STUBS'
+ @echo 'make mpi-stubs build dummy MPI library in STUBS'
@echo 'make install-python install LAMMPS wrapper in Python'
@echo 'make tar create lmp_src.tar.gz of src dir and packages'
@echo ''
@@ -94,7 +94,7 @@ help:
.DEFAULT:
@if [ $@ = "serial" -a ! -f STUBS/libmpi_stubs.a ]; \
- then $(MAKE) stubs; fi
+ then $(MAKE) mpi-stubs; fi
@test -f MAKE/Makefile.$@ -o -f MAKE/OPTIONS/Makefile.$@ -o \
-f MAKE/MACHINES/Makefile.$@ -o -f MAKE/MINE/Makefile.$@
@if [ ! -d Obj_$@ ]; then mkdir Obj_$@; fi
@@ -144,7 +144,7 @@ makelist:
# Make MPI STUBS library
-stubs:
+mpi-stubs:
@cd STUBS; $(MAKE) clean; $(MAKE)
# install LAMMPS shared lib and Python wrapper for Python usage
From 1403acd5013f1877a160479ea3c28b2c855743a4 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 21:46:10 +0000
Subject: [PATCH 08/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13173
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/USER-INTEL/fix_intel.cpp | 18 +-
src/USER-INTEL/fix_intel.h | 12 +-
src/USER-INTEL/intel_buffers.cpp | 97 +++-
src/USER-INTEL/intel_buffers.h | 32 +-
src/USER-INTEL/intel_preprocess.h | 46 +-
src/USER-INTEL/neigh_half_bin_intel.cpp | 435 ++++++++++++++-
src/USER-INTEL/pair_sw_intel.cpp | 703 ++++++++++++++++++++++++
src/USER-INTEL/pair_sw_intel.h | 111 ++++
8 files changed, 1414 insertions(+), 40 deletions(-)
create mode 100755 src/USER-INTEL/pair_sw_intel.cpp
create mode 100755 src/USER-INTEL/pair_sw_intel.h
diff --git a/src/USER-INTEL/fix_intel.cpp b/src/USER-INTEL/fix_intel.cpp
index cd63199095..c0847a8bf1 100644
--- a/src/USER-INTEL/fix_intel.cpp
+++ b/src/USER-INTEL/fix_intel.cpp
@@ -177,7 +177,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
// set OpenMP threads
// nomp is user setting, default = 0
-#if defined(_OPENMP)
+ #if defined(_OPENMP)
if (nomp != 0) {
omp_set_num_threads(nomp);
comm->nthreads = nomp;
@@ -187,7 +187,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
nthreads = omp_get_num_threads();
comm->nthreads = nthreads;
}
-#endif
+ #endif
// set offload params
@@ -357,17 +357,23 @@ void FixIntel::pair_init_check()
#endif
}
+ int need_tag = 0;
+ if (atom->molecular) need_tag = 1;
+
// Clear buffers used for pair style
char kmode[80];
if (_precision_mode == PREC_MODE_SINGLE) {
strcpy(kmode, "single");
get_single_buffers()->free_all_nbor_buffers();
+ get_single_buffers()->need_tag(need_tag);
} else if (_precision_mode == PREC_MODE_MIXED) {
strcpy(kmode, "mixed");
get_mixed_buffers()->free_all_nbor_buffers();
+ get_mixed_buffers()->need_tag(need_tag);
} else {
strcpy(kmode, "double");
get_double_buffers()->free_all_nbor_buffers();
+ get_double_buffers()->need_tag(need_tag);
}
#ifdef _LMP_INTEL_OFFLOAD
@@ -730,16 +736,16 @@ int FixIntel::set_host_affinity(const int nomp)
int pragma_size = 1024;
buf1 = (float*) malloc(sizeof(float)*pragma_size);
- #pragma offload target (mic:0) \
+ #pragma offload target (mic:0) mandatory \
in(buf1:length(pragma_size) alloc_if(1) free_if(0)) \
signal(&sig1)
- {}
+ { buf1[0] = 0.0; }
#pragma offload_wait target(mic:0) wait(&sig1)
- #pragma offload target (mic:0) \
+ #pragma offload target (mic:0) mandatory \
out(buf1:length(pragma_size) alloc_if(0) free_if(1)) \
signal(&sig2)
- {}
+ { buf1[0] = 1.0; }
#pragma offload_wait target(mic:0) wait(&sig2)
free(buf1);
diff --git a/src/USER-INTEL/fix_intel.h b/src/USER-INTEL/fix_intel.h
index f7d985b704..cbb0051594 100644
--- a/src/USER-INTEL/fix_intel.h
+++ b/src/USER-INTEL/fix_intel.h
@@ -366,11 +366,12 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
lmp_ft * _noalias const tor = (lmp_ft *) lmp->atom->torque[0] +
out_offset;
if (eatom) {
+ double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[ii].x;
f[i].y += f_in[ii].y;
f[i].z += f_in[ii].z;
- force->pair->eatom[i] += f_in[ii].w;
+ lmp_eatom[i] += f_in[ii].w;
tor[i].x += f_in[ii+1].x;
tor[i].y += f_in[ii+1].y;
tor[i].z += f_in[ii+1].z;
@@ -389,11 +390,12 @@ void FixIntel::add_oresults(const ft * _noalias const f_in,
}
} else {
if (eatom) {
+ double * _noalias const lmp_eatom = force->pair->eatom + out_offset;
for (int i = ifrom; i < ito; i++) {
f[i].x += f_in[i].x;
f[i].y += f_in[i].y;
f[i].z += f_in[i].z;
- force->pair->eatom[i] += f_in[i].w;
+ lmp_eatom[i] += f_in[i].w;
}
} else {
for (int i = ifrom; i < ito; i++) {
@@ -478,7 +480,11 @@ void FixIntel::add_off_results(const ft * _noalias const f_in,
start_watch(TIME_OFFLOAD_WAIT);
#ifdef _LMP_INTEL_OFFLOAD
- #pragma offload_wait target(mic:_cop) wait(f_in)
+ if (neighbor->ago == 0) {
+ #pragma offload_wait target(mic:_cop) wait(atom->tag, f_in)
+ } else {
+ #pragma offload_wait target(mic:_cop) wait(f_in)
+ }
#endif
double wait_time = stop_watch(TIME_OFFLOAD_WAIT);
diff --git a/src/USER-INTEL/intel_buffers.cpp b/src/USER-INTEL/intel_buffers.cpp
index 2f9a39d2f2..e430a1ebc4 100644
--- a/src/USER-INTEL/intel_buffers.cpp
+++ b/src/USER-INTEL/intel_buffers.cpp
@@ -27,6 +27,7 @@ IntelBuffers::IntelBuffers(class LAMMPS *lmp_in) :
_list_alloc_atoms = 0;
_ntypes = 0;
_off_map_maxlocal = 0;
+ _ccachex = 0;
#ifdef _LMP_INTEL_OFFLOAD
_separate_buffers = 0;
_off_f = 0;
@@ -35,6 +36,7 @@ IntelBuffers::IntelBuffers(class LAMMPS *lmp_in) :
_off_map_maxhead = 0;
_off_list_alloc = false;
_off_threads = 0;
+ _off_ccache = 0;
#endif
}
@@ -45,6 +47,7 @@ IntelBuffers::~IntelBuffers()
{
free_buffers();
free_all_nbor_buffers();
+ free_ccache();
set_ntypes(0);
}
@@ -196,14 +199,16 @@ void IntelBuffers::_grow_nmax()
nspecial = lmp->atom->nspecial[0];
special_length = size * lmp->atom->maxspecial;
nspecial_length = size * 3;
- tag_length = size;
} else {
special = &_special_holder;
nspecial = &_nspecial_holder;
special_length = 1;
nspecial_length = 1;
- tag_length = 1;
}
+ if (_need_tag)
+ tag_length = size;
+ else
+ tag_length = 1;
int *tag = lmp->atom->tag;
int *bins = lmp->neighbor->bins;
#pragma offload_transfer target(mic:_cop) \
@@ -337,8 +342,8 @@ void IntelBuffers::free_nbor_list()
template
void IntelBuffers::_grow_nbor_list(NeighList *list,
- const int nlocal,
- const int offload_end)
+ const int nlocal,
+ const int offload_end)
{
free_nbor_list();
_list_alloc_atoms = 1.10 * nlocal;
@@ -353,8 +358,8 @@ void IntelBuffers::_grow_nbor_list(NeighList *list,
if (special_flag != NULL && list_alloc != NULL) {
#pragma offload_transfer target(mic:_cop) \
in(special_flag:length(4) alloc_if(1) free_if(0)) \
- in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \
- nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0))
+ in(stencil:length(list->maxstencil) alloc_if(1) free_if(0)) \
+ nocopy(list_alloc:length(list_alloc_size) alloc_if(1) free_if(0))
_off_map_stencil = stencil;
_off_list_alloc = true;
}
@@ -378,6 +383,86 @@ void IntelBuffers::_grow_stencil(NeighList *list)
/* ---------------------------------------------------------------------- */
+template
+void IntelBuffers::free_ccache()
+{
+ if (_ccachex) {
+ flt_t *ccachex = _ccachex;
+ flt_t *ccachey = _ccachey;
+ flt_t *ccachez = _ccachez;
+ flt_t *ccachew = _ccachew;
+ int *ccachei = _ccachei;
+ int *ccachej = _ccachej;
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (_off_ccache) {
+ #pragma offload_transfer target(mic:_cop) \
+ nocopy(ccachex,ccachey,ccachez,ccachew:alloc_if(0) free_if(1)) \
+ nocopy(ccachei,ccachej:alloc_if(0) free_if(1))
+ }
+ _off_ccache = 0;
+ #endif
+
+ lmp->memory->destroy(ccachex);
+ lmp->memory->destroy(ccachey);
+ lmp->memory->destroy(ccachez);
+ lmp->memory->destroy(ccachew);
+ lmp->memory->destroy(ccachei);
+ lmp->memory->destroy(ccachej);
+
+ _ccachex = 0;
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void IntelBuffers::grow_ccache(const int off_flag,
+ const int nthreads)
+{
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (_ccachex && off_flag && _off_ccache == 0)
+ free_ccache();
+ #endif
+ if (_ccachex)
+ return;
+
+ const int nsize = get_max_nbors();
+ int esize = MIN(sizeof(int), sizeof(flt_t));
+ IP_PRE_get_stride(_ccache_stride, nsize, esize, 0);
+ int nt = MAX(nthreads, _off_threads);
+ const int vsize = _ccache_stride * nt;
+
+ lmp->memory->create(_ccachex, vsize , "_ccachex");
+ lmp->memory->create(_ccachey, vsize, "_ccachey");
+ lmp->memory->create(_ccachez, vsize, "_ccachez");
+ lmp->memory->create(_ccachew, vsize, "_ccachew");
+ lmp->memory->create(_ccachei, vsize, "_ccachei");
+ lmp->memory->create(_ccachej, vsize, "_ccachej");
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (off_flag) {
+ flt_t *ccachex = _ccachex;
+ flt_t *ccachey = _ccachey;
+ flt_t *ccachez = _ccachez;
+ flt_t *ccachew = _ccachew;
+ int *ccachei = _ccachei;
+ int *ccachej = _ccachej;
+
+ if (ccachex != NULL && ccachey !=NULL && ccachez != NULL &&
+ ccachew != NULL && ccachei != NULL && ccachej !=NULL) {
+ #pragma offload_transfer target(mic:_cop) \
+ nocopy(ccachex,ccachey:length(vsize) alloc_if(1) free_if(0)) \
+ nocopy(ccachez,ccachew:length(vsize) alloc_if(1) free_if(0)) \
+ nocopy(ccachei,ccachej:length(vsize) alloc_if(1) free_if(0))
+ }
+ _off_ccache = 1;
+ }
+ #endif
+}
+
+/* ---------------------------------------------------------------------- */
+
template
void IntelBuffers::set_ntypes(const int ntypes)
{
diff --git a/src/USER-INTEL/intel_buffers.h b/src/USER-INTEL/intel_buffers.h
index 19f10f9559..91dbb790ca 100644
--- a/src/USER-INTEL/intel_buffers.h
+++ b/src/USER-INTEL/intel_buffers.h
@@ -49,7 +49,14 @@ class IntelBuffers {
inline int get_stride(int nall) {
int stride;
IP_PRE_get_stride(stride, nall, sizeof(vec3_acc_t),
- lmp->atom->torque);
+ lmp->atom->torque);
+ return stride;
+ }
+
+ template
+ inline int get_scalar_stride(const int n) {
+ int stride;
+ IP_PRE_get_stride(stride, n, sizeof(stype), 0);
return stride;
}
@@ -103,6 +110,16 @@ class IntelBuffers {
#endif
}
+ void free_ccache();
+ void grow_ccache(const int off_flag, const int nthreads);
+ inline int ccache_stride() { return _ccache_stride; }
+ inline flt_t * get_ccachex() { return _ccachex; }
+ inline flt_t * get_ccachey() { return _ccachey; }
+ inline flt_t * get_ccachez() { return _ccachez; }
+ inline flt_t * get_ccachew() { return _ccachew; }
+ inline int * get_ccachei() { return _ccachei; }
+ inline int * get_ccachej() { return _ccachej; }
+
inline int get_max_nbors() {
int mn = lmp->neighbor->oneatom * sizeof(int) /
(INTEL_ONEATOM_FACTOR * INTEL_DATA_ALIGN);
@@ -232,6 +249,12 @@ class IntelBuffers {
used_ghost * sizeof(flt_t));
}
}
+
+ inline int need_tag() { return _need_tag; }
+ inline void need_tag(const int nt) { _need_tag = nt; }
+ #else
+ inline int need_tag() { return 0; }
+ inline void need_tag(const int nt) { }
#endif
double memory_usage(const int nthreads);
@@ -255,17 +278,22 @@ class IntelBuffers {
flt_t **_cutneighsq;
int _ntypes;
+ int _ccache_stride;
+ flt_t *_ccachex, *_ccachey, *_ccachez, *_ccachew;
+ int *_ccachei, *_ccachej;
+
#ifdef _LMP_INTEL_OFFLOAD
int _separate_buffers;
atom_t *_host_x;
flt_t *_host_q;
quat_t *_host_quat;
vec3_acc_t *_off_f;
- int _off_map_nmax, _off_map_maxhead, _cop;
+ int _off_map_nmax, _off_map_maxhead, _cop, _off_ccache;
int *_off_map_ilist;
int *_off_map_stencil, *_off_map_special, *_off_map_nspecial, *_off_map_tag;
int *_off_map_binhead, *_off_map_bins, *_off_map_numneigh;
bool _off_list_alloc;
+ int _need_tag;
#endif
int _buf_size, _buf_local_size;
diff --git a/src/USER-INTEL/intel_preprocess.h b/src/USER-INTEL/intel_preprocess.h
index 9dfb3a20e0..44534e1324 100644
--- a/src/USER-INTEL/intel_preprocess.h
+++ b/src/USER-INTEL/intel_preprocess.h
@@ -247,7 +247,7 @@ inline double MIC_Wtime() {
nall = nlocal + nghost; \
separate_flag--; \
int flength; \
- if (NEWTON_PAIR) flength = nall; \
+ if (newton) flength = nall; \
else flength = nlocal; \
IP_PRE_get_stride(f_stride, flength, sizeof(FORCE_T), \
separate_flag); \
@@ -302,16 +302,40 @@ inline double MIC_Wtime() {
#endif
-#define IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz) \
-{ \
- if (vflag == 1) { \
- sv0 += ev_pre * delx * delx * fpair; \
- sv1 += ev_pre * dely * dely * fpair; \
- sv2 += ev_pre * delz * delz * fpair; \
- sv3 += ev_pre * delx * dely * fpair; \
- sv4 += ev_pre * delx * delz * fpair; \
- sv5 += ev_pre * dely * delz * fpair; \
- } \
+#define IP_PRE_ev_tally_nbor(vflag, ev_pre, fpair, delx, dely, delz) \
+{ \
+ if (vflag == 1) { \
+ sv0 += ev_pre * delx * delx * fpair; \
+ sv1 += ev_pre * dely * dely * fpair; \
+ sv2 += ev_pre * delz * delz * fpair; \
+ sv3 += ev_pre * delx * dely * fpair; \
+ sv4 += ev_pre * delx * delz * fpair; \
+ sv5 += ev_pre * dely * delz * fpair; \
+ } \
+}
+
+#define IP_PRE_ev_tally_nbor3(vflag, fj, fk, delx, dely, delz, delr2) \
+{ \
+ if (vflag == 1) { \
+ sv0 += delx * fj[0] + delr2[0] * fk[0]; \
+ sv1 += dely * fj[1] + delr2[1] * fk[1]; \
+ sv2 += delz * fj[2] + delr2[2] * fk[2]; \
+ sv3 += delx * fj[1] + delr2[0] * fk[1]; \
+ sv4 += delx * fj[2] + delr2[0] * fk[2]; \
+ sv5 += dely * fj[2] + delr2[1] * fk[2]; \
+ } \
+}
+
+#define IP_PRE_ev_tally_nbor3v(vflag, fj0, fj1, fj2, delx, dely, delz) \
+{ \
+ if (vflag == 1) { \
+ sv0 += delx * fj0; \
+ sv1 += dely * fj1; \
+ sv2 += delz * fj2; \
+ sv3 += delx * fj1; \
+ sv4 += delx * fj2; \
+ sv5 += dely * fj2; \
+ } \
}
#define IP_PRE_ev_tally_atom(evflag, eflag, vflag, f, fwtmp) \
diff --git a/src/USER-INTEL/neigh_half_bin_intel.cpp b/src/USER-INTEL/neigh_half_bin_intel.cpp
index f4ca6b3a1e..6c3cfc1961 100644
--- a/src/USER-INTEL/neigh_half_bin_intel.cpp
+++ b/src/USER-INTEL/neigh_half_bin_intel.cpp
@@ -203,16 +203,15 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
- int tag_size, special_size;
+ int tag_size = 0, special_size;
+ if (buffers->need_tag()) tag_size = nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
- tag_size = nall;
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
- tag_size = 0;
special_size = 0;
}
const tagint * _noalias const special = s;
@@ -286,7 +285,7 @@ void Neighbor::hbnni(const int offload, NeighList *list, void *buffers_in,
in(separate_buffers, astart, aend, nlocal, molecular, ntypes) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
- signal(numneigh)
+ signal(tag)
#endif
{
#ifdef __MIC__
@@ -604,16 +603,15 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
- int tag_size, special_size;
+ int tag_size = 0, special_size;
+ if (buffers->need_tag()) tag_size = e_nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
- tag_size = e_nall;
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
- tag_size = 0;
special_size = 0;
}
const tagint * _noalias const special = s;
@@ -687,7 +685,7 @@ void Neighbor::hbni(const int offload, NeighList *list, void *buffers_in,
in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
- signal(numneigh)
+ signal(tag)
#endif
{
#ifdef __MIC__
@@ -1049,16 +1047,15 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
const int molecular = atom->molecular;
int *ns = NULL;
tagint *s = NULL;
- int tag_size, special_size;
+ int tag_size = 0, special_size;
+ if (buffers->need_tag()) tag_size = e_nall;
if (molecular) {
s = atom->special[0];
ns = atom->nspecial[0];
- tag_size = e_nall;
special_size = aend;
} else {
s = &buffers->_special_holder;
ns = &buffers->_nspecial_holder;
- tag_size = 0;
special_size = 0;
}
const tagint * _noalias const special = s;
@@ -1132,7 +1129,7 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
in(offload,separate_buffers, astart, aend, nlocal, molecular, ntypes) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
- signal(numneigh)
+ signal(tag)
#endif
{
#ifdef __MIC__
@@ -1344,3 +1341,417 @@ void Neighbor::hbnti(const int offload, NeighList *list, void *buffers_in,
#endif
}
}
+
+/* ----------------------------------------------------------------------
+ binned neighbor list construction for all neighbors
+ every neighbor pair appears in list of both atoms i and j
+------------------------------------------------------------------------- */
+
+void Neighbor::full_bin_intel(NeighList *list)
+{
+ const int nlocal = (includegroup) ? atom->nfirst : atom->nlocal;
+ list->inum = nlocal;
+ list->gnum = 0;
+
+ // Get fix for intel stuff
+ FixIntel *fix = static_cast(fix_intel);
+
+ const int off_end = fix->offload_end_neighbor();
+ int host_start = fix->host_start_neighbor();;
+ int offload_noghost = 0;
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (fix->full_host_list()) host_start = 0;
+ offload_noghost = fix->offload_noghost();
+ if (exclude)
+ error->all(FLERR, "Exclusion lists not yet supported for Intel offload");
+ #endif
+
+ if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
+ if (offload_noghost) {
+ fbi(1, list, fix->get_mixed_buffers(),
+ 0, off_end, fix);
+ fbi(0, list, fix->get_mixed_buffers(),
+ host_start, nlocal, fix, off_end);
+ } else {
+ fbi(1, list, fix->get_mixed_buffers(),
+ 0, off_end, fix);
+ fbi(0, list, fix->get_mixed_buffers(),
+ host_start, nlocal, fix);
+ }
+ } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
+ if (offload_noghost) {
+ fbi(1, list, fix->get_double_buffers(),
+ 0, off_end, fix);
+ fbi(0, list, fix->get_double_buffers(),
+ host_start, nlocal, fix, off_end);
+ } else {
+ fbi(1, list, fix->get_double_buffers(),
+ 0, off_end, fix);
+ fbi(0, list, fix->get_double_buffers(),
+ host_start, nlocal, fix);
+ }
+ } else {
+ if (offload_noghost) {
+ fbi(1, list, fix->get_single_buffers(), 0, off_end, fix);
+ fbi(0, list, fix->get_single_buffers(),
+ host_start, nlocal, fix, off_end);
+ } else {
+ fbi(1, list, fix->get_single_buffers(), 0, off_end, fix);
+ fbi(0, list, fix->get_single_buffers(),
+ host_start, nlocal, fix);
+ }
+ }
+}
+
+template
+void Neighbor::fbi(const int offload, NeighList *list, void *buffers_in,
+ const int astart, const int aend, void *fix_in,
+ const int offload_end) {
+ IntelBuffers *buffers = (IntelBuffers *)buffers_in;
+ FixIntel *fix = (FixIntel *)fix_in;
+ const int nall = atom->nlocal + atom->nghost;
+ int pad = 1;
+
+ if (offload) {
+ fix->start_watch(TIME_PACK);
+ buffers->grow(nall, atom->nlocal, comm->nthreads, aend);
+ buffers->grow_nbor(list, atom->nlocal, aend);
+
+ ATOM_T biga;
+ biga.x = INTEL_BIGP;
+ biga.y = INTEL_BIGP;
+ biga.z = INTEL_BIGP;
+ biga.w = 1;
+ buffers->get_x()[nall]=biga;
+
+ const int nthreads = comm->nthreads;
+ #if defined(_OPENMP)
+ #pragma omp parallel default(none) shared(buffers)
+ #endif
+ {
+ int ifrom, ito, tid;
+ IP_PRE_omp_range_id_align(ifrom, ito, tid, nall, nthreads,
+ sizeof(ATOM_T));
+ buffers->thr_pack(ifrom, ito, 0);
+ }
+ fix->stop_watch(TIME_PACK);
+
+ fix->start_watch(TIME_HOST_NEIGHBOR);
+ bin_atoms(buffers->get_x(), buffers->get_atombin());
+ } else {
+ fix->start_watch(TIME_HOST_NEIGHBOR);
+ }
+ const int pad_width = pad;
+
+ if (aend-astart == 0) {
+ fix->stop_watch(TIME_HOST_NEIGHBOR);
+ return;
+ }
+
+ const ATOM_T * _noalias const x = buffers->get_x();
+ int * _noalias const firstneigh = buffers->firstneigh(list);
+ int nall_t = nall;
+ if (offload_noghost && offload) nall_t = atom->nlocal;
+ const int e_nall = nall_t;
+
+ const int molecular = atom->molecular;
+ int *ns = NULL;
+ tagint *s = NULL;
+ int tag_size = 0, special_size;
+ if (buffers->need_tag()) tag_size = e_nall;
+ if (molecular) {
+ s = atom->special[0];
+ ns = atom->nspecial[0];
+ special_size = aend;
+ } else {
+ s = &buffers->_special_holder;
+ ns = &buffers->_nspecial_holder;
+ special_size = 0;
+ }
+ const tagint * _noalias const special = s;
+ const int * _noalias const nspecial = ns;
+ const int maxspecial = atom->maxspecial;
+ const tagint * _noalias const tag = atom->tag;
+
+ int * _noalias const ilist = list->ilist;
+ int * _noalias numneigh = list->numneigh;
+ int * _noalias const cnumneigh = buffers->cnumneigh(list);
+ const int nstencil = list->nstencil;
+ const int * _noalias const stencil = list->stencil;
+ const flt_t * _noalias const cutneighsq = buffers->get_cutneighsq()[0];
+ const int ntypes = atom->ntypes + 1;
+ const int nlocal = atom->nlocal;
+
+ #ifndef _LMP_INTEL_OFFLOAD
+ int * const mask = atom->mask;
+ tagint * const molecule = atom->molecule;
+ #endif
+
+ int tnum;
+ int *overflow;
+ double *timer_compute;
+ if (offload) {
+ timer_compute = fix->off_watch_neighbor();
+ tnum = buffers->get_off_threads();
+ overflow = fix->get_off_overflow_flag();
+ fix->stop_watch(TIME_HOST_NEIGHBOR);
+ fix->start_watch(TIME_OFFLOAD_LATENCY);
+ } else {
+ tnum = comm->nthreads;
+ overflow = fix->get_overflow_flag();
+ }
+ const int nthreads = tnum;
+ const int maxnbors = buffers->get_max_nbors();
+ int * _noalias const atombin = buffers->get_atombin();
+
+ // Make sure dummy coordinates to eliminate loop remainder not within cutoff
+ {
+ const flt_t dx = (INTEL_BIGP - bboxhi[0]);
+ const flt_t dy = (INTEL_BIGP - bboxhi[1]);
+ const flt_t dz = (INTEL_BIGP - bboxhi[2]);
+ if (dx * dx + dy * dy + dz * dz < static_cast(cutneighmaxsq))
+ error->one(FLERR,
+ "Intel package expects no atoms within cutoff of {1e15,1e15,1e15}.");
+ }
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ const int * _noalias const binhead = this->binhead;
+ const int * _noalias const special_flag = this->special_flag;
+ const int * _noalias const bins = this->bins;
+ const int cop = fix->coprocessor_number();
+ const int separate_buffers = fix->separate_buffers();
+ #pragma offload target(mic:cop) if(offload) \
+ in(x:length(e_nall+1) alloc_if(0) free_if(0)) \
+ in(tag:length(tag_size) alloc_if(0) free_if(0)) \
+ in(special:length(special_size*maxspecial) alloc_if(0) free_if(0)) \
+ in(nspecial:length(special_size*3) alloc_if(0) free_if(0)) \
+ in(bins:length(nall) alloc_if(0) free_if(0)) \
+ in(binhead:length(mbins) alloc_if(0) free_if(0)) \
+ in(cutneighsq:length(0) alloc_if(0) free_if(0)) \
+ in(firstneigh:length(0) alloc_if(0) free_if(0)) \
+ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
+ out(numneigh:length(0) alloc_if(0) free_if(0)) \
+ in(ilist:length(0) alloc_if(0) free_if(0)) \
+ in(atombin:length(aend) alloc_if(0) free_if(0)) \
+ in(stencil:length(nstencil) alloc_if(0) free_if(0)) \
+ in(special_flag:length(0) alloc_if(0) free_if(0)) \
+ in(maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
+ in(offload_end,separate_buffers,astart, aend, nlocal, molecular, ntypes) \
+ out(overflow:length(5) alloc_if(0) free_if(0)) \
+ out(timer_compute:length(1) alloc_if(0) free_if(0)) \
+ signal(tag)
+ #endif
+ {
+ #ifdef __MIC__
+ *timer_compute = MIC_Wtime();
+ #endif
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ overflow[LMP_LOCAL_MIN] = astart;
+ overflow[LMP_LOCAL_MAX] = aend - 1;
+ overflow[LMP_GHOST_MIN] = e_nall;
+ overflow[LMP_GHOST_MAX] = -1;
+ #endif
+
+ #if defined(_OPENMP)
+ #pragma omp parallel default(none) shared(numneigh, overflow)
+ #endif
+ {
+ #ifdef _LMP_INTEL_OFFLOAD
+ int lmin = e_nall, lmax = -1, gmin = e_nall, gmax = -1;
+ #endif
+
+ const int num = aend - astart;
+ int tid, ifrom, ito;
+ IP_PRE_omp_range_id(ifrom, ito, tid, num, nthreads);
+ ifrom += astart;
+ ito += astart;
+
+ int which;
+
+ const int list_size = (ito + tid + 1) * maxnbors;
+ int ct = (ifrom + tid) * maxnbors;
+ int *neighptr = firstneigh + ct;
+ for (int i = ifrom; i < ito; i++) {
+ int j, k, n, n2, itype, jtype, ibin;
+ double xtmp, ytmp, ztmp, delx, dely, delz, rsq;
+
+ n = 0;
+ n2 = maxnbors;
+
+ xtmp = x[i].x;
+ ytmp = x[i].y;
+ ztmp = x[i].z;
+ itype = x[i].w;
+ const tagint itag = tag[i];
+ const int ioffset = ntypes * itype;
+
+ // loop over all atoms in surrounding bins in stencil including self
+ // skip i = j
+
+ ibin = atombin[i];
+
+ for (k = 0; k < nstencil; k++) {
+ for (j = binhead[ibin + stencil[k]]; j >= 0; j = bins[j]) {
+ if (i == j) continue;
+
+ if (offload_noghost) {
+ if (j < nlocal) {
+ if (i < offload_end) continue;
+ } else if (offload) continue;
+ }
+
+ jtype = x[j].w;
+ #ifndef _LMP_INTEL_OFFLOAD
+ if (exclude && exclusion(i,j,itype,jtype,mask,molecule)) continue;
+ #endif
+
+ delx = xtmp - x[j].x;
+ dely = ytmp - x[j].y;
+ delz = ztmp - x[j].z;
+ rsq = delx * delx + dely * dely + delz * delz;
+ if (rsq <= cutneighsq[ioffset + jtype]) {
+ const int jtag = tag[j];
+ int flist = 0;
+ if (itag > jtag) {
+ if ((itag+jtag) % 2 == 0) flist = 1;
+ } else if (itag < jtag) {
+ if ((itag+jtag) % 2 == 1) flist = 1;
+ } else {
+ if (x[j].z < ztmp) flist = 1;
+ else if (x[j].z == ztmp && x[j].y < ytmp) flist = 1;
+ else if (x[j].z == ztmp && x[j].y == ytmp && x[j].x < xtmp)
+ flist = 1;
+ }
+ if (flist)
+ neighptr[n2++] = j;
+ else
+ neighptr[n++] = j;
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (j < nlocal) {
+ if (j < lmin) lmin = j;
+ if (j > lmax) lmax = j;
+ } else {
+ if (j < gmin) gmin = j;
+ if (j > gmax) gmax = j;
+ }
+ #endif
+ }
+ }
+ }
+ ilist[i] = i;
+
+ cnumneigh[i] = ct;
+ if (n > maxnbors) *overflow = 1;
+ atombin[i] = n;
+ for (k = maxnbors; k < n2; k++) neighptr[n++] = neighptr[k];
+ numneigh[i] = n;
+ while((n % (INTEL_DATA_ALIGN / sizeof(int))) != 0) n++;
+ ct += n;
+ neighptr += n;
+ if (ct + n + maxnbors > list_size) {
+ *overflow = 1;
+ ct = (ifrom + tid) * maxnbors;
+ }
+ }
+
+ if (*overflow == 1)
+ for (int i = ifrom; i < ito; i++)
+ numneigh[i] = 0;
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (separate_buffers) {
+ #if defined(_OPENMP)
+ #pragma omp critical
+ #endif
+ {
+ if (lmin < overflow[LMP_LOCAL_MIN]) overflow[LMP_LOCAL_MIN] = lmin;
+ if (lmax > overflow[LMP_LOCAL_MAX]) overflow[LMP_LOCAL_MAX] = lmax;
+ if (gmin < overflow[LMP_GHOST_MIN]) overflow[LMP_GHOST_MIN] = gmin;
+ if (gmax > overflow[LMP_GHOST_MAX]) overflow[LMP_GHOST_MAX] = gmax;
+ }
+ #pragma omp barrier
+ }
+
+ int ghost_offset = 0, nall_offset = e_nall;
+ if (separate_buffers) {
+ int nghost = overflow[LMP_GHOST_MAX] + 1 - overflow[LMP_GHOST_MIN];
+ if (nghost < 0) nghost = 0;
+ if (offload) {
+ ghost_offset = overflow[LMP_GHOST_MIN] - overflow[LMP_LOCAL_MAX] - 1;
+ nall_offset = overflow[LMP_LOCAL_MAX] + 1 + nghost;
+ } else {
+ ghost_offset = overflow[LMP_GHOST_MIN] - nlocal;
+ nall_offset = nlocal + nghost;
+ }
+ }
+ #endif
+
+ if (molecular) {
+ for (int i = ifrom; i < ito; ++i) {
+ int * _noalias jlist = firstneigh + cnumneigh[i];
+ const int jnum = numneigh[i];
+ for (int jj = 0; jj < jnum; jj++) {
+ const int j = jlist[jj];
+ ofind_special(which, special, nspecial, i, tag[j],
+ special_flag);
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (j >= nlocal) {
+ if (j == e_nall)
+ jlist[jj] = nall_offset;
+ else if (which > 0)
+ jlist[jj] = (j-ghost_offset) ^ (which << SBBITS);
+ else jlist[jj]-=ghost_offset;
+ } else
+ #endif
+ if (which > 0) jlist[jj] = j ^ (which << SBBITS);
+ }
+ }
+ }
+ #ifdef _LMP_INTEL_OFFLOAD
+ else if (separate_buffers) {
+ for (int i = ifrom; i < ito; ++i) {
+ int * _noalias jlist = firstneigh + cnumneigh[i];
+ const int jnum = numneigh[i];
+ int jj = 0;
+ for (jj = 0; jj < jnum; jj++) {
+ if (jlist[jj] >= nlocal) {
+ if (jlist[jj] == e_nall) jlist[jj] = nall_offset;
+ else jlist[jj] -= ghost_offset;
+ }
+ }
+ }
+ }
+ #endif
+ } // end omp
+ #ifdef __MIC__
+ *timer_compute = MIC_Wtime() - *timer_compute;
+ #endif
+ } // end offload
+
+ if (offload) {
+ fix->stop_watch(TIME_OFFLOAD_LATENCY);
+ #ifdef _LMP_INTEL_OFFLOAD
+ for (int n = 0; n < aend; n++) {
+ ilist[n] = n;
+ numneigh[n] = 0;
+ }
+ #endif
+ } else {
+ for (int i = astart; i < aend; i++)
+ list->firstneigh[i] = firstneigh + cnumneigh[i];
+ fix->stop_watch(TIME_HOST_NEIGHBOR);
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (separate_buffers) {
+ fix->start_watch(TIME_PACK);
+ fix->set_neighbor_host_sizes();
+ buffers->pack_sep_from_single(fix->host_min_local(),
+ fix->host_used_local(),
+ fix->host_min_ghost(),
+ fix->host_used_ghost());
+ fix->stop_watch(TIME_PACK);
+ }
+ #endif
+ }
+}
diff --git a/src/USER-INTEL/pair_sw_intel.cpp b/src/USER-INTEL/pair_sw_intel.cpp
new file mode 100755
index 0000000000..ebd626b5f7
--- /dev/null
+++ b/src/USER-INTEL/pair_sw_intel.cpp
@@ -0,0 +1,703 @@
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author: W. Michael Brown (Intel)
+------------------------------------------------------------------------- */
+
+#include "math.h"
+#include "stdio.h"
+#include "stdlib.h"
+#include "string.h"
+#include "pair_sw_intel.h"
+#include "atom.h"
+#include "neighbor.h"
+#include "neigh_request.h"
+#include "force.h"
+#include "comm.h"
+#include "memory.h"
+#include "neighbor.h"
+#include "neigh_list.h"
+#include "memory.h"
+#include "error.h"
+#include "modify.h"
+#include "suffix.h"
+
+using namespace LAMMPS_NS;
+
+#define FC_PACKED0_T typename ForceConst::fc_packed0
+#define FC_PACKED1_T typename ForceConst::fc_packed1
+#define FC_PACKED2_T typename ForceConst::fc_packed2
+#define FC_PACKED3_T typename ForceConst::fc_packed3
+
+#define MAXLINE 1024
+#define DELTA 4
+
+/* ---------------------------------------------------------------------- */
+
+PairSWIntel::PairSWIntel(LAMMPS *lmp) : PairSW(lmp)
+{
+ suffix_flag |= Suffix::INTEL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+PairSWIntel::~PairSWIntel()
+{
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSWIntel::compute(int eflag, int vflag)
+{
+ if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+ compute(eflag, vflag, fix->get_mixed_buffers(),
+ force_const_single);
+ else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+ compute(eflag, vflag, fix->get_double_buffers(),
+ force_const_double);
+ else
+ compute(eflag, vflag, fix->get_single_buffers(),
+ force_const_single);
+
+ fix->balance_stamp();
+ vflag_fdotr = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairSWIntel::compute(int eflag, int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc)
+{
+ if (eflag || vflag) {
+ ev_setup(eflag, vflag);
+ } else evflag = vflag_fdotr = 0;
+
+ const int inum = list->inum;
+ const int nthreads = comm->nthreads;
+ const int host_start = fix->host_start_pair();
+ const int offload_end = fix->offload_end_pair();
+ const int ago = neighbor->ago;
+
+ if (ago != 0 && fix->separate_buffers() == 0) {
+ fix->start_watch(TIME_PACK);
+
+ #if defined(_OPENMP)
+ #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
+ #endif
+ {
+ int ifrom, ito, tid;
+ IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal + atom->nghost,
+ nthreads, sizeof(ATOM_T));
+ buffers->thr_pack(ifrom, ito, ago);
+ }
+
+ fix->stop_watch(TIME_PACK);
+ }
+
+ if (_spq) {
+ if (evflag || vflag_fdotr) {
+ int ovflag = 0;
+ if (vflag_fdotr) ovflag = 2;
+ else if (vflag) ovflag = 1;
+ if (eflag) {
+ eval<1,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
+ eval<1,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
+ } else {
+ eval<1,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
+ eval<1,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
+ }
+ } else {
+ eval<1,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad);
+ eval<1,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad);
+ }
+ } else {
+ if (evflag || vflag_fdotr) {
+ int ovflag = 0;
+ if (vflag_fdotr) ovflag = 2;
+ else if (vflag) ovflag = 1;
+ if (eflag) {
+ eval<0,1,1>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
+ eval<0,1,1>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
+ } else {
+ eval<0,1,0>(1, ovflag, buffers, fc, 0, offload_end, _offload_pad);
+ eval<0,1,0>(0, ovflag, buffers, fc, host_start, inum, _host_pad);
+ }
+ } else {
+ eval<0,0,0>(1, 0, buffers, fc, 0, offload_end, _offload_pad);
+ eval<0,0,0>(0, 0, buffers, fc, host_start, inum, _host_pad);
+ }
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairSWIntel::eval(const int offload, const int vflag,
+ IntelBuffers *buffers,
+ const ForceConst &fc, const int astart,
+ const int aend, const int pad_width)
+{
+ const int inum = aend - astart;
+ if (inum == 0) return;
+ int nlocal, nall, minlocal;
+ fix->get_buffern(offload, nlocal, nall, minlocal);
+
+ const int ago = neighbor->ago;
+ IP_PRE_pack_separate_buffers(fix, buffers, ago, offload, nlocal, nall);
+
+ ATOM_T * _noalias const x = buffers->get_x(offload);
+ const int * _noalias const numneighhalf = buffers->get_atombin();
+ const int * _noalias const numneigh = list->numneigh;
+ const int * _noalias const cnumneigh = buffers->cnumneigh(list);
+ const int * _noalias const firstneigh = buffers->firstneigh(list);
+
+ const FC_PACKED0_T * _noalias const p2 = fc.p2[0];
+ const FC_PACKED1_T * _noalias const p2f = fc.p2f[0];
+ const FC_PACKED2_T * _noalias const p2e = fc.p2e[0];
+ const FC_PACKED3_T * _noalias const p3 = fc.p3[0][0];
+
+ flt_t * _noalias const ccachex = buffers->get_ccachex();
+ flt_t * _noalias const ccachey = buffers->get_ccachey();
+ flt_t * _noalias const ccachez = buffers->get_ccachez();
+ flt_t * _noalias const ccachew = buffers->get_ccachew();
+ int * _noalias const ccachei = buffers->get_ccachei();
+ int * _noalias const ccachej = buffers->get_ccachej();
+ const int ccache_stride = _ccache_stride;
+
+ const int ntypes = atom->ntypes + 1;
+ const int eatom = this->eflag_atom;
+
+ // Determine how much data to transfer
+ int x_size, q_size, f_stride, ev_size, separate_flag;
+ IP_PRE_get_transfern(ago, /* NEWTON_PAIR*/ 1, EVFLAG, EFLAG, vflag,
+ buffers, offload, fix, separate_flag,
+ x_size, q_size, ev_size, f_stride);
+
+ int tc;
+ FORCE_T * _noalias f_start;
+ acc_t * _noalias ev_global;
+ IP_PRE_get_buffers(offload, buffers, fix, tc, f_start, ev_global);
+ const int nthreads = tc;
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ double *timer_compute = fix->off_watch_pair();
+ int *overflow = fix->get_off_overflow_flag();
+
+ if (offload) fix->start_watch(TIME_OFFLOAD_LATENCY);
+ #pragma offload target(mic:_cop) if(offload) \
+ in(p2,p2f,p2e,p3:length(0) alloc_if(0) free_if(0)) \
+ in(firstneigh:length(0) alloc_if(0) free_if(0)) \
+ in(cnumneigh:length(0) alloc_if(0) free_if(0)) \
+ in(numneigh:length(0) alloc_if(0) free_if(0)) \
+ in(x:length(x_size) alloc_if(0) free_if(0)) \
+ in(numneighhalf:length(0) alloc_if(0) free_if(0)) \
+ in(overflow:length(0) alloc_if(0) free_if(0)) \
+ in(ccachex,ccachey,ccachez,ccachew:length(0) alloc_if(0) free_if(0)) \
+ in(ccachei,ccachej:length(0) alloc_if(0) free_if(0)) \
+ in(ccache_stride,nthreads,inum,nall,ntypes,vflag,eatom,offload) \
+ in(astart,nlocal,f_stride,minlocal,separate_flag,pad_width) \
+ out(f_start:length(f_stride) alloc_if(0) free_if(0)) \
+ out(ev_global:length(ev_size) alloc_if(0) free_if(0)) \
+ out(timer_compute:length(1) alloc_if(0) free_if(0)) \
+ signal(f_start)
+ #endif
+ {
+ #ifdef __MIC__
+ *timer_compute = MIC_Wtime();
+ #endif
+
+ IP_PRE_repack_for_offload(1, separate_flag, nlocal, nall,
+ f_stride, x, 0);
+
+ acc_t oevdwl, ov0, ov1, ov2, ov3, ov4, ov5;
+ if (EVFLAG) {
+ oevdwl = (acc_t)0;
+ if (vflag) ov0 = ov1 = ov2 = ov3 = ov4 = ov5 = (acc_t)0;
+ }
+
+ #if defined(_OPENMP)
+ #pragma omp parallel default(none) \
+ shared(f_start,f_stride,nlocal,nall,minlocal) \
+ reduction(+:oevdwl,ov0,ov1,ov2,ov3,ov4,ov5)
+ #endif
+ {
+ int iifrom, iito, tid;
+ IP_PRE_omp_range_id(iifrom, iito, tid, inum, nthreads);
+ iifrom += astart;
+ iito += astart;
+
+ FORCE_T * _noalias const f = f_start - minlocal + (tid * f_stride);
+ memset(f + minlocal, 0, f_stride * sizeof(FORCE_T));
+
+ const int toffs = tid * ccache_stride;
+ flt_t * _noalias const tdelx = ccachex + toffs;
+ flt_t * _noalias const tdely = ccachey + toffs;
+ flt_t * _noalias const tdelz = ccachez + toffs;
+ flt_t * _noalias const trsq = ccachew + toffs;
+ int * _noalias const tj = ccachei + toffs;
+ int * _noalias const tjtype = ccachej + toffs;
+
+ // loop over full neighbor list of my atoms
+
+ for (int i = iifrom; i < iito; ++i) {
+ const flt_t xtmp = x[i].x;
+ const flt_t ytmp = x[i].y;
+ const flt_t ztmp = x[i].z;
+ const int itype = x[i].w;
+
+ const int ptr_off = itype * ntypes;
+ const FC_PACKED0_T * _noalias const p2i = p2 + ptr_off;
+ const FC_PACKED1_T * _noalias const p2fi = p2f + ptr_off;
+ const FC_PACKED2_T * _noalias const p2ei = p2e + ptr_off;
+ const FC_PACKED3_T * _noalias const p3i = p3 + ptr_off*ntypes;
+
+ const int * _noalias const jlist = firstneigh + cnumneigh[i];
+ const int jnum = numneigh[i];
+ const int jnumhalf = numneighhalf[i];
+
+ acc_t fxtmp, fytmp, fztmp, fwtmp;
+ acc_t sevdwl, sv0, sv1, sv2, sv3, sv4, sv5;
+ fxtmp = fytmp = fztmp = (acc_t)0.0;
+ if (EVFLAG) {
+ if (EFLAG) fwtmp = sevdwl = (acc_t)0;
+ if (vflag==1) sv0 = sv1 = sv2 = sv3 = sv4 = sv5 = (acc_t)0;
+ }
+
+ int ejnum = 0, ejnumhalf = 0;
+ for (int jj = 0; jj < jnum; jj++) {
+ int j = jlist[jj];
+ j &= NEIGHMASK;
+ const flt_t delx = x[j].x - xtmp;
+ const flt_t dely = x[j].y - ytmp;
+ const flt_t delz = x[j].z - ztmp;
+ const int jtype = x[j].w;
+ const flt_t rsq1 = delx * delx + dely * dely + delz * delz;
+ if (rsq1 < p2i[jtype].cutsq) {
+ tdelx[ejnum] = delx;
+ tdely[ejnum] = dely;
+ tdelz[ejnum] = delz;
+ trsq[ejnum] = rsq1;
+ tj[ejnum] = j;
+ tjtype[ejnum] = jtype;
+ ejnum++;
+ if (jj < jnumhalf) ejnumhalf++;
+ }
+ }
+ int ejnum_pad = ejnum;
+ while ( (ejnum_pad % pad_width) != 0) {
+ tdelx[ejnum_pad] = (flt_t)0.0;
+ tdely[ejnum_pad] = (flt_t)0.0;
+ tdelz[ejnum_pad] = (flt_t)0.0;
+ trsq[ejnum_pad] = (flt_t)1.0;
+ tj[ejnum_pad] = nall;
+ tjtype[ejnum_pad] = 0;
+ ejnum_pad++;
+ }
+
+ #if defined(__INTEL_COMPILER)
+ #pragma vector aligned
+ #pragma simd reduction(+:fxtmp, fytmp, fztmp, fwtmp, sevdwl, \
+ sv0, sv1, sv2, sv3, sv4, sv5)
+ #endif
+ for (int jj = 0; jj < ejnum_pad; jj++) {
+ acc_t fjxtmp, fjytmp, fjztmp, fjtmp;
+ fjxtmp = fjytmp = fjztmp = (acc_t)0.0;
+ if (EFLAG) fjtmp = (acc_t)0.0;
+
+ const flt_t delx = tdelx[jj];
+ const flt_t dely = tdely[jj];
+ const flt_t delz = tdelz[jj];
+ const int jtype = tjtype[jj];
+ const flt_t rsq1 = trsq[jj];
+
+ const flt_t r1 = sqrt(rsq1);
+ const flt_t rinvsq1 = (flt_t)1.0 / rsq1;
+ const flt_t rainv1 = (flt_t)1.0 / (r1 - p2fi[jtype].cut);
+
+ // two-body interactions, skip half of them
+ flt_t rp, rq;
+ if (SPQ == 1) {
+ rp = r1 * r1;
+ rp *= rp;
+ rp = (flt_t)1.0 / rp;
+ rq = (flt_t)1.0;
+ } else {
+ rp = pow(r1, -p2fi[jtype].powerp);
+ rq = pow(r1, -p2fi[jtype].powerq);
+ }
+ const flt_t rainvsq = rainv1 * rainv1 * r1;
+ flt_t expsrainv = exp(p2fi[jtype].sigma * rainv1);
+ if (jj >= ejnumhalf) expsrainv = (flt_t)0.0;
+ const flt_t fpair = (p2fi[jtype].c1 * rp - p2fi[jtype].c2 * rq +
+ (p2fi[jtype].c3 * rp -p2fi[jtype].c4 * rq) *
+ rainvsq) * expsrainv * rinvsq1;
+
+ fxtmp -= delx * fpair;
+ fytmp -= dely * fpair;
+ fztmp -= delz * fpair;
+ fjxtmp += delx * fpair;
+ fjytmp += dely * fpair;
+ fjztmp += delz * fpair;
+
+ if (EVFLAG) {
+ if (EFLAG) {
+ flt_t evdwl;
+ evdwl = (p2ei[jtype].c5 * rp - p2ei[jtype].c6 * rq) *
+ expsrainv;
+ sevdwl += evdwl;
+ if (eatom) {
+ fwtmp += (acc_t)0.5 * evdwl;
+ fjtmp += (acc_t)0.5 * evdwl;
+ }
+ }
+ IP_PRE_ev_tally_nbor(vflag, (flt_t)1.0, fpair,
+ -delx, -dely, -delz);
+ }
+
+ /*---------------------------------------------*/
+
+ flt_t gsrainv1 = p2i[jtype].sigma_gamma * rainv1;
+ flt_t gsrainvsq1 = gsrainv1 * rainv1 / r1;
+ flt_t expgsrainv1 = exp(gsrainv1);
+ const int joffset = jtype * ntypes;
+
+ for (int kk = 0; kk < ejnum; kk++) {
+ flt_t delr2[3];
+ delr2[0] = tdelx[kk];
+ delr2[1] = tdely[kk];
+ delr2[2] = tdelz[kk];
+ const int ktype = tjtype[kk];
+ const flt_t rsq2 = trsq[kk];
+
+ const flt_t r2 = sqrt(rsq2);
+ const flt_t rinvsq2 = (flt_t)1.0 / rsq2;
+ const flt_t rainv2 = (flt_t)1.0 / (r2 - p2i[ktype].cut);
+ const flt_t gsrainv2 = p2i[ktype].sigma_gamma * rainv2;
+ const flt_t gsrainvsq2 = gsrainv2 * rainv2 / r2;
+ const flt_t expgsrainv2 = exp(gsrainv2);
+
+ const flt_t rinv12 = (flt_t)1.0 / (r1 * r2);
+ const flt_t cs = (delx * delr2[0] + dely * delr2[1] +
+ delz * delr2[2]) * rinv12;
+ const flt_t delcs = cs - p3i[joffset + ktype].costheta;
+ const flt_t delcssq = delcs*delcs;
+
+ flt_t kfactor;
+ if (jj == kk) kfactor = (flt_t)0.0;
+ else kfactor = (flt_t)1.0;
+
+ const flt_t facexp = expgsrainv1*expgsrainv2*kfactor;
+ const flt_t facrad = p3i[joffset + ktype].lambda_epsilon *
+ facexp * delcssq;
+ const flt_t frad1 = facrad*gsrainvsq1;
+ const flt_t frad2 = facrad*gsrainvsq2;
+ const flt_t facang = p3i[joffset + ktype].lambda_epsilon2 *
+ facexp * delcs;
+ const flt_t facang12 = rinv12*facang;
+ const flt_t csfacang = cs*facang;
+ const flt_t csfac1 = rinvsq1*csfacang;
+
+ const flt_t fjx = delx*(frad1+csfac1)-delr2[0]*facang12;
+ const flt_t fjy = dely*(frad1+csfac1)-delr2[1]*facang12;
+ const flt_t fjz = delz*(frad1+csfac1)-delr2[2]*facang12;
+
+ fxtmp -= fjx;
+ fytmp -= fjy;
+ fztmp -= fjz;
+ fjxtmp += fjx;
+ fjytmp += fjy;
+ fjztmp += fjz;
+
+ if (EVFLAG) {
+ if (EFLAG) {
+ const flt_t evdwl = facrad * (flt_t)0.5;
+ sevdwl += evdwl;
+ if (eatom) {
+ fwtmp += (acc_t)0.33333333 * evdwl;
+ fjtmp += (acc_t)0.33333333 * facrad;
+ }
+ }
+ IP_PRE_ev_tally_nbor3v(vflag, fjx, fjy, fjz,
+ delx, dely, delz);
+ }
+ } // for kk
+ const int j = tj[jj];
+ f[j].x += fjxtmp;
+ f[j].y += fjytmp;
+ f[j].z += fjztmp;
+ if (EFLAG)
+ if (eatom) f[j].w += fjtmp;
+ } // for jj
+
+ f[i].x += fxtmp;
+ f[i].y += fytmp;
+ f[i].z += fztmp;
+ IP_PRE_ev_tally_atom(EVFLAG, EFLAG, vflag, f, fwtmp);
+ } // for ii
+ #if defined(_OPENMP)
+ #pragma omp barrier
+ #endif
+ IP_PRE_fdotr_acc_force(1, EVFLAG, EFLAG, vflag, eatom, nall,
+ nlocal, minlocal, nthreads, f_start, f_stride,
+ x);
+ } // end omp
+ if (EVFLAG) {
+ if (EFLAG) {
+ ev_global[0] = oevdwl;
+ ev_global[1] = (acc_t)0.0;
+ }
+ if (vflag) {
+ ev_global[2] = ov0;
+ ev_global[3] = ov1;
+ ev_global[4] = ov2;
+ ev_global[5] = ov3;
+ ev_global[6] = ov4;
+ ev_global[7] = ov5;
+ }
+ }
+ #ifdef __MIC__
+ *timer_compute = MIC_Wtime() - *timer_compute;
+ #endif
+ } // end offload
+ if (offload)
+ fix->stop_watch(TIME_OFFLOAD_LATENCY);
+ else
+ fix->stop_watch(TIME_HOST_PAIR);
+
+ if (EVFLAG)
+ fix->add_result_array(f_start, ev_global, offload, eatom);
+ else
+ fix->add_result_array(f_start, 0, offload);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PairSWIntel::allocate()
+{
+ PairSW::allocate();
+}
+
+/* ----------------------------------------------------------------------
+ init specific to this pair style
+------------------------------------------------------------------------- */
+
+void PairSWIntel::init_style()
+{
+ PairSW::init_style();
+ neighbor->requests[neighbor->nrequest-1]->intel = 1;
+ map[0] = map[1];
+
+ int ifix = modify->find_fix("package_intel");
+ if (ifix < 0)
+ error->all(FLERR,
+ "The 'package intel' command is required for /intel styles");
+ fix = static_cast(modify->fix[ifix]);
+
+ fix->pair_init_check();
+ #ifdef _LMP_INTEL_OFFLOAD
+ _cop = fix->coprocessor_number();
+ #endif
+
+ if (fix->precision() == FixIntel::PREC_MODE_MIXED) {
+ pack_force_const(force_const_single, fix->get_mixed_buffers());
+ fix->get_mixed_buffers()->need_tag(1);
+ } else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE) {
+ pack_force_const(force_const_double, fix->get_double_buffers());
+ fix->get_double_buffers()->need_tag(1);
+ } else {
+ pack_force_const(force_const_single, fix->get_single_buffers());
+ fix->get_single_buffers()->need_tag(1);
+ }
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (fix->offload_noghost())
+ error->all(FLERR,"The 'ghost no' option cannot be used with sw/intel.");
+ #endif
+
+ #if defined(__INTEL_COMPILER)
+ if (__INTEL_COMPILER_BUILD_DATE < 20141023)
+ error->all(FLERR, "Intel compiler versions before "
+ "15 Update 1 not supported for sw/intel");
+ #endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairSWIntel::pack_force_const(ForceConst &fc,
+ IntelBuffers *buffers)
+{
+ int off_ccache = 0;
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (_cop >= 0) off_ccache = 1;
+ #endif
+ buffers->grow_ccache(off_ccache, comm->nthreads);
+ _ccache_stride = buffers->ccache_stride();
+
+ int tp1 = atom->ntypes + 1;
+ fc.set_ntypes(tp1,memory,_cop);
+ buffers->set_ntypes(tp1);
+ flt_t **cutneighsq = buffers->get_cutneighsq();
+
+ // Repeat cutsq calculation because done after call to init_style
+ double cut, cutneigh;
+ for (int i = 1; i <= atom->ntypes; i++) {
+ for (int j = i; j <= atom->ntypes; j++) {
+ if (setflag[i][j] != 0 || (setflag[i][i] != 0 && setflag[j][j] != 0)) {
+ cut = init_one(i,j);
+ cutneigh = cut + neighbor->skin;
+ cutsq[i][j] = cutsq[j][i] = cut*cut;
+ cutneighsq[i][j] = cutneighsq[j][i] = cutneigh * cutneigh;
+ }
+ }
+ }
+
+ _spq = 1;
+ for (int ii = 0; ii < tp1; ii++) {
+ int i = map[ii];
+ for (int jj = 0; jj < tp1; jj++) {
+ int j = map[jj];
+ if (i < 0 || j < 0 || ii == 0 || jj == 0) {
+ fc.p2[ii][jj].cutsq = 0;
+ fc.p2[ii][jj].cut = 0;
+ fc.p2[ii][jj].sigma_gamma = 0;
+ fc.p2f[ii][jj].cut = 0;
+ fc.p2f[ii][jj].powerp = 0;
+ fc.p2f[ii][jj].powerq = 0;
+ fc.p2f[ii][jj].sigma = 0;
+ fc.p2f[ii][jj].c1 = 0;
+ fc.p2f[ii][jj].c2 = 0;
+ fc.p2f[ii][jj].c3 = 0;
+ fc.p2f[ii][jj].c4 = 0;
+ fc.p2e[ii][jj].c5 = 0;
+ fc.p2e[ii][jj].c6 = 0;
+ } else {
+ int ijparam = elem2param[i][j][j];
+ fc.p2[ii][jj].cutsq = params[ijparam].cutsq;
+ fc.p2[ii][jj].cut = params[ijparam].cut;
+ fc.p2[ii][jj].sigma_gamma = params[ijparam].sigma_gamma;
+ fc.p2f[ii][jj].cut = params[ijparam].cut;
+ fc.p2f[ii][jj].powerp = params[ijparam].powerp;
+ fc.p2f[ii][jj].powerq = params[ijparam].powerq;
+ fc.p2f[ii][jj].sigma = params[ijparam].sigma;
+ fc.p2f[ii][jj].c1 = params[ijparam].c1;
+ fc.p2f[ii][jj].c2 = params[ijparam].c2;
+ fc.p2f[ii][jj].c3 = params[ijparam].c3;
+ fc.p2f[ii][jj].c4 = params[ijparam].c4;
+ fc.p2e[ii][jj].c5 = params[ijparam].c5;
+ fc.p2e[ii][jj].c6 = params[ijparam].c6;
+
+ double cutcut = params[ijparam].cut * params[ijparam].cut;
+ if (params[ijparam].cutsq >= cutcut)
+ fc.p2[ii][jj].cutsq *= 0.98;
+
+ if (params[ijparam].powerp != 4.0 || params[ijparam].powerq != 0.0)
+ _spq = 0;
+ }
+
+ for (int kk = 0; kk < tp1; kk++) {
+ int k = map[kk];
+ if (i < 0 || j < 0 || k < 0 || ii == 0 || jj == 0 || kk == 0) {
+ fc.p3[ii][jj][kk].costheta = 0;
+ fc.p3[ii][jj][kk].lambda_epsilon = 0;
+ fc.p3[ii][jj][kk].lambda_epsilon2 = 0;
+ } else {
+ int ijkparam = elem2param[i][j][k];
+ fc.p3[ii][jj][kk].costheta = params[ijkparam].costheta;
+ fc.p3[ii][jj][kk].lambda_epsilon = params[ijkparam].lambda_epsilon;
+ fc.p3[ii][jj][kk].lambda_epsilon2 = params[ijkparam].lambda_epsilon2;
+ }
+ }
+ }
+ }
+
+ _host_pad = 1;
+ _offload_pad = 1;
+
+ if (INTEL_NBOR_PAD > 1)
+ _host_pad = INTEL_NBOR_PAD * sizeof(float) / sizeof(flt_t);
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (_cop < 0) return;
+ FC_PACKED0_T *op2 = fc.p2[0];
+ FC_PACKED1_T *op2f = fc.p2f[0];
+ FC_PACKED2_T *op2e = fc.p2e[0];
+ FC_PACKED3_T *op3 = fc.p3[0][0];
+ flt_t * ocutneighsq = cutneighsq[0];
+ int tp1sq = tp1 * tp1;
+ int tp1cu = tp1sq * tp1;
+ if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL &&
+ ocutneighsq != NULL) {
+ #pragma offload_transfer target(mic:_cop) \
+ in(op2,op2f,op2e: length(tp1sq) alloc_if(0) free_if(0)) \
+ in(op3: length(tp1cu) alloc_if(0) free_if(0)) \
+ in(ocutneighsq: length(tp1sq))
+ }
+ #endif
+}
+
+/* ---------------------------------------------------------------------- */
+
+template
+void PairSWIntel::ForceConst::set_ntypes(const int ntypes,
+ Memory *memory,
+ const int cop) {
+ if (ntypes != _ntypes) {
+ if (_ntypes > 0) {
+ fc_packed0 *op2 = p2[0];
+ fc_packed1 *op2f = p2f[0];
+ fc_packed2 *op2e = p2e[0];
+ fc_packed3 *op3 = p3[0][0];
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL &&
+ _cop >= 0) {
+ #pragma offload_transfer target(mic:_cop) \
+ nocopy(op2, op2f, op2e, op3: alloc_if(0) free_if(1))
+ }
+ #endif
+
+ _memory->destroy(op2);
+ _memory->destroy(op2f);
+ _memory->destroy(op2e);
+ _memory->destroy(op3);
+ }
+ if (ntypes > 0) {
+ _cop = cop;
+ memory->create(p2,ntypes,ntypes,"fc.p2");
+ memory->create(p2f,ntypes,ntypes,"fc.p2f");
+ memory->create(p2e,ntypes,ntypes,"fc.p2e");
+ memory->create(p3,ntypes,ntypes,ntypes,"fc.p3");
+
+ #ifdef _LMP_INTEL_OFFLOAD
+ fc_packed0 *op2 = p2[0];
+ fc_packed1 *op2f = p2f[0];
+ fc_packed2 *op2e = p2e[0];
+ fc_packed3 *op3 = p3[0][0];
+ int tp1sq = ntypes * ntypes;
+ int tp1cu = tp1sq * ntypes;
+ if (op2 != NULL && op2f != NULL && op2e != NULL && op3 != NULL &&
+ cop >= 0) {
+ #pragma offload_transfer target(mic:cop) \
+ nocopy(op2,op2f,op2e: length(tp1sq) alloc_if(1) free_if(0)) \
+ nocopy(op3: length(tp1cu) alloc_if(1) free_if(0))
+ }
+ #endif
+ }
+ }
+ _ntypes = ntypes;
+ _memory = memory;
+}
diff --git a/src/USER-INTEL/pair_sw_intel.h b/src/USER-INTEL/pair_sw_intel.h
new file mode 100755
index 0000000000..5bd54c5f49
--- /dev/null
+++ b/src/USER-INTEL/pair_sw_intel.h
@@ -0,0 +1,111 @@
+/* -*- c++ -*- ----------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef PAIR_CLASS
+
+PairStyle(sw/intel,PairSWIntel)
+
+#else
+
+#ifndef LMP_PAIR_SW_INTEL_H
+#define LMP_PAIR_SW_INTEL_H
+
+#include "pair_sw.h"
+#include "fix_intel.h"
+
+namespace LAMMPS_NS {
+
+class PairSWIntel : public PairSW {
+ public:
+ PairSWIntel(class LAMMPS *);
+ virtual ~PairSWIntel();
+ virtual void compute(int, int);
+ virtual void init_style();
+
+ protected:
+ FixIntel *fix;
+ int _cop;
+ template class ForceConst;
+
+ virtual void allocate();
+
+ template
+ void compute(int eflag, int vflag, IntelBuffers *buffers,
+ const ForceConst &fc);
+ template
+ void eval(const int offload, const int vflag,
+ IntelBuffers * buffers, const ForceConst &fc,
+ const int astart, const int aend, const int pad_width);
+
+ template
+ void pack_force_const(ForceConst &fc,
+ IntelBuffers *buffers);
+
+ int _ccache_stride, _host_pad, _offload_pad, _spq;
+
+ // ----------------------------------------------------------------------
+
+ template
+ class ForceConst {
+ public:
+ typedef struct {
+ flt_t cutsq, cut, sigma_gamma, pad;
+ } fc_packed0;
+ typedef struct {
+ flt_t powerp, powerq, cut, sigma, c1, c2, c3, c4;
+ } fc_packed1;
+ typedef struct {
+ flt_t c5, c6;
+ } fc_packed2;
+ typedef struct {
+ flt_t costheta, lambda_epsilon, lambda_epsilon2, pad;
+ } fc_packed3;
+
+ fc_packed0 **p2;
+ fc_packed1 **p2f;
+ fc_packed2 **p2e;
+ fc_packed3 ***p3;
+
+ ForceConst() : _ntypes(0) {}
+ ~ForceConst() { set_ntypes(0, NULL, _cop); }
+
+ void set_ntypes(const int ntypes, Memory *memory, const int cop);
+
+ private:
+ int _ntypes, _cop;
+ Memory *_memory;
+ };
+ ForceConst force_const_single;
+ ForceConst force_const_double;
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: The 'package intel' command is required for /intel styles
+
+Self-explanatory.
+
+E: The 'ghost no' option cannot be used with sw/intel.
+
+Self-explanatory.
+
+E: Intel compiler versions before 15 Update 1 not supported for sw/intel.
+
+Self-explanatory.
+
+*/
From 925991ea8520d8954085363ba65a8ec383ef114c Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 21:46:19 +0000
Subject: [PATCH 09/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13174
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/accelerator_intel.h | 7 ++++++-
src/neighbor.cpp | 6 ++++--
2 files changed, 10 insertions(+), 3 deletions(-)
diff --git a/src/accelerator_intel.h b/src/accelerator_intel.h
index 2f0179eb4b..ad856e41e3 100644
--- a/src/accelerator_intel.h
+++ b/src/accelerator_intel.h
@@ -34,7 +34,7 @@
template
void bin_atoms(void *, int *);
-template
+ template
void hbni(const int, NeighList *, void *, const int, const int, void *,
const int offload_end = 0);
template
@@ -42,10 +42,14 @@ template
template
void hbnti(const int, NeighList *, void *, const int, const int, void *,
const int offload_end = 0);
+ template
+ void fbi(const int, NeighList *, void *, const int, const int, void *,
+ const int offload_end = 0);
void half_bin_no_newton_intel(class NeighList *);
void half_bin_newton_intel(class NeighList *);
void half_bin_newton_tri_intel(class NeighList *);
+ void full_bin_intel(class NeighList *);
#endif /* !LMP_INSIDE_NEIGHBOR_H */
@@ -58,6 +62,7 @@ template
void half_bin_no_newton_intel(class NeighList *) {}
void half_bin_newton_intel(class NeighList *) {}
void half_bin_newton_tri_intel(class NeighList *) {}
+ void full_bin_intel(class NeighList *) {}
#endif
diff --git a/src/neighbor.cpp b/src/neighbor.cpp
index db4cbad693..8567d74297 100644
--- a/src/neighbor.cpp
+++ b/src/neighbor.cpp
@@ -1187,8 +1187,10 @@ void Neighbor::choose_build(int index, NeighRequest *rq)
"Neighbor include group not allowed with ghost neighbors");
else pb = &Neighbor::full_nsq_ghost_omp;
} else if (style == BIN) {
- if (rq->ghost == 0) pb = &Neighbor::full_bin_omp;
- else if (includegroup)
+ if (rq->ghost == 0) {
+ if (rq->intel) pb = &Neighbor::full_bin_intel;
+ else pb = &Neighbor::full_bin_omp;
+ } else if (includegroup)
error->all(FLERR,
"Neighbor include group not allowed with ghost neighbors");
else pb = &Neighbor::full_bin_ghost_omp;
From 74ae72b74485aef662a43fbf2b1ea1a97f0a589c Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 21:47:09 +0000
Subject: [PATCH 10/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13175
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/pair_sw.html | 7 +++++++
doc/pair_sw.txt | 6 ++++++
2 files changed, 13 insertions(+)
diff --git a/doc/pair_sw.html b/doc/pair_sw.html
index 040b4166ea..70611a5328 100644
--- a/doc/pair_sw.html
+++ b/doc/pair_sw.html
@@ -15,6 +15,8 @@
pair_style sw/gpu command
+pair_style sw/intel command
+
pair_style sw/omp command
Syntax:
@@ -164,6 +166,11 @@ by including their suffix, or you can use the suffix command in your input script.
+When using the USER-INTEL package with this style, there is an
+additional 5 to 10 percent performance improvement when the
+Stillinger-Weber parameters p and q are set to 4 and 0 respectively.
+These parameters are common for modeling silicon and water.
+
See Section_accelerate of the manual for
more instructions on how to use the accelerated styles effectively.
diff --git a/doc/pair_sw.txt b/doc/pair_sw.txt
index 40f86682d3..b2f9f8a5f2 100644
--- a/doc/pair_sw.txt
+++ b/doc/pair_sw.txt
@@ -9,6 +9,7 @@
pair_style sw command :h3
pair_style sw/cuda command :h3
pair_style sw/gpu command :h3
+pair_style sw/intel command :h3
pair_style sw/omp command :h3
[Syntax:]
@@ -158,6 +159,11 @@ by including their suffix, or you can use the "-suffix command-line
switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can
use the "suffix"_suffix.html command in your input script.
+When using the USER-INTEL package with this style, there is an
+additional 5 to 10 percent performance improvement when the
+Stillinger-Weber parameters p and q are set to 4 and 0 respectively.
+These parameters are common for modeling silicon and water.
+
See "Section_accelerate"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
From 4b2a1ce5e63be9d0f06a9c01d513ff3520e27d7e Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 21:47:29 +0000
Subject: [PATCH 11/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13176
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Section_commands.html | 2 +-
doc/Section_commands.txt | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/Section_commands.html b/doc/Section_commands.html
index c54f1e8e49..922cd274df 100644
--- a/doc/Section_commands.html
+++ b/doc/Section_commands.html
@@ -499,7 +499,7 @@ KOKKOS, o = USER-OMP, t = OPT.
| nm/cut (o) | nm/cut/coul/cut (o) | nm/cut/coul/long (o) | peri/eps |
| peri/lps (o) | peri/pmb (o) | peri/ves | reax |
| rebo (o) | resquared (go) | snap | soft (go) |
-| sw (cgo) | table (gko) | tersoff (co) | tersoff/mod (o) |
+| sw (cgio) | table (gko) | tersoff (co) | tersoff/mod (o) |
| tersoff/zbl (o) | tip4p/cut (o) | tip4p/long (o) | tri/lj (o) |
| yukawa (go) | yukawa/colloid (go) | zbl (o)
|
diff --git a/doc/Section_commands.txt b/doc/Section_commands.txt
index 3138cf9789..c2bfa0807e 100644
--- a/doc/Section_commands.txt
+++ b/doc/Section_commands.txt
@@ -813,7 +813,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"resquared (go)"_pair_resquared.html,
"snap"_pair_snap.html,
"soft (go)"_pair_soft.html,
-"sw (cgo)"_pair_sw.html,
+"sw (cgio)"_pair_sw.html,
"table (gko)"_pair_table.html,
"tersoff (co)"_pair_tersoff.html,
"tersoff/mod (o)"_pair_tersoff_mod.html,
From 68a85d9002eefafdbfeadbdd089fe80c852bf236 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 21:51:42 +0000
Subject: [PATCH 12/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13177
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/version.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/version.h b/src/version.h
index b648b07436..b5941868c6 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "24 Feb 2015"
+#define LAMMPS_VERSION "4 Mar 2015"
From 6960d9742515220bb8c30bd77449ca34a6ab9b1b Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 21:51:44 +0000
Subject: [PATCH 13/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13178
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Manual.html | 4 ++--
doc/Manual.txt | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/doc/Manual.html b/doc/Manual.html
index 088fb6683e..f91a89ad09 100644
--- a/doc/Manual.html
+++ b/doc/Manual.html
@@ -1,7 +1,7 @@
LAMMPS Users Manual
-
+
@@ -22,7 +22,7 @@
LAMMPS Documentation
-24 Feb 2015 version
+4 Mar 2015 version
Version info:
diff --git a/doc/Manual.txt b/doc/Manual.txt
index f5a86a8ba5..f62365d561 100644
--- a/doc/Manual.txt
+++ b/doc/Manual.txt
@@ -1,6 +1,6 @@
LAMMPS Users Manual
-
+
@@ -18,7 +18,7 @@
LAMMPS Documentation :c,h3
-24 Feb 2015 version :c,h4
+4 Mar 2015 version :c,h4
Version info: :h4
From 45feeb1dcbe1c5e7d258d860718f3cdbc059bbd6 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:03:59 +0000
Subject: [PATCH 14/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13180
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/RIGID/fix_shake.cpp | 37 +++++++++++++++++++++++++++++++++++++
src/RIGID/fix_shake.h | 5 ++++-
src/fix_respa.h | 1 -
3 files changed, 41 insertions(+), 2 deletions(-)
diff --git a/src/RIGID/fix_shake.cpp b/src/RIGID/fix_shake.cpp
index 0dd6c85d5f..bf41b17a5b 100644
--- a/src/RIGID/fix_shake.cpp
+++ b/src/RIGID/fix_shake.cpp
@@ -205,6 +205,7 @@ FixShake::FixShake(LAMMPS *lmp, int narg, char **arg) :
// SHAKE vs RATTLE
rattle = 0;
+ vflag_post_force = 0;
// identify all SHAKE clusters
@@ -432,12 +433,14 @@ void FixShake::setup(int vflag)
// half timestep constraint on pre-step, full timestep thereafter
if (strstr(update->integrate_style,"verlet")) {
+ respa = 0;
dtv = update->dt;
dtfsq = 0.5 * update->dt * update->dt * force->ftm2v;
FixShake::post_force(vflag);
if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v;
} else {
+ respa = 1;
dtv = step_respa[0];
dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v;
dtf_inner = dtf_innerhalf;
@@ -564,6 +567,10 @@ void FixShake::post_force(int vflag)
else if (shake_flag[m] == 4) shake4(m);
else shake3angle(m);
}
+
+ // store vflag for coordinate_constraints_end_of_step()
+
+ vflag_post_force = vflag;
}
/* ----------------------------------------------------------------------
@@ -608,6 +615,10 @@ void FixShake::post_force_respa(int vflag, int ilevel, int iloop)
else if (shake_flag[m] == 4) shake4(m);
else shake3angle(m);
}
+
+ // store vflag for coordinate_constraints_end_of_step()
+
+ vflag_post_force = vflag;
}
/* ----------------------------------------------------------------------
@@ -2665,3 +2676,29 @@ void *FixShake::extract(const char *str, int &dim)
}
return NULL;
}
+
+
+
+/* ----------------------------------------------------------------------
+ wrapper method for end_of_step fixes which modify the coordinates
+------------------------------------------------------------------------- */
+
+void FixShake::coordinate_constraints_end_of_step() {
+ if (!respa) {
+ dtfsq = 0.5 * update->dt * update->dt * force->ftm2v;
+ FixShake::post_force(vflag_post_force);
+ if (!rattle) dtfsq = update->dt * update->dt * force->ftm2v;
+ }
+ else {
+ dtf_innerhalf = 0.5 * step_respa[0] * force->ftm2v;
+ dtf_inner = dtf_innerhalf;
+ // apply correction to all rRESPA levels
+ for (int ilevel = 0; ilevel < nlevels_respa; ilevel++) {
+ ((Respa *) update->integrate)->copy_flevel_f(ilevel);
+ FixShake::post_force_respa(vflag_post_force,ilevel,loop_respa[ilevel]-1);
+ ((Respa *) update->integrate)->copy_f_flevel(ilevel);
+ }
+ if (!rattle) dtf_inner = step_respa[0] * force->ftm2v;
+ }
+}
+
diff --git a/src/RIGID/fix_shake.h b/src/RIGID/fix_shake.h
index 74e09d12d8..049971ca4c 100644
--- a/src/RIGID/fix_shake.h
+++ b/src/RIGID/fix_shake.h
@@ -46,19 +46,22 @@ class FixShake : public Fix {
virtual int unpack_exchange(int, double *);
virtual int pack_forward_comm(int, int *, double *, int, int *);
virtual void unpack_forward_comm(int, int, double *);
+ virtual void coordinate_constraints_end_of_step();
int dof(int);
virtual void reset_dt();
void *extract(const char *, int &);
protected:
+ int vflag_post_force; // store the vflag of last post_force call
+ int respa; // 0 = vel. Verlet, 1 = respa
int me,nprocs;
int rattle; // 0 = SHAKE, 1 = RATTLE
double tolerance; // SHAKE tolerance
int max_iter; // max # of SHAKE iterations
int output_every; // SHAKE stat output every so often
bigint next_output; // timestep for next output
-
+
// settings from input command
int *bond_flag,*angle_flag; // bond/angle types to constrain
int *type_flag; // constrain bonds to these types
diff --git a/src/fix_respa.h b/src/fix_respa.h
index d65587cdc9..7c7bd53a61 100644
--- a/src/fix_respa.h
+++ b/src/fix_respa.h
@@ -50,7 +50,6 @@ class FixRespa : public Fix {
#endif
#endif
-
/* ERROR/WARNING messages:
*/
From 8d52f22d670ac507dafa171d8a8604bf1698a4fb Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:04:46 +0000
Subject: [PATCH 15/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13181
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/RIGID/fix_rattle.cpp | 883 +++++++++++++++++++++++++++++++++++++++
src/RIGID/fix_rattle.h | 74 ++++
2 files changed, 957 insertions(+)
create mode 100644 src/RIGID/fix_rattle.cpp
create mode 100644 src/RIGID/fix_rattle.h
diff --git a/src/RIGID/fix_rattle.cpp b/src/RIGID/fix_rattle.cpp
new file mode 100644
index 0000000000..6a693c6bc3
--- /dev/null
+++ b/src/RIGID/fix_rattle.cpp
@@ -0,0 +1,883 @@
+/* ----------------------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+ Contributing author: Peter Wirnsberger (University of Cambridge)
+------------------------------------------------------------------------- */
+
+#include "mpi.h"
+#include "math.h"
+#include "stdlib.h"
+#include "string.h"
+#include "stdio.h"
+#include "fix_rattle.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "molecule.h"
+#include "update.h"
+#include "respa.h"
+#include "modify.h"
+#include "domain.h"
+#include "force.h"
+#include "bond.h"
+#include "angle.h"
+#include "comm.h"
+#include "group.h"
+#include "fix_respa.h"
+#include "math_const.h"
+#include "math_extra.h"
+#include "memory.h"
+#include "error.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+using namespace MathConst;
+using namespace MathExtra;
+
+// set RATTLE_DEBUG = 1 to check constraints at end of timestep
+
+#define RATTLE_DEBUG 0
+#define RATTLE_TEST_VEL true
+#define RATTLE_TEST_POS true
+
+enum{V,VP,XSHAKE,X};
+
+/* ---------------------------------------------------------------------- */
+
+FixRattle::FixRattle(LAMMPS *lmp, int narg, char **arg) :
+ FixShake(lmp, narg, arg)
+{
+ rattle = 1;
+
+ // define timestep for velocity integration
+
+ dtfv = 0.5 * update->dt * force->ftm2v;
+
+ // allocate memory for unconstrained velocity update
+
+ vp = NULL;
+ grow_arrays(atom->nmax);
+
+ // default communication mode
+ // necessary for compatibility with SHAKE
+ // see pack_forward and unpack_forward
+
+ comm_mode = XSHAKE;
+ vflag_post_force = 0;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixRattle::~FixRattle()
+{
+ memory->destroy(vp);
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixRattle::setmask()
+{
+ int mask = 0;
+ mask |= PRE_NEIGHBOR;
+ mask |= POST_FORCE;
+ mask |= POST_FORCE_RESPA;
+ mask |= FINAL_INTEGRATE;
+ mask |= FINAL_INTEGRATE_RESPA;
+ if (RATTLE_DEBUG) mask |= END_OF_STEP;
+ return mask;
+}
+
+/* ----------------------------------------------------------------------
+ initialize RATTLE and check that this is the last final_integrate fix
+------------------------------------------------------------------------- */
+
+void FixRattle::init() {
+
+ // initialise SHAKE first
+
+ FixShake::init();
+
+ // show a warning if any final-integrate fix comes after this one
+
+ int after = 0;
+ int flag = 0;
+ for (int i = 0; i < modify->nfix; i++) {
+ if (strcmp(id,modify->fix[i]->id) == 0) after = 1;
+ else if ((modify->fmask[i] & FINAL_INTEGRATE) && after) flag = 1;
+ }
+
+ if (flag && comm->me == 0)
+ error->warning(FLERR,
+ "Fix rattle should come after all other integration fixes");
+}
+
+/* ----------------------------------------------------------------------
+ This method carries out an unconstrained velocity update first and
+ then applies the velocity corrections directly (v and vp are modified).
+------------------------------------------------------------------------- */
+
+void FixRattle::post_force(int vflag)
+{
+ // remember vflag for the coordinate correction in this->final_integrate
+
+ vflag_post_force = vflag;
+
+ // unconstrained velocity update by half a timestep
+ // similar to FixShake::unconstrained_update()
+
+ update_v_half_nocons();
+
+ // communicate the unconstrained velocities
+
+ if (nprocs > 1) {
+ comm_mode = VP;
+ comm->forward_comm_fix(this);
+ }
+
+ // correct the velocity for each molecule accordingly
+
+ int m;
+ for (int i = 0; i < nlist; i++) {
+ m = list[i];
+ if (shake_flag[m] == 2) vrattle2(m);
+ else if (shake_flag[m] == 3) vrattle3(m);
+ else if (shake_flag[m] == 4) vrattle4(m);
+ else vrattle3angle(m);
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::post_force_respa(int vflag, int ilevel, int iloop)
+{
+ // remember vflag for the coordinate correction in this->final_integrate
+
+ vflag_post_force = vflag;
+
+ // unconstrained velocity update by half a timestep
+ // similar to FixShake::unconstrained_update()
+
+ update_v_half_nocons_respa(ilevel);
+
+ // communicate the unconstrained velocities
+
+ if (nprocs > 1) {
+ comm_mode = VP;
+ comm->forward_comm_fix(this);
+ }
+
+ // correct the velocity for each molecule accordingly
+
+ int m;
+ for (int i = 0; i < nlist; i++) {
+ m = list[i];
+ if (shake_flag[m] == 2) vrattle2(m);
+ else if (shake_flag[m] == 3) vrattle3(m);
+ else if (shake_flag[m] == 4) vrattle4(m);
+ else vrattle3angle(m);
+ }
+}
+
+/* ----------------------------------------------------------------------
+ let SHAKE calculate the constraining forces for the coordinates
+------------------------------------------------------------------------- */
+
+void FixRattle::final_integrate()
+{
+ comm_mode = XSHAKE;
+ FixShake::post_force(vflag_post_force);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::final_integrate_respa(int ilevel, int iloop)
+{
+ comm_mode = XSHAKE;
+ FixShake::post_force_respa(vflag_post_force, ilevel, iloop);
+}
+
+/* ----------------------------------------------------------------------
+ correct velocities of molecule m with 2 constraints bonds and 1 angle
+------------------------------------------------------------------------- */
+
+void FixRattle::vrattle3angle(int m)
+{
+ tagint i0,i1,i2;
+ int nlist,list[3];
+ double c[3], l[3], a[3][3], r01[3], imass[3],
+ r02[3], r12[3], vp01[3], vp02[3], vp12[3];
+
+ // local atom IDs and constraint distances
+ i0 = atom->map(shake_atom[m][0]);
+ i1 = atom->map(shake_atom[m][1]);
+ i2 = atom->map(shake_atom[m][2]);
+
+ // r01,r02,r12 = distance vec between atoms
+ MathExtra::sub3(x[i1],x[i0],r01);
+ MathExtra::sub3(x[i2],x[i0],r02);
+ MathExtra::sub3(x[i2],x[i1],r12);
+
+ // take into account periodicity
+ domain->minimum_image(r01);
+ domain->minimum_image(r02);
+ domain->minimum_image(r12);
+
+ // v01,v02,v12 = velocity differences
+ MathExtra::sub3(vp[i1],vp[i0],vp01);
+ MathExtra::sub3(vp[i2],vp[i0],vp02);
+ MathExtra::sub3(vp[i2],vp[i1],vp12);
+
+ // matrix coeffs and rhs for lamda equations
+ if (rmass) {
+ imass[0] = 1.0/rmass[i0];
+ imass[1] = 1.0/rmass[i1];
+ imass[2] = 1.0/rmass[i2];
+ } else {
+ imass[0] = 1.0/mass[type[i0]];
+ imass[1] = 1.0/mass[type[i1]];
+ imass[2] = 1.0/mass[type[i2]];
+ }
+
+ // setup matrix
+ a[0][0] = (imass[1] + imass[0]) * MathExtra::dot3(r01,r01);
+ a[0][1] = (imass[0] ) * MathExtra::dot3(r01,r02);
+ a[0][2] = (-imass[1] ) * MathExtra::dot3(r01,r12);
+ a[1][0] = a[0][1];
+ a[1][1] = (imass[0] + imass[2]) * MathExtra::dot3(r02,r02);
+ a[1][2] = (imass[2] ) * MathExtra::dot3(r02,r12);
+ a[2][0] = a[0][2];
+ a[2][1] = a[1][2];
+ a[2][2] = (imass[2] + imass[1]) * MathExtra::dot3(r12,r12);
+
+ // sestup RHS
+ c[0] = -MathExtra::dot3(vp01,r01);
+ c[1] = -MathExtra::dot3(vp02,r02);
+ c[2] = -MathExtra::dot3(vp12,r12);
+
+ // calculate the inverse matrix exactly
+ solve3x3exactly(a,c,l);
+
+ // add corrections to the velocities if processor owns atom
+ if (i0 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i0][k] -= imass[0]* ( l[0] * r01[k] + l[1] * r02[k] );
+ }
+ if (i1 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i1][k] -= imass[1] * ( -l[0] * r01[k] + l[2] * r12[k] );
+ }
+ if (i2 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i2][k] -= imass[2] * ( -l[1] * r02[k] - l[2] * r12[k] );
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::vrattle2(int m)
+{
+ tagint i0, i1;
+ int nlist,list[2];
+ double imass[2], r01[3], vp01[3];
+
+ // local atom IDs and constraint distances
+ i0 = atom->map(shake_atom[m][0]);
+ i1 = atom->map(shake_atom[m][1]);
+
+ // r01 = distance vec between atoms, with PBC
+ MathExtra::sub3(x[i1],x[i0],r01);
+ domain->minimum_image(r01);
+
+ // v01 = distance vectors for velocities
+ MathExtra::sub3(vp[i1],vp[i0],vp01);
+
+ // matrix coeffs and rhs for lamda equations
+ if (rmass) {
+ imass[0] = 1.0/rmass[i0];
+ imass[1] = 1.0/rmass[i1];
+ } else {
+ imass[0] = 1.0/mass[type[i0]];
+ imass[1] = 1.0/mass[type[i1]];
+ }
+
+ // Lagrange multiplier: exact solution
+ double l01 = - MathExtra::dot3(r01,vp01) /
+ (MathExtra::dot3(r01,r01) * (imass[0] + imass[1]));
+
+ // add corrections to the velocities if the process owns this atom
+ if (i0 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i0][k] -= imass[0] * l01 * r01[k];
+ }
+ if (i1 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i1][k] -= imass[1] * (-l01) * r01[k];
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::vrattle3(int m)
+{
+ tagint i0,i1,i2;
+ int nlist,list[3];
+ double imass[3], r01[3], r02[3], vp01[3], vp02[3],
+ a[2][2],c[2],l[2];
+
+ // local atom IDs and constraint distances
+ i0 = atom->map(shake_atom[m][0]);
+ i1 = atom->map(shake_atom[m][1]);
+ i2 = atom->map(shake_atom[m][2]);
+
+ // r01,r02 = distance vec between atoms, with PBC
+ MathExtra::sub3(x[i1],x[i0],r01);
+ MathExtra::sub3(x[i2],x[i0],r02);
+
+ domain->minimum_image(r01);
+ domain->minimum_image(r02);
+
+ // vp01,vp02 = distance vectors between velocities
+ MathExtra::sub3(vp[i1],vp[i0],vp01);
+ MathExtra::sub3(vp[i2],vp[i0],vp02);
+
+ if (rmass) {
+ imass[0] = 1.0/rmass[i0];
+ imass[1] = 1.0/rmass[i1];
+ imass[2] = 1.0/rmass[i2];
+ } else {
+ imass[0] = 1.0/mass[type[i0]];
+ imass[1] = 1.0/mass[type[i1]];
+ imass[2] = 1.0/mass[type[i2]];
+ }
+
+ // setup matrix
+ a[0][0] = (imass[1] + imass[0]) * MathExtra::dot3(r01,r01);
+ a[0][1] = (imass[0] ) * MathExtra::dot3(r01,r02);
+ a[1][0] = a[0][1];
+ a[1][1] = (imass[0] + imass[2]) * MathExtra::dot3(r02,r02);
+
+ // setup RHS
+ c[0] = - MathExtra::dot3(vp01,r01);
+ c[1] = - MathExtra::dot3(vp02,r02);
+
+ // calculate the inverse 2x2 matrix exactly
+ solve2x2exactly(a,c,l);
+
+ // add corrections to the velocities if the process owns this atom
+ if (i0 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i0][k] -= imass[0] * ( l[0] * r01[k] + l[1] * r02[k] );
+ }
+ if (i1 < nlocal)
+ for (int k=0; k<3; k++) {
+ v[i1][k] -= imass[1] * ( -l[0] * r01[k] );
+ }
+ if (i2 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i2][k] -= imass[2] * ( -l[1] * r02[k] );
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::vrattle4(int m)
+{
+ tagint i0,i1,i2,i3;
+ int nlist,list[4];
+ double imass[4], c[3], l[3], a[3][3],
+ r01[3], r02[3], r03[3], vp01[3], vp02[3], vp03[3];
+
+ // local atom IDs and constraint distances
+ i0 = atom->map(shake_atom[m][0]);
+ i1 = atom->map(shake_atom[m][1]);
+ i2 = atom->map(shake_atom[m][2]);
+ i3 = atom->map(shake_atom[m][3]);
+
+ // r01,r02,r12 = distance vec between atoms, with PBC
+ MathExtra::sub3(x[i1],x[i0],r01);
+ MathExtra::sub3(x[i2],x[i0],r02);
+ MathExtra::sub3(x[i3],x[i0],r03);
+
+ domain->minimum_image(r01);
+ domain->minimum_image(r02);
+ domain->minimum_image(r03);
+
+ // vp01,vp02,vp03 = distance vectors between velocities
+ MathExtra::sub3(vp[i1],vp[i0],vp01);
+ MathExtra::sub3(vp[i2],vp[i0],vp02);
+ MathExtra::sub3(vp[i3],vp[i0],vp03);
+
+ // matrix coeffs and rhs for lamda equations
+ if (rmass) {
+ imass[0] = 1.0/rmass[i0];
+ imass[1] = 1.0/rmass[i1];
+ imass[2] = 1.0/rmass[i2];
+ imass[3] = 1.0/rmass[i3];
+ } else {
+ imass[0] = 1.0/mass[type[i0]];
+ imass[1] = 1.0/mass[type[i1]];
+ imass[2] = 1.0/mass[type[i2]];
+ imass[3] = 1.0/mass[type[i3]];
+ }
+
+ // setup matrix
+ a[0][0] = (imass[0] + imass[1]) * MathExtra::dot3(r01,r01);
+ a[0][1] = (imass[0] ) * MathExtra::dot3(r01,r02);
+ a[0][2] = (imass[0] ) * MathExtra::dot3(r01,r03);
+ a[1][0] = a[0][1];
+ a[1][1] = (imass[0] + imass[2]) * MathExtra::dot3(r02,r02);
+ a[1][2] = (imass[0] ) * MathExtra::dot3(r02,r03);
+ a[2][0] = a[0][2];
+ a[2][1] = a[1][2];
+ a[2][2] = (imass[0] + imass[3]) * MathExtra::dot3(r03,r03);
+
+ // setup RHS
+ c[0] = - MathExtra::dot3(vp01,r01);
+ c[1] = - MathExtra::dot3(vp02,r02);
+ c[2] = - MathExtra::dot3(vp03,r03);
+
+ // calculate the inverse 3x3 matrix exactly
+ solve3x3exactly(a,c,l);
+
+ // add corrections to the velocities if the process owns this atom
+ if (i0 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i0][k] -= imass[0] * ( l[0] * r01[k] + l[1] * r02[k] + l[2] * r03[k]);
+ }
+ if (i1 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i1][k] -= imass[1] * (-l[0] * r01[k]);
+ }
+ if (i2 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i2][k] -= imass[2] * ( -l[1] * r02[k]);
+ }
+ if (i3 < nlocal) {
+ for (int k=0; k<3; k++)
+ v[i3][k] -= imass[3] * ( -l[2] * r03[k]);
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::solve2x2exactly(const double a[][2],
+ const double c[], double l[])
+{
+ double determ, determinv;
+
+ // calculate the determinant of the matrix
+ determ = a[0][0] * a[1][1] - a[0][1] * a[1][0];
+
+ // check if matrix is actually invertible
+ if (determ == 0.0) error->one(FLERR,"RATTLE determinant = 0.0");
+ determinv = 1.0/determ;
+
+ // Calcualte the solution: (l01, l02)^T = A^(-1) * c
+ l[0] = determinv * ( a[1][1] * c[0] - a[0][1] * c[1]);
+ l[1] = determinv * (-a[1][0] * c[0] + a[0][0] * c[1]);
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::solve3x3exactly(const double a[][3],
+ const double c[], double l[])
+{
+ double ai[3][3];
+ double determ, determinv;
+
+ // calculate the determinant of the matrix
+ determ = a[0][0]*a[1][1]*a[2][2] + a[0][1]*a[1][2]*a[2][0] +
+ a[0][2]*a[1][0]*a[2][1] - a[0][0]*a[1][2]*a[2][1] -
+ a[0][1]*a[1][0]*a[2][2] - a[0][2]*a[1][1]*a[2][0];
+
+ // check if matrix is actually invertible
+ if (determ == 0.0) error->one(FLERR,"Rattle determinant = 0.0");
+
+ // calculate the inverse 3x3 matrix: A^(-1) = (ai_jk)
+ determinv = 1.0/determ;
+ ai[0][0] = determinv * (a[1][1]*a[2][2] - a[1][2]*a[2][1]);
+ ai[0][1] = -determinv * (a[0][1]*a[2][2] - a[0][2]*a[2][1]);
+ ai[0][2] = determinv * (a[0][1]*a[1][2] - a[0][2]*a[1][1]);
+ ai[1][0] = -determinv * (a[1][0]*a[2][2] - a[1][2]*a[2][0]);
+ ai[1][1] = determinv * (a[0][0]*a[2][2] - a[0][2]*a[2][0]);
+ ai[1][2] = -determinv * (a[0][0]*a[1][2] - a[0][2]*a[1][0]);
+ ai[2][0] = determinv * (a[1][0]*a[2][1] - a[1][1]*a[2][0]);
+ ai[2][1] = -determinv * (a[0][0]*a[2][1] - a[0][1]*a[2][0]);
+ ai[2][2] = determinv * (a[0][0]*a[1][1] - a[0][1]*a[1][0]);
+
+ // calculate the solution: (l01, l02, l12)^T = A^(-1) * c
+ for (int i=0; i<3; i++) {
+ l[i] = 0;
+ for (int j=0; j<3; j++)
+ l[i] += ai[i][j] * c[j];
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::reset_dt()
+{
+ FixShake::reset_dt();
+ dtfv = 0.5 * update->dt * force->ftm2v;
+}
+
+/* ----------------------------------------------------------------------
+ carry out an unconstrained velocity update (vp is modified)
+------------------------------------------------------------------------- */
+
+void FixRattle::update_v_half_nocons()
+{
+ double dtfvinvm;
+ if (rmass) {
+ for (int i = 0; i < nlocal; i++) {
+ if (shake_flag[i]) {
+ dtfvinvm = dtfv / rmass[i];
+ for (int k=0; k<3; k++)
+ vp[i][k] = v[i][k] + dtfvinvm * f[i][k];
+ }
+ else
+ vp[i][0] = vp[i][1] = vp[i][2] = 0;
+ }
+ }
+ else {
+ for (int i = 0; i < nlocal; i++) {
+ dtfvinvm = dtfv/mass[type[i]];
+ if (shake_flag[i]) {
+ for (int k=0; k<3; k++)
+ vp[i][k] = v[i][k] + dtfvinvm * f[i][k];
+ }
+ else
+ vp[i][0] = vp[i][1] = vp[i][2] = 0;
+ }
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::update_v_half_nocons_respa(int ilevel)
+{
+ // select timestep for current level
+ dtfv = 0.5 * step_respa[ilevel] * force->ftm2v;
+
+ // carry out unconstrained velocity update
+ update_v_half_nocons();
+}
+
+/* ----------------------------------------------------------------------
+ memory usage of local atom-based arrays
+------------------------------------------------------------------------- */
+
+double FixRattle::memory_usage()
+{
+ int nmax = atom->nmax;
+ double bytes = FixShake::memory_usage();
+ bytes += nmax*3 * sizeof(double);
+ return bytes;
+}
+
+/* ----------------------------------------------------------------------
+ allocate local atom-based arrays
+------------------------------------------------------------------------- */
+
+void FixRattle::grow_arrays(int nmax)
+{
+ FixShake::grow_arrays(nmax);
+ memory->destroy(vp);
+ memory->create(vp,nmax,3,"rattle:vp");
+}
+
+/* ---------------------------------------------------------------------- */
+
+int FixRattle::pack_forward_comm(int n, int *list, double *buf,
+ int pbc_flag, int *pbc)
+{
+ int i,j,m;
+ m = 0;
+
+ switch (comm_mode) {
+ case XSHAKE:
+ m = FixShake::pack_forward_comm(n, list, buf, pbc_flag, pbc);
+ break;
+ case VP:
+ for (i = 0; i < n; i++) {
+ j = list[i];
+ buf[m++] = vp[j][0];
+ buf[m++] = vp[j][1];
+ buf[m++] = vp[j][2];
+ }
+ break;
+
+ case V:
+ for (i = 0; i < n; i++) {
+ j = list[i];
+ buf[m++] = v[j][0];
+ buf[m++] = v[j][1];
+ buf[m++] = v[j][2];
+ }
+ break;
+
+ case X:
+ for (i = 0; i < n; i++) {
+ j = list[i];
+ buf[m++] = x[j][0];
+ buf[m++] = x[j][1];
+ buf[m++] = x[j][2];
+ }
+ break;
+ }
+ return m;
+}
+
+/* ---------------------------------------------------------------------- */
+
+void FixRattle::unpack_forward_comm(int n, int first, double *buf)
+{
+ int i, m, last;
+ m = 0;
+ last = first + n;
+
+ switch (comm_mode) {
+ case XSHAKE:
+ FixShake::unpack_forward_comm(n, first,buf);
+ break;
+
+ case VP:
+ for (i = first; i < last; i++) {
+ vp[i][0] = buf[m++];
+ vp[i][1] = buf[m++];
+ vp[i][2] = buf[m++];
+ }
+ break;
+
+ case V:
+ for (i = first; i < last; i++) {
+ v[i][0] = buf[m++];
+ v[i][1] = buf[m++];
+ v[i][2] = buf[m++];
+ }
+ break;
+
+ case X:
+ for (i = first; i < last; i++) {
+ x[i][0] = buf[m++];
+ x[i][1] = buf[m++];
+ x[i][2] = buf[m++];
+ }
+ break;
+ }
+}
+
+
+/* ----------------------------------------------------------------------
+ wrapper method for end_of_step fixes which modify the coordinates
+------------------------------------------------------------------------- */
+
+void FixRattle::coordinate_constraints_end_of_step() {
+ comm_mode = XSHAKE;
+ FixShake::coordinate_constraints_end_of_step();
+}
+
+
+/* ----------------------------------------------------------------------
+ DEBUGGING methods
+ The functions below allow you to check whether the
+ coordinate and velocity constraints are satisfied at the
+ end of the timestep
+ only enabled if RATTLE_DEBUG is set to 1 at top of file
+ checkX tests if shakeX and vrattleX worked as expected
+------------------------------------------------------------------------- */
+
+void FixRattle::end_of_step()
+{
+ // communicate velocities and coordinates (x and v)
+ if (nprocs > 1) {
+ comm_mode = V;
+ comm->forward_comm_fix(this);
+ comm_mode = X;
+ comm->forward_comm_fix(this);
+ }
+ if (!check_constraints(v, RATTLE_TEST_POS, RATTLE_TEST_VEL)) {
+ error->one(FLERR, "RATTLE failed!");
+ }
+}
+
+/* ---------------------------------------------------------------------- */
+
+bool FixRattle::check_constraints(double **v, bool checkr, bool checkv)
+{
+ int m;
+ bool ret = true;
+ int i=0;
+ while (i < nlist && ret) {
+ m = list[i];
+ if (shake_flag[m] == 2) ret = check2(v,m,checkr,checkv);
+ else if (shake_flag[m] == 3) ret = check3(v,m,checkr,checkv);
+ else if (shake_flag[m] == 4) ret = check4(v,m,checkr,checkv);
+ else ret = check3angle(v,m,checkr,checkv);
+ i++;
+ }
+ return ret;
+}
+
+/* ---------------------------------------------------------------------- */
+
+bool FixRattle::check2(double **v, int m, bool checkr, bool checkv)
+{
+ bool stat;
+ double r01[3],v01[3];
+ const double tol = tolerance;
+ double bond1 = bond_distance[shake_type[m][0]];
+
+ tagint i0 = atom->map(shake_atom[m][0]);
+ tagint i1 = atom->map(shake_atom[m][1]);
+
+ MathExtra::sub3(x[i1],x[i0],r01);
+ domain->minimum_image(r01);
+ MathExtra::sub3(v[i1],v[i0],v01);
+
+ stat = !(checkr && (fabs(sqrt(MathExtra::dot3(r01,r01)) - bond1) > tol));
+ if (!stat)
+ error->one(FLERR,"Coordinate constraints are not satisfied "
+ "up to desired tolerance");
+
+ stat = !(checkv && (fabs(MathExtra::dot3(r01,v01)) > tol));
+ if (!stat)
+ error->one(FLERR,"Velocity constraints are not satisfied "
+ "up to desired tolerance");
+ return stat;
+}
+
+/* ---------------------------------------------------------------------- */
+
+bool FixRattle::check3(double **v, int m, bool checkr, bool checkv)
+{
+ bool stat;
+ tagint i0,i1,i2;
+ double r01[3], r02[3], v01[3], v02[3];
+ const double tol = tolerance;
+ double bond1 = bond_distance[shake_type[m][0]];
+ double bond2 = bond_distance[shake_type[m][1]];
+
+ i0 = atom->map(shake_atom[m][0]);
+ i1 = atom->map(shake_atom[m][1]);
+ i2 = atom->map(shake_atom[m][2]);
+
+ MathExtra::sub3(x[i1],x[i0],r01);
+ MathExtra::sub3(x[i2],x[i0],r02);
+
+ domain->minimum_image(r01);
+ domain->minimum_image(r02);
+
+ MathExtra::sub3(v[i1],v[i0],v01);
+ MathExtra::sub3(v[i2],v[i0],v02);
+
+ stat = !(checkr && (fabs(sqrt(MathExtra::dot3(r01,r01)) - bond1) > tol ||
+ fabs(sqrt(MathExtra::dot3(r02,r02))-bond2) > tol));
+ if (!stat)
+ error->one(FLERR,"Coordinate constraints are not satisfied "
+ "up to desired tolerance");
+
+ stat = !(checkv && (fabs(MathExtra::dot3(r01,v01)) > tol ||
+ fabs(MathExtra::dot3(r02,v02)) > tol));
+ if (!stat)
+ error->one(FLERR,"Velocity constraints are not satisfied "
+ "up to desired tolerance!");
+ return stat;
+}
+
+/* ---------------------------------------------------------------------- */
+
+bool FixRattle::check4(double **v, int m, bool checkr, bool checkv)
+{
+ bool stat = true;
+ const double tol = tolerance;
+ double r01[3], r02[3], r03[3], v01[3], v02[3], v03[3];
+
+ int i0 = atom->map(shake_atom[m][0]);
+ int i1 = atom->map(shake_atom[m][1]);
+ int i2 = atom->map(shake_atom[m][2]);
+ int i3 = atom->map(shake_atom[m][3]);
+ double bond1 = bond_distance[shake_type[m][0]];
+ double bond2 = bond_distance[shake_type[m][1]];
+ double bond3 = bond_distance[shake_type[m][2]];
+
+ MathExtra::sub3(x[i1],x[i0],r01);
+ MathExtra::sub3(x[i2],x[i0],r02);
+ MathExtra::sub3(x[i3],x[i0],r03);
+
+ domain->minimum_image(r01);
+ domain->minimum_image(r02);
+ domain->minimum_image(r03);
+
+ MathExtra::sub3(v[i1],v[i0],v01);
+ MathExtra::sub3(v[i2],v[i0],v02);
+ MathExtra::sub3(v[i3],v[i0],v03);
+
+ stat = !(checkr && (fabs(sqrt(MathExtra::dot3(r01,r01)) - bond1) > tol ||
+ fabs(sqrt(MathExtra::dot3(r02,r02))-bond2) > tol ||
+ fabs(sqrt(MathExtra::dot3(r03,r03))-bond3) > tol));
+ if (!stat)
+ error->one(FLERR,"Coordinate constraints are not satisfied "
+ "up to desired tolerance!");
+
+ stat = !(checkv && (fabs(MathExtra::dot3(r01,v01)) > tol ||
+ fabs(MathExtra::dot3(r02,v02)) > tol ||
+ fabs(MathExtra::dot3(r03,v03)) > tol));
+ if (!stat)
+ error->one(FLERR,"Velocity constraints are not satisfied "
+ "up to desired tolerance!");
+ return stat;
+}
+
+/* ---------------------------------------------------------------------- */
+
+bool FixRattle::check3angle(double **v, int m, bool checkr, bool checkv)
+{
+ bool stat = true;
+ const double tol = tolerance;
+ double r01[3], r02[3], r12[3], v01[3], v02[3], v12[3];
+
+ int i0 = atom->map(shake_atom[m][0]);
+ int i1 = atom->map(shake_atom[m][1]);
+ int i2 = atom->map(shake_atom[m][2]);
+ double bond1 = bond_distance[shake_type[m][0]];
+ double bond2 = bond_distance[shake_type[m][1]];
+ double bond12 = angle_distance[shake_type[m][2]];
+
+ MathExtra::sub3(x[i1],x[i0],r01);
+ MathExtra::sub3(x[i2],x[i0],r02);
+ MathExtra::sub3(x[i2],x[i1],r12);
+
+ domain->minimum_image(r01);
+ domain->minimum_image(r02);
+ domain->minimum_image(r12);
+
+ MathExtra::sub3(v[i1],v[i0],v01);
+ MathExtra::sub3(v[i2],v[i0],v02);
+ MathExtra::sub3(v[i2],v[i1],v12);
+
+ stat = !(checkr && (fabs(sqrt(MathExtra::dot3(r01,r01)) - bond1) > tol ||
+ fabs(sqrt(MathExtra::dot3(r02,r02))-bond2) > tol ||
+ fabs(sqrt(MathExtra::dot3(r12,r12))-bond12) > tol));
+ if (!stat)
+ error->one(FLERR,"Coordinate constraints are not satisfied "
+ "up to desired tolerance!");
+
+ stat = !(checkv && (fabs(MathExtra::dot3(r01,v01)) > tol ||
+ fabs(MathExtra::dot3(r02,v02)) > tol ||
+ fabs(MathExtra::dot3(r12,v12)) > tol));
+ if (!stat)
+ error->one(FLERR,"Velocity constraints are not satisfied "
+ "up to desired tolerance!");
+ return stat;
+}
diff --git a/src/RIGID/fix_rattle.h b/src/RIGID/fix_rattle.h
new file mode 100644
index 0000000000..b06117c274
--- /dev/null
+++ b/src/RIGID/fix_rattle.h
@@ -0,0 +1,74 @@
+/* -*- c++ -*- ----------------------------------------------------------
+ LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+ http://lammps.sandia.gov, Sandia National Laboratories
+ Steve Plimpton, sjplimp@sandia.gov
+
+ Copyright (2003) Sandia Corporation. Under the terms of Contract
+ DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+ certain rights in this software. This software is distributed under
+ the GNU General Public License.
+
+ See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(rattle,FixRattle)
+
+#else
+
+#ifndef LMP_FIX_RATTLE_H
+#define LMP_FIX_RATTLE_H
+
+#include "fix.h"
+#include "fix_shake.h"
+
+namespace LAMMPS_NS {
+
+class FixRattle : public FixShake {
+ public:
+ double **vp; // array for unconstrained velocities
+ double dtfv; // timestep for velocity update
+ int comm_mode; // mode for communication pack/unpack
+
+ FixRattle(class LAMMPS *, int, char **);
+ ~FixRattle();
+ int setmask();
+ virtual void init();
+ virtual void post_force(int);
+ virtual void post_force_respa(int, int, int);
+ virtual void final_integrate();
+ virtual void final_integrate_respa(int,int);
+ virtual void coordinate_constraints_end_of_step();
+
+ virtual double memory_usage();
+ virtual void grow_arrays(int);
+ virtual int pack_forward_comm(int, int *, double *, int, int *);
+ virtual void unpack_forward_comm(int, int, double *);
+ virtual void reset_dt();
+
+ private:
+ void update_v_half_nocons();
+ void update_v_half_nocons_respa(int);
+
+ void vrattle2(int m);
+ void vrattle3(int m);
+ void vrattle4(int m);
+ void vrattle3angle(int m);
+ void solve3x3exactly(const double a[][3], const double c[], double l[]);
+ void solve2x2exactly(const double a[][2], const double c[], double l[]);
+
+ // debugging methosd
+
+ bool check3angle(double ** v, int m, bool checkr, bool checkv);
+ bool check2(double **v, int m, bool checkr, bool checkv);
+ bool check3(double **v, int m, bool checkr, bool checkv);
+ bool check4(double **v, int m, bool checkr, bool checkv);
+ bool check_constraints(double **v, bool checkr, bool checkv);
+ void end_of_step();
+};
+
+}
+
+#endif
+#endif
From c92b71f0ae80f4cf7bebeab8c532d6b49e6f446c Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:08:07 +0000
Subject: [PATCH 16/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13182
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/fix_shake.html | 55 ++++++++++++++++++++++++++++++++++++++++--
doc/fix_shake.txt | 60 ++++++++++++++++++++++++++++++++++++++++++++--
2 files changed, 111 insertions(+), 4 deletions(-)
diff --git a/doc/fix_shake.html b/doc/fix_shake.html
index c1f62b8ac7..10106b216c 100644
--- a/doc/fix_shake.html
+++ b/doc/fix_shake.html
@@ -15,11 +15,11 @@
Syntax:
-fix ID group-ID shake tol iter N constraint values ... keyword value ...
+fix ID group-ID shake/rattle tol iter N constraint values ... keyword value ...
- ID, group-ID are documented in fix command
-
- shake = style name of this fix command
+
- shake or rattle = style name of this fix command
- tol = accuracy tolerance of SHAKE solution
@@ -51,11 +51,32 @@
fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31
fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol
+
fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31
+fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol
+
Description:
Apply bond and angle constraints to specified bonds and angles in the
simulation. This typically enables a longer timestep.
+SHAKE vs. RATTLE:
+
+The SHAKE algorithm was invented for schemes such as Verlet, where only the coordinates
+are integrated and the velocities are approximated as finite differences to the trajectories (Ryckaert et al. (1977)).
+If the velocities are integrated explicitly, for example with velocity Verlet,
+a second set of constraining forces is required in order to eliminate velocity components along the bonds (Andersen (1983)).
+
+In order to formulate the individual constraints for SHAKE and RATTLE, let us focus on a single molecule whose
+bonds are to be constrained. Let image(Eqs/fix_rattle_ri.jpg) and image(Eqs/fix_rattle_vi.jpg) be the position and velocity of atom i at time n, for i=1,...,N, where N is the number of sites of our reference molecule. The distance vector between sites i and j is given by image(Eqs/fix_rattle_rij.jpg). The constraints can then be formulated as
+
+
+
+The SHAKE algorithm satisfies the first condition, i.e. the sites at time n+1 will have the desired separations image(Eqs/fix_rattle_dij.jpg) immediately after the coordinates are integrated.
+If we also enforce the second condition, the velocity components along the bonds will vanish.
+RATTLE satisfies both conditions and fix rattle is implemented in a way that it uses fix shake for dealing with the coordinate constraints. Therefore all the information about SHAKE below is also relevant for RATTLE.
+
+SHAKE:
+
Each timestep the specified bonds and angles are reset to their
equilibrium lengths and angular values via the well-known SHAKE
algorithm. This is done by applying an additional constraint force so
@@ -144,6 +165,24 @@ more instructions on how to use the accelerated styles effectively.
+RATTLE:
+
+The velocity constraints lead to a linear system of equations which
+can be solved analytically. The implementation of the algorithm in LAMMPS
+closely follows Andersen (1983).
+
+IMPORTANT NOTE: This command modifies forces and velocities and
+it has to come after all other integration fixes. If you define other
+fixes that modify velocities or forces after fix rattle operates,
+then this fix will not take them into account and the time integration
+will typically not satisfy the RATTLE constraints. You can check whether
+the constraints work correctly by setting the value of RATTLE_DEBUG in fix_rattle.cpp
+to 1 and recompiling LAMMPS.
+
+IMPORTANT NOTE: RATTLE does not support cuda at the moment.
+
+
+
Restart, fix_modify, output, run start/stop, minimize info:
No information about this fix is written to binary restart
@@ -173,4 +212,16 @@ of SHAKE parameters and monitoring the energy versus time.
Default: none
+
+
+
+
+(Ryckaert) J.-P. Ryckaert, G. Ciccotti and H. J. C. Berendsen,
+Journal of Computational Physics, 23, 327–341 (1977).
+
+
+
+(Andersen) H. Andersen,
+Journal of Computational Physics, 52, 24-34 (1983).
+
diff --git a/doc/fix_shake.txt b/doc/fix_shake.txt
index edcb10ea2a..c9e4d8a1f9 100644
--- a/doc/fix_shake.txt
+++ b/doc/fix_shake.txt
@@ -11,10 +11,10 @@ fix shake/cuda command :h3
[Syntax:]
-fix ID group-ID shake tol iter N constraint values ... keyword value ... :pre
+fix ID group-ID {shake/rattle} tol iter N constraint values ... keyword value ... :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
-shake = style name of this fix command :l
+shake or rattle = style name of this fix command :l
tol = accuracy tolerance of SHAKE solution :l
iter = max # of iterations in each SHAKE solution :l
N = print SHAKE statistics every this many timesteps (0 = never) :l
@@ -35,12 +35,34 @@ keyword = {mol} :l
fix 1 sub shake 0.0001 20 10 b 4 19 a 3 5 2
fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31
fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol :pre
+fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31
+fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol :pre
[Description:]
Apply bond and angle constraints to specified bonds and angles in the
simulation. This typically enables a longer timestep.
+
+[SHAKE vs. RATTLE:]
+
+The SHAKE algorithm was invented for schemes such as Verlet, where only the coordinates
+are integrated and the velocities are approximated as finite differences to the trajectories ("Ryckaert et al. (1977)"_#Ryckaert).
+If the velocities are integrated explicitly, for example with velocity Verlet,
+a second set of constraining forces is required in order to eliminate velocity components along the bonds ("Andersen (1983)"_#Andersen).
+
+In order to formulate the individual constraints for SHAKE and RATTLE, let us focus on a single molecule whose
+bonds are to be constrained. Let image(Eqs/fix_rattle_ri.jpg) and image(Eqs/fix_rattle_vi.jpg) be the position and velocity of atom {i} at time {n}, for {i}=1,...,{N}, where {N} is the number of sites of our reference molecule. The distance vector between sites {i} and {j} is given by image(Eqs/fix_rattle_rij.jpg). The constraints can then be formulated as
+
+:c,image(Eqs/fix_rattle_constraints.jpg)
+
+The SHAKE algorithm satisfies the first condition, i.e. the sites at time {n+1} will have the desired separations image(Eqs/fix_rattle_dij.jpg) immediately after the coordinates are integrated.
+If we also enforce the second condition, the velocity components along the bonds will vanish.
+RATTLE satisfies both conditions and fix rattle is implemented in a way that it uses fix shake for dealing with the coordinate constraints. Therefore all the information about SHAKE below is also relevant for RATTLE.
+
+[SHAKE:]
+
+
Each timestep the specified bonds and angles are reset to their
equilibrium lengths and angular values via the well-known SHAKE
algorithm. This is done by applying an additional constraint force so
@@ -92,6 +114,7 @@ satisfy the SHAKE constraints. The solution for this is to make sure
that fix shake is defined in your input script after any other fixes
which add or change forces (to atoms that fix shake operates on).
+
:line
The {mol} keyword should be used when other commands, such as "fix
@@ -127,6 +150,27 @@ use the "suffix"_suffix.html command in your input script.
See "Section_accelerate"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
+:line
+
+
+[RATTLE:]
+
+
+The velocity constraints lead to a linear system of equations which
+can be solved analytically. The implementation of the algorithm in LAMMPS
+closely follows "Andersen (1983)"_#Andersen.
+
+IMPORTANT NOTE: This command modifies forces and velocities and
+it has to come after all other integration fixes. If you define other
+fixes that modify velocities or forces after fix rattle operates,
+then this fix will not take them into account and the time integration
+will typically not satisfy the RATTLE constraints. You can check whether
+the constraints work correctly by setting the value of RATTLE_DEBUG in fix_rattle.cpp
+to 1 and recompiling LAMMPS.
+
+IMPORTANT NOTE: RATTLE does not support cuda at the moment.
+
+
:line
[Restart, fix_modify, output, run start/stop, minimize info:]
@@ -157,3 +201,15 @@ of SHAKE parameters and monitoring the energy versus time.
[Related commands:] none
[Default:] none
+
+:line
+
+:link(Ryckaert)
+[(Ryckaert)] J.-P. Ryckaert, G. Ciccotti and H. J. C. Berendsen,
+Journal of Computational Physics, 23, 327–341 (1977).
+
+:link(Andersen)
+[(Andersen)] H. Andersen,
+Journal of Computational Physics, 52, 24-34 (1983).
+
+
From 2cf4762097fbc1712ddfefaa6bd0de6cbf463946 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:16:46 +0000
Subject: [PATCH 17/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13183
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Eqs/fix_rattle_constraints.tex | 9 +++++++++
doc/Eqs/fix_rattle_dij.tex | 7 +++++++
doc/Eqs/fix_rattle_rij.tex | 8 ++++++++
3 files changed, 24 insertions(+)
create mode 100644 doc/Eqs/fix_rattle_constraints.tex
create mode 100644 doc/Eqs/fix_rattle_dij.tex
create mode 100644 doc/Eqs/fix_rattle_rij.tex
diff --git a/doc/Eqs/fix_rattle_constraints.tex b/doc/Eqs/fix_rattle_constraints.tex
new file mode 100644
index 0000000000..daff0ce775
--- /dev/null
+++ b/doc/Eqs/fix_rattle_constraints.tex
@@ -0,0 +1,9 @@
+\documentclass[12pt]{article}
+\usepackage{amsmath}
+
+\begin{document}
+\begin{align*}
+\mathbf r^{n+1}_{ij} \cdot \mathbf r^{n+1}_{ij} &= d^2_{ij} \quad \text{and} \\
+\mathbf v^{n+1}_{ij} \cdot \mathbf r^{n+1}_{ij} &= 0, \label{eq:velcon}
+\end{align*}
+\end{document}
diff --git a/doc/Eqs/fix_rattle_dij.tex b/doc/Eqs/fix_rattle_dij.tex
new file mode 100644
index 0000000000..7577b715cd
--- /dev/null
+++ b/doc/Eqs/fix_rattle_dij.tex
@@ -0,0 +1,7 @@
+\documentclass[12pt]{article}
+\begin{document}
+$$
+ d_{ij}
+$$
+\end{document}
+
diff --git a/doc/Eqs/fix_rattle_rij.tex b/doc/Eqs/fix_rattle_rij.tex
new file mode 100644
index 0000000000..fae0457c60
--- /dev/null
+++ b/doc/Eqs/fix_rattle_rij.tex
@@ -0,0 +1,8 @@
+\documentclass[12pt]{article}
+\usepackage{amsmath}
+\begin{document}
+$$
+ \mathbf r^{n+1}_{ij} = \mathbf r^n_j - \mathbf r^n_i
+$$
+\end{document}
+
From 6c15559e13bc2560c719b6dd8a835fb7eecc8aba Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:18:51 +0000
Subject: [PATCH 18/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13184
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Eqs/fix_rattle_constraints.jpg | Bin 0 -> 4556 bytes
doc/Eqs/fix_rattle_dij.tex | 7 -------
doc/Eqs/fix_rattle_rij.jpg | Bin 0 -> 1812 bytes
3 files changed, 7 deletions(-)
create mode 100644 doc/Eqs/fix_rattle_constraints.jpg
delete mode 100644 doc/Eqs/fix_rattle_dij.tex
create mode 100644 doc/Eqs/fix_rattle_rij.jpg
diff --git a/doc/Eqs/fix_rattle_constraints.jpg b/doc/Eqs/fix_rattle_constraints.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..2ba86095cd3c3682160dedebb745413be563243b
GIT binary patch
literal 4556
zcmcgwXHb({w|*(1qajr3B?zG@ND-tXEl3MZ3FT0wL!?Mmy7UqgiUkl7RFD=RAWb}U
z5Je0mfZ{O}(a=St%H`a9&%gWQ&V1i|YtO9Nd-k5S_F8LZKhN4Hl#{;zFx3~>nsGqb(V{Zn@^aZn@5nBi%UR4
zKu|vMi4U{13k;>
zt}Ym$V_>AGqi0}bW&(juPy9UvKulZ^31)6(9!UcgTMrZquaqGosgMtP$=>l6zXo>l
zLlhcQ)N(Qlu>5US5BNQXhrtn;|DT##a=NKNG+pPsy7yFwF`Y!1Ri%?d6ju#MNL+m^
z793X~7bv>TPrDKQz^&;Oldc`>t%`&Cc!@Z2Z6Bpr6v2=HgC%~+Wd=f%gu#IY=sHm%UO&?OnS-#?`m
zKqDroMPyua^{6K=RZc2Xh3{LiT$?eF$mZykJMg7Gn((+)mhySBLqDfNq~GTtsX
z*(l&fupoeiS-htf6jWN}6T63b)$(K5f4=Clym0+m9K|&9w^rT3rv)N>a
zglW$ehGO@-625%dKIztfE=Ey)l%o3rtOwf0#o7!V+01G6`p2K94|FESz#3ZUGq`94@
zK71rbXF+C!5XAP5YfRzL(4%WhzqOAv`3?uBo~Z<{8_(4r{Z*UY)#GL^A&y%4)x3tRUY)lY(QCVW$K(8*m{>W8
zYwdQlW#ad4$Gx5cvWlU}u(@LfWz8KPn9c+Boqmz7Hf}4>h8*v1&%7Bgq(r24t8ELj
zb)TQ8OABV5pjy%VAI^DyY8tBOW4!=d=A*|x!D*+JodD1WA8X^SA#~)cxui0QY}cSt
zU89)dn6!)TJ{quH=eFnNmVS93SXpf&T~YqfRnd{%-5LwrUR0-d?c+fU)RU_|p~EqK
zIN~hC9?!i4&v6t?8Y@)3{wjWkHE*5r%nfgcd`NiL`g)cng?&S_uJ$KyHBc50Fu<*&u(q;Blc9pi}RikGmuGqc*D@zl%i4J=-0)W0h;jOqkK`3loSp5
z>~a2>QRf&YK{FdMxd_KftE^=Gd6HC@9@8(Z=gVn3$o6>s+WqlZ$woBv4KDn)I-3&-Gs~&AZ+Q|Legg1RZKN?UeU5YY!f$)Cvqah2?v6szso;O8N{8~viKJfDj;YPb_R4{yU`xDYyK3__x2lXq3|?IJxlsa?c2j?)v2zTzRi+owf~*%-
zDzsCs4heT`Z4@z2F#bv&H<0oUef0^Ts|)J#QsEpMnwGC?s?EN*4u1GxR0+cOz(%_WqBA#X$YYnksz!Vq^caw}a@h(8ljKVR-Q_#pC3-K8qxb5Oths&pWu
zU#)3TcOFsAwR%6?fiSgom%}BjfFZ!0=jnI6;vZlP4kTOKgN|UGxD@TWgxPy1*5Ry3
zKErSJyySz1X00>j46dj02W`2ckf8B-JrS5Dx+#}3EiKS^rzukdCAV2D$9?!&*<*I_
zU%vYXI%CZDd$!dow5Dt)yKLCZ&F3@7rVeVy_XsDzO?W3FT!gzV+jgT_`yCWpI(;5AeXiN!-nzr3;r3;u=g=K`waQN_6p}-y
zh>~jC#O3Y*dBp=W1ek?QZq3SQeRRdP4P_2NKPM#D7!P%`PDh>4CLqpxwZ1$W*X35kSzGLw%
zdI9aTYp4p{vHF%}(g#@sg#9du&79daWzQiE2A{k+&4Bf|ofEq7C{YO|ob`0hS~B&8
z-_NE-1M%Fv%+)y3%>wt1*7Q%C?AjK%&ZR--CxTvKn3CKh`1B0SaTIC6GMgcTOCKcR
zfmk&HPh2oju|rbl^7|V`3RQbHm{is!}{=N^Wq3$W)*mLUmriYlkb+4>AbuMfA@-!*_Q0dgR(}YTvwJxCVWc0I2Yc@vr*-A+wy&wEx(1ABcRP4@
z$<$hXjbIpCK+(Exk|C9lQq|MeQJoZVFT$>H);IQ!(HK=+w^wSek_utnpooy1vLsDX
z%J;Pt)WLl$-Tl;5PQa2NA@c)@wkDyiyCVV+|KYn<J=?+TMn3ofC
z(3h5X3Hxp-vXTDFMF>Ma`$jZYz5+nR&G%Bi?DKF`jw*#QIg|NaR}P9{lI?yQbwC+&!-JexAkVR
z&YnzeE^+18A<{S%^kr4FYWjrgnU0y6^KJaJM}dlY3DnRU&)%HH;_maKL>R0#_jE=@^WUx^L
zUwo{g)UKrsbLh_bgb*5=DaQ|=-w?Umk~K3D95RHe{)|d{4rNBN${8FEiJP1N7mfWI
zAKB(0wA;5r$gnz#q9Zc*h4)%9U+rmGAWqzFt1OKYS|NXDjDNB*+7x6V7MGIqZxkRvN*dl{?`l`aP!c6UDX`DJgz2Ji8VrChOv?gZYX
z7E?+Wf?~gj4hi;uq{Z3kISrX{4%t0F1C!EEus5g0$s|mv!Qj;2*R}>x1uAdqwB!t1urwH!r;V7}4jG8jBXaP4prb*e}$q
zLNGYl6~FsWrboEjb7sI@)*j_-eIY}-lJve~d4j(;LY8L|rg-Sl4JBp7GbD2z!I?e$
z?)m6B(RS0O!6pr@dD6S7tw{zH>AO*u*@Tz_BzAt}bEGAFOCKgJ2PCT+9FWLI+P!)c
zd+&~=^A7__(TDVgf4)!kGDldw=BuV-J^_LeB3vkE_gIR5h}Y(9lpzFNP|TBP59lNjV$~Y|sVr$(oki2}0%}Of
z=L_3kf0jfODzIC+2T-n~4Dmy-vB}rzM_Ey7k-b4h&n-OHz$!~y-z=ZO1%;K4
z7@RarS_g88(G1Oo*G1Ox>I1qKHU2nPoT
z2M-Gi2@DYr5)u&)5fKv>92XN58Wj-{7$F!M9UUJZ9}^cMBqAOp93CGY|G)qX2mmYq
zvjG7S0RO}Q8~_0T0s{d70RR9100000000041_cKQ0|5g6!~il700IF70s#X900;*F
z000000RjU61O*WW5+Mf@F%%*}2o)AGQCD$+Bw>+~l+)Dz+5iXv0RR9$0PMO_l&^~D
z%*w2#l<9yzN9WB!%jR(yb&0aMOFI!Xixgi`GN-ugdaAC^l1TsoQ|U@ll%*+3Qk11B
zO4oOJj!P%2S6p~zA+y9qhHQ=7EJ$9_OuZl|?7G{Cr)KUL000BWV>{C>QI^g6XN1n*
zSj~{gUP7`-9k;a{p4;uBk=5NomXF(F2j%Ob<19xGx}l%*?A<#}f)w3F0(i_Gd;
zn@(Eri1AQSRiUf+yPb-j0#vViXVFCZ*zF
zUTcFI4d}ZUeyFNGPgPYO00O0Bu~FG&vY8c?o$@k9?xc85
zk*W=UW!*>RJYx^Z?PImgLvuV4$dZVoyqXm*j;@SEZlQ{@s{(hT0TqDfM@6&?i&aA8`9g)F}t-8u+aKI?CH0
zXC0-aWg$)I7A(Y)OEWPfvl2-xNhFd0s!_xff1eTOIX%O`}%USslC9EJ-|ayyv>MMJAEXt&X{%tFf=2>~;X?
z>tlb!Rxf$$?6{r!jjM?pN&x-cetv5$7JDrIvwoqH
z=FsEd#z}K=5mhDzl2l7jA3*nzMRvu0I)xhXw*A}YGlSLJZa*~8OKkXytNW;gV^+iJ
zBaYZ3Yqw`#c)wys)9u#0-f2owm5SyV%gomp$YN|V5##5)gu*T!-`)tV;l9qxvR7bv
z8Yg~rs;PPr>CNY*+(9_6?Gk1&urocj?0tz5o!Q^B2VlUK@!)G;nv==%8OtwGIJPPo
zplC9e)2mw;!v(a4)!pVTuEI}v#G^;AR`ND>FEphqSNhlKRqv@8jyrg~r7Tdw~A8rb=)
z52LxezD2>U^B2=cE8b5%#k&Mlk?kI5Hm@S606RhLvCtdez^jcZN>z6=#~@40k0im^
zS)NZNuUmO;rIbMOqB_K6RXXwAm#`hWvjBY5ULD8scKo9|%JZ#kG8tIyt)hYlF1FUt
z2P&P28)kWBbD^Qw4Yyv#*-BEBrF?$f4^4L0jr?w@JYxaLF8PhOF6I9Kt0KL##2j_K
zwS&9HuOp+}OSb@k#we9n$qKtCZPjk{r72!_P(yWVEv>|BEHfdFL}9SXMu
Date: Wed, 4 Mar 2015 22:27:27 +0000
Subject: [PATCH 19/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13185
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Section_commands.html | 12 ++--
doc/Section_commands.txt | 1 +
doc/fix_shake.html | 111 +++++++++++++++++++++----------------
doc/fix_shake.txt | 113 ++++++++++++++++++++------------------
4 files changed, 132 insertions(+), 105 deletions(-)
diff --git a/doc/Section_commands.html b/doc/Section_commands.html
index 922cd274df..a51ad60fda 100644
--- a/doc/Section_commands.html
+++ b/doc/Section_commands.html
@@ -407,12 +407,12 @@ g = GPU, i = USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
| npt (co) | npt/asphere (o) | npt/sphere (o) | nve (cko) | nve/asphere | nve/asphere/noforce | nve/body | nve/limit |
| nve/line | nve/noforce | nve/sphere (o) | nve/tri | nvt (co) | nvt/asphere (o) | nvt/sllod (o) | nvt/sphere (o) |
| oneway | orient/fcc | planeforce | poems | pour | press/berendsen | print | property/atom |
-| qeq/comb (o) | qeq/dynamic | qeq/point | qeq/shielded | qeq/slater | reax/bonds | recenter | restrain |
-| rigid (o) | rigid/nph (o) | rigid/npt (o) | rigid/nve (o) | rigid/nvt (o) | rigid/small (o) | rigid/small/nph | rigid/small/npt |
-| rigid/small/nve | rigid/small/nvt | setforce (c) | shake (c) | spring | spring/rg | spring/self | srd |
-| store/force | store/state | temp/berendsen (c) | temp/csvr | temp/rescale (c) | tfmc | thermal/conductivity | tmd |
-| ttm | tune/kspace | vector | viscosity | viscous (c) | wall/colloid | wall/gran | wall/harmonic |
-| wall/lj1043 | wall/lj126 | wall/lj93 | wall/piston | wall/reflect | wall/region | wall/srd
+ |
| qeq/comb (o) | qeq/dynamic | qeq/point | qeq/shielded | qeq/slater | rattle | reax/bonds | recenter |
+| restrain | rigid (o) | rigid/nph (o) | rigid/npt (o) | rigid/nve (o) | rigid/nvt (o) | rigid/small (o) | rigid/small/nph |
+| rigid/small/npt | rigid/small/nve | rigid/small/nvt | setforce (c) | shake (c) | spring | spring/rg | spring/self |
+| srd | store/force | store/state | temp/berendsen (c) | temp/csvr | temp/rescale (c) | tfmc | thermal/conductivity |
+| tmd | ttm | tune/kspace | vector | viscosity | viscous (c) | wall/colloid | wall/gran |
+| wall/harmonic | wall/lj1043 | wall/lj126 | wall/lj93 | wall/piston | wall/reflect | wall/region | wall/srd
|
These are additional fix styles in USER packages, which can be used if
diff --git a/doc/Section_commands.txt b/doc/Section_commands.txt
index c2bfa0807e..fb527f942f 100644
--- a/doc/Section_commands.txt
+++ b/doc/Section_commands.txt
@@ -535,6 +535,7 @@ g = GPU, i = USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
"qeq/point"_fix_qeq.html,
"qeq/shielded"_fix_qeq.html,
"qeq/slater"_fix_qeq.html,
+"rattle"_fix_shake.html,
"reax/bonds"_fix_reax_bonds.html,
"recenter"_fix_recenter.html,
"restrain"_fix_restrain.html,
diff --git a/doc/fix_shake.html b/doc/fix_shake.html
index 10106b216c..4ccd6ecc64 100644
--- a/doc/fix_shake.html
+++ b/doc/fix_shake.html
@@ -13,13 +13,15 @@
fix shake/cuda command
+fix rattle command
+
Syntax:
-fix ID group-ID shake/rattle tol iter N constraint values ... keyword value ...
+fix ID group-ID style tol iter N constraint values ... keyword value ...
- ID, group-ID are documented in fix command
-
- shake or rattle = style name of this fix command
+
- style = shake or rattle = style name of this fix command
- tol = accuracy tolerance of SHAKE solution
@@ -49,45 +51,60 @@
fix 1 sub shake 0.0001 20 10 b 4 19 a 3 5 2
fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31
-fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol
-
-fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31
+fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol
+fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31
fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol
Description:
Apply bond and angle constraints to specified bonds and angles in the
-simulation. This typically enables a longer timestep.
+simulation by either the SHAKE or RATTLE algorithms. This typically
+enables a longer timestep.
-SHAKE vs. RATTLE:
+
SHAKE vs RATTLE:
-The SHAKE algorithm was invented for schemes such as Verlet, where only the coordinates
-are integrated and the velocities are approximated as finite differences to the trajectories (Ryckaert et al. (1977)).
-If the velocities are integrated explicitly, for example with velocity Verlet,
-a second set of constraining forces is required in order to eliminate velocity components along the bonds (Andersen (1983)).
+
The SHAKE algorithm was invented for schemes such as standard Verlet
+timesteppnig, where only the coordinates are integrated and the
+velocities are approximated as finite differences to the trajectories
+(Ryckaert et al. (1977)). If the velocities are
+integrated explicitly, as with velocity Verlet which is what LAMMPS
+uses as an integration method, a second set of constraining forces is
+required in order to eliminate velocity components along the bonds
+(Andersen (1983)).
-In order to formulate the individual constraints for SHAKE and RATTLE, let us focus on a single molecule whose
-bonds are to be constrained. Let image(Eqs/fix_rattle_ri.jpg) and image(Eqs/fix_rattle_vi.jpg) be the position and velocity of atom i at time n, for i=1,...,N, where N is the number of sites of our reference molecule. The distance vector between sites i and j is given by image(Eqs/fix_rattle_rij.jpg). The constraints can then be formulated as
+
In order to formulate individual constraints for SHAKE and RATTLE,
+focus on a single molecule whose bonds are constrained. Let Ri and Vi
+be the position and velocity of atom i at time n, for
+i=1,...,N, where N is the number of sites of our reference
+molecule. The distance vector between sites i and j is given by
+
+
+
+The constraints can then be formulated as
-The SHAKE algorithm satisfies the first condition, i.e. the sites at time n+1 will have the desired separations image(Eqs/fix_rattle_dij.jpg) immediately after the coordinates are integrated.
-If we also enforce the second condition, the velocity components along the bonds will vanish.
-RATTLE satisfies both conditions and fix rattle is implemented in a way that it uses fix shake for dealing with the coordinate constraints. Therefore all the information about SHAKE below is also relevant for RATTLE.
+
The SHAKE algorithm satisfies the first condition, i.e. the sites at
+time n+1 will have the desired separations Dij immediately after the
+coordinates are integrated. If we also enforce the second condition,
+the velocity components along the bonds will vanish. RATTLE satisfies
+both conditions. As implemented in LAMMPS, fix rattle uses fix shake
+for satisfying the coordinate constraints. Therefore all the
+information below about SHAKE is also relevant for RATTLE.
SHAKE:
Each timestep the specified bonds and angles are reset to their
-equilibrium lengths and angular values via the well-known SHAKE
-algorithm. This is done by applying an additional constraint force so
-that the new positions preserve the desired atom separations. The
-equations for the additional force are solved via an iterative method
-that typically converges to an accurate solution in a few iterations.
-The desired tolerance (e.g. 1.0e-4 = 1 part in 10000) and maximum # of
-iterations are specified as arguments. Setting the N argument will
-print statistics to the screen and log file about regarding the
-lengths of bonds and angles that are being constrained. Small delta
-values mean SHAKE is doing a good job.
+equilibrium lengths and angular values via the SHAKE algorithm
+(Ryckaert et al. (1977)). This is done by applying an
+additional constraint force so that the new positions preserve the
+desired atom separations. The equations for the additional force are
+solved via an iterative method that typically converges to an accurate
+solution in a few iterations. The desired tolerance (e.g. 1.0e-4 = 1
+part in 10000) and maximum # of iterations are specified as arguments.
+Setting the N argument will print statistics to the screen and log
+file about regarding the lengths of bonds and angles that are being
+constrained. Small delta values mean SHAKE is doing a good job.
In LAMMPS, only small clusters of atoms can be constrained. This is
so the constraint calculation for a cluster can be performed by a
@@ -168,39 +185,39 @@ more instructions on how to use the accelerated styles effectively.
RATTLE:
The velocity constraints lead to a linear system of equations which
-can be solved analytically. The implementation of the algorithm in LAMMPS
-closely follows Andersen (1983).
+can be solved analytically. The implementation of the algorithm in
+LAMMPS closely follows Andersen (1983).
-IMPORTANT NOTE: This command modifies forces and velocities and
-it has to come after all other integration fixes. If you define other
-fixes that modify velocities or forces after fix rattle operates,
-then this fix will not take them into account and the time integration
-will typically not satisfy the RATTLE constraints. You can check whether
-the constraints work correctly by setting the value of RATTLE_DEBUG in fix_rattle.cpp
-to 1 and recompiling LAMMPS.
-
-IMPORTANT NOTE: RATTLE does not support cuda at the moment.
+
IMPORTANT NOTE: The fix rattle command modifies forces and velocities
+and thus should be defined after all other integration fixes in your
+input script. If you define other fixes that modify velocities or
+forces after fix rattle operates, then fix rattle will not take them
+into account and the overall time integration will typically not
+satisfy the RATTLE constraints. You can check whether the constraints
+work correctly by setting the value of RATTLE_DEBUG in
+src/fix_rattle.cpp to 1 and recompiling LAMMPS.
Restart, fix_modify, output, run start/stop, minimize info:
-No information about this fix is written to binary restart
+No information about these fixes is written to binary restart
files. None of the fix_modify options
-are relevant to this fix. No global or per-atom quantities are stored
-by this fix for access by various output
-commands. No parameter of this fix can
-be used with the start/stop keywords of the run command.
-This fix is not invoked during energy minimization.
+are relevant to these fixes. No global or per-atom quantities are
+stored by these fixes for access by various output
+commands. No parameter of these fixes
+can be used with the start/stop keywords of the run
+command. These fixes are not invoked during energy
+minimization.
Restrictions:
-This fix is part of the RIGID package. It is only enabled if LAMMPS
-was built with that package. See the Making
+These fixes are part of the RIGID package. They are only enabled if
+LAMMPS was built with that package. See the Making
LAMMPS section for more info.
-For computational efficiency, there can only be one shake fix defined
-in a simulation.
+
For computational efficiency, there can only be one shake or rattle
+fix defined in a simulation.
If you use a tolerance that is too large or a max-iteration count that
is too small, the constraints will not be enforced very strongly,
diff --git a/doc/fix_shake.txt b/doc/fix_shake.txt
index c9e4d8a1f9..75f76b7a80 100644
--- a/doc/fix_shake.txt
+++ b/doc/fix_shake.txt
@@ -8,13 +8,14 @@
fix shake command :h3
fix shake/cuda command :h3
+fix rattle command :h3
[Syntax:]
-fix ID group-ID {shake/rattle} tol iter N constraint values ... keyword value ... :pre
+fix ID group-ID style tol iter N constraint values ... keyword value ... :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
-shake or rattle = style name of this fix command :l
+style = shake or rattle = style name of this fix command :l
tol = accuracy tolerance of SHAKE solution :l
iter = max # of iterations in each SHAKE solution :l
N = print SHAKE statistics every this many timesteps (0 = never) :l
@@ -34,46 +35,60 @@ keyword = {mol} :l
fix 1 sub shake 0.0001 20 10 b 4 19 a 3 5 2
fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31
-fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol :pre
+fix 1 sub shake 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol
fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31
fix 1 sub rattle 0.0001 20 10 t 5 6 m 1.0 a 31 mol myMol :pre
[Description:]
Apply bond and angle constraints to specified bonds and angles in the
-simulation. This typically enables a longer timestep.
+simulation by either the SHAKE or RATTLE algorithms. This typically
+enables a longer timestep.
+[SHAKE vs RATTLE:]
-[SHAKE vs. RATTLE:]
+The SHAKE algorithm was invented for schemes such as standard Verlet
+timesteppnig, where only the coordinates are integrated and the
+velocities are approximated as finite differences to the trajectories
+("Ryckaert et al. (1977)"_#Ryckaert). If the velocities are
+integrated explicitly, as with velocity Verlet which is what LAMMPS
+uses as an integration method, a second set of constraining forces is
+required in order to eliminate velocity components along the bonds
+("Andersen (1983)"_#Andersen).
-The SHAKE algorithm was invented for schemes such as Verlet, where only the coordinates
-are integrated and the velocities are approximated as finite differences to the trajectories ("Ryckaert et al. (1977)"_#Ryckaert).
-If the velocities are integrated explicitly, for example with velocity Verlet,
-a second set of constraining forces is required in order to eliminate velocity components along the bonds ("Andersen (1983)"_#Andersen).
+In order to formulate individual constraints for SHAKE and RATTLE,
+focus on a single molecule whose bonds are constrained. Let Ri and Vi
+be the position and velocity of atom {i} at time {n}, for
+{i}=1,...,{N}, where {N} is the number of sites of our reference
+molecule. The distance vector between sites {i} and {j} is given by
-In order to formulate the individual constraints for SHAKE and RATTLE, let us focus on a single molecule whose
-bonds are to be constrained. Let image(Eqs/fix_rattle_ri.jpg) and image(Eqs/fix_rattle_vi.jpg) be the position and velocity of atom {i} at time {n}, for {i}=1,...,{N}, where {N} is the number of sites of our reference molecule. The distance vector between sites {i} and {j} is given by image(Eqs/fix_rattle_rij.jpg). The constraints can then be formulated as
+:c,image(Eqs/fix_rattle_rij.jpg)
+
+The constraints can then be formulated as
:c,image(Eqs/fix_rattle_constraints.jpg)
-The SHAKE algorithm satisfies the first condition, i.e. the sites at time {n+1} will have the desired separations image(Eqs/fix_rattle_dij.jpg) immediately after the coordinates are integrated.
-If we also enforce the second condition, the velocity components along the bonds will vanish.
-RATTLE satisfies both conditions and fix rattle is implemented in a way that it uses fix shake for dealing with the coordinate constraints. Therefore all the information about SHAKE below is also relevant for RATTLE.
+The SHAKE algorithm satisfies the first condition, i.e. the sites at
+time {n+1} will have the desired separations Dij immediately after the
+coordinates are integrated. If we also enforce the second condition,
+the velocity components along the bonds will vanish. RATTLE satisfies
+both conditions. As implemented in LAMMPS, fix rattle uses fix shake
+for satisfying the coordinate constraints. Therefore all the
+information below about SHAKE is also relevant for RATTLE.
[SHAKE:]
-
Each timestep the specified bonds and angles are reset to their
-equilibrium lengths and angular values via the well-known SHAKE
-algorithm. This is done by applying an additional constraint force so
-that the new positions preserve the desired atom separations. The
-equations for the additional force are solved via an iterative method
-that typically converges to an accurate solution in a few iterations.
-The desired tolerance (e.g. 1.0e-4 = 1 part in 10000) and maximum # of
-iterations are specified as arguments. Setting the N argument will
-print statistics to the screen and log file about regarding the
-lengths of bonds and angles that are being constrained. Small delta
-values mean SHAKE is doing a good job.
+equilibrium lengths and angular values via the SHAKE algorithm
+("Ryckaert et al. (1977)"_#Ryckaert). This is done by applying an
+additional constraint force so that the new positions preserve the
+desired atom separations. The equations for the additional force are
+solved via an iterative method that typically converges to an accurate
+solution in a few iterations. The desired tolerance (e.g. 1.0e-4 = 1
+part in 10000) and maximum # of iterations are specified as arguments.
+Setting the N argument will print statistics to the screen and log
+file about regarding the lengths of bonds and angles that are being
+constrained. Small delta values mean SHAKE is doing a good job.
In LAMMPS, only small clusters of atoms can be constrained. This is
so the constraint calculation for a cluster can be performed by a
@@ -114,7 +129,6 @@ satisfy the SHAKE constraints. The solution for this is to make sure
that fix shake is defined in your input script after any other fixes
which add or change forces (to atoms that fix shake operates on).
-
:line
The {mol} keyword should be used when other commands, such as "fix
@@ -152,45 +166,42 @@ more instructions on how to use the accelerated styles effectively.
:line
-
[RATTLE:]
-
The velocity constraints lead to a linear system of equations which
-can be solved analytically. The implementation of the algorithm in LAMMPS
-closely follows "Andersen (1983)"_#Andersen.
+can be solved analytically. The implementation of the algorithm in
+LAMMPS closely follows "Andersen (1983)"_#Andersen.
-IMPORTANT NOTE: This command modifies forces and velocities and
-it has to come after all other integration fixes. If you define other
-fixes that modify velocities or forces after fix rattle operates,
-then this fix will not take them into account and the time integration
-will typically not satisfy the RATTLE constraints. You can check whether
-the constraints work correctly by setting the value of RATTLE_DEBUG in fix_rattle.cpp
-to 1 and recompiling LAMMPS.
-
-IMPORTANT NOTE: RATTLE does not support cuda at the moment.
-
+IMPORTANT NOTE: The fix rattle command modifies forces and velocities
+and thus should be defined after all other integration fixes in your
+input script. If you define other fixes that modify velocities or
+forces after fix rattle operates, then fix rattle will not take them
+into account and the overall time integration will typically not
+satisfy the RATTLE constraints. You can check whether the constraints
+work correctly by setting the value of RATTLE_DEBUG in
+src/fix_rattle.cpp to 1 and recompiling LAMMPS.
:line
[Restart, fix_modify, output, run start/stop, minimize info:]
-No information about this fix is written to "binary restart
+No information about these fixes is written to "binary restart
files"_restart.html. None of the "fix_modify"_fix_modify.html options
-are relevant to this fix. No global or per-atom quantities are stored
-by this fix for access by various "output
-commands"_Section_howto.html#howto_15. No parameter of this fix can
-be used with the {start/stop} keywords of the "run"_run.html command.
-This fix is not invoked during "energy minimization"_minimize.html.
+are relevant to these fixes. No global or per-atom quantities are
+stored by these fixes for access by various "output
+commands"_Section_howto.html#howto_15. No parameter of these fixes
+can be used with the {start/stop} keywords of the "run"_run.html
+command. These fixes are not invoked during "energy
+minimization"_minimize.html.
[Restrictions:]
-This fix is part of the RIGID package. It is only enabled if LAMMPS
-was built with that package. See the "Making
+These fixes are part of the RIGID package. They are only enabled if
+LAMMPS was built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
-For computational efficiency, there can only be one shake fix defined
-in a simulation.
+For computational efficiency, there can only be one shake or rattle
+fix defined in a simulation.
If you use a tolerance that is too large or a max-iteration count that
is too small, the constraints will not be enforced very strongly,
@@ -211,5 +222,3 @@ Journal of Computational Physics, 23, 327–341 (1977).
:link(Andersen)
[(Andersen)] H. Andersen,
Journal of Computational Physics, 52, 24-34 (1983).
-
-
From 89a978c14d3925b281cc675b4f6ace1a5174fcfe Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:28:12 +0000
Subject: [PATCH 20/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13186
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/fix_shake.html | 5 +++--
doc/fix_shake.txt | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/doc/fix_shake.html b/doc/fix_shake.html
index 4ccd6ecc64..69e8a11bd8 100644
--- a/doc/fix_shake.html
+++ b/doc/fix_shake.html
@@ -89,8 +89,9 @@ time n+1 will have the desired separations Dij immediately after the
coordinates are integrated. If we also enforce the second condition,
the velocity components along the bonds will vanish. RATTLE satisfies
both conditions. As implemented in LAMMPS, fix rattle uses fix shake
-for satisfying the coordinate constraints. Therefore all the
-information below about SHAKE is also relevant for RATTLE.
+for satisfying the coordinate constraints. Therefore the settings and
+optional keywords are the same for both fixes, and all the information
+below about SHAKE is also relevant for RATTLE.
SHAKE:
diff --git a/doc/fix_shake.txt b/doc/fix_shake.txt
index 75f76b7a80..629caca9a3 100644
--- a/doc/fix_shake.txt
+++ b/doc/fix_shake.txt
@@ -73,8 +73,9 @@ time {n+1} will have the desired separations Dij immediately after the
coordinates are integrated. If we also enforce the second condition,
the velocity components along the bonds will vanish. RATTLE satisfies
both conditions. As implemented in LAMMPS, fix rattle uses fix shake
-for satisfying the coordinate constraints. Therefore all the
-information below about SHAKE is also relevant for RATTLE.
+for satisfying the coordinate constraints. Therefore the settings and
+optional keywords are the same for both fixes, and all the information
+below about SHAKE is also relevant for RATTLE.
[SHAKE:]
From 46e6fe5eb534b0050931f07631ee18b2bc343e17 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:31:04 +0000
Subject: [PATCH 21/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13187
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/version.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/version.h b/src/version.h
index b5941868c6..33e0a6a6d2 100644
--- a/src/version.h
+++ b/src/version.h
@@ -1 +1 @@
-#define LAMMPS_VERSION "4 Mar 2015"
+#define LAMMPS_VERSION "5 Mar 2015"
From b66b3f774a1bc8074022887e4a45c01c4d186ec6 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:31:06 +0000
Subject: [PATCH 22/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13188
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/Manual.html | 4 ++--
doc/Manual.txt | 4 ++--
2 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/doc/Manual.html b/doc/Manual.html
index f91a89ad09..ae64da386a 100644
--- a/doc/Manual.html
+++ b/doc/Manual.html
@@ -1,7 +1,7 @@
LAMMPS Users Manual
-
+
@@ -22,7 +22,7 @@
LAMMPS Documentation
-4 Mar 2015 version
+5 Mar 2015 version
Version info:
diff --git a/doc/Manual.txt b/doc/Manual.txt
index f62365d561..2b5bb7d912 100644
--- a/doc/Manual.txt
+++ b/doc/Manual.txt
@@ -1,6 +1,6 @@
LAMMPS Users Manual
-
+
@@ -18,7 +18,7 @@
LAMMPS Documentation :c,h3
-4 Mar 2015 version :c,h4
+5 Mar 2015 version :c,h4
Version info: :h4
From 9171a85d360444d931e3c3f628addded8d7d73d1 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 22:58:12 +0000
Subject: [PATCH 23/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13190
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
src/RIGID/fix_rattle.h | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/RIGID/fix_rattle.h b/src/RIGID/fix_rattle.h
index b06117c274..84d04caeec 100644
--- a/src/RIGID/fix_rattle.h
+++ b/src/RIGID/fix_rattle.h
@@ -34,18 +34,18 @@ class FixRattle : public FixShake {
FixRattle(class LAMMPS *, int, char **);
~FixRattle();
int setmask();
- virtual void init();
- virtual void post_force(int);
- virtual void post_force_respa(int, int, int);
- virtual void final_integrate();
- virtual void final_integrate_respa(int,int);
- virtual void coordinate_constraints_end_of_step();
+ void init();
+ void post_force(int);
+ void post_force_respa(int, int, int);
+ void final_integrate();
+ void final_integrate_respa(int,int);
+ void coordinate_constraints_end_of_step();
- virtual double memory_usage();
- virtual void grow_arrays(int);
- virtual int pack_forward_comm(int, int *, double *, int, int *);
- virtual void unpack_forward_comm(int, int, double *);
- virtual void reset_dt();
+ double memory_usage();
+ void grow_arrays(int);
+ int pack_forward_comm(int, int *, double *, int, int *);
+ void unpack_forward_comm(int, int, double *);
+ void reset_dt();
private:
void update_v_half_nocons();
From 84c9a0675b4e6260d3ea36dd91f3e7d533b85411 Mon Sep 17 00:00:00 2001
From: sjplimp
Date: Wed, 4 Mar 2015 23:01:51 +0000
Subject: [PATCH 24/24] git-svn-id:
svn://svn.icms.temple.edu/lammps-ro/trunk@13191
f3b2605a-c512-4ea7-a41b-209d697bcdaa
---
doc/fix_shake.html | 2 +-
doc/fix_shake.txt | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/doc/fix_shake.html b/doc/fix_shake.html
index 69e8a11bd8..020ef7e870 100644
--- a/doc/fix_shake.html
+++ b/doc/fix_shake.html
@@ -187,7 +187,7 @@ more instructions on how to use the accelerated styles effectively.
The velocity constraints lead to a linear system of equations which
can be solved analytically. The implementation of the algorithm in
-LAMMPS closely follows Andersen (1983).
+LAMMPS closely follows (Andersen (1983)).
IMPORTANT NOTE: The fix rattle command modifies forces and velocities
and thus should be defined after all other integration fixes in your
diff --git a/doc/fix_shake.txt b/doc/fix_shake.txt
index 629caca9a3..32c98711b4 100644
--- a/doc/fix_shake.txt
+++ b/doc/fix_shake.txt
@@ -171,7 +171,7 @@ more instructions on how to use the accelerated styles effectively.
The velocity constraints lead to a linear system of equations which
can be solved analytically. The implementation of the algorithm in
-LAMMPS closely follows "Andersen (1983)"_#Andersen.
+LAMMPS closely follows ("Andersen (1983)"_#Andersen).
IMPORTANT NOTE: The fix rattle command modifies forces and velocities
and thus should be defined after all other integration fixes in your