Merge branch 'master' of https://www.github.com/lammps/lammps

2017-06-21 14:11:12 -04:00
parent 13643e185c 87c028ed02
commit f47aaa5f3c
825 changed files with 80656 additions and 22778 deletions
--- a/doc/src/Eqs/cnp_cutoff.jpg
+++ b/doc/src/Eqs/cnp_cutoff.jpg
--- a/doc/src/Eqs/cnp_cutoff.tex
+++ b/doc/src/Eqs/cnp_cutoff.tex
@ -0,0 +1,14 @@
+\documentclass[12pt,article]{article}
+
+\usepackage{indentfirst}
+\usepackage{amsmath}
+
+\begin{document}
+
+\begin{eqnarray*}
+  r_{c}^{fcc} & = & \frac{1}{2} \left(\frac{\sqrt{2}}{2} + 1\right) \mathrm{a} \simeq 0.8536 \:\mathrm{a} \\
+  r_{c}^{bcc} & = & \frac{1}{2}(\sqrt{2} + 1) \mathrm{a} \simeq 1.207 \:\mathrm{a} \\
+  r_{c}^{hcp} & = & \frac{1}{2}\left(1+\sqrt{\frac{4+2x^{2}}{3}}\right) \mathrm{a}
+\end{eqnarray*}
+
+\end{document}
--- a/doc/src/Eqs/cnp_cutoff2.jpg
+++ b/doc/src/Eqs/cnp_cutoff2.jpg
--- a/doc/src/Eqs/cnp_cutoff2.tex
+++ b/doc/src/Eqs/cnp_cutoff2.tex
@ -0,0 +1,12 @@
+\documentclass[12pt,article]{article}
+
+\usepackage{indentfirst}
+\usepackage{amsmath}
+
+\begin{document}
+
+$$
+  Rc + Rs > 2*{\rm cutoff}
+$$
+
+\end{document}
--- a/doc/src/Eqs/cnp_eq.jpg
+++ b/doc/src/Eqs/cnp_eq.jpg
--- a/doc/src/Eqs/cnp_eq.tex
+++ b/doc/src/Eqs/cnp_eq.tex
@ -0,0 +1,9 @@
+\documentclass[12pt]{article}
+
+\begin{document}
+
+$$
+   Q_{i} = \frac{1}{n_i}\sum_{j = 1}^{n_i} | \sum_{k = 1}^{n_{ij}}  \vec{R}_{ik} + \vec{R}_{jk} |^2
+$$
+
+\end{document}
--- a/doc/src/Eqs/pair_lj_sf.jpg
+++ b/doc/src/Eqs/pair_lj_sf.jpg
--- a/doc/src/Eqs/pair_lj_sf.tex
+++ b/doc/src/Eqs/pair_lj_sf.tex
@ -1,11 +0,0 @@
-\documentclass[12pt]{article}
-
-\begin{document}
-
-\begin{eqnarray*}
- F & = & F_{\mathrm{LJ}}(r) - F_{\mathrm{LJ}}(r_{\mathrm{c}}) \qquad r < r_{\mathrm{c}} \\
- E & = & E_{\mathrm{LJ}}(r) - E_{\mathrm{LJ}}(r_{\mathrm{c}}) + (r - r_{\mathrm{c}}) F_{\mathrm{LJ}}(r_{\mathrm{c}}) \qquad r < r_{\mathrm{c}} \\
- \mathrm{with} \qquad E_{\mathrm{LJ}}(r) & = & 4 \epsilon \left[ \left(\frac{\sigma}{r}\right)^{12} - \left(\frac{\sigma}{r}\right)^6 \right] \qquad \mathrm{and} \qquad F_{\mathrm{LJ}}(r) = - E^\prime_{\mathrm{LJ}}(r)
-\end{eqnarray*}                           
-
-\end{document}
--- a/doc/src/JPG/user_intel.png
+++ b/doc/src/JPG/user_intel.png
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="19 May 2017 version">
+<META NAME="docnumber" CONTENT="20 Jun 2017 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@ -21,7 +21,7 @@
 <H1></H1>

 LAMMPS Documentation :c,h3
-19 May 2017 version :c,h4
+20 Jun 2017 version :c,h4

 Version info: :h4

--- a/doc/src/Section_commands.txt
+++ b/doc/src/Section_commands.txt
@ -717,7 +717,7 @@ package"_Section_start.html#start_3.
 "phonon"_fix_phonon.html,
 "pimd"_fix_pimd.html,
 "qbmsst"_fix_qbmsst.html,
-"qeq/reax"_fix_qeq_reax.html,
+"qeq/reax (ko)"_fix_qeq_reax.html,
 "qmmm"_fix_qmmm.html,
 "qtb"_fix_qtb.html,
 "reax/c/bonds"_fix_reax_bonds.html,
@ -831,6 +831,7 @@ package"_Section_start.html#start_3.

 "ackland/atom"_compute_ackland_atom.html,
 "basal/atom"_compute_basal_atom.html,
+"cnp/atom"_compute_cnp_atom.html,
 "dpd"_compute_dpd.html,
 "dpd/atom"_compute_dpd_atom.html,
 "fep"_compute_fep.html,
@ -963,7 +964,7 @@ KOKKOS, o = USER-OMP, t = OPT.
 "lj/expand (gko)"_pair_lj_expand.html,
 "lj/gromacs (gko)"_pair_gromacs.html,
 "lj/gromacs/coul/gromacs (ko)"_pair_gromacs.html,
-"lj/long/coul/long (o)"_pair_lj_long.html,
+"lj/long/coul/long (io)"_pair_lj_long.html,
 "lj/long/dipole/long"_pair_dipole.html,
 "lj/long/tip4p/long"_pair_lj_long.html,
 "lj/smooth (o)"_pair_lj_smooth.html,
@ -1038,7 +1039,6 @@ package"_Section_start.html#start_3.
 "lj/sdk (gko)"_pair_sdk.html,
 "lj/sdk/coul/long (go)"_pair_sdk.html,
 "lj/sdk/coul/msm (o)"_pair_sdk.html,
-"lj/sf (o)"_pair_lj_sf.html,
 "meam/spline (o)"_pair_meam_spline.html,
 "meam/sw/spline"_pair_meam_sw_spline.html,
 "mgpt"_pair_mgpt.html,
@ -1057,7 +1057,7 @@ package"_Section_start.html#start_3.
 "oxdna2/excv"_pair_oxdna2.html,
 "oxdna2/stk"_pair_oxdna2.html,
 "quip"_pair_quip.html,
-"reax/c (k)"_pair_reaxc.html,
+"reax/c (ko)"_pair_reaxc.html,
 "smd/hertz"_pair_smd_hertz.html,
 "smd/tlsph"_pair_smd_tlsph.html,
 "smd/triangulated/surface"_pair_smd_triangulated_surface.html,
@ -1225,7 +1225,7 @@ USER-OMP, t = OPT.
 "msm/cg (o)"_kspace_style.html,
 "pppm (go)"_kspace_style.html,
 "pppm/cg (o)"_kspace_style.html,
-"pppm/disp"_kspace_style.html,
+"pppm/disp (i)"_kspace_style.html,
 "pppm/disp/tip4p"_kspace_style.html,
 "pppm/stagger"_kspace_style.html,
 "pppm/tip4p (o)"_kspace_style.html :tb(c=4,ea=c)
--- a/doc/src/Section_errors.txt
+++ b/doc/src/Section_errors.txt
@ -8890,6 +8890,14 @@ This is a requirement to use this potential. :dd

 See the newton command.  This is a restriction to use this potential. :dd

+{Pair style vashishta/gpu requires atom IDs} :dt
+
+This is a requirement to use this potential. :dd
+
+{Pair style vashishta/gpu requires newton pair off} :dt
+
+See the newton command.  This is a restriction to use this potential. :dd
+
 {Pair style tersoff/gpu requires atom IDs} :dt

 This is a requirement to use the tersoff/gpu potential. :dd
--- a/doc/src/Section_packages.txt
+++ b/doc/src/Section_packages.txt
@ -1502,7 +1502,7 @@ oxDNA model of Doye, Louis and Ouldridge at the University of Oxford.
 This includes Langevin-type rigid-body integrators with improved
 stability.

-[Author:] Oliver Henrich (University of Edinburgh).
+[Author:] Oliver Henrich (University of Strathclyde, Glasgow).

 [Install or un-install:]
  
@ -2027,8 +2027,8 @@ algorithm to formulate single-particle constraint functions
 g(xi,yi,zi) = 0 and their derivative (i.e. the normal of the manifold)
 n = grad(g).

-[Author:] Stefan Paquay (Eindhoven University of Technology (TU/e), The
-Netherlands)
+[Author:] Stefan Paquay (until 2017: Eindhoven University of Technology (TU/e), The
+Netherlands; since 2017: Brandeis University, Waltham, MA, USA)

 [Install or un-install:]
  
--- a/doc/src/accelerate_intel.txt
+++ b/doc/src/accelerate_intel.txt
@ -30,8 +30,8 @@ Dihedral Styles: charmm, harmonic, opls :l
 Fixes: nve, npt, nvt, nvt/sllod :l
 Improper Styles: cvff, harmonic :l
 Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
-charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l
-K-Space Styles: pppm :l
+charmm/coul/long, lj/cut, lj/cut/coul/long, lj/long/coul/long, sw, tersoff :l
+K-Space Styles: pppm, pppm/disp :l
 :ule

 [Speed-ups to expect:]
@ -42,62 +42,88 @@ precision mode. Performance improvements are shown compared to
 LAMMPS {without using other acceleration packages} as these are
 under active development (and subject to performance changes). The
 measurements were performed using the input files available in
-the src/USER-INTEL/TEST directory. These are scalable in size; the
-results given are with 512K particles (524K for Liquid Crystal).
-Most of the simulations are standard LAMMPS benchmarks (indicated
-by the filename extension in parenthesis) with modifications to the
-run length and to add a warmup run (for use with offload
-benchmarks).
+the src/USER-INTEL/TEST directory with the provided run script. 
+These are scalable in size; the results given are with 512K 
+particles (524K for Liquid Crystal). Most of the simulations are 
+standard LAMMPS benchmarks (indicated by the filename extension in
+parenthesis) with modifications to the run length and to add a 
+warmup run (for use with offload benchmarks).

 :c,image(JPG/user_intel.png)

 Results are speedups obtained on Intel Xeon E5-2697v4 processors
 (code-named Broadwell) and Intel Xeon Phi 7250 processors
-(code-named Knights Landing) with "18 Jun 2016" LAMMPS built with
-Intel Parallel Studio 2016 update 3. Results are with 1 MPI task
+(code-named Knights Landing) with "June 2017" LAMMPS built with
+Intel Parallel Studio 2017 update 2. Results are with 1 MPI task
 per physical core. See {src/USER-INTEL/TEST/README} for the raw
 simulation rates and instructions to reproduce.

 :line

+[Accuracy and order of operations:]
+
+In most molecular dynamics software, parallelization parameters
+(# of MPI, OpenMP, and vectorization) can change the results due
+to changing the order of operations with finite-precision 
+calculations. The USER-INTEL package is deterministic. This means
+that the results should be reproducible from run to run with the
+{same} parallel configurations and when using determinstic 
+libraries or library settings (MPI, OpenMP, FFT). However, there
+are differences in the USER-INTEL package that can change the
+order of operations compared to LAMMPS without acceleration:
+
+Neighbor lists can be created in a different order :ulb,l
+Bins used for sorting atoms can be oriented differently :l
+The default stencil order for PPPM is 7. By default, LAMMPS will 
+calculate other PPPM parameters to fit the desired acuracy with 
+this order :l
+The {newton} setting applies to all atoms, not just atoms shared
+between MPI tasks :l
+Vectorization can change the order for adding pairwise forces :l
+:ule
+
+The precision mode (described below) used with the USER-INTEL 
+package can change the {accuracy} of the calculations. For the 
+default {mixed} precision option, calculations between pairs or 
+triplets of atoms are performed in single precision, intended to 
+be within the inherent error of MD simulations. All accumulation
+is performed in double precision to prevent the error from growing 
+with the number of atoms in the simulation. {Single} precision
+mode should not be used without appropriate validation.
+
+:line
+
 [Quick Start for Experienced Users:]

 LAMMPS should be built with the USER-INTEL package installed.
 Simulations should be run with 1 MPI task per physical {core},
 not {hardware thread}.

-For Intel Xeon CPUs:
-
 Edit src/MAKE/OPTIONS/Makefile.intel_cpu_intelmpi as necessary. :ulb,l
-If using {kspace_style pppm} in the input script, add "neigh_modify binsize cutoff" and "kspace_modify diff ad" to the input script for better
-performance.  Cutoff should be roughly the neighbor list cutoff.  By
-default the binsize is half the neighbor list cutoff.  :l
-"-pk intel 0 omp 2 -sf intel" added to LAMMPS command-line :l
+Set the environment variable KMP_BLOCKTIME=0 :l
+"-pk intel 0 omp $t -sf intel" added to LAMMPS command-line :l
+$t should be 2 for Intel Xeon CPUs and 2 or 4 for Intel Xeon Phi :l
+For some of the simple 2-body potentials without long-range
+electrostatics, performance and scalability can be better with
+the "newton off" setting added to the input script :l
+If using {kspace_style pppm} in the input script, add 
+"kspace_modify diff ad" for better performance :l
 :ule

-For Intel Xeon Phi CPUs for simulations without {kspace_style
-pppm} in the input script :
+For Intel Xeon Phi CPUs:

-Edit src/MAKE/OPTIONS/Makefile.knl as necessary. :ulb,l
-Runs should be performed using MCDRAM. :l
-"-pk intel 0 omp 2 -sf intel" {or} "-pk intel 0 omp 4 -sf intel"
-should be added to the LAMMPS command-line. Choice for best
-performance will depend on the simulation. :l
+Runs should be performed using MCDRAM. :ulb,l
 :ule

-For Intel Xeon Phi CPUs for simulations with {kspace_style
-pppm} in the input script:
+For simulations using {kspace_style pppm} on Intel CPUs 
+supporting AVX-512:

-Edit src/MAKE/OPTIONS/Makefile.knl as necessary. :ulb,l
-Runs should be performed using MCDRAM. :l
-Add "neigh_modify binsize 3" to the input script for better
-performance. :l
-Add "kspace_modify diff ad" to the input script for better
-performance. :l
-export KMP_AFFINITY=none :l
-"-pk intel 0 omp 3 lrt yes -sf intel" or "-pk intel 0 omp 1 lrt yes
-sf intel" added to LAMMPS command-line. Choice for best performance
-will depend on the simulation. :l
+Add "kspace_modify diff ad" to the input script :ulb,l
+The command-line option should be changed to 
+"-pk intel 0 omp $r lrt yes -sf intel" where $r is the number of 
+threads minus 1. :l
+Do not use thread affinity (set KMP_AFFINITY=none) :l
+The "newton off" setting may provide better scalability :l
 :ule

 For Intel Xeon Phi coprocessors (Offload):
@ -169,6 +195,10 @@ cat /proc/cpuinfo :pre

 [Building LAMMPS with the USER-INTEL package:]

+NOTE: See the src/USER-INTEL/README file for additional flags that
+might be needed for best performance on Intel server processors
+code-named "Skylake".
+
 The USER-INTEL package must be installed into the source directory:

 make yes-user-intel :pre
@ -322,8 +352,8 @@ follow in the input script.

 NOTE: The USER-INTEL package will perform better with modifications
 to the input script when "PPPM"_kspace_style.html is used:
-"kspace_modify diff ad"_kspace_modify.html and "neigh_modify binsize
-3"_neigh_modify.html should be added to the input script.
+"kspace_modify diff ad"_kspace_modify.html should be added to the 
+input script.

 Long-Range Thread (LRT) mode is an option to the "package
 intel"_package.html command that can improve performance when using
@ -342,6 +372,10 @@ would normally perform best with "-pk intel 0 omp 4", instead use
 environment variable "KMP_AFFINITY=none". LRT mode is not supported
 when using offload.

+NOTE: Changing the "newton"_newton.html setting to off can improve
+performance and/or scalability for simple 2-body potentials such as
+lj/cut or when using LRT mode on processors supporting AVX-512.
+
 Not all styles are supported in the USER-INTEL package. You can mix
 the USER-INTEL package with styles from the "OPT"_accelerate_opt.html
 package or the "USER-OMP package"_accelerate_omp.html. Of course,
@ -467,7 +501,7 @@ supported.

 Brown, W.M., Carrillo, J.-M.Y., Mishra, B., Gavhane, N., Thakker, F.M., De Kraker, A.R., Yamada, M., Ang, J.A., Plimpton, S.J., "Optimizing Classical Molecular Dynamics in LAMMPS," in Intel Xeon Phi Processor High Performance Programming: Knights Landing Edition, J. Jeffers, J. Reinders, A. Sodani, Eds. Morgan Kaufmann. :ulb,l

-Brown, W. M., Semin, A., Hebenstreit, M., Khvostov, S., Raman, K., Plimpton, S.J. Increasing Molecular Dynamics Simulation Rates with an 8-Fold Increase in Electrical Power Efficiency. 2016 International Conference for High Performance Computing. In press. :l
+Brown, W. M., Semin, A., Hebenstreit, M., Khvostov, S., Raman, K., Plimpton, S.J. "Increasing Molecular Dynamics Simulation Rates with an 8-Fold Increase in Electrical Power Efficiency."_http://dl.acm.org/citation.cfm?id=3014915 2016 High Performance Computing, Networking, Storage and Analysis, SC16: International Conference (pp. 82-95). :l

 Brown, W.M., Carrillo, J.-M.Y., Gavhane, N., Thakkar, F.M., Plimpton, S.J. Optimizing Legacy Molecular Dynamics Software with Directive-Based Offload. Computer Physics Communications. 2015. 195: p. 95-101. :l
 :ule
--- a/doc/src/compute_cna_atom.txt
+++ b/doc/src/compute_cna_atom.txt
@ -26,7 +26,7 @@ Define a computation that calculates the CNA (Common Neighbor
 Analysis) pattern for each atom in the group.  In solid-state systems
 the CNA pattern is a useful measure of the local crystal structure
 around an atom.  The CNA methodology is described in "(Faken)"_#Faken
-and "(Tsuzuki)"_#Tsuzuki.
+and "(Tsuzuki)"_#Tsuzuki1.

 Currently, there are five kinds of CNA patterns LAMMPS recognizes:

@ -93,5 +93,5 @@ above.
 :link(Faken)
 [(Faken)] Faken, Jonsson, Comput Mater Sci, 2, 279 (1994).

-:link(Tsuzuki)
+:link(Tsuzuki1)
 [(Tsuzuki)] Tsuzuki, Branicio, Rino, Comput Phys Comm, 177, 518 (2007).
--- a/doc/src/compute_cnp_atom.txt
+++ b/doc/src/compute_cnp_atom.txt
@ -0,0 +1,111 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+compute cnp/atom command :h3
+
+[Syntax:]
+
+compute ID group-ID cnp/atom cutoff :pre
+
+ID, group-ID are documented in "compute"_compute.html command
+cnp/atom = style name of this compute command
+cutoff = cutoff distance for nearest neighbors (distance units) :ul
+
+[Examples:]
+
+compute 1 all cnp/atom 3.08 :pre
+
+[Description:]
+
+Define a computation that calculates the Common Neighborhood
+Parameter (CNP) for each atom in the group.  In solid-state systems
+the CNP is a useful measure of the local crystal structure
+around an atom and can be used to characterize whether the
+atom is part of a perfect lattice, a local defect (e.g. a dislocation
+or stacking fault), or at a surface.
+
+The value of the CNP parameter will be 0.0 for atoms not in the
+specified compute group.  Note that normally a CNP calculation should
+only be performed on single component systems.
+
+This parameter is computed using the following formula from
+"(Tsuzuki)"_#Tsuzuki2
+
+:c,image(Eqs/cnp_eq.jpg)
+
+where the index {j} goes over the {n}i nearest neighbors of atom
+{i}, and the index {k} goes over the {n}ij common nearest neighbors
+between atom {i} and atom {j}. Rik and Rjk are the vectors connecting atom
+{k} to atoms {i} and {j}.  The quantity in the double sum is computed
+for each atom. 
+
+The CNP calculation is sensitive to the specified cutoff value.
+You should ensure that the appropriate nearest neighbors of an atom are
+found within the cutoff distance for the presumed crystal structure.
+E.g. 12 nearest neighbor for perfect FCC and HCP crystals, 14 nearest
+neighbors for perfect BCC crystals.  These formulas can be used to
+obtain a good cutoff distance:
+
+:c,image(Eqs/cnp_cutoff.jpg)
+
+where a is the lattice constant for the crystal structure concerned
+and in the HCP case, x = (c/a) / 1.633, where 1.633 is the ideal c/a
+for HCP crystals.
+
+Also note that since the CNP calculation in LAMMPS uses the neighbors
+of an owned atom to find the nearest neighbors of a ghost atom, the
+following relation should also be satisfied:
+
+:c,image(Eqs/cnp_cutoff2.jpg)
+
+where Rc is the cutoff distance of the potential, Rs is the skin
+distance as specified by the "neighbor"_neighbor.html command, and
+cutoff is the argument used with the compute cnp/atom command.  LAMMPS
+will issue a warning if this is not the case.
+
+The neighbor list needed to compute this quantity is constructed each
+time the calculation is performed (e.g. each time a snapshot of atoms
+is dumped).  Thus it can be inefficient to compute/dump this quantity
+too frequently or to have multiple compute/dump commands, each with a
+{cnp/atom} style.
+
+[Output info:]
+
+This compute calculates a per-atom vector, which can be accessed by
+any command that uses per-atom values from a compute as input.  See
+"Section 6.15"_Section_howto.html#howto_15 for an overview of
+LAMMPS output options.
+
+The per-atom vector values will be real positive numbers. Some typical CNP
+values:
+
+FCC lattice = 0.0
+BCC lattice = 0.0
+HCP lattice = 4.4 :pre
+
+FCC (111) surface ~ 13.0
+FCC (100) surface ~ 26.5
+FCC dislocation core ~ 11 :pre
+
+[Restrictions:]
+
+This compute is part of the USER-MISC package.  It is only enabled if
+LAMMPS was built with that package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
+
+[Related commands:]
+
+"compute cna/atom"_compute_cna_atom.html
+"compute centro/atom"_compute_centro_atom.html
+
+[Default:] none
+
+:line
+
+:link(Tsuzuki2)
+[(Tsuzuki)] Tsuzuki, Branicio, Rino, Comput Phys Comm, 177, 518 (2007).
--- a/doc/src/computes.txt
+++ b/doc/src/computes.txt
@ -17,6 +17,7 @@ Computes :h1
   compute_chunk_atom
   compute_cluster_atom
   compute_cna_atom
+   compute_cnp_atom
   compute_com
   compute_com_chunk
   compute_contact_atom
--- a/doc/src/dump_modify.txt
+++ b/doc/src/dump_modify.txt
@ -16,7 +16,8 @@ dump-ID = ID of dump to modify :ulb,l
 one or more keyword/value pairs may be appended :l
 these keywords apply to various dump styles :l
 keyword = {append} or {buffer} or {element} or {every} or {fileper} or {first} or {flush} or {format} or {image} or {label} or {nfile} or {pad} or {precision} or {region} or {scale} or {sort} or {thresh} or {unwrap} :l
-  {append} arg = {yes} or {no}
+  {append} arg = {yes} or {no} or {at} N
+    N = index of frame written upon first dump
  {buffer} arg = {yes} or {no}
  {element} args = E1 E2 ... EN, where N = # of atom types
    E1,...,EN = element name, e.g. C or Fe or Ga
@ -41,6 +42,7 @@ keyword = {append} or {buffer} or {element} or {every} or {fileper} or {first} o
  {region} arg = region-ID or "none"
  {scale} arg = {yes} or {no}
  {sfactor} arg = coordinate scaling factor (> 0.0)
+  {thermo} arg = {yes} or {no}
  {tfactor} arg = time scaling factor (> 0.0)
  {sort} arg = {off} or {id} or N or -N
     off = no sorting of per-atom lines within a snapshot
@ -139,12 +141,13 @@ and {dcd}.  It also applies only to text output files, not to binary
 or gzipped or image/movie files.  If specified as {yes}, then dump
 snapshots are appended to the end of an existing dump file.  If
 specified as {no}, then a new dump file will be created which will
-overwrite an existing file with the same name.  This keyword can only
-take effect if the dump_modify command is used after the
-"dump"_dump.html command, but before the first command that causes
-dump snapshots to be output, e.g. a "run"_run.html or
-"minimize"_minimize.html command.  Once the dump file has been opened,
-this keyword has no further effect.
+overwrite an existing file with the same name.  If the {at} option is present
+({netcdf} only), then the frame to append to can be specified.  Negative values
+are counted from the end of the file.  This keyword can only take effect if the
+dump_modify command is used after the "dump"_dump.html command, but before the
+first command that causes dump snapshots to be output, e.g. a "run"_run.html or
+"minimize"_minimize.html command.  Once the dump file has been opened, this
+keyword has no further effect.

 :line

@ -413,6 +416,13 @@ most effective when the typical magnitude of position data is between

 :line

+The {thermo} keyword ({netcdf} only) triggers writing of "thermo"_thermo.html
+information to the dump file alongside per-atom data. The data included in the
+dump file is identical to the data specified by
+"thermo_style"_thermo_style.html.
+
+:line
+
 The {region} keyword only applies to the dump {custom}, {cfg},
 {image}, and {movie} styles.  If specified, only atoms in the region
 will be written to the dump file or included in the image/movie.  Only
--- a/doc/src/dump_netcdf.txt
+++ b/doc/src/dump_netcdf.txt
@ -24,7 +24,7 @@ args = list of atom attributes, same as for "dump_style custom"_dump.html :l,ule
 [Examples:]

 dump 1 all netcdf 100 traj.nc type x y z vx vy vz
-dump_modify 1 append yes at -1 global c_thermo_pe c_thermo_temp c_thermo_press
+dump_modify 1 append yes at -1 thermo yes
 dump 1 all netcdf/mpiio 1000 traj.nc id type x y z :pre

 [Description:]
@ -44,7 +44,7 @@ rank.
 NetCDF files can be directly visualized via the following tools:

 Ovito (http://www.ovito.org/). Ovito supports the AMBER convention and
-all of the above extensions. :ule,b
+all extensions of this dump style. :ule,b

 VMD (http://www.ks.uiuc.edu/Research/vmd/). :l

@ -52,15 +52,9 @@ AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye
 contains a NetCDF reader that is not present in the standard
 distribution of AtomEye. :l,ule

-In addition to per-atom data, global data can be included in the dump
-file, which are the kinds of values output by the
-"thermo_style"_thermo_style.html command .  See "Section howto
-6.15"_Section_howto.html#howto_15 for an explanation of per-atom
-versus global data.  The global output written into the dump file can
-be from computes, fixes, or variables, by prefixing the compute/fix ID
-or variable name with "c_" or "f_" or "v_" respectively, as in the
-example above.  These global values are specified via the "dump_modify
-global"_dump_modify.html command.
+In addition to per-atom data, "thermo"_thermo.html data can be included in the
+dump file. The data included in the dump file is identical to the data specified
+by "thermo_style"_thermo_style.html.

 :link(netcdf-home,http://www.unidata.ucar.edu/software/netcdf/)
 :link(pnetcdf-home,http://trac.mcs.anl.gov/projects/parallel-netcdf/)
--- a/doc/src/fix_adapt.txt
+++ b/doc/src/fix_adapt.txt
@ -47,7 +47,7 @@ keyword = {scale} or {reset} :l
 fix 1 all adapt 1 pair soft a 1 1 v_prefactor
 fix 1 all adapt 1 pair soft a 2* 3 v_prefactor
 fix 1 all adapt 1 pair lj/cut epsilon * * v_scale1 coul/cut scale 3 3 v_scale2 scale yes reset yes
-fix 1 all adapt 10 atom diameter v_size
+fix 1 all adapt 10 atom diameter v_size :pre

 variable ramp_up equal "ramp(0.01,0.5)"
 fix stretch all adapt 1 bond harmonic r0 1 v_ramp_up :pre
--- a/doc/src/fix_deform.txt
+++ b/doc/src/fix_deform.txt
@ -565,8 +565,10 @@ more instructions on how to use the accelerated styles effectively.

 [Restart, fix_modify, output, run start/stop, minimize info:]

-No information about this fix is written to "binary restart
-files"_restart.html.  None of the "fix_modify"_fix_modify.html options
+This fix will restore the initial box settings from "binary restart
+files"_restart.html, which allows the fix to be properly continue
+deformation, when using the start/stop options of the "run"_run.html
+command.  None of the "fix_modify"_fix_modify.html options
 are relevant to this fix.  No global or per-atom quantities are stored
 by this fix for access by various "output
 commands"_Section_howto.html#howto_15.
--- a/doc/src/fix_neb.txt
+++ b/doc/src/fix_neb.txt
@ -10,68 +10,183 @@ fix neb command :h3

 [Syntax:]

-fix ID group-ID neb Kspring :pre
+fix ID group-ID neb Kspring keyword value :pre

-ID, group-ID are documented in "fix"_fix.html command
-neb = style name of this fix command
-Kspring = inter-replica spring constant (force/distance units) :ul
+ID, group-ID are documented in "fix"_fix.html command :ulb,l
+neb = style name of this fix command :l
+Kspring = spring constant for parallel nudging force (force/distance units or force units, see parallel keyword) :l
+zero or more keyword/value pairs may be appended :l
+keyword = {parallel} or {perp} or {end} :l
+  {parallel} value = {neigh} or {ideal}
+    {neigh} = parallel nudging force based on distance to neighbor replicas (Kspring = force/distance units)
+    {ideal} = parallel nudging force based on interpolated ideal position (Kspring = force units)
+  {perp} value = {Kspring2}
+    {Kspring2} = spring constant for perpendicular nudging force (force/distance units)
+  {end} values = estyle Kspring3
+    {estyle} = {first} or {last} or {last/efirst} or {last/efirst/middle}
+      {first} = apply force to first replica
+      {last} = apply force to last replica
+      {last/efirst} = apply force to last replica and set its target energy to that of first replica
+      {last/efirst/middle} = same as {last/efirst} plus prevent middle replicas having lower energy than first replica
+    {Kspring3} = spring constant for target energy term (1/distance units) :pre,ule

 [Examples:]

-fix 1 active neb 10.0 :pre
+fix 1 active neb 10.0
+fix 2 all neb 1.0 perp 1.0 end last
+fix 2 all neb 1.0 perp 1.0 end first 1.0 end last 1.0
+fix 1 all neb 1.0 nudge ideal end last/efirst 1 :pre

 [Description:]

-Add inter-replica forces to atoms in the group for a multi-replica
+Add nudging forces to atoms in the group for a multi-replica
 simulation run via the "neb"_neb.html command to perform a nudged
-elastic band (NEB) calculation for transition state finding.  Hi-level
-explanations of NEB are given with the "neb"_neb.html command and in
-"Section 6.5"_Section_howto.html#howto_5 of the manual.  The fix
-neb command must be used with the "neb" command to define how
-inter-replica forces are computed.
+elastic band (NEB) calculation for finding the transition state.
+Hi-level explanations of NEB are given with the "neb"_neb.html command
+and in "Section_howto 5"_Section_howto.html#howto_5 of the manual.
+The fix neb command must be used with the "neb" command and defines
+how inter-replica nudging forces are computed.  A NEB calculation is
+divided in two stages. In the first stage n replicas are relaxed
+toward a MEP until convergence.  In the second stage, the climbing
+image scheme (see "(Henkelman2)"_#Henkelman2) is enabled, so that the
+replica having the highest energy relaxes toward the saddle point
+(i.e. the point of highest energy along the MEP), and a second
+relaxation is performed.

-Only the N atoms in the fix group experience inter-replica forces.
-Atoms in the two end-point replicas do not experience these forces,
-but those in intermediate replicas do.  During the initial stage of
-NEB, the 3N-length vector of interatomic forces Fi = -Grad(V) acting
-on the atoms of each intermediate replica I is altered, as described
-in the "(Henkelman1)"_#Henkelman1 paper, to become:
+A key purpose of the nudging forces is to keep the replicas equally
+spaced.  During the NEB calculation, the 3N-length vector of
+interatomic force Fi = -Grad(V) for each replica I is altered.  For
+all intermediate replicas (i.e. for 1 < I < N, except the climbing
+replica) the force vector becomes:

-Fi = -Grad(V) + (Grad(V) dot That) That + Kspring (| Ri+i - Ri | - | Ri - Ri-1 |) That :pre
+Fi = -Grad(V) + (Grad(V) dot T') T' + Fnudge_parallel + Fnudge_perp :pre

-Ri are the atomic coordinates of replica I; Ri-1 and Ri+1 are the
-coordinates of its neighbor replicas.  That (t with a hat over it) is
-the unit "tangent" vector for replica I which is a function of Ri,
+T' is the unit "tangent" vector for replica I and is a function of Ri,
 Ri-1, Ri+1, and the potential energy of the 3 replicas; it points
 roughly in the direction of (Ri+i - Ri-1); see the
-"(Henkelman1)"_#Henkelman1 paper for details.
+"(Henkelman1)"_#Henkelman1 paper for details.  Ri are the atomic
+coordinates of replica I; Ri-1 and Ri+1 are the coordinates of its
+neighbor replicas.  The term (Grad(V) dot T') is used to remove the
+component of the gradient parallel to the path which would tend to
+distribute the replica unevenly along the path.  Fnudge_parallel is an
+artificial nudging force which is applied only in the tangent
+direction and which maintains the equal spacing between replicas (see
+below for more information).  Fnudge_perp is an optional artificial
+spring which is applied in a direction perpendicular to the tangent
+direction and which prevent the paths from forming acute kinks (see
+below for more information).

-The first two terms in the above equation are the component of the
-interatomic forces perpendicular to the tangent vector.  The last term
-is a spring force between replica I and its neighbors, parallel to the
-tangent vector direction with the specified spring constant {Kspring}.
+In the second stage of the NEB calculation, the interatomic force Fi
+for the climbing replica (the replica of highest energy after the
+first stage) is changed to:

-The effect of the first two terms is to push the atoms of each replica
-toward the minimum energy path (MEP) of conformational states that
-transition over the energy barrier.  The MEP for an energy barrier is
-defined as a sequence of 3N-dimensional states which cross the barrier
-at its saddle point, each of which has a potential energy gradient
-parallel to the MEP itself.
+Fi = -Grad(V) + 2 (Grad(V) dot T') T' :pre

-The effect of the last term is to push each replica away from its two
-neighbors in a direction along the MEP, so that the final set of
-states are equidistant from each other.
+and the relaxation procedure is continued to a new converged MEP.

-During the second stage of NEB, the forces on the N atoms in the
-replica nearest the top of the energy barrier are altered so that it
-climbs to the top of the barrier and finds the saddle point.  The
-forces on atoms in this replica are described in the
-"(Henkelman2)"_#Henkelman2 paper, and become:
+:line

-Fi = -Grad(V) + 2 (Grad(V) dot That) That :pre
+The keyword {parallel} specifies how the parallel nudging force is
+computed.  With a value of {neigh}, the parallel nudging force is
+computed as in "(Henkelman1)"_#Henkelman1 by connecting each
+intermediate replica with the previous and the next image:

-The inter-replica forces for the other replicas are unchanged from the
-first equation.
+Fnudge_parallel = {Kspring} * (|Ri+1 - Ri| - |Ri - Ri-1|) :pre
+
+Note that in this case the specified {Kspring) is in force/distance
+units.
+
+With a value of {ideal}, the spring force is computed as suggested in
+"(WeinenE)"_#WeinenE :
+
+Fnudge_parallel = -{Kspring} * (RD-RDideal) / (2 * meanDist) :pre
+
+where RD is the "reaction coordinate" see "neb"_neb.html section, and
+RDideal is the ideal RD for which all the images are equally spaced.
+I.e. RDideal = (I-1)*meanDist when the climbing replica is off, where
+I is the replica number).  The meanDist is the average distance
+between replicas.  Note that in this case the specified {Kspring) is
+in force units.
+
+Note that the {ideal} form of nudging can often be more effective at
+keeping the replicas equally spaced.
+
+:line
+
+The keyword {perp} specifies if and how a perpendicual nudging force
+is computed.  It adds a spring force perpendicular to the path in
+order to prevent the path from becoming too kinky.  It can
+significantly improve the convergence of the NEB calculation when the
+resolution is poor.  I.e. when few replicas are used; see
+"(Maras)"_#Maras1 for details.
+
+The perpendicular spring force is given by
+
+Fnudge_perp = {Kspring2} * F(Ri-1,Ri,Ri+1) (Ri+1 + Ri-1 - 2 Ri) :pre
+
+where {Kspring2} is the specified value.  F(Ri-1 Ri R+1) is a smooth
+scalar function of the angle Ri-1 Ri Ri+1.  It is equal to 0.0 when
+the path is straight and is equal to 1 when the angle Ri-1 Ri Ri+1 is
+acute.  F(Ri-1 Ri R+1) is defined in "(Jonsson)"_#Jonsson.
+
+If {Kspring2} is set to 0.0 (the default) then no perpendicular spring
+force is added.
+
+:line
+
+By default, no additional forces act on the first and last replicas
+during the NEB relaxation, so these replicas simply relax toward their
+respective local minima.  By using the key word {end}, additional
+forces can be applied to the first and/or last replicas, to enable
+them to relax toward a MEP while constraining their energy.
+
+The interatomic force Fi for the specified replica becomes:
+
+Fi = -Grad(V) + (Grad(V) dot T' + (E-ETarget)*Kspring3) T',  {when} Grad(V) dot T' < 0
+Fi = -Grad(V) + (Grad(V) dot T' + (ETarget- E)*Kspring3) T', {when} Grad(V) dot T' > 0
+:pre
+
+where E is the current energy of the replica and ETarget is the target
+energy.  The "spring" constant on the difference in energies is the
+specified {Kspring3} value.
+
+When {estyle} is specified as {first}, the force is applied to the
+first replica.  When {estyle} is specified as {last}, the force is
+applied to the last replica.  Note that the {end} keyword can be used
+twice to add forces to both the first and last replicas.
+
+For both these {estyle} settings, the target energy {ETarget} is set
+to the initial energy of the replica (at the start of the NEB
+calculation).
+
+If the {estyle} is specified as {last/efirst} or {last/efirst/middle},
+force is applied to the last replica, but the target energy {ETarget}
+is continuously set to the energy of the first replica, as it evolves
+during the NEB relaxation.
+
+The difference between these two {estyle} options is as follows.  When
+{estyle} is specified as {last/efirst}, no change is made to the
+inter-replica force applied to the intermediate replicas (neither
+first or last).  If the initial path is too far from the MEP, an
+intermediate repilica may relax "faster" and reach a lower energy than
+the last replica.  In this case the intermediate replica will be
+relaxing toward its own local minima.  This behavior can be prevented
+by specifying {estyle} as {last/efirst/middle} which will alter the
+inter-replica force applied to intermediate replicas by removing the
+contribution of the gradient to the inter-replica force.  This will
+only be done if a particular intermediate replica has a lower energy
+than the first replica.  This should effectively prevent the
+intermediate replicas from over-relaxing.
+
+After converging a NEB calculation using an {estyle} of
+{last/efirst/middle}, you should check that all intermediate replicas
+have a larger energy than the first replica. If this is not the case,
+the path is probably not a MEP.
+
+Finally, note that if the last replica converges toward a local
+minimum which has a larger energy than the energy of the first
+replica, a NEB calculation using an {estyle} of {last/efirst} or
+{last/efirst/middle} cannot reach final convergence.

 [Restart, fix_modify, output, run start/stop, minimize info:]

@ -96,7 +211,12 @@ for more info on packages.

 "neb"_neb.html

-[Default:] none
+[Default:]
+
+The option defaults are nudge = neigh, perp = 0.0, ends is not
+specified (no inter-replica force on the end replicas).
+
+:line

 :link(Henkelman1)
 [(Henkelman1)] Henkelman and Jonsson, J Chem Phys, 113, 9978-9985 (2000).
@ -104,3 +224,15 @@ for more info on packages.
 :link(Henkelman2)
 [(Henkelman2)] Henkelman, Uberuaga, Jonsson, J Chem Phys, 113,
 9901-9904 (2000).
+
+:link(WeinenE)
+[(WeinenE)] E, Ren, Vanden-Eijnden, Phys Rev B, 66, 052301 (2002).
+
+:link(Jonsson)
+[(Jonsson)] Jonsson, Mills and Jacobsen, in Classical and Quantum
+Dynamics in Condensed Phase Simulations, edited by Berne, Ciccotti,
+and Coker World Scientific, Singapore, 1998, p 385.
+
+:link(Maras1)
+[(Maras)] Maras, Trushin, Stukowski, Ala-Nissila, Jonsson,
+Comp Phys Comm, 205, 13-21 (2016).
--- a/doc/src/fix_qeq_reax.txt
+++ b/doc/src/fix_qeq_reax.txt
@ -8,17 +8,19 @@

 fix qeq/reax command :h3
 fix qeq/reax/kk command :h3
+fix qeq/reax/omp command :h3

 [Syntax:]

-fix ID group-ID qeq/reax Nevery cutlo cuthi tolerance params :pre
+fix ID group-ID qeq/reax Nevery cutlo cuthi tolerance params args :pre

 ID, group-ID are documented in "fix"_fix.html command
 qeq/reax = style name of this fix command
 Nevery = perform QEq every this many steps
 cutlo,cuthi = lo and hi cutoff for Taper radius
 tolerance = precision to which charges will be equilibrated
-params = reax/c or a filename :ul
+params = reax/c or a filename
+args   = {dual} (optional) :ul

 [Examples:]

@ -59,6 +61,10 @@ potential file, except that eta is defined here as twice the eta value
 in the ReaxFF file. Note that unlike the rest of LAMMPS, the units
 of this fix are hard-coded to be A, eV, and electronic charge.

+The optional {dual} keyword allows to perform the optimization
+of the S and T matrices in parallel. This is only supported for
+the {qeq/reax/omp} style. Otherwise they are processed separately.
+
 [Restart, fix_modify, output, run start/stop, minimize info:]

 No information about this fix is written to "binary restart
--- a/doc/src/fix_rigid.txt
+++ b/doc/src/fix_rigid.txt
@ -31,11 +31,12 @@ bodystyle = {single} or {molecule} or {group} :l
    groupID1, groupID2, ... = list of N group IDs :pre

 zero or more keyword/value pairs may be appended :l
-keyword = {langevin} or {temp} or {iso} or {aniso} or {x} or {y} or {z} or {couple} or {tparam} or {pchain} or {dilate} or {force} or {torque} or {infile} :l
+keyword = {langevin} or {reinit} or {temp} or {iso} or {aniso} or {x} or {y} or {z} or {couple} or {tparam} or {pchain} or {dilate} or {force} or {torque} or {infile} :l
  {langevin} values = Tstart Tstop Tperiod seed
    Tstart,Tstop = desired temperature at start/stop of run (temperature units)
    Tdamp = temperature damping parameter (time units)
    seed = random number seed to use for white noise (positive integer)
+  {reinit} = {yes} or {no}
  {temp} values = Tstart Tstop Tdamp
    Tstart,Tstop = desired temperature at start/stop of run (temperature units)
    Tdamp = temperature damping parameter (time units)
@ -68,10 +69,10 @@ keyword = {langevin} or {temp} or {iso} or {aniso} or {x} or {y} or {z} or {coup

 [Examples:]

-fix 1 clump rigid single
+fix 1 clump rigid single reinit yes
 fix 1 clump rigid/small molecule
 fix 1 clump rigid single force 1 off off on langevin 1.0 1.0 1.0 428984
-fix 1 polychains rigid/nvt molecule temp 1.0 1.0 5.0
+fix 1 polychains rigid/nvt molecule temp 1.0 1.0 5.0 reinit no
 fix 1 polychains rigid molecule force 1*5 off off off force 6*10 off off on
 fix 1 polychains rigid/small molecule langevin 1.0 1.0 1.0 428984
 fix 2 fluid rigid group 3 clump1 clump2 clump3 torque * off off off
@ -87,7 +88,12 @@ means that each timestep the total force and torque on each rigid body
 is computed as the sum of the forces and torques on its constituent
 particles.  The coordinates, velocities, and orientations of the atoms
 in each body are then updated so that the body moves and rotates as a
-single entity.
+single entity.  This is implemented by creating internal data structures
+for each rigid body and performing time integration on these data
+structures.  Positions, velocities, and orientations of the constituent
+particles are regenerated from the rigid body data structures in every
+time step. This restricts which operations and fixes can be applied to
+rigid bodies. See below for a detailed discussion.

 Examples of large rigid bodies are a colloidal particle, or portions
 of a biomolecule such as a protein.
@ -148,8 +154,9 @@ differences may accumulate to produce divergent trajectories.

 NOTE: You should not update the atoms in rigid bodies via other
 time-integration fixes (e.g. "fix nve"_fix_nve.html, "fix
-nvt"_fix_nh.html, "fix npt"_fix_nh.html), or you will be integrating
-their motion more than once each timestep.  When performing a hybrid
+nvt"_fix_nh.html, "fix npt"_fix_nh.html, "fix move"_fix_move.html),
+or you will have conflicting updates to positions and velocities
+resulting in unphysical behavior in most cases. When performing a hybrid
 simulation with some atoms in rigid bodies, and some not, a separate
 time integration fix like "fix nve"_fix_nve.html or "fix
 nvt"_fix_nh.html should be used for the non-rigid particles.
@ -165,23 +172,29 @@ setting the force on them to 0.0 (via the "fix
 setforce"_fix_setforce.html command), and integrating them as usual
 (e.g. via the "fix nve"_fix_nve.html command).

-NOTE: The aggregate properties of each rigid body are calculated one
-time at the start of the first simulation run after these fixes are
-specified.  The properties include the position and velocity of the
-center-of-mass of the body, its moments of inertia, and its angular
-momentum.  This is done using the properties of the constituent atoms
-of the body at that point in time (or see the {infile} keyword
-option).  Thereafter, changing properties of individual atoms in the
-body will have no effect on a rigid body's dynamics, unless they
-affect the "pair_style"_pair_style.html interactions that individual
-particles are part of.  For example, you might think you could
-displace the atoms in a body or add a large velocity to each atom in a
-body to make it move in a desired direction before a 2nd run is
+IMPORTANT NOTE: The aggregate properties of each rigid body are
+calculated at the start of a simulation run and are maintained in
+internal data structures. The properties include the position and
+velocity of the center-of-mass of the body, its moments of inertia, and
+its angular momentum.  This is done using the properties of the
+constituent atoms of the body at that point in time (or see the {infile}
+keyword option).  Thereafter, changing these properties of individual
+atoms in the body will have no effect on a rigid body's dynamics, unless
+they effect any computation of per-atom forces or torques. If the
+keyword {reinit} is set to {yes} (the default), the rigid body data
+structures will be recreated at the beginning of each {run} command;
+if the keyword {reinit} is set to {no}, the rigid body data structures
+will be built only at the very first {run} command and maintained for
+as long as the rigid fix is defined. For example, you might think you
+could displace the atoms in a body or add a large velocity to each atom
+in a body to make it move in a desired direction before a 2nd run is
 performed, using the "set"_set.html or
 "displace_atoms"_displace_atoms.html or "velocity"_velocity.html
-command.  But these commands will not affect the internal attributes
-of the body, and the position and velocity of individual atoms in the
-body will be reset when time integration starts.
+commands.  But these commands will not affect the internal attributes
+of the body unless {reinit} is set to {yes}. With {reinit} set to {no}
+(or using the {infile} option, which implies {reinit} {no}) the position
+and velocity of individual atoms in the body will be reset when time
+integration starts again.

 :line

@ -401,6 +414,14 @@ couple none :pre

 The keyword/value option pairs are used in the following ways.

+The {reinit} keyword determines, whether the rigid body properties
+are reinitialized between run commands. With the option {yes} (the
+default) this is done, with the option {no} this is not done. Turning
+off the reinitialization can be helpful to protect rigid bodies against
+unphysical manipulations between runs or when properties cannot be
+easily recomputed (e.g. when read from a file). When using the {infile}
+keyword, the {reinit} option is automatically set to {no}.
+
 The {langevin} and {temp} and {tparam} keywords perform thermostatting
 of the rigid bodies, altering both their translational and rotational
 degrees of freedom.  What is meant by "temperature" of a collection of
@ -778,7 +799,7 @@ exclude, "fix shake"_fix_shake.html

 The option defaults are force * on on on and torque * on on on,
 meaning all rigid bodies are acted on by center-of-mass force and
-torque.  Also Tchain = Pchain = 10, Titer = 1, Torder = 3.
+torque.  Also Tchain = Pchain = 10, Titer = 1, Torder = 3, reinit = yes.

 :line

--- a/doc/src/kspace_modify.txt
+++ b/doc/src/kspace_modify.txt
@ -308,7 +308,8 @@ The option defaults are mesh = mesh/disp = 0 0 0, order = order/disp =
 gewald = gewald/disp = 0.0, slab = 1.0, compute = yes, cutoff/adjust =
 yes (MSM), pressure/scalar = yes (MSM), fftbench = yes (PPPM), diff = ik
 (PPPM), mix/disp = pair, force/disp/real = -1.0, force/disp/kspace = -1.0,
-split = 0, tol = 1.0e-6, and disp/auto = no.
+split = 0, tol = 1.0e-6, and disp/auto = no. For pppm/intel, order =
+order/disp = 7.

 :line

--- a/doc/src/kspace_style.txt
+++ b/doc/src/kspace_style.txt
@ -33,12 +33,16 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
    accuracy = desired relative error in forces
  {pppm/gpu} value = accuracy
    accuracy = desired relative error in forces
+  {pppm/intel} value = accuracy
+    accuracy = desired relative error in forces
  {pppm/kk} value = accuracy
    accuracy = desired relative error in forces
  {pppm/omp} value = accuracy
    accuracy = desired relative error in forces
  {pppm/cg/omp} value = accuracy
    accuracy = desired relative error in forces
+  {pppm/disp/intel} value = accuracy
+    accuracy = desired relative error in forces
  {pppm/tip4p/omp} value = accuracy
    accuracy = desired relative error in forces
  {pppm/stagger} value = accuracy
--- a/doc/src/lammps.book
+++ b/doc/src/lammps.book
@ -301,6 +301,7 @@ compute_centro_atom.html
 compute_chunk_atom.html
 compute_cluster_atom.html
 compute_cna_atom.html
+compute_cnp_atom.html
 compute_com.html
 compute_com_chunk.html
 compute_contact_atom.html
@ -446,7 +447,6 @@ pair_lj96.html
 pair_lj_cubic.html
 pair_lj_expand.html
 pair_lj_long.html
-pair_lj_sf.html
 pair_lj_smooth.html
 pair_lj_smooth_linear.html
 pair_lj_soft.html
--- a/doc/src/manifolds.txt
+++ b/doc/src/manifolds.txt
@ -24,8 +24,9 @@ to the relevant fixes.
 {manifold} @ {parameters} @ {equation} @ {description}
 cylinder @ R @ x^2 + y^2 - R^2 = 0 @ Cylinder along z-axis, axis going through (0,0,0)
 cylinder_dent @ R l a @ x^2 + y^2 - r(z)^2 = 0, r(x) = R if | z | > l, r(z) = R - a*(1 + cos(z/l))/2 otherwise @ A cylinder with a dent around z = 0
-dumbbell @ a A B c @ -( x^2 + y^2 ) * (a^2 - z^2/c^2) * ( 1 + (A*sin(B*z^2))^4) = 0 @ A dumbbell @
+dumbbell @ a A B c @ -( x^2 + y^2 ) + (a^2 - z^2/c^2) * ( 1 + (A*sin(B*z^2))^4) = 0 @ A dumbbell
 ellipsoid @ a  b c @ (x/a)^2 + (y/b)^2 + (z/c)^2 = 0 @ An ellipsoid
+gaussian_bump @ A l rc1 rc2 @ if( x < rc1) -z + A * exp( -x^2 / (2 l^2) ); else if( x < rc2 ) -z + a + b*x + c*x^2 + d*x^3; else z @ A Gaussian bump at x = y = 0, smoothly tapered to a flat plane z = 0.
 plane @ a b c x0 y0 z0 @ a*(x-x0) + b*(y-y0) + c*(z-z0) = 0 @ A plane with normal (a,b,c) going through point (x0,y0,z0)
 plane_wiggle @ a w @ z - a*sin(w*x) = 0 @ A plane with a sinusoidal modulation on z along x.
 sphere @ R @ x^2 + y^2 + z^2 - R^2 = 0 @ A sphere of radius R
--- a/doc/src/neb.txt
+++ b/doc/src/neb.txt
@ -10,28 +10,31 @@ neb command :h3

 [Syntax:]

-neb etol ftol N1 N2 Nevery file-style arg :pre
+neb etol ftol N1 N2 Nevery file-style arg keyword :pre

 etol = stopping tolerance for energy (energy units) :ulb,l
 ftol = stopping tolerance for force (force units) :l
 N1 = max # of iterations (timesteps) to run initial NEB :l
 N2 = max # of iterations (timesteps) to run barrier-climbing NEB :l
 Nevery = print replica energies and reaction coordinates every this many timesteps :l
-file-style= {final} or {each} or {none} :l
+file-style = {final} or {each} or {none} :l
  {final} arg = filename
    filename = file with initial coords for final replica
-      coords for intermediate replicas are linearly interpolated between first and last replica
+      coords for intermediate replicas are linearly interpolated
+      between first and last replica
  {each} arg = filename
-    filename = unique filename for each replica (except first) with its initial coords
-  {none} arg = no argument
-    all replicas assumed to already have their initial coords :pre
+    filename = unique filename for each replica (except first)
+      with its initial coords
+  {none} arg = no argument all replicas assumed to already have
+      their initial coords :pre
+keyword = {verbose}
 :ule

 [Examples:]

 neb 0.1 0.0 1000 500 50 final coords.final
 neb 0.0 0.001 1000 500 50 each coords.initial.$i
-neb 0.0 0.001 1000 500 50 none :pre
+neb 0.0 0.001 1000 500 50 none verbose :pre

 [Description:]

@ -43,8 +46,8 @@ NEB is a method for finding both the atomic configurations and height
 of the energy barrier associated with a transition state, e.g. for an
 atom to perform a diffusive hop from one energy basin to another in a
 coordinated fashion with its neighbors.  The implementation in LAMMPS
-follows the discussion in these 3 papers: "(HenkelmanA)"_#HenkelmanA,
-"(HenkelmanB)"_#HenkelmanB, and "(Nakano)"_#Nakano3.
+follows the discussion in these 4 papers: "(HenkelmanA)"_#HenkelmanA,
+"(HenkelmanB)"_#HenkelmanB, "(Nakano)"_#Nakano3 and "(Maras)"_#Maras2.

 Each replica runs on a partition of one or more processors.  Processor
 partitions are defined at run-time using the -partition command-line
@ -70,18 +73,17 @@ I.e. the simulation domain, the number of atoms, the interaction
 potentials, and the starting configuration when the neb command is
 issued should be the same for every replica.

-In a NEB calculation each atom in a replica is connected to the same
-atom in adjacent replicas by springs, which induce inter-replica
-forces.  These forces are imposed by the "fix neb"_fix_neb.html
-command, which must be used in conjunction with the neb command.  The
-group used to define the fix neb command defines the NEB atoms which
-are the only ones that inter-replica springs are applied to.  If the
-group does not include all atoms, then non-NEB atoms have no
-inter-replica springs and the forces they feel and their motion is
-computed in the usual way due only to other atoms within their
-replica.  Conceptually, the non-NEB atoms provide a background force
-field for the NEB atoms.  They can be allowed to move during the NEB
-minimization procedure (which will typically induce different
+In a NEB calculation each replica is connected to other replicas by
+inter-replica nudging forces.  These forces are imposed by the "fix
+neb"_fix_neb.html command, which must be used in conjunction with the
+neb command.  The group used to define the fix neb command defines the
+NEB atoms which are the only ones that inter-replica springs are
+applied to.  If the group does not include all atoms, then non-NEB
+atoms have no inter-replica springs and the forces they feel and their
+motion is computed in the usual way due only to other atoms within
+their replica.  Conceptually, the non-NEB atoms provide a background
+force field for the NEB atoms.  They can be allowed to move during the
+NEB minimization procedure (which will typically induce different
 coordinates for non-NEB atoms in different replicas), or held fixed
 using other LAMMPS commands such as "fix setforce"_fix_setforce.html.
 Note that the "partition"_partition.html command can be used to invoke
@ -93,33 +95,18 @@ specified in different manners via the {file-style} setting, as
 discussed below.  Only atoms whose initial coordinates should differ
 from the current configuration need be specified.

-Conceptually, the initial configuration for the first replica should
-be a state with all the atoms (NEB and non-NEB) having coordinates on
-one side of the energy barrier.  A perfect energy minimum is not
-required, since atoms in the first replica experience no spring forces
-from the 2nd replica.  Thus the damped dynamics minimization will
-drive the first replica to an energy minimum if it is not already
-there.  However, you will typically get better convergence if the
-initial state is already at a minimum.  For example, for a system with
-a free surface, the surface should be fully relaxed before attempting
-a NEB calculation.
-
-Likewise, the initial configuration of the final replica should be a
-state with all the atoms (NEB and non-NEB) on the other side of the
-energy barrier.  Again, a perfect energy minimum is not required,
-since the atoms in the last replica also experience no spring forces
-from the next-to-last replica, and thus the damped dynamics
-minimization will drive it to an energy minimum.
+Conceptually, the initial and final configurations for the first
+replica should be states on either side of an energy barrier.

 As explained below, the initial configurations of intermediate
 replicas can be atomic coordinates interpolated in a linear fashion
-between the first and last replicas.  This is often adequate state for
+between the first and last replicas.  This is often adequate for
 simple transitions.  For more complex transitions, it may lead to slow
 convergence or even bad results if the minimum energy path (MEP, see
 below) of states over the barrier cannot be correctly converged to
-from such an initial configuration.  In this case, you will want to
-generate initial states for the intermediate replicas that are
-geometrically closer to the MEP and read them in.
+from such an initial path.  In this case, you will want to generate
+initial states for the intermediate replicas that are geometrically
+closer to the MEP and read them in.

 :line

@ -135,10 +122,11 @@ is assigned to be a fraction of the distance.  E.g. if there are 10
 replicas, the 2nd replica will assign a position that is 10% of the
 distance along a line between the starting and final point, and the
 9th replica will assign a position that is 90% of the distance along
-the line.  Note that this procedure to produce consistent coordinates
-across all the replicas, the current coordinates need to be the same
-in all replicas.  LAMMPS does not check for this, but invalid initial
-configurations will likely result if it is not the case.
+the line.  Note that for this procedure to produce consistent
+coordinates across all the replicas, the current coordinates need to
+be the same in all replicas.  LAMMPS does not check for this, but
+invalid initial configurations will likely result if it is not the
+case.

 NOTE: The "distance" between the starting and final point is
 calculated in a minimum-image sense for a periodic simulation box.
@ -150,8 +138,8 @@ interpolation is outside the periodic box, the atom will be wrapped
 back into the box when the NEB calculation begins.

 For a {file-style} setting of {each}, a filename is specified which is
-assumed to be unique to each replica.  This can be done by
-using a variable in the filename, e.g.
+assumed to be unique to each replica.  This can be done by using a
+variable in the filename, e.g.

 variable i equal part
 neb 0.0 0.001 1000 500 50 each coords.initial.$i :pre
@ -198,11 +186,10 @@ The minimizer tolerances for energy and force are set by {etol} and
 A non-zero {etol} means that the NEB calculation will terminate if the
 energy criterion is met by every replica.  The energies being compared
 to {etol} do not include any contribution from the inter-replica
-forces, since these are non-conservative.  A non-zero {ftol} means
-that the NEB calculation will terminate if the force criterion is met
-by every replica.  The forces being compared to {ftol} include the
-inter-replica forces between an atom and its images in adjacent
-replicas.
+nudging forces, since these are non-conservative.  A non-zero {ftol}
+means that the NEB calculation will terminate if the force criterion
+is met by every replica.  The forces being compared to {ftol} include
+the inter-replica nudging forces.

 The maximum number of iterations in each stage is set by {N1} and
 {N2}.  These are effectively timestep counts since each iteration of
@ -220,27 +207,27 @@ finding a good energy barrier.  {N1} and {N2} must both be multiples
 of {Nevery}.

 In the first stage of NEB, the set of replicas should converge toward
-the minimum energy path (MEP) of conformational states that transition
-over the barrier.  The MEP for a barrier is defined as a sequence of
-3N-dimensional states that cross the barrier at its saddle point, each
-of which has a potential energy gradient parallel to the MEP itself.
-The replica states will also be roughly equally spaced along the MEP
-due to the inter-replica spring force added by the "fix
-neb"_fix_neb.html command.
+a minimum energy path (MEP) of conformational states that transition
+over a barrier.  The MEP for a transition is defined as a sequence of
+3N-dimensional states, each of which has a potential energy gradient
+parallel to the MEP itself.  The configuration of highest energy along
+a MEP corresponds to a saddle point.  The replica states will also be
+roughly equally spaced along the MEP due to the inter-replica nugding
+force added by the "fix neb"_fix_neb.html command.

-In the second stage of NEB, the replica with the highest energy
-is selected and the inter-replica forces on it are converted to a
-force that drives its atom coordinates to the top or saddle point of
-the barrier, via the barrier-climbing calculation described in
+In the second stage of NEB, the replica with the highest energy is
+selected and the inter-replica forces on it are converted to a force
+that drives its atom coordinates to the top or saddle point of the
+barrier, via the barrier-climbing calculation described in
 "(HenkelmanB)"_#HenkelmanB.  As before, the other replicas rearrange
 themselves along the MEP so as to be roughly equally spaced.

 When both stages are complete, if the NEB calculation was successful,
-one of the replicas should be an atomic configuration at the top or
-saddle point of the barrier, the potential energies for the set of
-replicas should represent the energy profile of the barrier along the
-MEP, and the configurations of the replicas should be a sequence of
-configurations along the MEP.
+the configurations of the replicas should be along (close to) the MEP
+and the replica with the highest energy should be an atomic
+configuration at (close to) the saddle point of the transition. The
+potential energies for the set of replicas represents the energy
+profile of the transition along the MEP.

 :line

@ -284,9 +271,9 @@ ID2 x2 y2 z2
 ...
 IDN xN yN zN :pre

-The fields are the atom ID, followed by the x,y,z coordinates.
-The lines can be listed in any order.  Additional trailing information
-on the line is OK, such as a comment.
+The fields are the atom ID, followed by the x,y,z coordinates.  The
+lines can be listed in any order.  Additional trailing information on
+the line is OK, such as a comment.

 Note that for a typical NEB calculation you do not need to specify
 initial coordinates for very many atoms to produce differing starting
@ -310,38 +297,54 @@ this case), the print-out to the screen and master log.lammps file
 contains a line of output, printed once every {Nevery} timesteps.  It
 contains the timestep, the maximum force per replica, the maximum
 force per atom (in any replica), potential gradients in the initial,
-final, and climbing replicas, the forward and backward energy barriers,
-the total reaction coordinate (RDT), and the normalized reaction
-coordinate and potential energy of each replica.
+final, and climbing replicas, the forward and backward energy
+barriers, the total reaction coordinate (RDT), and the normalized
+reaction coordinate and potential energy of each replica.

-The "maximum force per replica" is
-the two-norm of the 3N-length force vector for the atoms in each
-replica, maximized across replicas, which is what the {ftol} setting
-is checking against.  In this case, N is all the atoms in each
-replica.  The "maximum force per atom" is the maximum force component
-of any atom in any replica.  The potential gradients are the two-norm
-of the 3N-length force vector solely due to the interaction potential i.e.
-without adding in inter-replica forces. Note that inter-replica forces
-are zero in the initial and final replicas, and only affect
-the direction in the climbing replica. For this reason, the "maximum
-force per replica" is often equal to the potential gradient in the
-climbing replica. In the first stage of NEB, there is no climbing
-replica, and so the potential gradient in the highest energy replica
-is reported, since this replica will become the climbing replica
-in the second stage of NEB.
+The "maximum force per replica" is the two-norm of the 3N-length force
+vector for the atoms in each replica, maximized across replicas, which
+is what the {ftol} setting is checking against.  In this case, N is
+all the atoms in each replica.  The "maximum force per atom" is the
+maximum force component of any atom in any replica.  The potential
+gradients are the two-norm of the 3N-length force vector solely due to
+the interaction potential i.e.  without adding in inter-replica
+forces.

-The "reaction coordinate" (RD) for each
-replica is the two-norm of the 3N-length vector of distances between
-its atoms and the preceding replica's atoms, added to the RD of the
-preceding replica. The RD of the first replica RD1 = 0.0;
-the RD of the final replica RDN = RDT, the total reaction coordinate.
-The normalized RDs are divided by RDT,
-so that they form a monotonically increasing sequence
-from zero to one. When computing RD, N only includes the atoms
-being operated on by the fix neb command.
+The "reaction coordinate" (RD) for each replica is the two-norm of the
+3N-length vector of distances between its atoms and the preceding
+replica's atoms, added to the RD of the preceding replica. The RD of
+the first replica RD1 = 0.0; the RD of the final replica RDN = RDT,
+the total reaction coordinate.  The normalized RDs are divided by RDT,
+so that they form a monotonically increasing sequence from zero to
+one. When computing RD, N only includes the atoms being operated on by
+the fix neb command.

-The forward (reverse) energy barrier is the potential energy of the highest
-replica minus the energy of the first (last) replica.
+The forward (reverse) energy barrier is the potential energy of the
+highest replica minus the energy of the first (last) replica.
+
+Supplementary informations for all replicas can be printed out to the
+screen and master log.lammps file by adding the verbose keyword. These
+informations include the following.  The "path angle" (pathangle) for
+the replica i which is the angle between the 3N-length vectors (Ri-1 -
+Ri) and (Ri+1 - Ri) (where Ri is the atomic coordinates of replica
+i). A "path angle" of 180 indicates that replicas i-1, i and i+1 are
+aligned.  "angletangrad" is the angle between the 3N-length tangent
+vector and the 3N-length force vector at image i. The tangent vector
+is calculated as in "(HenkelmanA)"_#HenkelmanA for all intermediate
+replicas and at R2 - R1 and RM - RM-1 for the first and last replica,
+respectively.  "anglegrad" is the angle between the 3N-length energy
+gradient vector of replica i and that of replica i+1. It is not
+defined for the final replica and reads nan.  gradV is the norm of the
+energy gradient of image i.  ReplicaForce is the two-norm of the
+3N-length force vector (including nudging forces) for replica i.
+MaxAtomForce is the maximum force component of any atom in replica i.
+
+When a NEB calculation does not converge properly, these suplementary
+informations can help understanding what is going wrong. For instance
+when the path angle becomes accute the definition of tangent used in
+the NEB calculation is questionable and the NEB cannot may diverge
+"(Maras)"_#Maras2.
+ 

 When running on multiple partitions, LAMMPS produces additional log
 files for each partition, e.g. log.lammps.0, log.lammps.1, etc.  For a
@ -396,12 +399,16 @@ This command can only be used if LAMMPS was built with the REPLICA
 package.  See the "Making LAMMPS"_Section_start.html#start_3 section
 for more info on packages.

+:line
+
 [Related commands:]

-"prd"_prd.html, "temper"_temper.html, "fix
-langevin"_fix_langevin.html, "fix viscous"_fix_viscous.html
+"prd"_prd.html, "temper"_temper.html, "fix langevin"_fix_langevin.html,
+"fix viscous"_fix_viscous.html

-[Default:] none
+[Default:]
+
+none

 :line

@ -414,3 +421,7 @@ langevin"_fix_langevin.html, "fix viscous"_fix_viscous.html

 :link(Nakano3)
 [(Nakano)] Nakano, Comp Phys Comm, 178, 280-289 (2008).
+
+:link(Maras2)
+[(Maras)] Maras, Trushin, Stukowski, Ala-Nissila, Jonsson,
+Comp Phys Comm, 205, 13-21 (2016)
--- a/doc/src/pair_lj_long.txt
+++ b/doc/src/pair_lj_long.txt
@ -7,6 +7,7 @@
 :line

 pair_style lj/long/coul/long command :h3
+pair_style lj/long/coul/long/intel command :h3
 pair_style lj/long/coul/long/omp command :h3
 pair_style lj/long/coul/long/opt command :h3
 pair_style lj/long/tip4p/long command :h3
--- a/doc/src/pair_lj_sf.txt
+++ b/doc/src/pair_lj_sf.txt
@ -1,114 +0,0 @@
-"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
-
-:link(lws,http://lammps.sandia.gov)
-:link(ld,Manual.html)
-:link(lc,Section_commands.html#comm)
-
-:line
-
-pair_style lj/sf command :h3
-pair_style lj/sf/omp command :h3
-
-[Syntax:]
-
-pair_style lj/sf cutoff :pre
-
-cutoff = global cutoff for Lennard-Jones interactions (distance units) :ul
-
-[Examples:]
-
-pair_style lj/sf 2.5
-pair_coeff * * 1.0 1.0
-pair_coeff 1 1 1.0 1.0 3.0 :pre
-
-[Description:]
-
-Style {lj/sf} computes a truncated and force-shifted LJ interaction
-(Shifted Force Lennard-Jones), so that both the potential and the
-force go continuously to zero at the cutoff "(Toxvaerd)"_#Toxvaerd:
-
-:c,image(Eqs/pair_lj_sf.jpg)
-
-The following coefficients must be defined for each pair of atoms
-types via the "pair_coeff"_pair_coeff.html command as in the examples
-above, or in the data file or restart files read by the
-"read_data"_read_data.html or "read_restart"_read_restart.html
-commands, or by mixing as described below:
-
-epsilon (energy units)
-sigma (distance units)
-cutoff (distance units) :ul
-
-The last coefficient is optional. If not specified, the global
-LJ cutoff specified in the pair_style command is used.
-
-:line
-
-Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
-functionally the same as the corresponding style without the suffix.
-They have been optimized to run faster, depending on your available
-hardware, as discussed in "Section 5"_Section_accelerate.html
-of the manual.  The accelerated styles take the same arguments and
-should produce the same results, except for round-off and precision
-issues.
-
-These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
-USER-OMP and OPT packages, respectively.  They are only enabled if
-LAMMPS was built with those packages.  See the "Making
-LAMMPS"_Section_start.html#start_3 section for more info.
-
-You can specify the accelerated styles explicitly in your input script
-by including their suffix, or you can use the "-suffix command-line
-switch"_Section_start.html#start_7 when you invoke LAMMPS, or you can
-use the "suffix"_suffix.html command in your input script.
-
-See "Section 5"_Section_accelerate.html of the manual for
-more instructions on how to use the accelerated styles effectively.
-
-:line
-
-[Mixing, shift, table, tail correction, restart, rRESPA info]:
-
-For atom type pairs I,J and I != J, the epsilon and sigma
-coefficients and cutoff distance for this pair style can be mixed.
-Rin is a cutoff value and is mixed like the cutoff. The
-default mix value is {geometric}.  See the "pair_modify" command for
-details.
-
-The "pair_modify"_pair_modify.html shift option is not relevant for
-this pair style, since the pair interaction goes to 0.0 at the cutoff.
-
-The "pair_modify"_pair_modify.html table option is not relevant
-for this pair style.
-
-This pair style does not support the "pair_modify"_pair_modify.html
-tail option for adding long-range tail corrections to energy and
-pressure, since the energy of the pair interaction is smoothed to 0.0
-at the cutoff.
-
-This pair style writes its information to "binary restart
-files"_restart.html, so pair_style and pair_coeff commands do not need
-to be specified in an input script that reads a restart file.
-
-This pair style can only be used via the {pair} keyword of the
-"run_style respa"_run_style.html command.  It does not support the
-{inner}, {middle}, {outer} keywords.
-
-:line
-
-[Restrictions:]
-
-This pair style is part of the USER-MISC package.  It is only enabled
-if LAMMPS was built with that package.  See the "Making
-LAMMPS"_Section_start.html#start_3 section for more info.
-
-[Related commands:]
-
-"pair_coeff"_pair_coeff.html
-
-[Default:] none
-
-:line
-
-:link(Toxvaerd)
-[(Toxvaerd)] Toxvaerd, Dyre, J Chem Phys, 134, 081102 (2011).
--- a/doc/src/pair_lj_smooth_linear.txt
+++ b/doc/src/pair_lj_smooth_linear.txt
@ -11,26 +11,26 @@ pair_style lj/smooth/linear/omp command :h3

 [Syntax:]

-pair_style lj/smooth/linear Rc :pre
+pair_style lj/smooth/linear cutoff :pre

-Rc = cutoff for lj/smooth/linear interactions (distance units) :ul
+cutoff = global cutoff for Lennard-Jones interactions (distance units) :ul

 [Examples:]

-pair_style lj/smooth/linear 5.456108274435118
-pair_coeff * * 0.7242785984051078 2.598146797350056
-pair_coeff 1 1 20.0 1.3 9.0 :pre
+pair_style lj/smooth/linear 2.5
+pair_coeff * * 1.0 1.0
+pair_coeff 1 1 0.3 3.0 9.0 :pre

 [Description:]

-Style {lj/smooth/linear} computes a LJ interaction that combines the
-standard 12/6 Lennard-Jones function and subtracts a linear term that
-includes the cutoff distance Rc, as in this formula:
+Style {lj/smooth/linear} computes a truncated and force-shifted LJ
+interaction (aka Shifted Force Lennard-Jones) that combines the
+standard 12/6 Lennard-Jones function and subtracts a linear term based
+on the cutoff distance, so that both, the potential and the force, go
+continuously to zero at the cutoff Rc "(Toxvaerd)"_#Toxvaerd:

 :c,image(Eqs/pair_lj_smooth_linear.jpg)

-At the cutoff Rc, the energy and force (its 1st derivative) will be 0.0.
-
 The following coefficients must be defined for each pair of atoms
 types via the "pair_coeff"_pair_coeff.html command as in the examples
 above, or in the data file or restart files read by the
@ -41,8 +41,8 @@ epsilon (energy units)
 sigma (distance units)
 cutoff (distance units) :ul

-The last coefficient is optional.  If not specified, the global value
-for Rc is used.
+The last coefficient is optional. If not specified, the global
+LJ cutoff specified in the pair_style command is used.

 :line

@ -76,10 +76,11 @@ and cutoff distance can be mixed. The default mix value is geometric.
 See the "pair_modify" command for details.

 This pair style does not support the "pair_modify"_pair_modify.html
-shift option for the energy of the pair interaction.
+shift option for the energy of the pair interaction, since it goes
+to 0.0 at the cutoff by construction.

-The "pair_modify"_pair_modify.html table option is not relevant for
-this pair style.
+The "pair_modify"_pair_modify.html table option is not relevant
+for this pair style.

 This pair style does not support the "pair_modify"_pair_modify.html
 tail option for adding long-range tail corrections to energy and
@ -103,3 +104,8 @@ This pair style can only be used via the {pair} keyword of the
 "pair_coeff"_pair_coeff.html, "pair lj/smooth"_pair_lj_smooth.html

 [Default:] none
+
+:line
+
+:link(Toxvaerd)
+[(Toxvaerd)] Toxvaerd, Dyre, J Chem Phys, 134, 081102 (2011).
--- a/doc/src/pair_reaxc.txt
+++ b/doc/src/pair_reaxc.txt
@ -8,6 +8,7 @@

 pair_style reax/c command :h3
 pair_style reax/c/kk command :h3
+pair_style reax/c/omp command :h3

 [Syntax:]

--- a/doc/src/pair_vashishta.txt
+++ b/doc/src/pair_vashishta.txt
@ -7,6 +7,7 @@
 :line

 pair_style vashishta command :h3
+pair_style vashishta/gpu command :h3
 pair_style vashishta/omp command :h3
 pair_style vashishta/kk command :h3
 pair_style vashishta/table command :h3
--- a/doc/src/pairs.txt
+++ b/doc/src/pairs.txt
@ -49,7 +49,6 @@ Pair Styles :h1
   pair_lj_cubic
   pair_lj_expand
   pair_lj_long
-   pair_lj_sf
   pair_lj_smooth
   pair_lj_smooth_linear
   pair_lj_soft
--- a/doc/src/set.txt
+++ b/doc/src/set.txt
@ -80,6 +80,7 @@ keyword = {type} or {type/fraction} or {mol} or {x} or {y} or {z} or \
    value can be an atom-style variable (see below)
  {image} nx ny nz
    nx,ny,nz = which periodic image of the simulation box the atom is in
+    any of nx,ny,nz can be an atom-style variable (see below)
  {bond} value = bond type for all bonds between selected atoms
  {angle} value = angle type for all angles between selected atoms
  {dihedral} value = dihedral type for all dihedrals between selected atoms
@ -363,9 +364,8 @@ A value of -1 means subtract 1 box length to get the true value.
 LAMMPS updates these flags as atoms cross periodic boundaries during
 the simulation.  The flags can be output with atom snapshots via the
 "dump"_dump.html command.  If a value of NULL is specified for any of
-nx,ny,nz, then the current image value for that dimension is
-unchanged.  For non-periodic dimensions only a value of 0 can be
-specified.  This keyword does not allow use of atom-style variables.
+nx,ny,nz, then the current image value for that dimension is unchanged.
+For non-periodic dimensions only a value of 0 can be specified.
 This command can be useful after a system has been equilibrated and
 atoms have diffused one or more box lengths in various directions.
 This command can then reset the image values for atoms so that they
--- a/doc/src/special_bonds.txt
+++ b/doc/src/special_bonds.txt
@ -65,7 +65,13 @@ sense to define permanent bonds between atoms that interact via these
 potentials, though such bonds may exist elsewhere in your system,
 e.g. when using the "pair_style hybrid"_pair_hybrid.html command.
 Thus LAMMPS ignores special_bonds settings when manybody potentials
-are calculated.
+are calculated.  Please note, that the existence of explicit bonds
+for atoms that are described by a manybody potential will alter the
+neigborlist and thus can render the computation of those interactions
+invalid, since those pairs are not only used to determine direct
+pairwise interactions but also neighbors of neighbors and more.
+The recommended course of action is to remove such bonds, or - if
+that is not possible - use a special bonds setting of 1.0 1.0 1.0.

 NOTE: Unlike some commands in LAMMPS, you cannot use this command
 multiple times in an incremental fashion: e.g. to first set the LJ
--- a/examples/USER/cgdna/util/generate.py
+++ b/examples/USER/cgdna/util/generate.py
@ -14,7 +14,7 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Oliver Henrich (EPCC, University of Edinburgh)
+   Contributing author: Oliver Henrich (University of Strathclyde, Glasgow)
 ------------------------------------------------------------------------- */
 """

--- a/examples/USER/misc/cnp/Cu_Mishin1.eam
+++ b/examples/USER/misc/cnp/Cu_Mishin1.eam
--- a/examples/USER/misc/cnp/in.cnp
+++ b/examples/USER/misc/cnp/in.cnp
@ -0,0 +1,51 @@
+# Generation and relaxation of a partial dislocation in Cu perfect FCC crystal
+
+# Initialization
+units           metal
+boundary        p p p
+atom_style      atomic
+
+# create simulation box and system
+lattice         fcc 3.615  origin 0.01 0.01 0.01 orient x -1 -1 2 orient y 1 1 1 orient z -1 1 0 
+region          mdbox  block 0 3 0.0 14.0 0 84 units lattice
+region          system block 0 3 1.1 13.1 0 84 units lattice
+create_box      2 mdbox
+create_atoms    1 region system
+
+# Define atoms mass and force field
+mass            *  63.54                     
+pair_style      eam/alloy
+pair_coeff      * * Cu_Mishin1.eam Cu Cu
+
+# Delete a plane of atoms along the z direction to generate a partial dislocation
+region          dislocation_atoms block 0 3 7 14 41.9 42.1 units lattice
+delete_atoms    region dislocation_atoms
+region          quarter_up block 0 3 7 11 0 84 units lattice
+group           middle region quarter_up
+
+# specify simulation parameters
+timestep        0.004
+
+# Relax configuration using conjugate gradient
+#min_style cg
+#minimize 1.0e-4 1.0e-6 100 1000
+
+# Setup calculations 
+compute         1 all cnp/atom 3.086
+compute         2 all cna/atom 3.086
+compute         3 all centro/atom fcc
+compute         4 all coord/atom cutoff 3.086
+dump            1 all custom 100 dump.lammpstrj id type xu yu zu c_1 c_2 c_3 c_4 
+
+### Set up thermo display
+thermo          10
+thermo_style    custom step atoms temp press pe ke etotal
+
+# Relax the system performing a langevin dynamics (freeze motion along y 111 direction)
+fix             1 all nve
+fix             2 all langevin 50 1 0.1 699483
+fix             3 all setforce NULL 0.0 NULL
+fix             4 middle setforce 0.0  0.0 0.0
+run             100
+unfix           4
+run             200
--- a/examples/USER/misc/cnp/log.31May17.cnp.g++.4
+++ b/examples/USER/misc/cnp/log.31May17.cnp.g++.4
@ -0,0 +1,185 @@
+LAMMPS (19 May 2017)
+OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (../comm.cpp:90)
+  using 1 OpenMP thread(s) per MPI task
+# Generation and relaxation of a partial dislocation in Cu perfect FCC crystal
+
+# Initialization
+units           metal
+boundary        p p p
+atom_style      atomic
+
+# create simulation box and system
+lattice         fcc 3.615  origin 0.01 0.01 0.01 orient x -1 -1 2 orient y 1 1 1 orient z -1 1 0
+Lattice spacing in x,y,z = 5.90327 6.26136 5.11238
+region          mdbox  block 0 3 0.0 14.0 0 84 units lattice
+region          system block 0 3 1.1 13.1 0 84 units lattice
+create_box      2 mdbox
+Created orthogonal box = (0 0 0) to (17.7098 87.6591 429.44)
+  1 by 1 by 4 MPI processor grid
+create_atoms    1 region system
+Created 48384 atoms
+
+# Define atoms mass and force field
+mass            *  63.54
+pair_style      eam/alloy
+pair_coeff      * * Cu_Mishin1.eam Cu Cu
+
+# Delete a plane of atoms along the z direction to generate a partial dislocation
+region          dislocation_atoms block 0 3 7 14 41.9 42.1 units lattice
+delete_atoms    region dislocation_atoms
+Deleted 76 atoms, new total = 48308
+region          quarter_up block 0 3 7 11 0 84 units lattice
+group           middle region quarter_up
+16080 atoms in group middle
+
+# specify simulation parameters
+timestep        0.004
+
+# Relax configuration using conjugate gradient
+#min_style cg
+#minimize 1.0e-4 1.0e-6 100 1000
+
+# Setup calculations
+compute         1 all cnp/atom 3.086
+compute         2 all cna/atom 3.086
+compute         3 all centro/atom fcc
+compute         4 all coord/atom cutoff 3.086
+dump            1 all custom 100 dump.lammpstrj id type xu yu zu c_1 c_2 c_3 c_4
+
+### Set up thermo display
+thermo          10
+thermo_style    custom step atoms temp press pe ke etotal
+
+# Relax the system performing a langevin dynamics (freeze motion along y 111 direction)
+fix             1 all nve
+fix             2 all langevin 50 1 0.1 699483
+fix             3 all setforce NULL 0.0 NULL
+fix             4 middle setforce 0.0  0.0 0.0
+run             100
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 7.50679
+  ghost atom cutoff = 7.50679
+  binsize = 3.75339, bins = 5 24 115
+  5 neighbor lists, perpetual/occasional/extra = 1 4 0
+  (1) pair eam/alloy, perpetual
+      attributes: half, newton on
+      pair build: half/bin/atomonly/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute cnp/atom, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+  (3) compute cna/atom, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+  (4) compute centro/atom, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+  (5) compute coord/atom, occasional
+      attributes: full, newton on
+      pair build: full/bin/atomonly
+      stencil: full/bin/3d
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 45.41 | 45.41 | 45.41 Mbytes
+Step Atoms Temp Press PotEng KinEng TotEng 
+       0    48308            0   -3388.0911   -169746.07            0   -169746.07 
+      10    48308      7.35092   -3091.0864   -169715.96    45.900393   -169670.05 
+      20    48308    9.9162268   -2822.7045   -169678.51    61.918604   -169616.59 
+      30    48308    12.351316   -2726.7195   -169666.35    77.123716   -169589.23 
+      40    48308    13.302856    -2703.586    -169662.9     83.06529   -169579.83 
+      50    48308    12.782228   -2706.8662   -169662.36    79.814401   -169582.55 
+      60    48308    12.198179   -2772.4206   -169670.02    76.167503   -169593.86 
+      70    48308    10.663322   -2841.3384   -169677.48    66.583595    -169610.9 
+      80    48308    9.1169804   -2932.3896   -169687.85    56.927974   -169630.92 
+      90    48308    7.2905076   -3029.9433   -169699.09    45.523167   -169653.56 
+     100    48308    5.4063635   -3139.4496   -169711.65    33.758252   -169677.89 
+Loop time of 10.9003 on 4 procs for 100 steps with 48308 atoms
+
+Performance: 3.171 ns/day, 7.570 hours/ns, 9.174 timesteps/s
+31.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 9.8764     | 9.9587     | 10.021     |   1.6 | 91.36
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.1232     | 0.18385    | 0.26683    |  12.1 |  1.69
+Output  | 0.45385    | 0.45451    | 0.45634    |   0.2 |  4.17
+Modify  | 0.25026    | 0.2537     | 0.25744    |   0.5 |  2.33
+Other   |            | 0.04949    |            |       |  0.45
+
+Nlocal:    12077 ave 12096 max 12020 min
+Histogram: 1 0 0 0 0 0 0 0 0 3
+Nghost:    14204 ave 14261 max 14109 min
+Histogram: 1 0 0 0 0 1 0 0 0 2
+Neighs:    814050 ave 818584 max 809212 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+FullNghs:  1.6281e+06 ave 1.63296e+06 max 1.61808e+06 min
+Histogram: 1 0 0 0 0 0 1 0 0 2
+
+Total # of neighbors = 6512400
+Ave neighs/atom = 134.81
+Neighbor list builds = 0
+Dangerous builds = 0
+unfix           4
+run             200
+Per MPI rank memory allocation (min/avg/max) = 45.41 | 45.41 | 45.41 Mbytes
+Step Atoms Temp Press PotEng KinEng TotEng 
+     100    48308    5.4063635   -3139.4496   -169711.65    33.758252   -169677.89 
+     110    48308    15.260795    -2793.119   -169677.24    95.290993   -169581.95 
+     120    48308    18.548656   -2433.1584   -169624.79    115.82096   -169508.97 
+     130    48308     22.15831    -2276.626   -169604.28    138.36025   -169465.92 
+     140    48308    24.393841   -2208.1771   -169596.16    152.31929   -169443.84 
+     150    48308    24.797558   -2173.3145   -169591.43    154.84016   -169436.59 
+     160    48308     24.73371    -2188.909   -169593.08    154.44148   -169438.64 
+     170    48308    24.128467   -2220.3404   -169596.96    150.66225   -169446.29 
+     180    48308    22.975708   -2275.1244   -169602.72    143.46422   -169459.26 
+     190    48308    21.936324   -2348.3762   -169610.59    136.97413   -169473.61 
+     200    48308    20.516249   -2432.8447   -169619.98    128.10694   -169491.87 
+     210    48308    19.000566   -2510.2915   -169628.58    118.64276   -169509.93 
+     220    48308    17.490407    -2597.299   -169638.24    109.21307   -169529.03 
+     230    48308    16.062482   -2684.1203   -169648.31    100.29687   -169548.01 
+     240    48308    14.360342   -2768.2313    -169657.7    89.668411   -169568.03 
+     250    48308    12.802315   -2852.6965   -169666.99    79.939831   -169587.05 
+     260    48308    11.258205   -2944.4533   -169677.52    70.298142   -169607.23 
+     270    48308    9.6159129   -3038.6304   -169688.06    60.043393   -169628.02 
+     280    48308     7.972425   -3129.0826   -169698.03    49.781176   -169648.25 
+     290    48308    6.3752377   -3219.2054   -169708.23    39.808067   -169668.42 
+     300    48308    4.7374688   -3306.1468   -169718.27     29.58156   -169688.69 
+Loop time of 23.0164 on 4 procs for 200 steps with 48308 atoms
+
+Performance: 3.003 ns/day, 7.992 hours/ns, 8.689 timesteps/s
+31.8% CPU use with 4 MPI tasks x 1 OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 20.221     | 20.423     | 20.57      |   3.1 | 88.73
+Neigh   | 0          | 0          | 0          |   0.0 |  0.00
+Comm    | 0.27748    | 0.42603    | 0.62832    |  21.4 |  1.85
+Output  | 1.5454     | 1.5473     | 1.5529     |   0.3 |  6.72
+Modify  | 0.48886    | 0.49773    | 0.50842    |   1.1 |  2.16
+Other   |            | 0.1221     |            |       |  0.53
+
+Nlocal:    12077 ave 12096 max 12020 min
+Histogram: 1 0 0 0 0 0 0 0 0 3
+Nghost:    14204 ave 14261 max 14109 min
+Histogram: 1 0 0 0 0 1 0 0 0 2
+Neighs:    814094 ave 818584 max 809212 min
+Histogram: 1 0 0 0 0 2 0 0 0 1
+FullNghs:  1.62852e+06 ave 1.63296e+06 max 1.61892e+06 min
+Histogram: 1 0 0 0 0 0 0 1 0 2
+
+Total # of neighbors = 6514094
+Ave neighs/atom = 134.845
+Neighbor list builds = 0
+Dangerous builds = 0
+Total wall time: 0:00:35
--- a/examples/USER/misc/grem/lj-temper/0/log.lammps.0
+++ b/examples/USER/misc/grem/lj-temper/0/log.lammps.0
--- a/examples/USER/misc/grem/lj-temper/1/log.lammps.1
+++ b/examples/USER/misc/grem/lj-temper/1/log.lammps.1
--- a/examples/USER/misc/grem/lj-temper/2/log.lammps.2
+++ b/examples/USER/misc/grem/lj-temper/2/log.lammps.2
--- a/examples/USER/misc/grem/lj-temper/3/log.lammps.3
+++ b/examples/USER/misc/grem/lj-temper/3/log.lammps.3
--- a/examples/USER/misc/grem/lj-temper/in.gREM-temper
+++ b/examples/USER/misc/grem/lj-temper/in.gREM-temper
@ -18,14 +18,14 @@ read_data       ${rep}/lj.data

 #dump            dump all xyz 1000 ${rep}/dump.xyz

-thermo          10
-thermo_style    custom step temp pe etotal press vol
-timestep        1.0
-
 fix             fxnpt all npt temp ${T0} ${T0} 1000.0 iso ${press} ${press} 10000.0 
 fix             fxgREM all grem ${lambda} -.03 -30000 fxnpt
-thermo_modify   press fxgREM_press

-temper/grem            10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker}
+thermo          10
+thermo_style    custom step temp f_fxgREM pe etotal press vol
+thermo_modify   press fxgREM_press
+timestep        1.0
+
+temper/grem     10000 100 ${lambda} fxgREM fxnpt 10294 98392 #${walker}

 #write_data      ${rep}/lj-out.data
--- a/examples/USER/tally/log.12Jun17.force.1
+++ b/examples/USER/tally/log.12Jun17.force.1
@ -0,0 +1,177 @@
+LAMMPS (19 May 2017)
+
+units		real
+atom_style	full
+
+read_data	data.spce
+  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
+  1 by 1 by 1 MPI processor grid
+  reading atoms ...
+  4500 atoms
+  scanning bonds ...
+  2 = max bonds/atom
+  scanning angles ...
+  1 = max angles/atom
+  reading bonds ...
+  3000 bonds
+  reading angles ...
+  1500 angles
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  1 = max # of 1-4 neighbors
+  2 = max # of special neighbors
+
+pair_style	lj/cut/coul/long 12.0 12.0
+kspace_style	pppm 1.0e-4
+
+pair_coeff	1 1 0.15535 3.166
+pair_coeff	* 2 0.0000 0.0000
+
+bond_style	harmonic
+angle_style	harmonic
+dihedral_style	none
+improper_style	none
+
+bond_coeff	1 1000.00 1.000
+angle_coeff	1 100.0 109.47
+
+special_bonds   lj/coul 0.0 0.0 1.0
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  2 = max # of special neighbors
+
+neighbor        2.0 bin
+
+fix		1 all shake 0.0001 20 0 b 1 a 1
+  0 = # of size 2 clusters
+  0 = # of size 3 clusters
+  0 = # of size 4 clusters
+  1500 = # of frozen angles
+fix		2 all nvt temp 300.0 300.0 100.0
+
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 3e-06 on 1 procs for 0 steps with 4500 atoms
+
+
+group		one molecule 1 2
+6 atoms in group one
+
+# the following section shows equivalences between using the force/tally compute and other computes and thermo keywords
+
+# compute pairwise force between two molecules and everybody
+compute		fpa one group/group all pair yes kspace no boundary no
+# tally pairwise force between two molecules and the all molecules
+compute		c1 one force/tally all
+# tally the force of all with all (should be zero)
+compute		c2 all force/tally all
+# collect per atom data. only reduce over the first group.
+compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
+compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
+# determine magnitude of force
+variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
+variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
+# round to 10**-10 absolute precision.
+variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
+variable	all equal round(1e10*c_c2)*1e-10
+
+velocity	all create 300 432567 dist uniform
+
+timestep	2.0
+
+# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
+thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all
+thermo		10
+
+run 50
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 28.47 | 28.47 | 28.47 Mbytes
+Step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all 
+       0      22.7331      22.7331      22.7331   -17.068295   -17.068295   -8.8348335   -8.8348334   -12.141369   -12.141369            0            0 
+      10    11.736901    11.736901    11.736901   -3.3897029   -3.3897029    9.1193856    9.1193856   -6.5651786   -6.5651786            0            0 
+      20    5.6120339    5.6120339    5.6120339  -0.60046861  -0.60046861   -4.4481306   -4.4481306    3.3687528    3.3687528            0            0 
+      30     17.29261     17.29261     17.29261     6.179302     6.179302   -10.593979   -10.593979    12.190906    12.190906            0            0 
+      40    18.664433    18.664433    18.664433    5.4727782    5.4727782   -6.9329319   -6.9329319    16.442148    16.442148            0            0 
+      50    12.130407    12.130407    12.130407   -1.0321196   -1.0321196    8.0035558    8.0035558   -9.0567428   -9.0567428            0            0 
+Loop time of 13.9507 on 1 procs for 50 steps with 4500 atoms
+
+Performance: 0.619 ns/day, 38.752 hours/ns, 3.584 timesteps/s
+32.0% CPU use with 1 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 12.594     | 12.594     | 12.594     |   0.0 | 90.27
+Bond    | 7.3e-05    | 7.3e-05    | 7.3e-05    |   0.0 |  0.00
+Kspace  | 0.56296    | 0.56296    | 0.56296    |   0.0 |  4.04
+Neigh   | 0.65858    | 0.65858    | 0.65858    |   0.0 |  4.72
+Comm    | 0.019093   | 0.019093   | 0.019093   |   0.0 |  0.14
+Output  | 0.055025   | 0.055025   | 0.055025   |   0.0 |  0.39
+Modify  | 0.057276   | 0.057276   | 0.057276   |   0.0 |  0.41
+Other   |            | 0.004003   |            |       |  0.03
+
+Nlocal:    4500 ave 4500 max 4500 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Nghost:    21131 ave 21131 max 21131 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+Neighs:    2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min
+Histogram: 1 0 0 0 0 0 0 0 0 0
+
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
+Ave special neighs/atom = 2
+Neighbor list builds = 4
+Dangerous builds = 1
+
+Total wall time: 0:00:15
--- a/examples/USER/tally/log.12Jun17.force.4
+++ b/examples/USER/tally/log.12Jun17.force.4
@ -0,0 +1,177 @@
+LAMMPS (19 May 2017)
+
+units		real
+atom_style	full
+
+read_data	data.spce
+  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
+  2 by 2 by 1 MPI processor grid
+  reading atoms ...
+  4500 atoms
+  scanning bonds ...
+  2 = max bonds/atom
+  scanning angles ...
+  1 = max angles/atom
+  reading bonds ...
+  3000 bonds
+  reading angles ...
+  1500 angles
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  1 = max # of 1-4 neighbors
+  2 = max # of special neighbors
+
+pair_style	lj/cut/coul/long 12.0 12.0
+kspace_style	pppm 1.0e-4
+
+pair_coeff	1 1 0.15535 3.166
+pair_coeff	* 2 0.0000 0.0000
+
+bond_style	harmonic
+angle_style	harmonic
+dihedral_style	none
+improper_style	none
+
+bond_coeff	1 1000.00 1.000
+angle_coeff	1 100.0 109.47
+
+special_bonds   lj/coul 0.0 0.0 1.0
+  2 = max # of 1-2 neighbors
+  1 = max # of 1-3 neighbors
+  2 = max # of special neighbors
+
+neighbor        2.0 bin
+
+fix		1 all shake 0.0001 20 0 b 1 a 1
+  0 = # of size 2 clusters
+  0 = # of size 3 clusters
+  0 = # of size 4 clusters
+  1500 = # of frozen angles
+fix		2 all nvt temp 300.0 300.0 100.0
+
+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 4.5e-06 on 4 procs for 0 steps with 4500 atoms
+
+
+group		one molecule 1 2
+6 atoms in group one
+
+# the following section shows equivalences between using the force/tally compute and other computes and thermo keywords
+
+# compute pairwise force between two molecules and everybody
+compute		fpa one group/group all pair yes kspace no boundary no
+# tally pairwise force between two molecules and the all molecules
+compute		c1 one force/tally all
+# tally the force of all with all (should be zero)
+compute		c2 all force/tally all
+# collect per atom data. only reduce over the first group.
+compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
+compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
+# determine magnitude of force
+variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
+variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
+# round to 10**-10 absolute precision.
+variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
+variable	all equal round(1e10*c_c2)*1e-10
+
+velocity	all create 300 432567 dist uniform
+
+timestep	2.0
+
+# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
+thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all
+thermo		10
+
+run 50
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:77)
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 11.58 | 11.59 | 11.6 Mbytes
+Step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref v_all 
+       0      22.7331      22.7331      22.7331   -17.068295   -17.068295   -8.8348335   -8.8348334   -12.141369   -12.141369            0            0 
+      10    11.736901    11.736901    11.736901   -3.3897029   -3.3897029    9.1193856    9.1193856   -6.5651786   -6.5651786            0            0 
+      20    5.6120339    5.6120339    5.6120339  -0.60046861  -0.60046861   -4.4481306   -4.4481306    3.3687528    3.3687528            0            0 
+      30     17.29261     17.29261     17.29261     6.179302     6.179302   -10.593979   -10.593979    12.190906    12.190906            0            0 
+      40    18.664433    18.664433    18.664433    5.4727782    5.4727782   -6.9329319   -6.9329319    16.442148    16.442148            0            0 
+      50    12.130407    12.130407    12.130407   -1.0321196   -1.0321196    8.0035558    8.0035558   -9.0567428   -9.0567428            0            0 
+Loop time of 4.31614 on 4 procs for 50 steps with 4500 atoms
+
+Performance: 2.002 ns/day, 11.989 hours/ns, 11.584 timesteps/s
+31.6% CPU use with 4 MPI tasks x no OpenMP threads
+
+MPI task timing breakdown:
+Section |  min time  |  avg time  |  max time  |%varavg| %total
+---------------------------------------------------------------
+Pair    | 3.5075     | 3.6114     | 3.7489     |   4.7 | 83.67
+Bond    | 8.6e-05    | 0.00010525 | 0.000141   |   0.0 |  0.00
+Kspace  | 0.2581     | 0.39489    | 0.49723    |  14.2 |  9.15
+Neigh   | 0.19826    | 0.19888    | 0.19918    |   0.1 |  4.61
+Comm    | 0.034639   | 0.037137   | 0.038938   |   0.9 |  0.86
+Output  | 0.025465   | 0.025997   | 0.027558   |   0.6 |  0.60
+Modify  | 0.044022   | 0.044175   | 0.044407   |   0.1 |  1.02
+Other   |            | 0.003593   |            |       |  0.08
+
+Nlocal:    1125 ave 1148 max 1097 min
+Histogram: 1 0 0 1 0 0 0 0 1 1
+Nghost:    12212.5 ave 12269 max 12162 min
+Histogram: 1 0 0 1 0 1 0 0 0 1
+Neighs:    650496 ave 675112 max 631353 min
+Histogram: 1 0 0 1 1 0 0 0 0 1
+
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
+Ave special neighs/atom = 2
+Neighbor list builds = 4
+Dangerous builds = 1
+
+Total wall time: 0:00:04
--- a/examples/USER/tally/log.12Jun17.pe.1
+++ b/examples/USER/tally/log.12Jun17.pe.1
@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)

 units		real
 atom_style	full
@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
  1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0

+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 1e-06 on 1 procs for 0 steps with 4500 atoms
+
+
 group		oxy type 1
 1500 atoms in group oxy
 group		hyd type 2
@ -88,6 +116,7 @@ thermo		10

 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
  G vector (1/distance) = 0.218482
  grid = 15 15 15
  stencil order = 5
@ -95,38 +124,49 @@ PPPM initialization ...
  estimated relative force accuracy = 9.61968e-05
  using double precision FFTs
  3d grid and FFT values/proc = 8000 3375
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
 Neighbor list info ...
-  2 neighbor list requests
  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 14
  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 17.381 Mbytes
-Step epa epa E_vdwl vdwl E_coul coul eref pe c2 pair 
-       0   -516632.19   -516632.19    3169.9382    3169.9382    46213.889    46213.889    49383.827    49383.827    49383.827    49383.827 
-      10   -517027.36   -517027.36    3099.1322    3099.1322     45891.84     45891.84    48990.972    48990.972    48990.972    48990.972 
-      20   -516828.06   -516828.06    3101.4321    3101.4321     45884.14     45884.14    48985.572    48985.572    48985.572    48985.572 
-      30    -517032.1    -517032.1    3198.5939    3198.5939    45793.571    45793.571    48992.165    48992.165    48992.165    48992.165 
-      40   -517095.56   -517095.56    3244.0797    3244.0797    45715.265    45715.265    48959.345    48959.345    48959.345    48959.345 
-      50   -517273.54   -517273.54    3274.9142    3274.9142    45665.997    45665.997    48940.911    48940.911    48940.911    48940.911 
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 29.08 | 29.08 | 29.08 Mbytes
+Step c_epa v_epa E_vdwl v_vdwl E_coul v_coul v_eref v_pe c_c2 v_pair 
+       0   -516634.27   -516634.27    3169.9427    3169.9427    46212.482    46212.482    49382.425    49382.425    49382.425    49382.425 
+      10   -517027.35   -517027.35    3099.1374    3099.1374    45891.866    45891.866    48991.003    48991.003    48991.003    48991.003 
+      20   -516828.05   -516828.05    3101.4373    3101.4373    45884.156    45884.156    48985.594    48985.594    48985.594    48985.594 
+      30   -517032.07   -517032.07    3198.5951    3198.5951    45793.595    45793.595    48992.191    48992.191    48992.191    48992.191 
+      40   -517095.54   -517095.54    3244.0771    3244.0771    45715.292    45715.292    48959.369    48959.369    48959.369    48959.369 
+      50    -517273.5    -517273.5    3274.9097    3274.9097    45666.025    45666.025    48940.935    48940.935    48940.935    48940.935 
+Loop time of 15.3339 on 1 procs for 50 steps with 4500 atoms

-Loop time of 4.31105 on 1 procs for 50 steps with 4500 atoms
-100.1% CPU use with 1 MPI tasks x 1 OpenMP threads
-Performance: 2.004 ns/day  11.975 hours/ns  11.598 timesteps/s
+Performance: 0.563 ns/day, 42.594 hours/ns, 3.261 timesteps/s
+32.0% CPU use with 1 MPI tasks x no OpenMP threads

-MPI task timings breakdown:
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 3.5071     | 3.5071     | 3.5071     |   0.0 | 81.35
-Bond    | 0.00025034 | 0.00025034 | 0.00025034 |   0.0 |  0.01
-Kspace  | 0.19991    | 0.19991    | 0.19991    |   0.0 |  4.64
-Neigh   | 0.31459    | 0.31459    | 0.31459    |   0.0 |  7.30
-Comm    | 0.010338   | 0.010338   | 0.010338   |   0.0 |  0.24
-Output  | 0.24722    | 0.24722    | 0.24722    |   0.0 |  5.73
-Modify  | 0.029466   | 0.029466   | 0.029466   |   0.0 |  0.68
-Other   |            | 0.002182   |            |       |  0.05
+Pair    | 13.432     | 13.432     | 13.432     |   0.0 | 87.60
+Bond    | 0.000365   | 0.000365   | 0.000365   |   0.0 |  0.00
+Kspace  | 0.581      | 0.581      | 0.581      |   0.0 |  3.79
+Neigh   | 0.66081    | 0.66081    | 0.66081    |   0.0 |  4.31
+Comm    | 0.019908   | 0.019908   | 0.019908   |   0.0 |  0.13
+Output  | 0.57731    | 0.57731    | 0.57731    |   0.0 |  3.76
+Modify  | 0.058515   | 0.058515   | 0.058515   |   0.0 |  0.38
+Other   |            | 0.003889   |            |       |  0.03

 Nlocal:    4500 ave 4500 max 4500 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@ -135,10 +175,10 @@ Histogram: 1 0 0 0 0 0 0 0 0 0
 Neighs:    2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min
 Histogram: 1 0 0 0 0 0 0 0 0 0

-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
 Ave special neighs/atom = 2
 Neighbor list builds = 4
 Dangerous builds = 1

-Total wall time: 0:00:04
+Total wall time: 0:00:16
--- a/examples/USER/tally/log.12Jun17.pe.4
+++ b/examples/USER/tally/log.12Jun17.pe.4
@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)

 units		real
 atom_style	full
@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
  1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0

+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 1.75e-06 on 4 procs for 0 steps with 4500 atoms
+
+
 group		oxy type 1
 1500 atoms in group oxy
 group		hyd type 2
@ -88,6 +116,7 @@ thermo		10

 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
  G vector (1/distance) = 0.218482
  grid = 15 15 15
  stencil order = 5
@ -95,38 +124,49 @@ PPPM initialization ...
  estimated relative force accuracy = 9.61968e-05
  using double precision FFTs
  3d grid and FFT values/proc = 3380 960
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
-WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:75)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
+WARNING: Compute pe/tally only called from pair style (../compute_pe_tally.cpp:77)
 Neighbor list info ...
-  2 neighbor list requests
  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 14
  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 8.44413 Mbytes
-Step epa epa E_vdwl vdwl E_coul coul eref pe c2 pair 
-       0   -516632.19   -516632.19    3169.9382    3169.9382    46213.889    46213.889    49383.827    49383.827    49383.827    49383.827 
-      10   -517027.36   -517027.36    3099.1322    3099.1322     45891.84     45891.84    48990.972    48990.972    48990.972    48990.972 
-      20   -516828.06   -516828.06    3101.4321    3101.4321     45884.14     45884.14    48985.572    48985.572    48985.572    48985.572 
-      30    -517032.1    -517032.1    3198.5939    3198.5939    45793.571    45793.571    48992.165    48992.165    48992.165    48992.165 
-      40   -517095.56   -517095.56    3244.0797    3244.0797    45715.265    45715.265    48959.345    48959.345    48959.345    48959.345 
-      50   -517273.54   -517273.54    3274.9142    3274.9142    45665.997    45665.997    48940.911    48940.911    48940.911    48940.911 
+  binsize = 7, bins = 6 6 6
+  2 neighbor lists, perpetual/occasional/extra = 1 1 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) compute group/group, occasional, copy from (1)
+      attributes: half, newton on
+      pair build: copy
+      stencil: none
+      bin: none
+Per MPI rank memory allocation (min/avg/max) = 11.86 | 11.87 | 11.88 Mbytes
+Step c_epa v_epa E_vdwl v_vdwl E_coul v_coul v_eref v_pe c_c2 v_pair 
+       0   -516634.27   -516634.27    3169.9427    3169.9427    46212.482    46212.482    49382.425    49382.425    49382.425    49382.425 
+      10   -517027.35   -517027.35    3099.1374    3099.1374    45891.866    45891.866    48991.003    48991.003    48991.003    48991.003 
+      20   -516828.05   -516828.05    3101.4373    3101.4373    45884.156    45884.156    48985.594    48985.594    48985.594    48985.594 
+      30   -517032.07   -517032.07    3198.5951    3198.5951    45793.595    45793.595    48992.191    48992.191    48992.191    48992.191 
+      40   -517095.54   -517095.54    3244.0771    3244.0771    45715.292    45715.292    48959.369    48959.369    48959.369    48959.369 
+      50    -517273.5    -517273.5    3274.9097    3274.9097    45666.025    45666.025    48940.935    48940.935    48940.935    48940.935 
+Loop time of 2.32344 on 4 procs for 50 steps with 4500 atoms

-Loop time of 1.20533 on 4 procs for 50 steps with 4500 atoms
-100.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-Performance: 7.168 ns/day  3.348 hours/ns  41.482 timesteps/s
+Performance: 3.719 ns/day, 6.454 hours/ns, 21.520 timesteps/s
+64.0% CPU use with 4 MPI tasks x no OpenMP threads

-MPI task timings breakdown:
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.87053    | 0.90325    | 0.94364    |   2.8 | 74.94
-Bond    | 0.00015402 | 0.00018191 | 0.00020909 |   0.2 |  0.02
-Kspace  | 0.061657   | 0.10164    | 0.13394    |   8.4 |  8.43
-Neigh   | 0.088292   | 0.088332   | 0.088373   |   0.0 |  7.33
-Comm    | 0.017319   | 0.017806   | 0.018291   |   0.4 |  1.48
-Output  | 0.07067    | 0.070706   | 0.070813   |   0.0 |  5.87
-Modify  | 0.021655   | 0.021694   | 0.02173    |   0.0 |  1.80
-Other   |            | 0.001719   |            |       |  0.14
+Pair    | 1.5561     | 1.8883     | 2.0327     |  14.1 | 81.27
+Bond    | 8.8e-05    | 0.000116   | 0.000135   |   0.0 |  0.00
+Kspace  | 0.094718   | 0.1933     | 0.26055    |  14.1 |  8.32
+Neigh   | 0.085117   | 0.1073     | 0.1147     |   3.9 |  4.62
+Comm    | 0.014156   | 0.017907   | 0.020005   |   1.8 |  0.77
+Output  | 0.071634   | 0.090599   | 0.097665   |   3.6 |  3.90
+Modify  | 0.019447   | 0.024101   | 0.026277   |   1.8 |  1.04
+Other   |            | 0.001804   |            |       |  0.08

 Nlocal:    1125 ave 1148 max 1097 min
 Histogram: 1 0 0 1 0 0 0 0 1 1
@ -135,10 +175,10 @@ Histogram: 1 0 0 1 0 1 0 0 0 1
 Neighs:    650496 ave 675112 max 631353 min
 Histogram: 1 0 0 1 1 0 0 0 0 1

-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
+Total # of neighbors = 2601983
+Ave neighs/atom = 578.218
 Ave special neighs/atom = 2
 Neighbor list builds = 4
 Dangerous builds = 1

-Total wall time: 0:00:01
+Total wall time: 0:00:02
--- a/examples/USER/tally/log.12Jun17.stress.1
+++ b/examples/USER/tally/log.12Jun17.stress.1
@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)

 units		real
 atom_style	full
@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
  1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0

+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 8000 3375
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 26.54 | 26.54 | 26.54 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 2e-06 on 1 procs for 0 steps with 4500 atoms
+
+
 group		one molecule 1 2
 6 atoms in group one

@ -79,6 +107,7 @@ thermo		10

 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
  G vector (1/distance) = 0.218482
  grid = 15 15 15
  stencil order = 5
@ -86,38 +115,32 @@ PPPM initialization ...
  estimated relative force accuracy = 9.61968e-05
  using double precision FFTs
  3d grid and FFT values/proc = 8000 3375
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-Neighbor list info ...
-  1 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 24.631 Mbytes
-Step press spa press one ref 
-       0    26497.547    26497.547    26497.547   -2357033.6   -2357033.6 
-      10    23665.073    23665.073    23665.073   -2096057.3   -2096057.3 
-      20    23338.149    23338.149    23338.149     -2034283     -2034283 
-      30      25946.4      25946.4      25946.4     -2002817     -2002817 
-      40    27238.349    27238.349    27238.349   -2155411.5   -2155411.5 
-      50    27783.092    27783.092    27783.092   -1862190.3   -1862190.3 
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+Per MPI rank memory allocation (min/avg/max) = 35.9 | 35.9 | 35.9 Mbytes
+Step c_press v_spa v_press v_one v_ref 
+       0    26496.811    26496.811    26496.811   -2356992.7   -2356992.7 
+      10    23665.129    23665.129    23665.129     -2096059     -2096059 
+      20    23338.197    23338.197    23338.197   -2034284.1   -2034284.1 
+      30    25946.434    25946.434    25946.434   -2002815.3   -2002815.3 
+      40    27238.374    27238.374    27238.374   -2155408.7   -2155408.7 
+      50    27783.107    27783.107    27783.107   -1862191.5   -1862191.5 
+Loop time of 14.2089 on 1 procs for 50 steps with 4500 atoms

-Loop time of 4.15609 on 1 procs for 50 steps with 4500 atoms
-100.1% CPU use with 1 MPI tasks x 1 OpenMP threads
-Performance: 2.079 ns/day  11.545 hours/ns  12.031 timesteps/s
+Performance: 0.608 ns/day, 39.469 hours/ns, 3.519 timesteps/s
+32.0% CPU use with 1 MPI tasks x no OpenMP threads

-MPI task timings breakdown:
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 3.6444     | 3.6444     | 3.6444     |   0.0 | 87.69
-Bond    | 0.0016105  | 0.0016105  | 0.0016105  |   0.0 |  0.04
-Kspace  | 0.22345    | 0.22345    | 0.22345    |   0.0 |  5.38
-Neigh   | 0.23588    | 0.23588    | 0.23588    |   0.0 |  5.68
-Comm    | 0.010035   | 0.010035   | 0.010035   |   0.0 |  0.24
-Output  | 0.0084085  | 0.0084085  | 0.0084085  |   0.0 |  0.20
-Modify  | 0.029978   | 0.029978   | 0.029978   |   0.0 |  0.72
-Other   |            | 0.002368   |            |       |  0.06
+Pair    | 12.983     | 12.983     | 12.983     |   0.0 | 91.37
+Bond    | 0.002788   | 0.002788   | 0.002788   |   0.0 |  0.02
+Kspace  | 0.62745    | 0.62745    | 0.62745    |   0.0 |  4.42
+Neigh   | 0.49839    | 0.49839    | 0.49839    |   0.0 |  3.51
+Comm    | 0.018597   | 0.018597   | 0.018597   |   0.0 |  0.13
+Output  | 0.015852   | 0.015852   | 0.015852   |   0.0 |  0.11
+Modify  | 0.058415   | 0.058415   | 0.058415   |   0.0 |  0.41
+Other   |            | 0.004126   |            |       |  0.03

 Nlocal:    4500 ave 4500 max 4500 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@ -132,4 +155,4 @@ Ave special neighs/atom = 2
 Neighbor list builds = 3
 Dangerous builds = 0

-Total wall time: 0:00:04
+Total wall time: 0:00:15
--- a/examples/USER/tally/log.12Jun17.stress.4
+++ b/examples/USER/tally/log.12Jun17.stress.4
@ -1,5 +1,4 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
+LAMMPS (19 May 2017)

 units		real
 atom_style	full
@ -50,6 +49,35 @@ fix		1 all shake 0.0001 20 0 b 1 a 1
  1500 = # of frozen angles
 fix		2 all nvt temp 300.0 300.0 100.0

+# make certain that shake constraints are satisfied
+run 0 post no
+PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
+  G vector (1/distance) = 0.218482
+  grid = 15 15 15
+  stencil order = 5
+  estimated absolute RMS force accuracy = 0.0319435
+  estimated relative force accuracy = 9.61968e-05
+  using double precision FFTs
+  3d grid and FFT values/proc = 3380 960
+Neighbor list info ...
+  update every 1 steps, delay 10 steps, check yes
+  max neighbors/atom: 2000, page size: 100000
+  master list distance cutoff = 14
+  ghost atom cutoff = 14
+  binsize = 7, bins = 6 6 6
+  1 neighbor lists, perpetual/occasional/extra = 1 0 0
+  (1) pair lj/cut/coul/long, perpetual
+      attributes: half, newton on
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+Per MPI rank memory allocation (min/avg/max) = 10.6 | 10.61 | 10.61 Mbytes
+Step Temp E_pair E_mol TotEng Press 
+       0            0   -16692.358            0   -16692.358   -1289.8319 
+Loop time of 4e-06 on 4 procs for 0 steps with 4500 atoms
+
+
 group		one molecule 1 2
 6 atoms in group one

@ -79,6 +107,7 @@ thermo		10

 run 50
 PPPM initialization ...
+WARNING: Using 12-bit tables for long-range coulomb (../kspace.cpp:321)
  G vector (1/distance) = 0.218482
  grid = 15 15 15
  stencil order = 5
@ -86,44 +115,38 @@ PPPM initialization ...
  estimated relative force accuracy = 9.61968e-05
  using double precision FFTs
  3d grid and FFT values/proc = 3380 960
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:75)
-Neighbor list info ...
-  1 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 12.0691 Mbytes
-Step press spa press one ref 
-       0    26497.547    26497.547    26497.547   -2357033.6   -2357033.6 
-      10    23665.073    23665.073    23665.073   -2096057.3   -2096057.3 
-      20    23338.149    23338.149    23338.149     -2034283     -2034283 
-      30      25946.4      25946.4      25946.4     -2002817     -2002817 
-      40    27238.349    27238.349    27238.349   -2155411.5   -2155411.5 
-      50    27783.092    27783.092    27783.092   -1862190.3   -1862190.3 
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+WARNING: Compute stress/tally only called from pair style (../compute_stress_tally.cpp:79)
+Per MPI rank memory allocation (min/avg/max) = 15.25 | 15.26 | 15.27 Mbytes
+Step c_press v_spa v_press v_one v_ref 
+       0    26496.811    26496.811    26496.811   -2356992.7   -2356992.7 
+      10    23665.129    23665.129    23665.129     -2096059     -2096059 
+      20    23338.197    23338.197    23338.197   -2034284.1   -2034284.1 
+      30    25946.434    25946.434    25946.434   -2002815.3   -2002815.3 
+      40    27238.374    27238.374    27238.374   -2155408.7   -2155408.7 
+      50    27783.107    27783.107    27783.107   -1862191.5   -1862191.5 
+Loop time of 4.32017 on 4 procs for 50 steps with 4500 atoms

-Loop time of 1.17266 on 4 procs for 50 steps with 4500 atoms
-100.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-Performance: 7.368 ns/day  3.257 hours/ns  42.638 timesteps/s
+Performance: 2.000 ns/day, 12.000 hours/ns, 11.574 timesteps/s
+31.8% CPU use with 4 MPI tasks x no OpenMP threads

-MPI task timings breakdown:
+MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.89832    | 0.93222    | 0.98611    |   3.4 | 79.50
-Bond    | 0.00081754 | 0.00096095 | 0.0011327  |   0.4 |  0.08
-Kspace  | 0.068058   | 0.12154    | 0.15522    |   9.4 | 10.36
-Neigh   | 0.065756   | 0.065785   | 0.065824   |   0.0 |  5.61
-Comm    | 0.017489   | 0.017982   | 0.018623   |   0.4 |  1.53
-Output  | 0.010985   | 0.011017   | 0.011111   |   0.1 |  0.94
-Modify  | 0.021429   | 0.021491   | 0.021551   |   0.0 |  1.83
-Other   |            | 0.001671   |            |       |  0.14
+Pair    | 3.5816     | 3.6917     | 3.839      |   4.9 | 85.45
+Bond    | 0.001579   | 0.0016563  | 0.001709   |   0.1 |  0.04
+Kspace  | 0.22505    | 0.3716     | 0.48023    |  15.3 |  8.60
+Neigh   | 0.14558    | 0.14568    | 0.14575    |   0.0 |  3.37
+Comm    | 0.032009   | 0.03441    | 0.036274   |   0.8 |  0.80
+Output  | 0.02253    | 0.023115   | 0.024844   |   0.7 |  0.54
+Modify  | 0.046954   | 0.047086   | 0.047132   |   0.0 |  1.09
+Other   |            | 0.004935   |            |       |  0.11

 Nlocal:    1125 ave 1154 max 1092 min
 Histogram: 1 0 0 0 1 0 1 0 0 1
 Nghost:    12263.5 ave 12300 max 12219 min
 Histogram: 1 0 1 0 0 0 0 0 0 2
-Neighs:    650438 ave 678786 max 626279 min
+Neighs:    650438 ave 678787 max 626279 min
 Histogram: 1 0 0 1 1 0 0 0 0 1

 Total # of neighbors = 2601750
@ -132,4 +155,4 @@ Ave special neighs/atom = 2
 Neighbor list builds = 3
 Dangerous builds = 0

-Total wall time: 0:00:01
+Total wall time: 0:00:04
--- a/examples/USER/tally/log.21Aug15.force.1
+++ b/examples/USER/tally/log.21Aug15.force.1
@ -1,136 +0,0 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
-
-units		real
-atom_style	full
-
-read_data	data.spce
-  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
-  1 by 1 by 1 MPI processor grid
-  reading atoms ...
-  4500 atoms
-  scanning bonds ...
-  2 = max bonds/atom
-  scanning angles ...
-  1 = max angles/atom
-  reading bonds ...
-  3000 bonds
-  reading angles ...
-  1500 angles
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  1 = max # of 1-4 neighbors
-  2 = max # of special neighbors
-
-pair_style	lj/cut/coul/long 12.0 12.0
-kspace_style	pppm 1.0e-4
-
-pair_coeff	1 1 0.15535 3.166
-pair_coeff	* 2 0.0000 0.0000
-
-bond_style	harmonic
-angle_style	harmonic
-dihedral_style	none
-improper_style	none
-
-bond_coeff	1 1000.00 1.000
-angle_coeff	1 100.0 109.47
-
-special_bonds   lj/coul 0.0 0.0 1.0
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  2 = max # of special neighbors
-
-neighbor        2.0 bin
-
-fix		1 all shake 0.0001 20 0 b 1 a 1
-  0 = # of size 2 clusters
-  0 = # of size 3 clusters
-  0 = # of size 4 clusters
-  1500 = # of frozen angles
-fix		2 all nvt temp 300.0 300.0 100.0
-
-group		one molecule 1 2
-6 atoms in group one
-
-# the following section shows equivalences between using the pe/tally compute and other computes and thermo keywords
-
-# compute pairwise force between two molecules and everybody
-compute		fpa one group/group all pair yes kspace no boundary no
-# tally pairwise force between two molecules and the all molecules
-compute		c1 one force/tally all
-# tally the force of all with all (should be zero)
-compute		c2 all force/tally all
-# collect per atom data. only reduce over the first group.
-compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
-compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
-# determine magnitude of force
-variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
-variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
-# round to 10**-10 absolute precision.
-variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
-
-velocity	all create 300 432567 dist uniform
-
-timestep	2.0
-
-# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
-thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref  c_c2
-thermo		10
-
-run 50
-PPPM initialization ...
-  G vector (1/distance) = 0.218482
-  grid = 15 15 15
-  stencil order = 5
-  estimated absolute RMS force accuracy = 0.0319435
-  estimated relative force accuracy = 9.61968e-05
-  using double precision FFTs
-  3d grid and FFT values/proc = 8000 3375
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-Neighbor list info ...
-  2 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 16.7648 Mbytes
-Step fpa for c1 fpa[1] one[1] fpa[2] one[2] fpa[3] one[3] ref c2 
-       0    22.732789    22.732789    22.732789   -17.068392   -17.068392   -8.8345214   -8.8345214   -12.140878   -12.140878            0            0 
-      10    11.736915    11.736915    11.736915   -3.3898298   -3.3898298     9.119272     9.119272   -6.5652948   -6.5652948            0            0 
-      20    5.6119761    5.6119761    5.6119761  -0.60028931  -0.60028931   -4.4479886   -4.4479886     3.368876     3.368876            0            0 
-      30    17.292617    17.292617    17.292617    6.1793856    6.1793856   -10.593927   -10.593927    12.190919    12.190919            0            0 
-      40    18.664226    18.664226    18.664226    5.4725079    5.4725079    -6.933046    -6.933046    16.441955    16.441955            0            0 
-      50    12.130282    12.130282    12.130282   -1.0321244   -1.0321244    8.0032646    8.0032646   -9.0568326   -9.0568326            0            0 
-
-Loop time of 4.11825 on 1 procs for 50 steps with 4500 atoms
-100.0% CPU use with 1 MPI tasks x 1 OpenMP threads
-Performance: 2.098 ns/day  11.440 hours/ns  12.141 timesteps/s
-
-MPI task timings breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
---------------------------------------------------------------
-Pair    | 3.5286     | 3.5286     | 3.5286     |   0.0 | 85.68
-Bond    | 6.1274e-05 | 6.1274e-05 | 6.1274e-05 |   0.0 |  0.00
-Kspace  | 0.1937     | 0.1937     | 0.1937     |   0.0 |  4.70
-Neigh   | 0.31454    | 0.31454    | 0.31454    |   0.0 |  7.64
-Comm    | 0.01037    | 0.01037    | 0.01037    |   0.0 |  0.25
-Output  | 0.039355   | 0.039355   | 0.039355   |   0.0 |  0.96
-Modify  | 0.029273   | 0.029273   | 0.029273   |   0.0 |  0.71
-Other   |            | 0.002351   |            |       |  0.06
-
-Nlocal:    4500 ave 4500 max 4500 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Nghost:    21131 ave 21131 max 21131 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-Neighs:    2.60198e+06 ave 2.60198e+06 max 2.60198e+06 min
-Histogram: 1 0 0 0 0 0 0 0 0 0
-
-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
-Ave special neighs/atom = 2
-Neighbor list builds = 4
-Dangerous builds = 1
-
-Total wall time: 0:00:04
--- a/examples/USER/tally/log.21Aug15.force.4
+++ b/examples/USER/tally/log.21Aug15.force.4
@ -1,136 +0,0 @@
-LAMMPS (21 Aug 2015-ICMS)
-  using 1 OpenMP thread(s) per MPI task
-
-units		real
-atom_style	full
-
-read_data	data.spce
-  orthogonal box = (0.02645 0.02645 0.02641) to (35.5328 35.5328 35.4736)
-  2 by 2 by 1 MPI processor grid
-  reading atoms ...
-  4500 atoms
-  scanning bonds ...
-  2 = max bonds/atom
-  scanning angles ...
-  1 = max angles/atom
-  reading bonds ...
-  3000 bonds
-  reading angles ...
-  1500 angles
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  1 = max # of 1-4 neighbors
-  2 = max # of special neighbors
-
-pair_style	lj/cut/coul/long 12.0 12.0
-kspace_style	pppm 1.0e-4
-
-pair_coeff	1 1 0.15535 3.166
-pair_coeff	* 2 0.0000 0.0000
-
-bond_style	harmonic
-angle_style	harmonic
-dihedral_style	none
-improper_style	none
-
-bond_coeff	1 1000.00 1.000
-angle_coeff	1 100.0 109.47
-
-special_bonds   lj/coul 0.0 0.0 1.0
-  2 = max # of 1-2 neighbors
-  1 = max # of 1-3 neighbors
-  2 = max # of special neighbors
-
-neighbor        2.0 bin
-
-fix		1 all shake 0.0001 20 0 b 1 a 1
-  0 = # of size 2 clusters
-  0 = # of size 3 clusters
-  0 = # of size 4 clusters
-  1500 = # of frozen angles
-fix		2 all nvt temp 300.0 300.0 100.0
-
-group		one molecule 1 2
-6 atoms in group one
-
-# the following section shows equivalences between using the pe/tally compute and other computes and thermo keywords
-
-# compute pairwise force between two molecules and everybody
-compute		fpa one group/group all pair yes kspace no boundary no
-# tally pairwise force between two molecules and the all molecules
-compute		c1 one force/tally all
-# tally the force of all with all (should be zero)
-compute		c2 all force/tally all
-# collect per atom data. only reduce over the first group.
-compute		one one reduce sum c_c1[1] c_c1[2] c_c1[3]
-compute		red all reduce sum c_c2[1] c_c2[2] c_c2[3]
-# determine magnitude of force
-variable	fpa equal sqrt(c_fpa[1]*c_fpa[1]+c_fpa[2]*c_fpa[2]+c_fpa[3]*c_fpa[3])
-variable	for equal sqrt(c_one[1]*c_one[1]+c_one[2]*c_one[2]+c_one[3]*c_one[3])
-# round to 10**-10 absolute precision.
-variable	ref equal round(1e10*sqrt(c_red[1]*c_red[1]+c_red[2]*c_red[2]+c_red[3]*c_red[3]))*1e-10
-
-velocity	all create 300 432567 dist uniform
-
-timestep	2.0
-
-# v_fpa and v_for and c_c1, c_fpa[] and c_one[] should all each have the same value. v_ref and c_c2 should be zero
-thermo_style    custom step v_fpa v_for c_c1 c_fpa[1] c_one[1] c_fpa[2] c_one[2] c_fpa[3] c_one[3] v_ref  c_c2
-thermo		10
-
-run 50
-PPPM initialization ...
-  G vector (1/distance) = 0.218482
-  grid = 15 15 15
-  stencil order = 5
-  estimated absolute RMS force accuracy = 0.0319435
-  estimated relative force accuracy = 9.61968e-05
-  using double precision FFTs
-  3d grid and FFT values/proc = 3380 960
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-WARNING: Compute force/tally only called from pair style (../compute_force_tally.cpp:75)
-Neighbor list info ...
-  2 neighbor list requests
-  update every 1 steps, delay 10 steps, check yes
-  master list distance cutoff = 14
-  ghost atom cutoff = 14
-  binsize = 7 -> bins = 6 6 6
-Memory usage per processor = 8.16441 Mbytes
-Step fpa for c1 fpa[1] one[1] fpa[2] one[2] fpa[3] one[3] ref c2 
-       0    22.732789    22.732789    22.732789   -17.068392   -17.068392   -8.8345214   -8.8345214   -12.140878   -12.140878            0            0 
-      10    11.736915    11.736915    11.736915   -3.3898298   -3.3898298     9.119272     9.119272   -6.5652948   -6.5652948            0            0 
-      20    5.6119761    5.6119761    5.6119761  -0.60028931  -0.60028931   -4.4479886   -4.4479886     3.368876     3.368876            0            0 
-      30    17.292617    17.292617    17.292617    6.1793856    6.1793856   -10.593927   -10.593927    12.190919    12.190919            0            0 
-      40    18.664226    18.664226    18.664226    5.4725079    5.4725079    -6.933046    -6.933046    16.441955    16.441955            0            0 
-      50    12.130282    12.130282    12.130282   -1.0321244   -1.0321244    8.0032646    8.0032646   -9.0568326   -9.0568326            0            0 
-
-Loop time of 1.13658 on 4 procs for 50 steps with 4500 atoms
-100.0% CPU use with 4 MPI tasks x 1 OpenMP threads
-Performance: 7.602 ns/day  3.157 hours/ns  43.991 timesteps/s
-
-MPI task timings breakdown:
-Section |  min time  |  avg time  |  max time  |%varavg| %total
---------------------------------------------------------------
-Pair    | 0.85795    | 0.89088    | 0.93636    |   3.0 | 78.38
-Bond    | 3.4571e-05 | 4.4644e-05 | 5.4598e-05 |   0.1 |  0.00
-Kspace  | 0.059847   | 0.1051     | 0.1384     |   8.9 |  9.25
-Neigh   | 0.085891   | 0.085954   | 0.086      |   0.0 |  7.56
-Comm    | 0.01758    | 0.018091   | 0.019178   |   0.5 |  1.59
-Output  | 0.013697   | 0.013725   | 0.013805   |   0.0 |  1.21
-Modify  | 0.021068   | 0.021137   | 0.021205   |   0.0 |  1.86
-Other   |            | 0.001656   |            |       |  0.15
-
-Nlocal:    1125 ave 1148 max 1097 min
-Histogram: 1 0 0 1 0 0 0 0 1 1
-Nghost:    12212.5 ave 12269 max 12162 min
-Histogram: 1 0 0 1 0 1 0 0 0 1
-Neighs:    650496 ave 675112 max 631353 min
-Histogram: 1 0 0 1 1 0 0 0 0 1
-
-Total # of neighbors = 2601984
-Ave neighs/atom = 578.219
-Ave special neighs/atom = 2
-Neighbor list builds = 4
-Dangerous builds = 1
-
-Total wall time: 0:00:01
--- a/examples/neb/README
+++ b/examples/neb/README
@ -2,12 +2,16 @@ Run these examples as:

 mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop1
 mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop2
+mpirun -np 4 lmp_g++ -partition 4x1 -in in.neb.hop1.end
 mpirun -np 3 lmp_g++ -partition 3x1 -in in.neb.sivac

 mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop1
 mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop2
-mpirun -np 6 lmp_g++ -partition 3x2 -in in.neb.sivac
-mpirun -np 9 lmp_g++ -partition 3x3 -in in.neb.sivac
+mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.hop1.end
+mpirun -np 8 lmp_g++ -partition 4x2 -in in.neb.sivac
+
+Note that more than 4 replicas should be used for a precise estimate 
+of the activation energy corresponding to a transition.

 If you uncomment the dump command lines in the input scripts, you can
 create dump files to do visualization from via Python tools: (see
--- a/examples/neb/in.neb.hop1
+++ b/examples/neb/in.neb.hop1
@ -51,7 +51,7 @@ set		group nebatoms type 3
 group		nonneb subtract all nebatoms

 fix		1 lower setforce 0.0 0.0 0.0
-fix		2 nebatoms neb 1.0
+fix		2 nebatoms neb 1.0 parallel ideal
 fix		3 all enforce2d

 thermo		100
--- a/examples/neb/in.neb.hop1.end
+++ b/examples/neb/in.neb.hop1.end
@ -0,0 +1,56 @@
+# 2d NEB surface simulation, hop from surface to become adatom
+
+dimension	2
+boundary	p s p
+
+atom_style	atomic
+neighbor	0.3 bin
+neigh_modify	delay 5
+atom_modify	map array sort 0 0.0
+
+variable	u uloop 20
+
+# create geometry with flat surface
+
+lattice		hex 0.9
+region		box block 0 20 0 10 -0.25 0.25
+
+read_data        initial.hop1.end
+
+# LJ potentials
+
+pair_style	lj/cut 2.5
+pair_coeff	* * 1.0 1.0 2.5
+pair_modify	shift yes
+
+# define groups
+
+region	        1 block INF INF INF 1.25 INF INF
+group		lower region 1
+group		mobile subtract all lower
+set		group lower type 2
+
+timestep	0.05
+
+# group of NEB atoms - either block or single atom ID 412
+
+region		surround block 10 18 17 20 0 0 units box
+group		nebatoms region surround
+#group		nebatoms id 412
+set		group nebatoms type 3
+group		nonneb subtract all nebatoms
+
+fix		1 lower setforce 0.0 0.0 0.0
+fix		2 nebatoms neb 1.0 parallel ideal end first 1.0
+fix		3 all enforce2d
+
+thermo		100
+
+#dump		1 nebatoms atom 10 dump.neb.$u
+#dump		2 nonneb atom 10 dump.nonneb.$u
+
+# run NEB for 2000 steps or to force tolerance
+
+min_style	quickmin
+
+neb		0.0 0.1 1000 1000 100 final final.hop1
--- a/examples/neb/in.neb.hop2
+++ b/examples/neb/in.neb.hop2
@ -53,7 +53,7 @@ set		group nebatoms type 3
 group		nonneb subtract all nebatoms

 fix		1 lower setforce 0.0 0.0 0.0
-fix		2 nebatoms neb 1.0
+fix		2 nebatoms neb 1.0 
 fix		3 all enforce2d

 thermo		100
@ -65,4 +65,4 @@ thermo		100

 min_style	fire

-neb		0.0 0.01 1000 1000 100 final final.hop2
+neb		0.0 0.05 1000 1000 100 final final.hop2
--- a/examples/neb/in.neb.sivac
+++ b/examples/neb/in.neb.sivac
@ -66,7 +66,7 @@ minimize	1.0e-6 1.0e-4 1000 10000

 reset_timestep	0

-fix		1 all neb 1.0
+fix		1 all neb 1.0 

 thermo		100

--- a/examples/neb/initial.hop1.end
+++ b/examples/neb/initial.hop1.end
@ -0,0 +1,860 @@
+LAMMPS data file via write_data, version 4 May 2017, timestep = 155
+
+420 atoms
+3 atom types
+
+0.0000000000000000e+00 2.2653923264628304e+01 xlo xhi
+2.1918578738841410e-01 1.9932852254455714e+01 ylo yhi
+-2.8317404080785380e-01 2.8317404080785380e-01 zlo zhi
+
+Masses
+
+1 1
+2 1
+3 1
+
+Atoms # atomic
+
+1 2 0.0000000000000000e+00 2.2114806707013038e-01 0.0000000000000000e+00 0 0 0
+2 2 5.6634808161570760e-01 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0
+3 2 1.1326961632314152e+00 2.2114806707013018e-01 0.0000000000000000e+00 0 0 0
+4 2 1.6990442448471228e+00 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0
+5 2 2.2653923264628304e+00 2.2114806707013032e-01 0.0000000000000000e+00 0 0 0
+6 2 2.8317404080785380e+00 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0
+7 2 3.3980884896942456e+00 2.2114806707013024e-01 0.0000000000000000e+00 0 0 0
+8 2 3.9644365713099532e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+9 2 4.5307846529256608e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+10 2 5.0971327345413684e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+11 2 5.6634808161570760e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+12 2 6.2298288977727836e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+13 2 6.7961769793884912e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+14 2 7.3625250610041988e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+15 2 7.9288731426199064e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+16 2 8.4952212242356140e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+17 2 9.0615693058513216e+00 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+18 2 9.6279173874670292e+00 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+19 2 1.0194265469082737e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+20 2 1.0760613550698444e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+21 2 1.1326961632314152e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+22 2 1.1893309713929860e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+23 2 1.2459657795545567e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+24 2 1.3026005877161275e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+25 2 1.3592353958776982e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+26 2 1.4158702040392690e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+27 2 1.4725050122008398e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+28 2 1.5291398203624105e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+29 2 1.5857746285239813e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+30 2 1.6424094366855520e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+31 2 1.6990442448471228e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+32 2 1.7556790530086936e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+33 2 1.8123138611702643e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+34 2 1.8689486693318351e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+35 2 1.9255834774934058e+01 2.2114806707013010e-01 0.0000000000000000e+00 0 0 0
+36 2 1.9822182856549766e+01 1.1832938184587636e+00 0.0000000000000000e+00 0 0 0
+37 2 2.0388530938165474e+01 2.2114806707013024e-01 0.0000000000000000e+00 0 0 0
+38 2 2.0954879019781181e+01 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0
+39 2 2.1521227101396889e+01 2.2114806707013043e-01 0.0000000000000000e+00 0 0 0
+40 2 2.2087575183012596e+01 1.1832938184587634e+00 0.0000000000000000e+00 0 0 0
+41 2 5.5197595012095140e-17 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0
+42 1 5.6653050195082300e-01 3.1000166664180786e+00 0.0000000000000000e+00 0 0 0
+43 2 1.1326961632314152e+00 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0
+44 1 1.6992713312703549e+00 3.1000339212153092e+00 0.0000000000000000e+00 0 0 0
+45 2 2.2653923264628304e+00 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0
+46 1 2.8319979330663916e+00 3.1000568858502824e+00 0.0000000000000000e+00 0 0 0
+47 2 3.3980884896942456e+00 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0
+48 1 3.9647072056144004e+00 3.1000829051868171e+00 0.0000000000000000e+00 0 0 0
+49 2 4.5307846529256608e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+50 1 5.0973978903306154e+00 3.1001089282984520e+00 0.0000000000000000e+00 0 0 0
+51 2 5.6634808161570760e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+52 1 6.2300706856774344e+00 3.1001320005511488e+00 0.0000000000000000e+00 0 0 0
+53 2 6.7961769793884912e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+54 1 7.3627281418365298e+00 3.1001497026412643e+00 0.0000000000000000e+00 0 0 0
+55 2 7.9288731426199064e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+56 1 8.4953743353575657e+00 3.1001604410839558e+00 0.0000000000000000e+00 0 0 0
+57 2 9.0615693058513216e+00 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+58 1 9.6280143647524650e+00 3.1001635457640377e+00 0.0000000000000000e+00 0 0 0
+59 2 1.0194265469082737e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+60 1 1.0760653757776259e+01 3.1001591904894030e+00 0.0000000000000000e+00 0 0 0
+61 2 1.1326961632314152e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+62 1 1.1893297897551465e+01 3.1001481997229781e+00 0.0000000000000000e+00 0 0 0
+63 2 1.2459657795545567e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+64 1 1.3025951551034638e+01 3.1001318239711781e+00 0.0000000000000000e+00 0 0 0
+65 2 1.3592353958776982e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+66 1 1.4158618530491893e+01 3.1001115545681470e+00 0.0000000000000000e+00 0 0 0
+67 2 1.4725050122008398e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+68 1 1.5291301468763761e+01 3.1000890162853869e+00 0.0000000000000000e+00 0 0 0
+69 2 1.5857746285239813e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+70 1 1.6424001663467980e+01 3.1000659357603495e+00 0.0000000000000000e+00 0 0 0
+71 2 1.6990442448471228e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+72 1 1.7556718955895743e+01 3.1000441476131195e+00 0.0000000000000000e+00 0 0 0
+73 2 1.8123138611702643e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+74 1 1.8689451647636982e+01 3.1000255781755963e+00 0.0000000000000000e+00 0 0 0
+75 2 1.9255834774934058e+01 2.1414943053865132e+00 0.0000000000000000e+00 0 0 0
+76 1 1.9822196505112320e+01 3.1000121466922494e+00 0.0000000000000000e+00 0 0 0
+77 2 2.0388530938165474e+01 2.1414943053865136e+00 0.0000000000000000e+00 0 0 0
+78 1 2.0954948927196146e+01 3.1000055506449713e+00 0.0000000000000000e+00 0 0 0
+79 2 2.1521227101396889e+01 2.1414943053865141e+00 0.0000000000000000e+00 0 0 0
+80 1 2.2087703334137267e+01 3.1000069547492535e+00 0.0000000000000000e+00 0 0 0
+81 1 3.1056926716504509e-04 4.0585004644184055e+00 0.0000000000000000e+00 0 0 0
+82 1 5.6689331628382078e-01 5.0169857265632762e+00 0.0000000000000000e+00 0 0 0
+83 1 1.1331010876667682e+00 4.0585336877518543e+00 0.0000000000000000e+00 0 0 0
+84 1 1.6997107179473134e+00 5.0170595571637469e+00 0.0000000000000000e+00 0 0 0
+85 1 2.2658691471408239e+00 4.0585832735991989e+00 0.0000000000000000e+00 0 0 0
+86 1 2.8324913387275488e+00 5.0171576059016481e+00 0.0000000000000000e+00 0 0 0
+87 1 3.3986077265334802e+00 4.0586437183143182e+00 0.0000000000000000e+00 0 0 0
+88 1 3.9652274946581523e+00 5.0172690174612651e+00 0.0000000000000000e+00 0 0 0
+89 1 4.5313127263524615e+00 4.0587080892871539e+00 0.0000000000000000e+00 0 0 0
+90 1 5.0979153202534064e+00 5.0173813990872880e+00 0.0000000000000000e+00 0 0 0
+91 1 5.6639833195247755e+00 4.0587690704404489e+00 0.0000000000000000e+00 0 0 0
+92 1 6.2305551824295442e+00 5.0174824868813017e+00 0.0000000000000000e+00 0 0 0
+93 1 6.7966220748571669e+00 4.0588200390400129e+00 0.0000000000000000e+00 0 0 0
+94 1 7.3631519876339633e+00 5.0175617795367824e+00 0.0000000000000000e+00 0 0 0
+95 1 7.9292347620768062e+00 4.0588559557915787e+00 0.0000000000000000e+00 0 0 0
+96 1 8.4957150696300925e+00 5.0176118394895646e+00 0.0000000000000000e+00 0 0 0
+97 1 9.0618297669257259e+00 4.0588738859603266e+00 0.0000000000000000e+00 0 0 0
+98 1 9.6282574219214077e+00 5.0176289672989007e+00 0.0000000000000000e+00 0 0 0
+99 1 1.0194417159454611e+01 4.0588730767572860e+00 0.0000000000000000e+00 0 0 0
+100 1 1.0760794315385466e+01 5.0176131474245498e+00 0.0000000000000000e+00 0 0 0
+101 1 1.1327007580768864e+01 4.0588546552053515e+00 0.0000000000000000e+00 0 0 0
+102 1 1.1893341583868121e+01 5.0175674119454996e+00 0.0000000000000000e+00 0 0 0
+103 1 1.2459611156068675e+01 4.0588211205885418e+00 0.0000000000000000e+00 0 0 0
+104 1 1.3025913928919357e+01 5.0174969437432848e+00 0.0000000000000000e+00 0 0 0
+105 1 1.3592236588154931e+01 4.0587758328652299e+00 0.0000000000000000e+00 0 0 0
+106 1 1.4158523495745847e+01 5.0174082592346645e+00 0.0000000000000000e+00 0 0 0
+107 1 1.4724890484932756e+01 4.0587226477181808e+00 0.0000000000000000e+00 0 0 0
+108 1 1.5291178803597106e+01 5.0173086870307237e+00 0.0000000000000000e+00 0 0 0
+109 1 1.5857576888353359e+01 4.0586657476126140e+00 0.0000000000000000e+00 0 0 0
+110 1 1.6423884000624799e+01 5.0172061640888863e+00 0.0000000000000000e+00 0 0 0
+111 1 1.6990296946466405e+01 4.0586096139851531e+00 0.0000000000000000e+00 0 0 0
+112 1 1.7556638404998214e+01 5.0171091825602536e+00 0.0000000000000000e+00 0 0 0
+113 1 1.8123048711157228e+01 4.0585590083330025e+00 0.0000000000000000e+00 0 0 0
+114 1 1.8689436384449273e+01 5.0170266065355777e+00 0.0000000000000000e+00 0 0 0
+115 1 1.9255827121600600e+01 4.0585188068824696e+00 0.0000000000000000e+00 0 0 0
+116 1 1.9822267727126505e+01 5.0169670887341100e+00 0.0000000000000000e+00 0 0 0
+117 1 2.0388624292977298e+01 4.0584935738800203e+00 0.0000000000000000e+00 0 0 0
+118 1 2.0955118660666272e+01 5.0169379847636248e+00 0.0000000000000000e+00 0 0 0
+119 1 2.1521430213723754e+01 4.0584868720623906e+00 0.0000000000000000e+00 0 0 0
+120 1 2.2087973498256840e+01 5.0169439545250629e+00 0.0000000000000000e+00 0 0 0
+121 1 6.5693888433665819e-04 5.9753894955957820e+00 0.0000000000000000e+00 0 0 0
+122 1 5.6732815172745055e-01 6.9338399304063270e+00 0.0000000000000000e+00 0 0 0
+123 1 1.1335287178365945e+00 5.9754794631117711e+00 0.0000000000000000e+00 0 0 0
+124 1 1.7002170239497103e+00 6.9340002985609068e+00 0.0000000000000000e+00 0 0 0
+125 1 2.2663607603415961e+00 5.9756128140497200e+00 0.0000000000000000e+00 0 0 0
+126 1 2.8330530781363672e+00 6.9342120448719999e+00 0.0000000000000000e+00 0 0 0
+127 1 3.3991419489134609e+00 5.9757753571666763e+00 0.0000000000000000e+00 0 0 0
+128 1 3.9658260192240613e+00 6.9344537679961507e+00 0.0000000000000000e+00 0 0 0
+129 1 4.5318648361825700e+00 5.9759497256682410e+00 0.0000000000000000e+00 0 0 0
+130 1 5.0985283212441441e+00 6.9347011709619251e+00 0.0000000000000000e+00 0 0 0
+131 1 5.6645260085061278e+00 5.9761173336292988e+00 0.0000000000000000e+00 0 0 0
+132 1 6.2311560978848899e+00 6.9349290616610286e+00 0.0000000000000000e+00 0 0 0
+133 1 6.7971269443565747e+00 5.9762605495489280e+00 0.0000000000000000e+00 0 0 0
+134 1 7.3637111409380722e+00 6.9351139899270322e+00 0.0000000000000000e+00 0 0 0
+135 1 7.9296749255956041e+00 5.9763649377635293e+00 0.0000000000000000e+00 0 0 0
+136 1 8.4962030194710927e+00 6.9352373287964380e+00 0.0000000000000000e+00 0 0 0
+137 1 9.0621832354873675e+00 5.9764210443253543e+00 0.0000000000000000e+00 0 0 0
+138 1 9.6286498073427680e+00 6.9352878316174378e+00 0.0000000000000000e+00 0 0 0
+139 1 1.0194670187278859e+01 5.9764252099845692e+00 0.0000000000000000e+00 0 0 0
+140 1 1.0761076657788550e+01 6.9352626758233988e+00 0.0000000000000000e+00 0 0 0
+141 1 1.1327157075090911e+01 5.9763792530010624e+00 0.0000000000000000e+00 0 0 0
+142 1 1.1893512574619942e+01 6.9351667929726579e+00 0.0000000000000000e+00 0 0 0
+143 1 1.2459665575239395e+01 5.9762893326393627e+00 0.0000000000000000e+00 0 0 0
+144 1 1.3025986375565017e+01 6.9350110693313853e+00 0.0000000000000000e+00 0 0 0
+145 1 1.3592215193397568e+01 5.9761645662970668e+00 0.0000000000000000e+00 0 0 0
+146 1 1.4158522852873338e+01 6.9348103731164086e+00 0.0000000000000000e+00 0 0 0
+147 1 1.4724821146400661e+01 5.9760159211600943e+00 0.0000000000000000e+00 0 0 0
+148 1 1.5291139696354101e+01 6.9345821426564456e+00 0.0000000000000000e+00 0 0 0
+149 1 1.5857492801384174e+01 5.9758556179168476e+00 0.0000000000000000e+00 0 0 0
+150 1 1.6423845342403485e+01 6.9343457083902562e+00 0.0000000000000000e+00 0 0 0
+151 1 1.6990232659392920e+01 5.9756969240409656e+00 0.0000000000000000e+00 0 0 0
+152 1 1.7556637811168688e+01 6.9341219348234606e+00 0.0000000000000000e+00 0 0 0
+153 1 1.8123035927190724e+01 5.9755539362099377e+00 0.0000000000000000e+00 0 0 0
+154 1 1.8689504786395585e+01 6.9339324030079297e+00 0.0000000000000000e+00 0 0 0
+155 1 1.9255890870078105e+01 5.9754408656724385e+00 0.0000000000000000e+00 0 0 0
+156 1 1.9822425360651039e+01 6.9337974543626846e+00 0.0000000000000000e+00 0 0 0
+157 1 2.0388780199969101e+01 5.9753705251759808e+00 0.0000000000000000e+00 0 0 0
+158 1 2.0955373611320280e+01 6.9337330661143222e+00 0.0000000000000000e+00 0 0 0
+159 1 2.1521683507254988e+01 5.9753521824721574e+00 0.0000000000000000e+00 0 0 0
+160 1 2.2088323232189435e+01 6.9337475792566039e+00 0.0000000000000000e+00 0 0 0
+161 1 1.1151815023353693e-03 7.8921416571122727e+00 0.0000000000000000e+00 0 0 0
+162 1 5.6789887436851039e-01 8.8505576275120745e+00 0.0000000000000000e+00 0 0 0
+163 1 1.1340615020344891e+00 7.8923152028921146e+00 0.0000000000000000e+00 0 0 0
+164 1 1.7008494977197184e+00 8.8508369646616227e+00 0.0000000000000000e+00 0 0 0
+165 1 2.2669564852467339e+00 7.8925678787693965e+00 0.0000000000000000e+00 0 0 0
+166 1 2.8337429988374914e+00 8.8512007799959171e+00 0.0000000000000000e+00 0 0 0
+167 1 3.3997890834793392e+00 7.8928753791925752e+00 0.0000000000000000e+00 0 0 0
+168 1 3.9665700017177907e+00 8.8516188392723496e+00 0.0000000000000000e+00 0 0 0
+169 1 4.5325491541722158e+00 7.8932093579911635e+00 0.0000000000000000e+00 0 0 0
+170 1 5.0993179760197034e+00 8.8520570451664753e+00 0.0000000000000000e+00 0 0 0
+171 1 5.6652272696563086e+00 7.8935385042762318e+00 0.0000000000000000e+00 0 0 0
+172 1 6.2319722558852177e+00 8.8524768944511472e+00 0.0000000000000000e+00 0 0 0
+173 1 6.7978170214800082e+00 7.8938302754648539e+00 0.0000000000000000e+00 0 0 0
+174 1 7.3645207249719933e+00 8.8528366651387476e+00 0.0000000000000000e+00 0 0 0
+175 1 7.9303191911043118e+00 7.8940542651579788e+00 0.0000000000000000e+00 0 0 0
+176 1 8.4969615618418324e+00 8.8530963542120293e+00 0.0000000000000000e+00 0 0 0
+177 1 9.0627458585593441e+00 7.8941868850969135e+00 0.0000000000000000e+00 0 0 0
+178 1 9.6293104463590424e+00 8.8532254399208412e+00 0.0000000000000000e+00 0 0 0
+179 1 1.0195121730902658e+01 7.8942152485172352e+00 0.0000000000000000e+00 0 0 0
+180 1 1.0761602408503441e+01 8.8532092085980238e+00 0.0000000000000000e+00 0 0 0
+181 1 1.1327481649719793e+01 7.8941385508356099e+00 0.0000000000000000e+00 0 0 0
+182 1 1.1893886870241856e+01 8.8530505445055354e+00 0.0000000000000000e+00 0 0 0
+183 1 1.2459865179342737e+01 7.8939667557582798e+00 0.0000000000000000e+00 0 0 0
+184 1 1.3026218291904378e+01 8.8527674547956821e+00 0.0000000000000000e+00 0 0 0
+185 1 1.3592310202433307e+01 7.8937178025905181e+00 0.0000000000000000e+00 0 0 0
+186 1 1.4158645900042497e+01 8.8523887379317436e+00 0.0000000000000000e+00 0 0 0
+187 1 1.4724847145311326e+01 7.8934149070498600e+00 0.0000000000000000e+00 0 0 0
+188 1 1.5291205081244327e+01 8.8519503874602243e+00 0.0000000000000000e+00 0 0 0
+189 1 1.5857494607334019e+01 7.8930848995638652e+00 0.0000000000000000e+00 0 0 0
+190 1 1.6423911366860466e+01 8.8514936483282209e+00 0.0000000000000000e+00 0 0 0
+191 1 1.6990256625068444e+01 7.8927574412240151e+00 0.0000000000000000e+00 0 0 0
+192 1 1.7556757521848787e+01 8.8510636099500459e+00 0.0000000000000000e+00 0 0 0
+193 1 1.8123121878813144e+01 7.8924640508501298e+00 0.0000000000000000e+00 0 0 0
+194 1 1.8689714850348466e+01 8.8507060559423465e+00 0.0000000000000000e+00 0 0 0
+195 1 1.9256065579477248e+01 7.8922356001392169e+00 0.0000000000000000e+00 0 0 0
+196 1 1.9822740225596814e+01 8.8504608774193994e+00 0.0000000000000000e+00 0 0 0
+197 1 2.0389054599310764e+01 7.8920977743942782e+00 0.0000000000000000e+00 0 0 0
+198 1 2.0955788196198530e+01 8.8503534864083591e+00 0.0000000000000000e+00 0 0 0
+199 1 2.1522054950758765e+01 7.8920658349416701e+00 0.0000000000000000e+00 0 0 0
+200 1 2.2088823030833748e+01 8.8503894045591807e+00 0.0000000000000000e+00 0 0 0
+201 1 1.7402898961801966e-03 9.8087331458102049e+00 0.0000000000000000e+00 0 0 0
+202 1 5.6862550253253785e-01 1.0767129063577668e+01 0.0000000000000000e+00 0 0 0
+203 1 1.1347351125604563e+00 9.8090210312609756e+00 0.0000000000000000e+00 0 0 0
+204 1 1.7016010961270076e+00 1.0767553944884048e+01 0.0000000000000000e+00 0 0 0
+205 1 2.2676800733457139e+00 9.8094251915038573e+00 0.0000000000000000e+00 0 0 0
+206 1 2.8345388558320415e+00 1.0768094021206529e+01 0.0000000000000000e+00 0 0 0
+207 1 3.4005711921286008e+00 9.8099146303251388e+00 0.0000000000000000e+00 0 0 0
+208 1 3.9674359888022686e+00 1.0768719604543580e+01 0.0000000000000000e+00 0 0 0
+209 1 4.5333977826109315e+00 9.8104561733570019e+00 0.0000000000000000e+00 0 0 0
+210 1 5.1002760963180327e+00 1.0769398202643465e+01 0.0000000000000000e+00 0 0 0
+211 1 5.6661407887052828e+00 9.8110111848429966e+00 0.0000000000000000e+00 0 0 0
+212 1 6.2330282022400469e+00 1.0770087202120337e+01 0.0000000000000000e+00 0 0 0
+213 1 6.7987755062394477e+00 9.8115326503110527e+00 0.0000000000000000e+00 0 0 0
+214 1 7.3656514287550623e+00 1.0770727843890981e+01 0.0000000000000000e+00 0 0 0
+215 1 7.9312798141889260e+00 9.8119658218493768e+00 0.0000000000000000e+00 0 0 0
+216 1 8.4981076412551477e+00 1.0771244633836279e+01 0.0000000000000000e+00 0 0 0
+217 1 9.0636474998261161e+00 9.8122560909429151e+00 0.0000000000000000e+00 0 0 0
+218 1 9.6303843877347930e+00 1.0771559046035311e+01 0.0000000000000000e+00 0 0 0
+219 1 1.0195900672859819e+01 9.8123627359180627e+00 0.0000000000000000e+00 0 0 0
+220 1 1.0762516251278290e+01 1.0771614844517241e+01 0.0000000000000000e+00 0 0 0
+221 1 1.1328091472906591e+01 9.8122692653101016e+00 0.0000000000000000e+00 0 0 0
+222 1 1.1894584725285364e+01 1.0771394980275380e+01 0.0000000000000000e+00 0 0 0
+223 1 1.2460291956550108e+01 9.8119854743716211e+00 0.0000000000000000e+00 0 0 0
+224 1 1.3026697175518089e+01 1.0770922584297365e+01 0.0000000000000000e+00 0 0 0
+225 1 1.3592577560562113e+01 9.8115426529845742e+00 0.0000000000000000e+00 0 0 0
+226 1 1.4158957523975143e+01 1.0770251678533704e+01 0.0000000000000000e+00 0 0 0
+227 1 1.4725010595311739e+01 9.8109868569230709e+00 0.0000000000000000e+00 0 0 0
+228 1 1.5291439665423439e+01 1.0769456959141509e+01 0.0000000000000000e+00 0 0 0
+229 1 1.5857627568713173e+01 9.8103742214932304e+00 0.0000000000000000e+00 0 0 0
+230 1 1.6424169320270668e+01 1.0768628052568168e+01 0.0000000000000000e+00 0 0 0
+231 1 1.6990431516954079e+01 9.8097684628141781e+00 0.0000000000000000e+00 0 0 0
+232 1 1.7557116532362020e+01 1.0767864432631596e+01 0.0000000000000000e+00 0 0 0
+233 1 1.8123390991250901e+01 9.8092369760472078e+00 0.0000000000000000e+00 0 0 0
+234 1 1.8690204705628890e+01 1.0767262063551410e+01 0.0000000000000000e+00 0 0 0
+235 1 1.9256448808830498e+01 9.8088413825519911e+00 0.0000000000000000e+00 0 0 0
+236 1 1.9823340586830241e+01 1.0766888821404979e+01 0.0000000000000000e+00 0 0 0
+237 1 2.0389541413400988e+01 9.8086229912274785e+00 0.0000000000000000e+00 0 0 0
+238 1 2.0956458511796701e+01 1.0766759511236279e+01 0.0000000000000000e+00 0 0 0
+239 1 2.1522621458778595e+01 9.8085916713182311e+00 0.0000000000000000e+00 0 0 0
+240 1 2.2089529168272502e+01 1.0766851883618157e+01 0.0000000000000000e+00 0 0 0
+241 1 2.5440858595377333e-03 1.1725176449724485e+01 0.0000000000000000e+00 0 0 0
+242 1 5.6945959459694062e-01 1.2683596360703445e+01 0.0000000000000000e+00 0 0 0
+243 1 1.1355189649219313e+00 1.1725603142335736e+01 0.0000000000000000e+00 0 0 0
+244 1 1.7023827890664067e+00 1.2684167657575470e+01 0.0000000000000000e+00 0 0 0
+245 1 2.2684713496063051e+00 1.1726169790097240e+01 0.0000000000000000e+00 0 0 0
+246 1 2.8353214317297493e+00 1.2684869845626739e+01 0.0000000000000000e+00 0 0 0
+247 1 3.4014115221528614e+00 1.1726849793467629e+01 0.0000000000000000e+00 0 0 0
+248 1 3.9682847366436711e+00 1.2685690043118647e+01 0.0000000000000000e+00 0 0 0
+249 1 4.5343333925353440e+00 1.1727620546655658e+01 0.0000000000000000e+00 0 0 0
+250 1 5.1012595788864648e+00 1.2686617936467927e+01 0.0000000000000000e+00 0 0 0
+251 1 5.6672100999124009e+00 1.1728453321807010e+01 0.0000000000000000e+00 0 0 0
+252 1 6.2342050679378476e+00 1.2687631443781253e+01 0.0000000000000000e+00 0 0 0
+253 1 6.7999929539663801e+00 1.1729301393807379e+01 0.0000000000000000e+00 0 0 0
+254 1 7.3670487632296053e+00 1.2688678524169049e+01 0.0000000000000000e+00 0 0 0
+255 1 7.9326168577620031e+00 1.1730088752185795e+01 0.0000000000000000e+00 0 0 0
+256 1 8.4996909972151879e+00 1.2689657545646673e+01 0.0000000000000000e+00 0 0 0
+257 1 9.0650186324858186e+00 1.1730705889838760e+01 0.0000000000000000e+00 0 0 0
+258 1 9.6320279172941738e+00 1.2690401359419884e+01 0.0000000000000000e+00 0 0 0
+259 1 1.0197176988949883e+01 1.1731033591325737e+01 0.0000000000000000e+00 0 0 0
+260 1 1.0764025265158372e+01 1.2690719979755405e+01 0.0000000000000000e+00 0 0 0
+261 1 1.1329151471753224e+01 1.1730992082437087e+01 0.0000000000000000e+00 0 0 0
+262 1 1.1895793457864773e+01 1.2690499952724066e+01 0.0000000000000000e+00 0 0 0
+263 1 1.2461074954083520e+01 1.1730568008302011e+01 0.0000000000000000e+00 0 0 0
+264 1 1.3027557263784812e+01 1.2689744664661927e+01 0.0000000000000000e+00 0 0 0
+265 1 1.3593109813371450e+01 1.1729811224797992e+01 0.0000000000000000e+00 0 0 0
+266 1 1.4159562461497188e+01 1.2688557996910490e+01 0.0000000000000000e+00 0 0 0
+267 1 1.4725395146103379e+01 1.1728816806025771e+01 0.0000000000000000e+00 0 0 0
+268 1 1.5291979336937130e+01 1.2687112353846338e+01 0.0000000000000000e+00 0 0 0
+269 1 1.5858003969640130e+01 1.1727709969544065e+01 0.0000000000000000e+00 0 0 0
+270 1 1.6424834380846097e+01 1.2685627373535834e+01 0.0000000000000000e+00 0 0 0
+271 1 1.6990919595491782e+01 1.1726637864021814e+01 0.0000000000000000e+00 0 0 0
+272 1 1.7557997576834389e+01 1.2684345428666392e+01 0.0000000000000000e+00 0 0 0
+273 1 1.8124040077451223e+01 1.1725751812758334e+01 0.0000000000000000e+00 0 0 0
+274 1 1.8691233014266899e+01 1.2683475493290855e+01 0.0000000000000000e+00 0 0 0
+275 1 1.9257221467828444e+01 1.1725167602985902e+01 0.0000000000000000e+00 0 0 0
+276 1 1.9824402296022900e+01 1.2683041511515679e+01 0.0000000000000000e+00 0 0 0
+277 1 2.0390369399207284e+01 1.1724905550220807e+01 0.0000000000000000e+00 0 0 0
+278 1 2.0957468093457749e+01 1.2682973372169659e+01 0.0000000000000000e+00 0 0 0
+279 1 2.1523449818304549e+01 1.1724927159323300e+01 0.0000000000000000e+00 0 0 0
+280 1 2.2090449109149038e+01 1.2683182366055206e+01 0.0000000000000000e+00 0 0 0
+281 1 3.4599372752678664e-03 1.3641562113178441e+01 0.0000000000000000e+00 0 0 0
+282 1 5.7026645299712297e-01 1.4600072778762289e+01 0.0000000000000000e+00 0 0 0
+283 1 1.1363018176625184e+00 1.3642122621820601e+01 0.0000000000000000e+00 0 0 0
+284 1 1.7030304397851530e+00 1.4600744808517264e+01 0.0000000000000000e+00 0 0 0
+285 1 2.2691832178537314e+00 1.3642819174389231e+01 0.0000000000000000e+00 0 0 0
+286 1 2.8358920720180945e+00 1.4601540713388465e+01 0.0000000000000000e+00 0 0 0
+287 1 3.4021290160499604e+00 1.3643644649633430e+01 0.0000000000000000e+00 0 0 0
+288 1 3.9688713598453158e+00 1.4602477110803298e+01 0.0000000000000000e+00 0 0 0
+289 1 4.5351441054704758e+00 1.3644605343002951e+01 0.0000000000000000e+00 0 0 0
+290 1 5.1019693529741863e+00 1.4603583498422479e+01 0.0000000000000000e+00 0 0 0
+291 1 5.6682074099496385e+00 1.3645707993168530e+01 0.0000000000000000e+00 0 0 0
+292 1 6.2351598742721581e+00 1.4604888485699654e+01 0.0000000000000000e+00 0 0 0
+293 1 6.8012626955016664e+00 1.3646941178415421e+01 0.0000000000000000e+00 0 0 0
+294 1 7.3683745276621622e+00 1.4606397643582930e+01 0.0000000000000000e+00 0 0 0
+295 1 7.9342088141787288e+00 1.3648248663869856e+01 0.0000000000000000e+00 0 0 0
+296 1 8.5014805494154277e+00 1.4608055299059714e+01 0.0000000000000000e+00 0 0 0
+297 1 9.0668968877652869e+00 1.3649493406156790e+01 0.0000000000000000e+00 0 0 0
+298 1 9.6342620410232698e+00 1.4609684783907733e+01 0.0000000000000000e+00 0 0 0
+299 1 1.0199152905272882e+01 1.3650407439181874e+01 0.0000000000000000e+00 0 0 0
+300 1 1.0766413330496736e+01 1.4610836305969919e+01 0.0000000000000000e+00 0 0 0
+301 1 1.1330921232704116e+01 1.3650669393063648e+01 0.0000000000000000e+00 0 0 0
+302 1 1.1897841502623006e+01 1.4610930556808350e+01 0.0000000000000000e+00 0 0 0
+303 1 1.2462421601307861e+01 1.3650120591395567e+01 0.0000000000000000e+00 0 0 0
+304 1 1.3029018938638984e+01 1.4609761134444172e+01 0.0000000000000000e+00 0 0 0
+305 1 1.3594046992755665e+01 1.3648816127477271e+01 0.0000000000000000e+00 0 0 0
+306 1 1.4160647838817273e+01 1.4607529632893289e+01 0.0000000000000000e+00 0 0 0
+307 1 1.4726158301286814e+01 1.3646976680611393e+01 0.0000000000000000e+00 0 0 0
+308 1 1.5293172565463893e+01 1.4604737496674128e+01 0.0000000000000000e+00 0 0 0
+309 1 1.5858902039329786e+01 1.3644935977195637e+01 0.0000000000000000e+00 0 0 0
+310 1 1.6426542400228328e+01 1.4602024837155536e+01 0.0000000000000000e+00 0 0 0
+311 1 1.6992136000094352e+01 1.3643085312965626e+01 0.0000000000000000e+00 0 0 0
+312 1 1.7559955069272618e+01 1.4600235886953440e+01 0.0000000000000000e+00 0 0 0
+313 1 1.8125444003110619e+01 1.3641827838397322e+01 0.0000000000000000e+00 0 0 0
+314 1 1.8693119558449155e+01 1.4599324386000902e+01 0.0000000000000000e+00 0 0 0
+315 1 1.9258622464532973e+01 1.3641178402937728e+01 0.0000000000000000e+00 0 0 0
+316 1 1.9826034884164418e+01 1.4599040434292966e+01 0.0000000000000000e+00 0 0 0
+317 1 2.0391639577074734e+01 1.3641005830730871e+01 0.0000000000000000e+00 0 0 0
+318 1 2.0958793244869717e+01 1.4599158580846662e+01 0.0000000000000000e+00 0 0 0
+319 1 2.1524539566888354e+01 1.3641168546895004e+01 0.0000000000000000e+00 0 0 0
+320 1 2.2091487241099536e+01 1.4599530965127977e+01 0.0000000000000000e+00 0 0 0
+321 1 4.3165808837657372e-03 1.5558056864296654e+01 0.0000000000000000e+00 0 0 0
+322 1 5.7083714563799326e-01 1.6516708751979007e+01 0.0000000000000000e+00 0 0 0
+323 1 1.1369032012442155e+00 1.5558694023524415e+01 0.0000000000000000e+00 0 0 0
+324 1 1.7033478671452040e+00 1.6517390053019362e+01 0.0000000000000000e+00 0 0 0
+325 1 2.2696166894760093e+00 1.5559438272441989e+01 0.0000000000000000e+00 0 0 0
+326 1 2.8360390517176817e+00 1.6518173675862919e+01 0.0000000000000000e+00 0 0 0
+327 1 3.4024835472607537e+00 1.5560308985846410e+01 0.0000000000000000e+00 0 0 0
+328 1 3.9689280293273477e+00 1.6519102391285173e+01 0.0000000000000000e+00 0 0 0
+329 1 4.5355194771089229e+00 1.5561347597079759e+01 0.0000000000000000e+00 0 0 0
+330 1 5.1020300316691811e+00 1.6520241802728972e+01 0.0000000000000000e+00 0 0 0
+331 1 5.6687261068083830e+00 1.5562609975559823e+01 0.0000000000000000e+00 0 0 0
+332 1 6.2353553251066147e+00 1.6521676870813170e+01 0.0000000000000000e+00 0 0 0
+333 1 6.8020803209306022e+00 1.5564154380715671e+01 0.0000000000000000e+00 0 0 0
+334 1 7.3688969132482525e+00 1.6523504940606951e+01 0.0000000000000000e+00 0 0 0
+335 1 7.9355107433004930e+00 1.5566016713593251e+01 0.0000000000000000e+00 0 0 0
+336 1 8.5025988576228730e+00 1.6525815249413437e+01 0.0000000000000000e+00 0 0 0
+337 1 9.0688572861916743e+00 1.5568159644275148e+01 0.0000000000000000e+00 0 0 0
+338 1 9.6362867759002064e+00 1.6528628994685398e+01 0.0000000000000000e+00 0 0 0
+339 1 1.0201819017728946e+01 1.5570385216229328e+01 0.0000000000000000e+00 0 0 0
+340 1 1.0769583800973923e+01 1.6531845844888416e+01 0.0000000000000000e+00 0 0 0
+341 1 1.1333827071325254e+01 1.5571891707346142e+01 0.0000000000000000e+00 0 0 0
+342 1 1.1901384079738815e+01 1.6533906438348030e+01 0.0000000000000000e+00 0 0 0
+343 1 1.2464735369608531e+01 1.5571536667850689e+01 0.0000000000000000e+00 0 0 0
+344 1 1.3031616332415751e+01 1.6532264065445577e+01 0.0000000000000000e+00 0 0 0
+345 1 1.3595655007692846e+01 1.5569123466092078e+01 0.0000000000000000e+00 0 0 0
+346 1 1.4162673858547034e+01 1.6527452690896975e+01 0.0000000000000000e+00 0 0 0
+347 1 1.4727720485417455e+01 1.5565353830287787e+01 0.0000000000000000e+00 0 0 0
+348 1 1.5296922688141537e+01 1.6520776714395723e+01 0.0000000000000000e+00 0 0 0
+349 1 1.5861362778086731e+01 1.5561222799294468e+01 0.0000000000000000e+00 0 0 0
+350 1 1.6430601172841506e+01 1.6517208583528205e+01 0.0000000000000000e+00 0 0 0
+351 1 1.6994922716392164e+01 1.5558699760506759e+01 0.0000000000000000e+00 0 0 0
+352 1 1.7563499738678178e+01 1.6515675589984340e+01 0.0000000000000000e+00 0 0 0
+353 1 1.8128019408521833e+01 1.5557486534354460e+01 0.0000000000000000e+00 0 0 0
+354 1 1.8695872352982320e+01 1.6515215010141834e+01 0.0000000000000000e+00 0 0 0
+355 1 1.9260742856192188e+01 1.5557098710840322e+01 0.0000000000000000e+00 0 0 0
+356 1 1.9828039670247652e+01 1.6515278710047973e+01 0.0000000000000000e+00 0 0 0
+357 1 2.0393271644052639e+01 1.5557181842904438e+01 0.0000000000000000e+00 0 0 0
+358 1 2.0960186167839215e+01 1.6515618076673135e+01 0.0000000000000000e+00 0 0 0
+359 1 2.1525740973565441e+01 1.5557536533469163e+01 0.0000000000000000e+00 0 0 0
+360 1 2.2092409007659992e+01 1.6516113225524911e+01 0.0000000000000000e+00 0 0 0
+361 1 4.8529541639103424e-03 1.7474826118864232e+01 0.0000000000000000e+00 0 0 0
+362 1 5.7093835977538809e-01 1.8433654344787417e+01 0.0000000000000000e+00 0 0 0
+363 1 1.1371184592334547e+00 1.7475436697267657e+01 0.0000000000000000e+00 0 0 0
+364 1 1.7031721612449391e+00 1.8434245395923575e+01 0.0000000000000000e+00 0 0 0
+365 1 2.2695912343467985e+00 1.7476125500853652e+01 0.0000000000000000e+00 0 0 0
+366 1 2.8356286616018873e+00 1.8434926390559969e+01 0.0000000000000000e+00 0 0 0
+367 1 3.4022762478393873e+00 1.7476929029147396e+01 0.0000000000000000e+00 0 0 0
+368 1 3.9683006032101118e+00 1.8435747821965681e+01 0.0000000000000000e+00 0 0 0
+369 1 4.5351848152653442e+00 1.7477909278192492e+01 0.0000000000000000e+00 0 0 0
+370 1 5.1011966627212457e+00 1.8436786379097875e+01 0.0000000000000000e+00 0 0 0
+371 1 5.6683370882181823e+00 1.7479154484464996e+01 0.0000000000000000e+00 0 0 0
+372 1 6.2343468444568826e+00 1.8438149387991913e+01 0.0000000000000000e+00 0 0 0
+373 1 6.8017614989468314e+00 1.7480782103907771e+01 0.0000000000000000e+00 0 0 0
+374 1 7.3678106610096554e+00 1.8439986240742563e+01 0.0000000000000000e+00 0 0 0
+375 1 7.9354841298542631e+00 1.7482944033551004e+01 0.0000000000000000e+00 0 0 0
+376 1 8.5016878917981664e+00 1.8442513240991936e+01 0.0000000000000000e+00 0 0 0
+377 1 9.0694835466447152e+00 1.7485826099637269e+01 0.0000000000000000e+00 0 0 0
+378 1 9.6361298737773780e+00 1.8446058020412732e+01 0.0000000000000000e+00 0 0 0
+379 3 1.0203552137528503e+01 1.7489611887918201e+01 0.0000000000000000e+00 0 0 0
+380 3 1.0771411102042309e+01 1.8452757080501815e+01 0.0000000000000000e+00 0 0 0
+381 3 1.1337379036649208e+01 1.7494680029430171e+01 0.0000000000000000e+00 0 0 0
+382 3 1.1906745469447291e+01 1.8462022833637111e+01 0.0000000000000000e+00 0 0 0
+383 3 1.2469280552911147e+01 1.7497259990645460e+01 0.0000000000000000e+00 0 0 0
+384 3 1.3036336275216634e+01 1.8467761987124586e+01 0.0000000000000000e+00 0 0 0
+385 3 1.3598327635293121e+01 1.7492598542232617e+01 0.0000000000000000e+00 0 0 0
+386 3 1.4175415687021504e+01 1.8442378825657116e+01 0.0000000000000000e+00 0 0 0
+387 3 1.4734039006741700e+01 1.7480807237018496e+01 0.0000000000000000e+00 0 0 0
+388 3 1.5307067776131944e+01 1.8435324828663727e+01 0.0000000000000000e+00 0 0 0
+389 3 1.5867533841026015e+01 1.7475841850521086e+01 0.0000000000000000e+00 0 0 0
+390 3 1.6437492616293390e+01 1.8432690656917494e+01 0.0000000000000000e+00 0 0 0
+391 3 1.6999775067986949e+01 1.7473899503923125e+01 0.0000000000000000e+00 0 0 0
+392 3 1.7567888910690055e+01 1.8431965422256074e+01 0.0000000000000000e+00 0 0 0
+393 1 1.8131542880564929e+01 1.7473449678636637e+01 0.0000000000000000e+00 0 0 0
+394 1 1.8698655140485940e+01 1.8432031695013379e+01 0.0000000000000000e+00 0 0 0
+395 1 1.9263155459010161e+01 1.7473512474723787e+01 0.0000000000000000e+00 0 0 0
+396 1 1.9829735002682860e+01 1.8432284732233299e+01 0.0000000000000000e+00 0 0 0
+397 1 2.0394851027090610e+01 1.7473827778588312e+01 0.0000000000000000e+00 0 0 0
+398 1 2.0961149629160545e+01 1.8432663919612150e+01 0.0000000000000000e+00 0 0 0
+399 1 2.1526715232103822e+01 1.7474283624212315e+01 0.0000000000000000e+00 0 0 0
+400 1 2.2092870034810801e+01 1.8433127631588540e+01 0.0000000000000000e+00 0 0 0
+401 1 4.8013042136807321e-03 1.9396043340320471e+01 0.0000000000000000e+00 0 0 0
+402 1 1.1367941837037772e+00 1.9396530996055038e+01 0.0000000000000000e+00 0 0 0
+403 1 2.2690256110731926e+00 1.9397098404349112e+01 0.0000000000000000e+00 0 0 0
+404 1 3.4014603716469214e+00 1.9397780714315690e+01 0.0000000000000000e+00 0 0 0
+405 1 4.5340819027961929e+00 1.9398637844588755e+01 0.0000000000000000e+00 0 0 0
+406 1 5.6668967352607975e+00 1.9399756780683603e+01 0.0000000000000000e+00 0 0 0
+407 1 6.7999442980869498e+00 1.9401258783163115e+01 0.0000000000000000e+00 0 0 0
+408 1 7.9333186797521877e+00 1.9403316975932661e+01 0.0000000000000000e+00 0 0 0
+409 1 9.0672196895765094e+00 1.9406194727028712e+01 0.0000000000000000e+00 0 0 0
+410 3 1.0202094493927524e+01 1.9410357283161499e+01 0.0000000000000000e+00 0 0 0
+411 3 1.1344493619984819e+01 1.9424786980556338e+01 0.0000000000000000e+00 0 0 0
+412 3 1.2637742177785299e+01 1.9930889974773997e+01 0.0000000000000000e+00 0 0 0
+413 3 1.3631034994726834e+01 1.9413135103231994e+01 0.0000000000000000e+00 0 0 0
+414 3 1.4752642655140622e+01 1.9401337789912994e+01 0.0000000000000000e+00 0 0 0
+415 3 1.5877850085888255e+01 1.9396855373999653e+01 0.0000000000000000e+00 0 0 0
+416 3 1.7005401679016000e+01 1.9395287677046394e+01 0.0000000000000000e+00 0 0 0
+417 1 1.8134615217150579e+01 1.9394983390043944e+01 0.0000000000000000e+00 0 0 0
+418 1 1.9264825666738730e+01 1.9395059679793604e+01 0.0000000000000000e+00 0 0 0
+419 1 2.0395687437852093e+01 1.9395286212420899e+01 0.0000000000000000e+00 0 0 0
+420 1 2.1527023164623820e+01 1.9395624428209523e+01 0.0000000000000000e+00 0 0 0
+
+Velocities
+
+1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+11 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+12 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+13 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+14 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+15 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+16 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+17 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+18 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+19 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+20 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+21 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+22 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+23 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+24 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+25 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+26 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+27 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+28 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+29 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+30 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+31 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+32 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+33 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+34 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+35 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+36 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+37 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+38 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+39 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+40 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+41 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+42 1.4582830912290846e-05 8.8846352819582645e-06 0.0000000000000000e+00
+43 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+44 1.7312859198533731e-05 8.9315474912415886e-06 0.0000000000000000e+00
+45 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+46 1.9675072488620216e-05 9.1850485538530490e-06 0.0000000000000000e+00
+47 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+48 2.1433074233334628e-05 9.6286706528260491e-06 0.0000000000000000e+00
+49 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+50 2.2405931370299284e-05 1.0203419110643709e-05 0.0000000000000000e+00
+51 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+52 2.2491059900157929e-05 1.0820884817441812e-05 0.0000000000000000e+00
+53 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+54 2.1681031416712071e-05 1.1392109985469690e-05 0.0000000000000000e+00
+55 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+56 2.0060641760669130e-05 1.1861625865799975e-05 0.0000000000000000e+00
+57 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+58 1.7796484401635950e-05 1.2218953480137158e-05 0.0000000000000000e+00
+59 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+60 1.5112094617093786e-05 1.2477438154365889e-05 0.0000000000000000e+00
+61 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+62 1.2267198630926124e-05 1.2639445999779870e-05 0.0000000000000000e+00
+63 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+64 9.5350822789358476e-06 1.2675742401072055e-05 0.0000000000000000e+00
+65 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+66 7.1824119616190063e-06 1.2539493069881058e-05 0.0000000000000000e+00
+67 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+68 5.4428729520064613e-06 1.2202930059423273e-05 0.0000000000000000e+00
+69 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+70 4.4916342132800272e-06 1.1686680302800539e-05 0.0000000000000000e+00
+71 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+72 4.4211188598874964e-06 1.1059784694772424e-05 0.0000000000000000e+00
+73 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+74 5.2335171901646275e-06 1.0413115381938121e-05 0.0000000000000000e+00
+75 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+76 6.8418345483267640e-06 9.8270961040952689e-06 0.0000000000000000e+00
+77 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+78 9.0862581078626780e-06 9.3562441357185347e-06 0.0000000000000000e+00
+79 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+80 1.1750898218046424e-05 9.0341536486321376e-06 0.0000000000000000e+00
+81 2.6507896911975861e-05 1.6371545140358991e-05 0.0000000000000000e+00
+82 4.4071318998573086e-05 2.1693750644120881e-05 0.0000000000000000e+00
+83 3.1968282279407066e-05 1.6236651087870452e-05 0.0000000000000000e+00
+84 5.1796828963389391e-05 2.2204255617741203e-05 0.0000000000000000e+00
+85 3.6954144790935994e-05 1.6513220966131449e-05 0.0000000000000000e+00
+86 5.8466837911599906e-05 2.3765145047455889e-05 0.0000000000000000e+00
+87 4.0988263449448856e-05 1.7195597287912591e-05 0.0000000000000000e+00
+88 6.3421700984930829e-05 2.6202560196158383e-05 0.0000000000000000e+00
+89 4.3676514981254302e-05 1.8262201659237366e-05 0.0000000000000000e+00
+90 6.6189231995872341e-05 2.9208208903089284e-05 0.0000000000000000e+00
+91 4.4757820844412710e-05 1.9680681285465242e-05 0.0000000000000000e+00
+92 6.6503049702947023e-05 3.2363912757959332e-05 0.0000000000000000e+00
+93 4.4112984955796063e-05 2.1371026788621213e-05 0.0000000000000000e+00
+94 6.4345177020566643e-05 3.5287785267920645e-05 0.0000000000000000e+00
+95 4.1787976800244936e-05 2.3162941413835267e-05 0.0000000000000000e+00
+96 5.9928744845970383e-05 3.7723007380205399e-05 0.0000000000000000e+00
+97 3.7995079634412303e-05 2.4792875906820557e-05 0.0000000000000000e+00
+98 5.3679482622744082e-05 3.9576112916242527e-05 0.0000000000000000e+00
+99 3.3102802347870566e-05 2.5962643816709875e-05 0.0000000000000000e+00
+100 4.6196928202386916e-05 4.0841431876258162e-05 0.0000000000000000e+00
+101 2.7604731797030090e-05 2.6452929465938663e-05 0.0000000000000000e+00
+102 3.8198605610544056e-05 4.1488731650403403e-05 0.0000000000000000e+00
+103 2.2062125644364829e-05 2.6211631192676194e-05 0.0000000000000000e+00
+104 3.0461532031511585e-05 4.1405446569862951e-05 0.0000000000000000e+00
+105 1.7036890410487144e-05 2.5367151114186193e-05 0.0000000000000000e+00
+106 2.3743747198513856e-05 4.0442707371844690e-05 0.0000000000000000e+00
+107 1.3025410253023063e-05 2.4148417954514280e-05 0.0000000000000000e+00
+108 1.8714618137705143e-05 3.8535713097978544e-05 0.0000000000000000e+00
+109 1.0408634299164180e-05 2.2777378345262281e-05 0.0000000000000000e+00
+110 1.5879886471080462e-05 3.5803991688099920e-05 0.0000000000000000e+00
+111 9.4236282619897256e-06 2.1395799353203516e-05 0.0000000000000000e+00
+112 1.5528508660313246e-05 3.2559849894401442e-05 0.0000000000000000e+00
+113 1.0151636970505765e-05 2.0069631778223131e-05 0.0000000000000000e+00
+114 1.7697076907652322e-05 2.9219489273322326e-05 0.0000000000000000e+00
+115 1.2515393741428794e-05 1.8835255026813244e-05 0.0000000000000000e+00
+116 2.2166639673992795e-05 2.6190973817683679e-05 0.0000000000000000e+00
+117 1.6287686218571795e-05 1.7749195377506077e-05 0.0000000000000000e+00
+118 2.8490182736201632e-05 2.3793837746782787e-05 0.0000000000000000e+00
+119 2.1107512829311378e-05 1.6897890319175769e-05 0.0000000000000000e+00
+120 3.6037837106045466e-05 2.2245603004594772e-05 0.0000000000000000e+00
+121 5.3754680927831089e-05 2.5818243561840512e-05 0.0000000000000000e+00
+122 7.3852984703953750e-05 2.7472934391924166e-05 0.0000000000000000e+00
+123 6.4028996009385391e-05 2.5673459200729887e-05 0.0000000000000000e+00
+124 8.5908169593167111e-05 2.8779655657983422e-05 0.0000000000000000e+00
+125 7.3452059594226324e-05 2.6972883064637495e-05 0.0000000000000000e+00
+126 9.6325597970378315e-05 3.2438649799599460e-05 0.0000000000000000e+00
+127 8.1100712408732978e-05 2.9661132010523619e-05 0.0000000000000000e+00
+128 1.0416393664254911e-04 3.7996059391476322e-05 0.0000000000000000e+00
+129 8.6279187735689372e-05 3.3655093070911189e-05 0.0000000000000000e+00
+130 1.0867812733907078e-04 4.4662561906767820e-05 0.0000000000000000e+00
+131 8.8474833676771911e-05 3.8743354165649700e-05 0.0000000000000000e+00
+132 1.0953332540892396e-04 5.1642519960343192e-05 0.0000000000000000e+00
+133 8.7489301482666931e-05 4.4565301821478113e-05 0.0000000000000000e+00
+134 1.0664519563149365e-04 5.8191926603172465e-05 0.0000000000000000e+00
+135 8.3386685919597045e-05 5.0496180582897816e-05 0.0000000000000000e+00
+136 1.0029664202627839e-04 6.3854180722835299e-05 0.0000000000000000e+00
+137 7.6554421387387345e-05 5.5724916856637890e-05 0.0000000000000000e+00
+138 9.1026233328924885e-05 6.8337584627018583e-05 0.0000000000000000e+00
+139 6.7636782318513533e-05 5.9415263115523977e-05 0.0000000000000000e+00
+140 7.9687801662836277e-05 7.1465060890868459e-05 0.0000000000000000e+00
+141 5.7497112140338785e-05 6.0967609586633220e-05 0.0000000000000000e+00
+142 6.7357844628560681e-05 7.3031410372387953e-05 0.0000000000000000e+00
+143 4.7129030804640708e-05 6.0212629249287861e-05 0.0000000000000000e+00
+144 5.5250218292995579e-05 7.2797466423152327e-05 0.0000000000000000e+00
+145 3.7563119690651669e-05 5.7436898093437411e-05 0.0000000000000000e+00
+146 4.4567659752612482e-05 7.0555413628180241e-05 0.0000000000000000e+00
+147 2.9761856381290211e-05 5.3240161725660877e-05 0.0000000000000000e+00
+148 3.6379968408071024e-05 6.6254587745984145e-05 0.0000000000000000e+00
+149 2.4507249328654254e-05 4.8297754292616488e-05 0.0000000000000000e+00
+150 3.1513287993668526e-05 6.0149614548130757e-05 0.0000000000000000e+00
+151 2.2321658045569226e-05 4.3171432510556026e-05 0.0000000000000000e+00
+152 3.0489149535510922e-05 5.2826430989590112e-05 0.0000000000000000e+00
+153 2.3406691442805219e-05 3.8251620084219645e-05 0.0000000000000000e+00
+154 3.3434005351227340e-05 4.5149642749209296e-05 0.0000000000000000e+00
+155 2.7646263937105427e-05 3.3806816567877322e-05 0.0000000000000000e+00
+156 4.0072022728543377e-05 3.8051851023877120e-05 0.0000000000000000e+00
+157 3.4612453095456479e-05 3.0084179504664271e-05 0.0000000000000000e+00
+158 4.9715995320497382e-05 3.2366348850974144e-05 0.0000000000000000e+00
+159 4.3610755492928814e-05 2.7338724794948033e-05 0.0000000000000000e+00
+160 6.1380291935873857e-05 2.8717537667974358e-05 0.0000000000000000e+00
+161 8.1494982491825407e-05 2.8148953219575557e-05 0.0000000000000000e+00
+162 1.0332280599497646e-04 2.6201503829225565e-05 0.0000000000000000e+00
+163 9.5761847160871774e-05 2.7946357434336516e-05 0.0000000000000000e+00
+164 1.1864695815872809e-04 2.8356135457834722e-05 0.0000000000000000e+00
+165 1.0885399470738391e-04 3.0748146450938071e-05 0.0000000000000000e+00
+166 1.3196232705661449e-04 3.4623883717438410e-05 0.0000000000000000e+00
+167 1.1960145227112585e-04 3.6566084099414615e-05 0.0000000000000000e+00
+168 1.4195026861275745e-04 4.3922046815628174e-05 0.0000000000000000e+00
+169 1.2694731664409029e-04 4.5104351462522260e-05 0.0000000000000000e+00
+170 1.4795217588335909e-04 5.5130340832613979e-05 0.0000000000000000e+00
+171 1.3031757484079781e-04 5.5906601170376687e-05 0.0000000000000000e+00
+172 1.4942385512754647e-04 6.7008660580344934e-05 0.0000000000000000e+00
+173 1.2933895493945637e-04 6.7959205844179285e-05 0.0000000000000000e+00
+174 1.4640862693120205e-04 7.8808347871067721e-05 0.0000000000000000e+00
+175 1.2418770632620368e-04 7.9948632905278019e-05 0.0000000000000000e+00
+176 1.3899686227644703e-04 8.9704374162949410e-05 0.0000000000000000e+00
+177 1.1530175670323509e-04 9.0294899930563893e-05 0.0000000000000000e+00
+178 1.2781482749801704e-04 9.8919094420893419e-05 0.0000000000000000e+00
+179 1.0353662297703481e-04 9.7655023731693666e-05 0.0000000000000000e+00
+180 1.1373191796318627e-04 1.0541969215504335e-04 0.0000000000000000e+00
+181 8.9924310567240939e-05 1.0107679026970171e-04 0.0000000000000000e+00
+182 9.8144155759054283e-05 1.0842945197242355e-04 0.0000000000000000e+00
+183 7.5715426026251240e-05 1.0020899633485792e-04 0.0000000000000000e+00
+184 8.2585472575644195e-05 1.0759497773947524e-04 0.0000000000000000e+00
+185 6.2273313315312773e-05 9.5280171371694988e-05 0.0000000000000000e+00
+186 6.8676150395344254e-05 1.0303784847881596e-04 0.0000000000000000e+00
+187 5.0970916505555661e-05 8.7088088473526171e-05 0.0000000000000000e+00
+188 5.7773431465618642e-05 9.5162943772818259e-05 0.0000000000000000e+00
+189 4.3046171895633206e-05 7.6794608467548045e-05 0.0000000000000000e+00
+190 5.1018825812688962e-05 8.4471826448580955e-05 0.0000000000000000e+00
+191 3.9350805046373119e-05 6.5685772521718177e-05 0.0000000000000000e+00
+192 4.9118794840322114e-05 7.1798478806600616e-05 0.0000000000000000e+00
+193 4.0296544731752153e-05 5.4848218154306130e-05 0.0000000000000000e+00
+194 5.2373823993379091e-05 5.8337807582076184e-05 0.0000000000000000e+00
+195 4.5754994278033105e-05 4.5116316024063770e-05 0.0000000000000000e+00
+196 6.0480102795976155e-05 4.5653533328626764e-05 0.0000000000000000e+00
+197 5.5147574075957093e-05 3.7092945765346277e-05 0.0000000000000000e+00
+198 7.2607642712602124e-05 3.5308098903458904e-05 0.0000000000000000e+00
+199 6.7490113431981086e-05 3.1302088338741595e-05 0.0000000000000000e+00
+200 8.7399794370653100e-05 2.8568976378293145e-05 0.0000000000000000e+00
+201 1.0893722215793989e-04 2.3548242675947018e-05 0.0000000000000000e+00
+202 1.3096734611020011e-04 1.8222479559561189e-05 0.0000000000000000e+00
+203 1.2587405764826476e-04 2.3075086083653581e-05 0.0000000000000000e+00
+204 1.4794813469964918e-04 2.1310741972757686e-05 0.0000000000000000e+00
+205 1.4140062010542452e-04 2.7813335553710658e-05 0.0000000000000000e+00
+206 1.6232505969042551e-04 3.0392445992766945e-05 0.0000000000000000e+00
+207 1.5397074924697088e-04 3.7554013992723016e-05 0.0000000000000000e+00
+208 1.7307084911688810e-04 4.3967269632201604e-05 0.0000000000000000e+00
+209 1.6265299884253917e-04 5.1975640619979227e-05 0.0000000000000000e+00
+210 1.7920198664424654e-04 6.0190020213309052e-05 0.0000000000000000e+00
+211 1.6652150024137926e-04 6.9718144800536699e-05 0.0000000000000000e+00
+212 1.8081789854731273e-04 7.8360099041392683e-05 0.0000000000000000e+00
+213 1.6556181720350750e-04 8.9018344965798718e-05 0.0000000000000000e+00
+214 1.7752817256212197e-04 9.7575999242155437e-05 0.0000000000000000e+00
+215 1.5977208852566740e-04 1.0737397867986304e-04 0.0000000000000000e+00
+216 1.6979140654165145e-04 1.1697357648400367e-04 0.0000000000000000e+00
+217 1.4991716557360079e-04 1.2288188040735228e-04 0.0000000000000000e+00
+218 1.5773246565703171e-04 1.3407831270088809e-04 0.0000000000000000e+00
+219 1.3669127604115129e-04 1.3409320273816903e-04 0.0000000000000000e+00
+220 1.4251229678068216e-04 1.4611735938454550e-04 0.0000000000000000e+00
+221 1.2122307334826826e-04 1.4035076438954076e-04 0.0000000000000000e+00
+222 1.2529950168048559e-04 1.5081786662543979e-04 0.0000000000000000e+00
+223 1.0469592755885001e-04 1.4095013503932427e-04 0.0000000000000000e+00
+224 1.0808583047877408e-04 1.4794462550140816e-04 0.0000000000000000e+00
+225 8.8627241500263001e-05 1.3548056521811501e-04 0.0000000000000000e+00
+226 9.2458332531887566e-05 1.3885968265358318e-04 0.0000000000000000e+00
+227 7.4741877098264211e-05 1.2401626995789950e-04 0.0000000000000000e+00
+228 8.0205883060811833e-05 1.2520982577425913e-04 0.0000000000000000e+00
+229 6.4625586397733592e-05 1.0787656142000266e-04 0.0000000000000000e+00
+230 7.2455365263725255e-05 1.0828209149006050e-04 0.0000000000000000e+00
+231 5.9588283630412220e-05 8.9170324651604443e-05 0.0000000000000000e+00
+232 7.0192628935414336e-05 8.8909581629389317e-05 0.0000000000000000e+00
+233 6.0204784194593798e-05 7.0300012127155753e-05 0.0000000000000000e+00
+234 7.3736654845263383e-05 6.8446290010432812e-05 0.0000000000000000e+00
+235 6.6430610182749772e-05 5.3155748135392397e-05 0.0000000000000000e+00
+236 8.2877314926921305e-05 4.8905829630990080e-05 0.0000000000000000e+00
+237 7.7513488386102286e-05 3.9091588598701160e-05 0.0000000000000000e+00
+238 9.6520171103331445e-05 3.2781173935459572e-05 0.0000000000000000e+00
+239 9.2232416915701703e-05 2.8983592081175541e-05 0.0000000000000000e+00
+240 1.1323768745207866e-04 2.2099480605147771e-05 0.0000000000000000e+00
+241 1.3459865563319741e-04 1.2703227752398906e-05 0.0000000000000000e+00
+242 1.5474472990034682e-04 4.9028952578147883e-06 0.0000000000000000e+00
+243 1.5215092550023015e-04 1.2097129719583488e-05 0.0000000000000000e+00
+244 1.7049283829256650e-04 9.1706083284784945e-06 0.0000000000000000e+00
+245 1.6776008627299133e-04 1.9099097427610624e-05 0.0000000000000000e+00
+246 1.8333367243371162e-04 2.1604042292807520e-05 0.0000000000000000e+00
+247 1.8008195062651708e-04 3.3615378061081087e-05 0.0000000000000000e+00
+248 1.9189650531499411e-04 3.9685789135164573e-05 0.0000000000000000e+00
+249 1.8783573502138761e-04 5.4407048131235179e-05 0.0000000000000000e+00
+250 1.9614236422538478e-04 6.1776442206818580e-05 0.0000000000000000e+00
+251 1.9089458289321614e-04 7.9516349638183995e-05 0.0000000000000000e+00
+252 1.9558796322159504e-04 8.6892146170328155e-05 0.0000000000000000e+00
+253 1.8874887732394583e-04 1.0500710476222600e-04 0.0000000000000000e+00
+254 1.9152092472987035e-04 1.1597519058197392e-04 0.0000000000000000e+00
+255 1.8256170726520249e-04 1.2813543516461412e-04 0.0000000000000000e+00
+256 1.8317412822647789e-04 1.4660018932962857e-04 0.0000000000000000e+00
+257 1.7255263462072589e-04 1.4676532692364173e-04 0.0000000000000000e+00
+258 1.7180488878385214e-04 1.7463142934331309e-04 0.0000000000000000e+00
+259 1.5968255923162790e-04 1.6112886872639011e-04 0.0000000000000000e+00
+260 1.5736418072846288e-04 1.9373036611985696e-04 0.0000000000000000e+00
+261 1.4438478717201006e-04 1.7104686393958903e-04 0.0000000000000000e+00
+262 1.4172732865070544e-04 1.9993397606046031e-04 0.0000000000000000e+00
+263 1.2780764883044244e-04 1.7581062997197168e-04 0.0000000000000000e+00
+264 1.2551429537422006e-04 1.9326020335803872e-04 0.0000000000000000e+00
+265 1.1143818436667695e-04 1.7279653463973672e-04 0.0000000000000000e+00
+266 1.1129339275113247e-04 1.7696772682632424e-04 0.0000000000000000e+00
+267 9.7026315827934351e-05 1.6059526683700199e-04 0.0000000000000000e+00
+268 1.0011077369688101e-04 1.5518094661065159e-04 0.0000000000000000e+00
+269 8.6522173578507114e-05 1.3949676203897713e-04 0.0000000000000000e+00
+270 9.3664351269599769e-05 1.3012327167812935e-04 0.0000000000000000e+00
+271 8.1291625785936272e-05 1.1272276579557331e-04 0.0000000000000000e+00
+272 9.2500602391673231e-05 1.0286100151558550e-04 0.0000000000000000e+00
+273 8.2174156364853157e-05 8.4321872533994066e-05 0.0000000000000000e+00
+274 9.7130125497925732e-05 7.4496044974704109e-05 0.0000000000000000e+00
+275 8.9034151680274584e-05 5.8030339746749375e-05 0.0000000000000000e+00
+276 1.0678389790186836e-04 4.7638087115883802e-05 0.0000000000000000e+00
+277 1.0119535639935181e-04 3.6303198355693708e-05 0.0000000000000000e+00
+278 1.2094537118731950e-04 2.5060744325496045e-05 0.0000000000000000e+00
+279 1.1693017924424426e-04 2.0976803055913763e-05 0.0000000000000000e+00
+280 1.3749539877318216e-04 1.0260590705305633e-05 0.0000000000000000e+00
+281 1.5664700963922696e-04 -2.3143582219387062e-06 0.0000000000000000e+00
+282 1.7213521099798126e-04 -1.0893217653461935e-05 0.0000000000000000e+00
+283 1.7163725578470403e-04 -2.7032144307298500e-06 0.0000000000000000e+00
+284 1.8302101438290170e-04 -4.7015876043674941e-06 0.0000000000000000e+00
+285 1.8408986700031230e-04 7.3762135624856621e-06 0.0000000000000000e+00
+286 1.9034465344562048e-04 1.1634737927368466e-05 0.0000000000000000e+00
+287 1.9261711656854316e-04 2.7002357232706195e-05 0.0000000000000000e+00
+288 1.9361558083412500e-04 3.5128829947321527e-05 0.0000000000000000e+00
+289 1.9675951681764233e-04 5.4504542291358708e-05 0.0000000000000000e+00
+290 1.9229119872704733e-04 6.3160704993402156e-05 0.0000000000000000e+00
+291 1.9589287639141945e-04 8.5624062570252574e-05 0.0000000000000000e+00
+292 1.8763172746254775e-04 9.5926064062507038e-05 0.0000000000000000e+00
+293 1.9144012028784324e-04 1.1574950605207010e-04 0.0000000000000000e+00
+294 1.7925388721026504e-04 1.3336630569376064e-04 0.0000000000000000e+00
+295 1.8325338492376081e-04 1.3941523897602475e-04 0.0000000000000000e+00
+296 1.7111384196688562e-04 1.7606151476373124e-04 0.0000000000000000e+00
+297 1.7441139798144990e-04 1.5854503282415543e-04 0.0000000000000000e+00
+298 1.6032326888461984e-04 2.1446583056531008e-04 0.0000000000000000e+00
+299 1.6387438500404321e-04 1.7460023770546828e-04 0.0000000000000000e+00
+300 1.5074052192984994e-04 2.4020838682409438e-04 0.0000000000000000e+00
+301 1.5148519378157811e-04 1.8980377776341359e-04 0.0000000000000000e+00
+302 1.3966552521236660e-04 2.4774151750977874e-04 0.0000000000000000e+00
+303 1.3793365457372421e-04 2.0064007271268226e-04 0.0000000000000000e+00
+304 1.2999175496236775e-04 2.3635880684653166e-04 0.0000000000000000e+00
+305 1.2445809284142433e-04 2.0319207617434042e-04 0.0000000000000000e+00
+306 1.2058479570330461e-04 2.1223162942725083e-04 0.0000000000000000e+00
+307 1.1335261202324864e-04 1.9202689248144918e-04 0.0000000000000000e+00
+308 1.1489966476687653e-04 1.8116775432026056e-04 0.0000000000000000e+00
+309 1.0607147008139182e-04 1.6763704556867280e-04 0.0000000000000000e+00
+310 1.1336585045824133e-04 1.4707143380834897e-04 0.0000000000000000e+00
+311 1.0344655043634243e-04 1.3351953844142965e-04 0.0000000000000000e+00
+312 1.1682664536528110e-04 1.1117223278055075e-04 0.0000000000000000e+00
+313 1.0584011765559926e-04 9.5633260364903774e-05 0.0000000000000000e+00
+314 1.2284574193182076e-04 7.5987122603626839e-05 0.0000000000000000e+00
+315 1.1395109003961985e-04 5.9134539871496392e-05 0.0000000000000000e+00
+316 1.3307867183720012e-04 4.1776312299002252e-05 0.0000000000000000e+00
+317 1.2588269209880162e-04 2.9470198058979825e-05 0.0000000000000000e+00
+318 1.4544292811228404e-04 1.3605886591663147e-05 0.0000000000000000e+00
+319 1.4084591944156649e-04 8.4739193403876596e-06 0.0000000000000000e+00
+320 1.5916834934516234e-04 -4.8366910892216897e-06 0.0000000000000000e+00
+321 1.7356992275325834e-04 -1.8042780077684453e-05 0.0000000000000000e+00
+322 1.8147503918697350e-04 -2.4540925551368165e-05 0.0000000000000000e+00
+323 1.8163463266840930e-04 -1.7130129888846129e-05 0.0000000000000000e+00
+324 1.8253898079463108e-04 -1.5598547575766951e-05 0.0000000000000000e+00
+325 1.8657806229653399e-04 -3.4133038182733444e-06 0.0000000000000000e+00
+326 1.8037702210949025e-04 5.3434050311440554e-06 0.0000000000000000e+00
+327 1.8771879764928681e-04 2.1715518823040016e-05 0.0000000000000000e+00
+328 1.7471074558026713e-04 3.4539745108762290e-05 0.0000000000000000e+00
+329 1.8460873128030577e-04 5.4984524671188725e-05 0.0000000000000000e+00
+330 1.6600292929781353e-04 6.9008728811924969e-05 0.0000000000000000e+00
+331 1.7796512641162431e-04 9.1385034689547154e-05 0.0000000000000000e+00
+332 1.5410918238014520e-04 1.0736588228516046e-04 0.0000000000000000e+00
+333 1.6795483683047752e-04 1.2318778434343078e-04 0.0000000000000000e+00
+334 1.4157775831030313e-04 1.5014040855437159e-04 0.0000000000000000e+00
+335 1.5785083269945281e-04 1.4650075439474400e-04 0.0000000000000000e+00
+336 1.2665633684184470e-04 1.9516493438247949e-04 0.0000000000000000e+00
+337 1.4652933492756114e-04 1.5825492457397368e-04 0.0000000000000000e+00
+338 1.2214547186144981e-04 2.4167794353872549e-04 0.0000000000000000e+00
+339 1.4180074826065196e-04 1.7604592446691659e-04 0.0000000000000000e+00
+340 1.1255092108513610e-04 2.7260123374319155e-04 0.0000000000000000e+00
+341 1.3761399094902802e-04 1.9706727165017120e-04 0.0000000000000000e+00
+342 1.1522842530577449e-04 2.7911375117613133e-04 0.0000000000000000e+00
+343 1.2918411829601699e-04 2.1803343236056900e-04 0.0000000000000000e+00
+344 1.1734351128622201e-04 2.6571681944023359e-04 0.0000000000000000e+00
+345 1.2254276275087787e-04 2.2432204295978476e-04 0.0000000000000000e+00
+346 1.1974443872507096e-04 2.3680090433102696e-04 0.0000000000000000e+00
+347 1.2071540692597284e-04 2.1269652219509870e-04 0.0000000000000000e+00
+348 1.2270101616137648e-04 1.9743779542515523e-04 0.0000000000000000e+00
+349 1.2264642729289398e-04 1.8508739809863369e-04 0.0000000000000000e+00
+350 1.3383017178927455e-04 1.5156768459346495e-04 0.0000000000000000e+00
+351 1.2577540617175406e-04 1.4740321111045147e-04 0.0000000000000000e+00
+352 1.4340773421068627e-04 1.1293118958385223e-04 0.0000000000000000e+00
+353 1.3341749813102418e-04 1.0072504549123244e-04 0.0000000000000000e+00
+354 1.5386164243670204e-04 7.2227623415051933e-05 0.0000000000000000e+00
+355 1.4204167491417043e-04 5.6498808092830119e-05 0.0000000000000000e+00
+356 1.6279824866213127e-04 3.3429940217178515e-05 0.0000000000000000e+00
+357 1.5267578941533281e-04 1.9709016910970241e-05 0.0000000000000000e+00
+358 1.7110318234027366e-04 1.1467794122178855e-06 0.0000000000000000e+00
+359 1.6340539277356244e-04 -5.5189289771696855e-06 0.0000000000000000e+00
+360 1.7740714701955653e-04 -1.8977834327990608e-05 0.0000000000000000e+00
+361 1.8452802730369313e-04 -2.8832061973687458e-05 0.0000000000000000e+00
+362 1.8186625712888205e-04 -2.9558136605260799e-05 0.0000000000000000e+00
+363 1.8024969476821697e-04 -2.5772260635909678e-05 0.0000000000000000e+00
+364 1.6761747789816595e-04 -1.7724478134172529e-05 0.0000000000000000e+00
+365 1.7295655435232461e-04 -8.2023972909357763e-06 0.0000000000000000e+00
+366 1.5167563906890865e-04 7.1136721706312189e-06 0.0000000000000000e+00
+367 1.6300729193455892e-04 2.1421261796460145e-05 0.0000000000000000e+00
+368 1.3513262146713916e-04 4.1132163185070293e-05 0.0000000000000000e+00
+369 1.5092813590493438e-04 5.9201618093873702e-05 0.0000000000000000e+00
+370 1.1841008358865480e-04 8.0419467894957835e-05 0.0000000000000000e+00
+371 1.3728404354778657e-04 9.8997396367778159e-05 0.0000000000000000e+00
+372 1.0207956567589023e-04 1.2179874114308306e-04 0.0000000000000000e+00
+373 1.2285830843381253e-04 1.3408468250307866e-04 0.0000000000000000e+00
+374 8.5374614775879387e-05 1.6300686953091837e-04 0.0000000000000000e+00
+375 1.0823800239604665e-04 1.5689628308888026e-04 0.0000000000000000e+00
+376 7.0794245443767117e-05 2.0076846781781206e-04 0.0000000000000000e+00
+377 9.4916240542002045e-05 1.6757505383186989e-04 0.0000000000000000e+00
+378 5.3305776376960658e-05 2.3409206018084466e-04 0.0000000000000000e+00
+379 9.7006448817647123e-05 1.6923635825827552e-04 0.0000000000000000e+00
+380 7.3465624650027872e-05 2.8247344576399282e-04 0.0000000000000000e+00
+381 1.0549927592481066e-04 2.0928780749128950e-04 0.0000000000000000e+00
+382 8.2435739649036618e-05 2.8810010190574649e-04 0.0000000000000000e+00
+383 1.0402453921892486e-04 2.4985200604758925e-04 0.0000000000000000e+00
+384 1.0698549586748876e-04 2.9499323828433428e-04 0.0000000000000000e+00
+385 1.0406074064549273e-04 2.4902814952768373e-04 0.0000000000000000e+00
+386 1.2548092418124016e-04 2.5651719838597350e-04 0.0000000000000000e+00
+387 1.2351646868146525e-04 2.1977784401163054e-04 0.0000000000000000e+00
+388 1.5004190410192569e-04 2.0137571208681400e-04 0.0000000000000000e+00
+389 1.3393580166351108e-04 1.9255393507981809e-04 0.0000000000000000e+00
+390 1.6295883087441844e-04 1.5199213302359484e-04 0.0000000000000000e+00
+391 1.5063751113547077e-04 1.4694859067203417e-04 0.0000000000000000e+00
+392 1.8000897081745795e-04 1.0619343548646192e-04 0.0000000000000000e+00
+393 1.6561309378602883e-04 9.7037670677296156e-05 0.0000000000000000e+00
+394 1.9382745100814531e-04 6.5648980655922534e-05 0.0000000000000000e+00
+395 1.7695741527044807e-04 4.9716511735790200e-05 0.0000000000000000e+00
+396 2.0065188597728214e-04 2.5746708320466968e-05 0.0000000000000000e+00
+397 1.8297867804266331e-04 1.0615003992406336e-05 0.0000000000000000e+00
+398 1.9982611540716595e-04 -6.5682876870518107e-06 0.0000000000000000e+00
+399 1.8556554896400902e-04 -1.6509803537161082e-05 0.0000000000000000e+00
+400 1.9312536581833663e-04 -2.5912353602364910e-05 0.0000000000000000e+00
+401 1.9023801410824606e-04 -2.7464462701394523e-05 0.0000000000000000e+00
+402 1.6698242474704196e-04 -2.2099770957974276e-05 0.0000000000000000e+00
+403 1.4243470745724796e-04 -2.3480099448894497e-06 0.0000000000000000e+00
+404 1.1876992118927330e-04 2.8893674554990122e-05 0.0000000000000000e+00
+405 9.7519654816455303e-05 6.7473438674861880e-05 0.0000000000000000e+00
+406 7.9406575250692843e-05 1.0857040090545463e-04 0.0000000000000000e+00
+407 6.4539515581651943e-05 1.4692649268945184e-04 0.0000000000000000e+00
+408 5.1587395282262643e-05 1.7813848972085620e-04 0.0000000000000000e+00
+409 3.9112490387149122e-05 1.9658956051422694e-04 0.0000000000000000e+00
+410 5.0646024795521216e-05 2.1400372314275622e-04 0.0000000000000000e+00
+411 5.0846759072495365e-05 2.4269431977691632e-04 0.0000000000000000e+00
+412 -1.1482904584733112e-04 -1.4373303263865990e-04 0.0000000000000000e+00
+413 1.5511043799162975e-04 2.5875085720661746e-04 0.0000000000000000e+00
+414 1.6626514293530906e-04 2.1735425519646309e-04 0.0000000000000000e+00
+415 1.8641734272053080e-04 1.7687638118890660e-04 0.0000000000000000e+00
+416 2.0380463041431767e-04 1.3085055646827544e-04 0.0000000000000000e+00
+417 2.1756044245783355e-04 8.4550353671555104e-05 0.0000000000000000e+00
+418 2.2486305540664193e-04 4.3653832482456800e-05 0.0000000000000000e+00
+419 2.2188236489361566e-04 7.6524745655054321e-06 0.0000000000000000e+00
+420 2.0949238720629205e-04 -1.7218568434280989e-05 0.0000000000000000e+00
--- a/examples/neb/log.19Jun17.neb.hop1.end.g++.4
+++ b/examples/neb/log.19Jun17.neb.hop1.end.g++.4
@ -0,0 +1,11 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    229.26196    146.68251    2.9774577    4.4127369    233.11559  0.023301843    0.0224626    1.4763579            0    -3.048332   0.33333333   -3.0250302   0.66666667   -3.0291888            1   -3.0474928 
+100   0.11027532  0.085410308    3.0967938  0.024201563   0.38551033 0.0017583261 0.0021866943    1.7710358            0   -3.0483469   0.31192818   -3.0465886   0.61093022   -3.0466143            1   -3.0487752 
+130   0.09954083  0.075481108    3.0927626  0.015664388   0.37491833 0.0017573704 0.0021913201    1.7713726            0    -3.048342   0.31428487   -3.0465846   0.61762817   -3.0466296            1    -3.048776 
+Climbing replica = 2
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+130   0.37838747    0.3502435    3.0927626  0.015664388   0.37491833 0.0017573704 0.0021913201    1.7713726            0    -3.048342   0.31428487   -3.0465846   0.61762817   -3.0466296            1    -3.048776 
+230   0.22757286   0.12027481    3.1250243 0.0081260569   0.14019507 0.0018364585  0.002278918      1.76926            0   -3.0483347   0.39730698   -3.0464983   0.64450769   -3.0466973            1   -3.0487772 
+278  0.096184498  0.085088496    3.1405655 0.0068164307  0.093861113 0.0018426056  0.002286256    1.7684765            0   -3.0483338   0.41277997   -3.0464912   0.65562984   -3.0467294            1   -3.0487775 
--- a/examples/neb/log.19Jun17.neb.hop1.end.g++.8
+++ b/examples/neb/log.19Jun17.neb.hop1.end.g++.8
@ -0,0 +1,11 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    229.26196    146.68251    2.9774577    4.4127369    233.11559  0.023301843    0.0224626    1.4763579            0    -3.048332   0.33333333   -3.0250302   0.66666667   -3.0291888            1   -3.0474928 
+100   0.11375359  0.085350745    3.0966418    0.0236765   0.38531777 0.0017582606 0.0021868783    1.7710738            0   -3.0483467   0.31201141   -3.0465884   0.61117406   -3.0466149            1   -3.0487753 
+119   0.09996986  0.078639268    3.0937691  0.017444108    0.3780308 0.0017574935 0.0021899317    1.7713574            0   -3.0483433   0.31354192   -3.0465858   0.61555533   -3.0466249            1   -3.0487758 
+Climbing replica = 2
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+119    0.3793192   0.35281863    3.0937691  0.017444108    0.3780308 0.0017574935 0.0021899317    1.7713574            0   -3.0483433   0.31354192   -3.0465858   0.61555533   -3.0466249            1   -3.0487758 
+219   0.20159133   0.12247026    3.1244061 0.0085896057   0.13938632 0.0018362816 0.0022783681    1.7693295            0    -3.048335   0.39646633   -3.0464988   0.64277703   -3.0466925            1   -3.0487771 
+266  0.099868725  0.086180598    3.1401661 0.0070922949  0.095128081  0.001842608  0.002286044    1.7685191            0    -3.048334   0.41231024   -3.0464914   0.65425179   -3.0467252            1   -3.0487774 
--- a/examples/neb/log.19Jun17.neb.hop1.g++.4
+++ b/examples/neb/log.19Jun17.neb.hop1.g++.4
@ -0,0 +1,9 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    4327.2753    2746.3378  0.082169072    4.9967651    4514.5424   0.42933428   0.42323635    1.8941131            0   -3.0535948   0.33333333   -2.6242605   0.66666667   -2.7623811            1   -3.0474969 
+87  0.095951502  0.052720903  0.005588927  0.065110105   0.12467831 0.0071014928 0.0022798007    2.3003372            0   -3.0535967   0.32435271   -3.0473127   0.62805027   -3.0464952            1    -3.048775 
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+87   0.14137277   0.11108954  0.005588927  0.065110105   0.12467831 0.0071014928 0.0022798007    2.3003372            0   -3.0535967   0.32435271   -3.0473127   0.62805027   -3.0464952            1    -3.048775 
+124  0.099583263  0.085936899 0.0044220372  0.023873795  0.091308308 0.0071061754 0.0022863931     2.308121            0   -3.0535968   0.32223905   -3.0473329   0.61673898   -3.0464906            1    -3.048777 
--- a/examples/neb/log.19Jun17.neb.hop1.g++.8
+++ b/examples/neb/log.19Jun17.neb.hop1.g++.8
@ -0,0 +1,9 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    4327.2753    2746.3378  0.082169072    4.9967651    4514.5424   0.42933428   0.42323635    1.8941131            0   -3.0535948   0.33333333   -2.6242605   0.66666667   -2.7623811            1   -3.0474969 
+87  0.095951792  0.052720902 0.0055889267  0.065110091   0.12467831 0.0071014928 0.0022798007    2.3003372            0   -3.0535967   0.32435271   -3.0473127   0.62805027   -3.0464952            1    -3.048775 
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+87   0.14137297   0.11108954 0.0055889267  0.065110091   0.12467831 0.0071014928 0.0022798007    2.3003372            0   -3.0535967   0.32435271   -3.0473127   0.62805027   -3.0464952            1    -3.048775 
+124  0.099582186   0.08593683 0.0044220345  0.023873731  0.091308197 0.0071061754 0.0022863931    2.3081211            0   -3.0535968   0.32223904   -3.0473329   0.61673896   -3.0464906            1    -3.048777 
--- a/examples/neb/log.19Jun17.neb.hop2.g++.4
+++ b/examples/neb/log.19Jun17.neb.hop2.g++.4
@ -0,0 +1,12 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    14.104748    10.419633    0.1227071     4.999238    8.2087606 0.0018276223 0.00064050211   0.98401186            0   -3.0514921   0.33333333   -3.0496673   0.66666667   -3.0496645            1    -3.050305 
+100   0.24646695   0.10792196 0.0077146918  0.058733261   0.63504706  0.001516756 0.0015151635     1.165391            0   -3.0514939    0.2890334   -3.0503533   0.59718494   -3.0499771            1   -3.0514923 
+200  0.061777741  0.050288749 0.0047486883 0.0095236035   0.88698597 0.0014465772 0.0014462528    1.1692938            0   -3.0514941   0.29975094   -3.0503052   0.62768286   -3.0500476            1   -3.0514938 
+261  0.048699591  0.038138604 0.0040083594 0.0074854409   0.95722712 0.0014243579 0.0014241377    1.1696848            0   -3.0514942   0.30525481   -3.0502812    0.6357998   -3.0500698            1    -3.051494 
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+261   0.95753855   0.94297239 0.0040083594 0.0074854409   0.95722712 0.0014243579 0.0014241377    1.1696848            0   -3.0514942   0.30525481   -3.0502812    0.6357998   -3.0500698            1    -3.051494 
+361  0.072509627   0.06580631 0.0027545765 0.0044749366  0.016746483 0.0016018879 0.0016017805    1.1704611            0   -3.0514943   0.28176307   -3.0503855   0.50355454   -3.0498924            1   -3.0514942 
+381   0.04884836  0.040787876 0.0023445904 0.0035162935  0.017959209 0.0016017716 0.0016016898    1.1713862            0   -3.0514943   0.27120138   -3.0504399   0.50428218   -3.0498925            1   -3.0514942 
--- a/examples/neb/log.19Jun17.neb.hop2.g++.8
+++ b/examples/neb/log.19Jun17.neb.hop2.g++.8
@ -0,0 +1,12 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    14.104748    10.419633    0.1227071     4.999238    8.2087606 0.0018276223 0.00064050211   0.98401186            0   -3.0514921   0.33333333   -3.0496673   0.66666667   -3.0496645            1    -3.050305 
+100   0.24646695   0.10792196 0.0077146918  0.058733261   0.63504706  0.001516756 0.0015151635     1.165391            0   -3.0514939    0.2890334   -3.0503533   0.59718494   -3.0499771            1   -3.0514923 
+200  0.061777741  0.050288749 0.0047486883 0.0095236035   0.88698597 0.0014465772 0.0014462528    1.1692938            0   -3.0514941   0.29975094   -3.0503052   0.62768286   -3.0500476            1   -3.0514938 
+261  0.048699591  0.038138604 0.0040083594 0.0074854409   0.95722712 0.0014243579 0.0014241377    1.1696848            0   -3.0514942   0.30525481   -3.0502812    0.6357998   -3.0500698            1    -3.051494 
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+261   0.95753855   0.94297239 0.0040083594 0.0074854409   0.95722712 0.0014243579 0.0014241377    1.1696848            0   -3.0514942   0.30525481   -3.0502812    0.6357998   -3.0500698            1    -3.051494 
+361  0.072509627   0.06580631 0.0027545765 0.0044749366  0.016746483 0.0016018879 0.0016017805    1.1704611            0   -3.0514943   0.28176307   -3.0503855   0.50355454   -3.0498924            1   -3.0514942 
+381   0.04884836  0.040787876 0.0023445904 0.0035162935  0.017959209 0.0016017716 0.0016016898    1.1713862            0   -3.0514943   0.27120138   -3.0504399   0.50428218   -3.0498925            1   -3.0514942 
--- a/examples/neb/log.19Jun17.neb.sivac.g++.4
+++ b/examples/neb/log.19Jun17.neb.sivac.g++.4
@ -0,0 +1,17 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    7.5525391    1.6345605   0.16683659    7.5525391    7.5525391    1.5383951            0    1.6207355            0   -2213.3343   0.33333333   -2212.7428   0.66666667   -2212.2247            1   -2211.7959 
+10   0.24005275  0.036502104  0.036483049   0.24005275   0.68351722   0.42916118   0.41794425    1.6989349            0   -2213.3365   0.32909183   -2212.9587   0.65386736   -2212.9073            1   -2213.3253 
+20   0.07940898  0.016398055  0.024706844   0.07940898   0.71637784   0.41387872   0.41157886    1.7343662            0   -2213.3369   0.32478734   -2212.9621   0.65348766    -2212.923            1   -2213.3346 
+30  0.094973707 0.0083631681  0.015145947  0.035267404    0.7535772   0.40072717   0.40024605    1.7504612            0   -2213.3372   0.32705584   -2212.9584   0.65894506   -2212.9365            1   -2213.3367 
+40  0.027727472 0.0044528145  0.011618173  0.022562656   0.76133752   0.39614635   0.39591731    1.7547519            0   -2213.3373   0.32873163   -2212.9562   0.66124255   -2212.9411            1    -2213.337 
+50  0.019429348 0.0030110281 0.0087135563  0.015391975   0.76952681   0.39274846    0.3926388    1.7578616            0   -2213.3373   0.33022595   -2212.9543   0.66307279   -2212.9446            1   -2213.3372 
+60  0.019009471 0.0016234562 0.0053426307 0.0086166186   0.77759617   0.38936861   0.38933364    1.7610433            0   -2213.3374   0.33187548   -2212.9523   0.66497617    -2212.948            1   -2213.3373 
+63 0.0097365134 0.0012734598  0.004777604 0.0076121987   0.77865149   0.38888778   0.38886047    1.7615294            0   -2213.3374   0.33212107    -2212.952   0.66525385   -2212.9485            1   -2213.3373 
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+63   0.77865149   0.31085821  0.004777604 0.0076121987   0.77865149   0.38888778   0.38886047    1.7615294            0   -2213.3374   0.33212107    -2212.952   0.66525385   -2212.9485            1   -2213.3373 
+73  0.098175496  0.033609035 0.0027886955 0.0042742148  0.036594003   0.51024838   0.51023983    1.7607181            0   -2213.3374   0.27574151   -2213.0416   0.50432348   -2212.8271            1   -2213.3374 
+83   0.03341862  0.012760857 0.0020868177 0.0031625649  0.010189924   0.51014634   0.51014168    1.7602562            0   -2213.3374   0.26045338   -2213.0672   0.50355193   -2212.8272            1   -2213.3374 
+93 0.0097374358 0.0028416114 0.0014003718 0.0020986584 0.0053485291   0.51011052   0.51010848    1.7601202            0   -2213.3374   0.25397887   -2213.0783   0.50388111   -2212.8273            1   -2213.3374 
--- a/examples/neb/log.19Jun17.neb.sivac.g++.8
+++ b/examples/neb/log.19Jun17.neb.sivac.g++.8
@ -0,0 +1,18 @@
+LAMMPS (19 May 2017)
+Running on 4 partitions of processors
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+0    7.5525391    1.6345605   0.16683659    7.5525391    7.5525391    1.5383951            0    1.6207355            0   -2213.3343   0.33333333   -2212.7428   0.66666667   -2212.2247            1   -2211.7959 
+10   0.24005275  0.036502104  0.036483049   0.24005275   0.68351722   0.42916118   0.41794425    1.6989349            0   -2213.3365   0.32909183   -2212.9587   0.65386736   -2212.9073            1   -2213.3253 
+20   0.07940898  0.016398055  0.024706844   0.07940898   0.71637784   0.41387872   0.41157886    1.7343662            0   -2213.3369   0.32478734   -2212.9621   0.65348766    -2212.923            1   -2213.3346 
+30  0.094973708 0.0083631681  0.015145947  0.035267404    0.7535772   0.40072717   0.40024605    1.7504612            0   -2213.3372   0.32705584   -2212.9584   0.65894506   -2212.9365            1   -2213.3367 
+40  0.027727472 0.0044528144  0.011618173  0.022562656   0.76133752   0.39614635   0.39591731    1.7547519            0   -2213.3373   0.32873163   -2212.9562   0.66124255   -2212.9411            1    -2213.337 
+50  0.019429341 0.0030110281 0.0087135565  0.015391975    0.7695268   0.39274846    0.3926388    1.7578616            0   -2213.3373   0.33022595   -2212.9543   0.66307279   -2212.9446            1   -2213.3372 
+60  0.019048963 0.0016262345 0.0053426844 0.0086167196   0.77759655   0.38936867    0.3893337    1.7610433            0   -2213.3374   0.33187545   -2212.9523   0.66497615    -2212.948            1   -2213.3373 
+63 0.0097037048 0.0012761841 0.0047749367 0.0076075138   0.77865545   0.38888554   0.38885827    1.7615318            0   -2213.3374   0.33212221    -2212.952   0.66525512   -2212.9485            1   -2213.3373 
+Climbing replica = 3
+Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
+63   0.77865545    0.3108551 0.0047749367 0.0076075138   0.77865545   0.38888554   0.38885827    1.7615318            0   -2213.3374   0.33212221    -2212.952   0.66525512   -2212.9485            1   -2213.3373 
+73  0.098595989  0.033659485 0.0027927196 0.0042813387  0.038224344   0.51024759   0.51023901    1.7607156            0   -2213.3374   0.27595612   -2213.0413   0.50453988   -2212.8271            1   -2213.3374 
+83  0.033344977  0.012868685 0.0020880608 0.0031645847  0.010250413   0.51014677    0.5101421    1.7602601            0   -2213.3374   0.26053624    -2213.067   0.50358775   -2212.8272            1   -2213.3374 
+93  0.013254873 0.0038176141 0.0014928226 0.0022407967 0.0058577818   0.51011371   0.51011138    1.7601272            0   -2213.3374   0.25452741   -2213.0774   0.50382161   -2212.8273            1   -2213.3374 
+95 0.0099964951 0.0031053214 0.0014131665 0.0021184362 0.0053683638   0.51011105   0.51010897    1.7601232            0   -2213.3374    0.2540975   -2213.0781   0.50387313   -2212.8273            1   -2213.3374 
--- a/examples/neb/log.5Oct16.neb.hop1.g++.4
+++ b/examples/neb/log.5Oct16.neb.hop1.g++.4
@ -1,10 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0    4327.2753    2746.3378    0.3387091    5.0075576    4514.5424   0.42933428   0.42323635    1.8941131            0   -3.0535948   0.33333333   -2.6242605   0.66666667   -2.7623811            1   -3.0474969 
-100   0.10482184  0.085218486  0.014588241  0.066178594   0.19602237 0.0070900402 0.0022691875    2.3031875            0   -3.0535967   0.31839181   -3.0473647   0.63987598   -3.0465067            1   -3.0487759 
-111  0.096708467   0.07803707  0.013922973   0.05417562    0.2023467 0.0070871172 0.0022668002    2.3052945            0   -3.0535968   0.31853431   -3.0473633   0.64178871   -3.0465096            1   -3.0487764 
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-111    0.2023467    0.1777038  0.013922973   0.05417562    0.2023467 0.0070871172 0.0022668002    2.3052945            0   -3.0535968   0.31853431   -3.0473633   0.64178871   -3.0465096            1   -3.0487764 
-179  0.096874474  0.090676856   0.01040177  0.023364005  0.096874474 0.0071047642 0.0022856172    2.3122768            0   -3.0535969   0.31577311   -3.0473955   0.61798541   -3.0464922            1   -3.0487778 
--- a/examples/neb/log.5Oct16.neb.hop1.g++.8
+++ b/examples/neb/log.5Oct16.neb.hop1.g++.8
@ -1,10 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0    4327.2753    2746.3378    0.3387091    5.0075576    4514.5424   0.42933428   0.42323635    1.8941131            0   -3.0535948   0.33333333   -2.6242605   0.66666667   -2.7623811            1   -3.0474969 
-100   0.10482171  0.085218406  0.014588234  0.066178435   0.19602242 0.0070900401 0.0022691875    2.3031875            0   -3.0535967   0.31839181   -3.0473647     0.639876   -3.0465067            1   -3.0487759 
-111  0.096708718  0.078036984  0.013922966  0.054175505   0.20234693 0.0070871172 0.0022668002    2.3052946            0   -3.0535968   0.31853431   -3.0473633   0.64178873   -3.0465096            1   -3.0487764 
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-111   0.20234693   0.17770387  0.013922966  0.054175505   0.20234693 0.0070871172 0.0022668002    2.3052946            0   -3.0535968   0.31853431   -3.0473633   0.64178873   -3.0465096            1   -3.0487764 
-178   0.09975409  0.093814031  0.010577358  0.024247224   0.09975409 0.0071042931 0.0022851195     2.312004            0   -3.0535969   0.31607934   -3.0473923     0.618931   -3.0464926            1   -3.0487777 
--- a/examples/neb/log.5Oct16.neb.hop2.g++.4
+++ b/examples/neb/log.5Oct16.neb.hop2.g++.4
@ -1,18 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0    14.104748    10.419633   0.24852044    5.0039071    8.2116049 0.0018276223 0.00064050211   0.98401186            0   -3.0514921   0.33333333   -3.0496673   0.66666667   -3.0496645            1    -3.050305 
-100   0.24646695   0.10792196   0.01781018  0.098854684   0.63725646  0.001516756 0.0015151635     1.165391            0   -3.0514939    0.2890334   -3.0503533   0.59718494   -3.0499771            1   -3.0514923 
-200  0.061777741  0.050288749  0.012466513  0.020420207   0.88741041 0.0014465772 0.0014462528    1.1692938            0   -3.0514941   0.29975094   -3.0503052   0.62768286   -3.0500476            1   -3.0514938 
-300  0.056346766  0.030000618 0.0093152917  0.013765031    1.0101529 0.0014069751 0.0014068154    1.1699608            0   -3.0514942   0.30992449   -3.0502613   0.64174291   -3.0500873            1   -3.0514941 
-400  0.025589489  0.015671005 0.0061287063  0.008588518    1.1136424  0.001370987 0.0013709154    1.1704204            0   -3.0514943   0.32016645   -3.0502198   0.65324019   -3.0501233            1   -3.0514943 
-500  0.014778626 0.0092108366 0.0042668521 0.0059963914    1.1636579 0.0013527466 0.0013527072    1.1706283            0   -3.0514944   0.32550275   -3.0501993   0.65875414   -3.0501416            1   -3.0514943 
-600   0.08786211  0.020876327 0.0031421548 0.0051657363    1.1898894 0.0013430848 0.0013430599    1.1707681            0   -3.0514944   0.32831927   -3.0501889   0.66160681   -3.0501513            1   -3.0514944 
-633 0.0098132678 0.0055392541 0.0030063464 0.0043091323    1.1924486 0.0013420127 0.0013419893    1.1707818            0   -3.0514944   0.32862625   -3.0501878   0.66191769   -3.0501524            1   -3.0514944 
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-633    1.1924486    1.1648685 0.0030063464 0.0043091323    1.1924486 0.0013420127 0.0013419893    1.1707818            0   -3.0514944   0.32862625   -3.0501878   0.66191769   -3.0501524            1   -3.0514944 
-733  0.095331134  0.089136608 0.0021551441 0.0031844438  0.043042998 0.0016022317 0.0016022168     1.170789            0   -3.0514944   0.29157063   -3.0503375   0.50358402   -3.0498922            1   -3.0514944 
-833   0.10539135  0.030724373 0.0013749699  0.002221013   0.10539135 0.0016019798  0.001601971    1.1732118            0   -3.0514944   0.26249002   -3.0504848   0.50415223   -3.0498924            1   -3.0514944 
-933   0.01883894  0.011496399 0.0011058925 0.0018178041  0.014621806 0.0016018934 0.0016018865     1.173866            0   -3.0514944   0.25788763   -3.0505113   0.50466375   -3.0498925            1   -3.0514944 
-996 0.0082457876 0.0036336551 0.00077325986 0.0013910671 0.0068823708 0.0016018293 0.0016018244     1.174511            0   -3.0514944    0.2544553   -3.0505324   0.50520462   -3.0498926            1   -3.0514944 
--- a/examples/neb/log.5Oct16.neb.hop2.g++.8
+++ b/examples/neb/log.5Oct16.neb.hop2.g++.8
@ -1,18 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 4 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0    14.104748    10.419633   0.24852044    5.0039071    8.2116049 0.0018276223 0.00064050211   0.98401186            0   -3.0514921   0.33333333   -3.0496673   0.66666667   -3.0496645            1    -3.050305 
-100   0.24646695   0.10792196   0.01781018  0.098854684   0.63725646  0.001516756 0.0015151635     1.165391            0   -3.0514939    0.2890334   -3.0503533   0.59718494   -3.0499771            1   -3.0514923 
-200  0.061777741  0.050288749  0.012466513  0.020420207   0.88741041 0.0014465772 0.0014462528    1.1692938            0   -3.0514941   0.29975094   -3.0503052   0.62768286   -3.0500476            1   -3.0514938 
-300  0.056346766  0.030000618 0.0093152917  0.013765031    1.0101529 0.0014069751 0.0014068154    1.1699608            0   -3.0514942   0.30992449   -3.0502613   0.64174291   -3.0500873            1   -3.0514941 
-400  0.025589489  0.015671005 0.0061287063  0.008588518    1.1136424  0.001370987 0.0013709154    1.1704204            0   -3.0514943   0.32016645   -3.0502198   0.65324019   -3.0501233            1   -3.0514943 
-500  0.014778626 0.0092108366 0.0042668521 0.0059963914    1.1636579 0.0013527466 0.0013527072    1.1706283            0   -3.0514944   0.32550275   -3.0501993   0.65875414   -3.0501416            1   -3.0514943 
-600   0.08786211  0.020876327 0.0031421548 0.0051657363    1.1898894 0.0013430848 0.0013430599    1.1707681            0   -3.0514944   0.32831927   -3.0501889   0.66160681   -3.0501513            1   -3.0514944 
-633 0.0098132678 0.0055392541 0.0030063464 0.0043091323    1.1924486 0.0013420127 0.0013419893    1.1707818            0   -3.0514944   0.32862625   -3.0501878   0.66191769   -3.0501524            1   -3.0514944 
-Climbing replica = 3
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-633    1.1924486    1.1648685 0.0030063464 0.0043091323    1.1924486 0.0013420127 0.0013419893    1.1707818            0   -3.0514944   0.32862625   -3.0501878   0.66191769   -3.0501524            1   -3.0514944 
-733  0.095331134  0.089136608 0.0021551441 0.0031844438  0.043042998 0.0016022317 0.0016022168     1.170789            0   -3.0514944   0.29157063   -3.0503375   0.50358402   -3.0498922            1   -3.0514944 
-833   0.10539135  0.030724373 0.0013749699  0.002221013   0.10539135 0.0016019798  0.001601971    1.1732118            0   -3.0514944   0.26249002   -3.0504848   0.50415223   -3.0498924            1   -3.0514944 
-933   0.01883894  0.011496399 0.0011058925 0.0018178041  0.014621806 0.0016018934 0.0016018865     1.173866            0   -3.0514944   0.25788763   -3.0505113   0.50466375   -3.0498925            1   -3.0514944 
-996 0.0082457876 0.0036336551 0.00077325986 0.0013910671 0.0068823708 0.0016018293 0.0016018244     1.174511            0   -3.0514944    0.2544553   -3.0505324   0.50520462   -3.0498926            1   -3.0514944 
--- a/examples/neb/log.5Oct16.neb.sivac.g++.3
+++ b/examples/neb/log.5Oct16.neb.sivac.g++.3
@ -1,14 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 3 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0    7.5525391    1.6345605   0.16683659    7.5525391    7.5525391    1.5383951            0    1.6207355            0   -2213.3343          0.5   -2212.4096            1   -2211.7959 
-10   0.27332818  0.040944923  0.039164338   0.27332818   0.17804882   0.51235911     0.497084    1.6790474            0   -2213.3364   0.49024121    -2212.824            1   -2213.3211 
-20    0.1820396  0.018049916  0.024428411    0.1820396   0.08601739   0.51038174    0.5080746    1.7224961            0    -2213.337   0.49199582   -2212.8266            1   -2213.3347 
-30  0.043288796 0.0068108825  0.017372479  0.043288796  0.049466709   0.51032316    0.5095943    1.7304745            0   -2213.3371   0.49553568   -2212.8268            1   -2213.3364 
-40    0.0421393 0.0037035761   0.01173707    0.0421393  0.026104735   0.51022733    0.5100163    1.7366752            0   -2213.3373   0.49838067   -2212.8271            1   -2213.3371 
-50  0.025897844 0.0022804241 0.0081056535  0.025897844  0.016908913    0.5101712   0.51008591     1.739143            0   -2213.3373   0.49923344   -2212.8272            1   -2213.3373 
-59   0.00962839 0.0012946076  0.005657505  0.009365729  0.012040803   0.51014185   0.51010207    1.7404554            0   -2213.3374   0.49955698   -2212.8272            1   -2213.3373 
-Climbing replica = 2
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-59  0.012040803 0.0031505502  0.005657505  0.009365729  0.012040803   0.51014185   0.51010207    1.7404554            0   -2213.3374   0.49955698   -2212.8272            1   -2213.3373 
-63  0.009152118 0.0016692472 0.0049645771 0.0081967836  0.009152118   0.51013743   0.51010776    1.7409028            0   -2213.3374   0.50022239   -2212.8272            1   -2213.3373 
--- a/examples/neb/log.5Oct16.neb.sivac.g++.6
+++ b/examples/neb/log.5Oct16.neb.sivac.g++.6
@ -1,14 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 3 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0    7.5525391    1.6345605   0.16683659    7.5525391    7.5525391    1.5383951            0    1.6207355            0   -2213.3343          0.5   -2212.4096            1   -2211.7959 
-10   0.27332818  0.040944923  0.039164338   0.27332818   0.17804882   0.51235911     0.497084    1.6790474            0   -2213.3364   0.49024121    -2212.824            1   -2213.3211 
-20    0.1820396  0.018049916  0.024428411    0.1820396   0.08601739   0.51038174    0.5080746    1.7224961            0    -2213.337   0.49199582   -2212.8266            1   -2213.3347 
-30  0.043288796 0.0068108825  0.017372479  0.043288796  0.049466709   0.51032316    0.5095943    1.7304745            0   -2213.3371   0.49553568   -2212.8268            1   -2213.3364 
-40  0.042139305 0.0037035764   0.01173707  0.042139305  0.026104735   0.51022733    0.5100163    1.7366752            0   -2213.3373   0.49838067   -2212.8271            1   -2213.3371 
-50  0.025899631 0.0022805513 0.0081057075  0.025899631  0.016908929    0.5101712   0.51008591     1.739143            0   -2213.3373   0.49923345   -2212.8272            1   -2213.3373 
-59 0.0096285044 0.0012946258 0.0056576061 0.0093678253  0.012040919   0.51014185   0.51010207    1.7404554            0   -2213.3374   0.49955698   -2212.8272            1   -2213.3373 
-Climbing replica = 2
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-59  0.012040919 0.0031505771 0.0056576061 0.0093678253  0.012040919   0.51014185   0.51010207    1.7404554            0   -2213.3374   0.49955698   -2212.8272            1   -2213.3373 
-63 0.0091523813 0.0016692845 0.0049647607 0.0081998372 0.0091523813   0.51013743   0.51010775    1.7409028            0   -2213.3374   0.50022236   -2212.8272            1   -2213.3373 
--- a/examples/neb/log.5Oct16.neb.sivac.g++.9
+++ b/examples/neb/log.5Oct16.neb.sivac.g++.9
@ -1,14 +0,0 @@
-LAMMPS (5 Oct 2016)
-Running on 3 partitions of processors
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-0    7.5525391    1.6345605   0.16683659    7.5525391    7.5525391    1.5383951            0    1.6207355            0   -2213.3343          0.5   -2212.4096            1   -2211.7959 
-10   0.27332818  0.040944923  0.039164338   0.27332818   0.17804882   0.51235911     0.497084    1.6790474            0   -2213.3364   0.49024121    -2212.824            1   -2213.3211 
-20    0.1820396  0.018049916  0.024428411    0.1820396   0.08601739   0.51038174    0.5080746    1.7224961            0    -2213.337   0.49199582   -2212.8266            1   -2213.3347 
-30  0.043288796 0.0068108825  0.017372479  0.043288796  0.049466709   0.51032316    0.5095943    1.7304745            0   -2213.3371   0.49553568   -2212.8268            1   -2213.3364 
-40  0.042139318 0.0037035773  0.011737071  0.042139318  0.026104737   0.51022733    0.5100163    1.7366752            0   -2213.3373   0.49838067   -2212.8271            1   -2213.3371 
-50  0.025904121 0.0022808707 0.0081058431  0.025904121  0.016908969    0.5101712   0.51008591    1.7391431            0   -2213.3373   0.49923346   -2212.8272            1   -2213.3373 
-59 0.0096287928 0.0012946716  0.005657861 0.0093731008   0.01204121   0.51014185   0.51010207    1.7404554            0   -2213.3374   0.49955696   -2212.8272            1   -2213.3373 
-Climbing replica = 2
-Step MaxReplicaForce MaxAtomForce GradV0 GradV1 GradVc EBF EBR RDT RD1 PE1 RD2 PE2 ... RDN PEN
-59   0.01204121 0.0031506449  0.005657861 0.0093731008   0.01204121   0.51014185   0.51010207    1.7404554            0   -2213.3374   0.49955696   -2212.8272            1   -2213.3373 
-63 0.0091530442 0.0016693787 0.0049652227 0.0082075097 0.0091530442   0.51013743   0.51010775    1.7409027            0   -2213.3374   0.50022228   -2212.8272            1   -2213.3373 
--- a/lib/gpu/Makefile.linux.mixed
+++ b/lib/gpu/Makefile.linux.mixed
@ -8,7 +8,6 @@
 EXTRAMAKE = Makefile.lammps.standard

 CUDA_HOME = /usr/local/cuda
-CUDA_HOME = /home/projects/openmpi/1.8.1/intel/13.1.SP1.106/cuda/6.0.37
 NVCC = nvcc

 # Kepler CUDA
--- a/lib/gpu/Nvidia.makefile
+++ b/lib/gpu/Nvidia.makefile
@ -63,6 +63,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_ans.o \
       $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \
       $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \
       $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \
+       $(OBJ_DIR)/lal_vashishta.o $(OBJ_DIR)/lal_vashishta_ext.o \
       $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \
       $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
       $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
@ -117,6 +118,7 @@ CBNS = $(OBJ_DIR)/device.cubin $(OBJ_DIR)/device_cubin.h \
       $(OBJ_DIR)/lj_coul_debye.cubin $(OBJ_DIR)/lj_coul_debye_cubin.h \
       $(OBJ_DIR)/coul_dsf.cubin $(OBJ_DIR)/coul_dsf_cubin.h \
       $(OBJ_DIR)/sw.cubin $(OBJ_DIR)/sw_cubin.h \
+       $(OBJ_DIR)/vashishta.cubin $(OBJ_DIR)/vashishta_cubin.h \
       $(OBJ_DIR)/beck.cubin $(OBJ_DIR)/beck_cubin.h \
       $(OBJ_DIR)/mie.cubin $(OBJ_DIR)/mie_cubin.h \
       $(OBJ_DIR)/soft.cubin $(OBJ_DIR)/soft_cubin.h \
@ -613,6 +615,18 @@ $(OBJ_DIR)/lal_coul_dsf.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf.cpp $(OBJ_DIR)/c
 $(OBJ_DIR)/lal_coul_dsf_ext.o: $(ALL_H) lal_coul_dsf.h lal_coul_dsf_ext.cpp lal_base_charge.h
 	$(CUDR) -o $@ -c lal_coul_dsf_ext.cpp -I$(OBJ_DIR)

+$(OBJ_DIR)/vashishta.cubin: lal_vashishta.cu lal_precision.h lal_preprocessor.h
+	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_vashishta.cu
+
+$(OBJ_DIR)/vashishta_cubin.h: $(OBJ_DIR)/vashishta.cubin $(OBJ_DIR)/vashishta.cubin
+	$(BIN2C) -c -n vashishta $(OBJ_DIR)/vashishta.cubin > $(OBJ_DIR)/vashishta_cubin.h
+
+$(OBJ_DIR)/lal_vashishta.o: $(ALL_H) lal_vashishta.h lal_vashishta.cpp $(OBJ_DIR)/vashishta_cubin.h $(OBJ_DIR)/lal_base_three.o
+	$(CUDR) -o $@ -c lal_vashishta.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_vashishta_ext.o: $(ALL_H) lal_vashishta.h lal_vashishta_ext.cpp lal_base_three.h
+	$(CUDR) -o $@ -c lal_vashishta_ext.cpp -I$(OBJ_DIR)
+
 $(OBJ_DIR)/sw.cubin: lal_sw.cu lal_precision.h lal_preprocessor.h
 	$(CUDA) --cubin -DNV_KERNEL -o $@ lal_sw.cu

--- a/lib/gpu/Opencl.makefile
+++ b/lib/gpu/Opencl.makefile
@ -52,6 +52,7 @@ OBJS = $(OBJ_DIR)/lal_atom.o $(OBJ_DIR)/lal_answer.o \
       $(OBJ_DIR)/lal_lj_coul_debye.o $(OBJ_DIR)/lal_lj_coul_debye_ext.o \
       $(OBJ_DIR)/lal_coul_dsf.o $(OBJ_DIR)/lal_coul_dsf_ext.o \
       $(OBJ_DIR)/lal_sw.o $(OBJ_DIR)/lal_sw_ext.o \
+       $(OBJ_DIR)/lal_vashishta.o $(OBJ_DIR)/lal_vashishta_ext.o \
       $(OBJ_DIR)/lal_beck.o $(OBJ_DIR)/lal_beck_ext.o \
       $(OBJ_DIR)/lal_mie.o $(OBJ_DIR)/lal_mie_ext.o \
       $(OBJ_DIR)/lal_soft.o $(OBJ_DIR)/lal_soft_ext.o \
@ -92,7 +93,7 @@ KERS = $(OBJ_DIR)/device_cl.h $(OBJ_DIR)/atom_cl.h \
       $(OBJ_DIR)/tersoff_cl.h $(OBJ_DIR)/tersoff_zbl_cl.h \
       $(OBJ_DIR)/tersoff_mod_cl.h $(OBJ_DIR)/coul_cl.h \
       $(OBJ_DIR)/coul_debye_cl.h $(OBJ_DIR)/zbl_cl.h \
-       $(OBJ_DIR)/lj_cubic_cl.h
+       $(OBJ_DIR)/lj_cubic_cl.h $(OBJ_DIR)/vashishta_cl.h


 OCL_EXECS = $(BIN_DIR)/ocl_get_devices
@ -450,6 +451,15 @@ $(OBJ_DIR)/lal_sw.o: $(ALL_H) lal_sw.h lal_sw.cpp  $(OBJ_DIR)/sw_cl.h $(OBJ_DIR)
 $(OBJ_DIR)/lal_sw_ext.o: $(ALL_H) lal_sw.h lal_sw_ext.cpp lal_base_three.h
 	$(OCL) -o $@ -c lal_sw_ext.cpp -I$(OBJ_DIR)

+$(OBJ_DIR)/vashishta_cl.h: lal_vashishta.cu $(PRE1_H)
+	$(BSH) ./geryon/file_to_cstr.sh vashishta $(PRE1_H) lal_vashishta.cu $(OBJ_DIR)/vashishta_cl.h;
+
+$(OBJ_DIR)/lal_vashishta.o: $(ALL_H) lal_vashishta.h lal_vashishta.cpp  $(OBJ_DIR)/vashishta_cl.h $(OBJ_DIR)/vashishta_cl.h $(OBJ_DIR)/lal_base_three.o
+	$(OCL) -o $@ -c lal_vashishta.cpp -I$(OBJ_DIR)
+
+$(OBJ_DIR)/lal_vashishta_ext.o: $(ALL_H) lal_vashishta.h lal_vashishta_ext.cpp lal_base_three.h
+	$(OCL) -o $@ -c lal_vashishta_ext.cpp -I$(OBJ_DIR)
+
 $(OBJ_DIR)/beck_cl.h: lal_beck.cu $(PRE1_H)
 	$(BSH) ./geryon/file_to_cstr.sh beck $(PRE1_H) lal_beck.cu $(OBJ_DIR)/beck_cl.h;

--- a/lib/gpu/lal_vashishta.cpp
+++ b/lib/gpu/lal_vashishta.cpp
@ -0,0 +1,283 @@
+/***************************************************************************
+                                vashishta.cpp
+                             -------------------
+                            Anders Hafreager (UiO)
+
+  Class for acceleration of the vashishta pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : Mon June 12, 2017
+    email                : andershaf@gmail.com
+ ***************************************************************************/
+
+#if defined(USE_OPENCL)
+#include "vashishta_cl.h"
+#elif defined(USE_CUDART)
+const char *vashishta=0;
+#else
+#include "vashishta_cubin.h"
+#endif
+
+#include "lal_vashishta.h"
+#include <cassert>
+using namespace LAMMPS_AL;
+#define VashishtaT Vashishta<numtyp, acctyp>
+
+extern Device<PRECISION,ACC_PRECISION> device;
+
+template <class numtyp, class acctyp>
+VashishtaT::Vashishta() : BaseThree<numtyp,acctyp>(), _allocated(false) {
+}
+
+template <class numtyp, class acctyp>
+VashishtaT::~Vashishta() {
+  clear();
+}
+
+template <class numtyp, class acctyp>
+int VashishtaT::bytes_per_atom(const int max_nbors) const {
+  return this->bytes_per_atom_atomic(max_nbors);
+}
+
+template <class numtyp, class acctyp>
+int VashishtaT::init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
+           const double cell_size, const double gpu_split, FILE *_screen,
+           int* host_map, const int nelements, int*** host_elem2param, const int nparams,
+           const double* cutsq, const double* r0,
+           const double* gamma, const double* eta,
+           const double* lam1inv, const double* lam4inv,
+           const double* zizj, const double* mbigd,
+           const double* dvrc, const double* big6w, 
+           const double* heta, const double* bigh,
+           const double* bigw, const double* c0,
+           const double* costheta, const double* bigb,
+           const double* big2b, const double* bigc)
+{
+  int success;
+  success=this->init_three(nlocal,nall,max_nbors,0,cell_size,gpu_split,
+                           _screen,vashishta,"k_vashishta","k_vashishta_three_center",
+                           "k_vashishta_three_end");
+  if (success!=0)
+    return success;
+
+  // If atom type constants fit in shared memory use fast kernel
+  int lj_types=ntypes;
+  shared_types=false;
+  int max_shared_types=this->device->max_shared_types();
+  if (lj_types<=max_shared_types && this->_block_size>=max_shared_types) {
+    lj_types=max_shared_types;
+    shared_types=true;
+  }
+  _lj_types=lj_types;
+
+  _nparams = nparams;
+  _nelements = nelements;
+
+  UCL_H_Vec<numtyp4> dview(nparams,*(this->ucl_device),
+                             UCL_WRITE_ONLY);
+
+  for (int i=0; i<nparams; i++) {
+    dview[i].x=(numtyp)0;
+    dview[i].y=(numtyp)0;
+    dview[i].z=(numtyp)0;
+    dview[i].w=(numtyp)0;
+  }
+
+  // pack coefficients into arrays
+  param1.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+  for (int i=0; i<nparams; i++) {
+    dview[i].x=static_cast<numtyp>(eta[i]);
+    dview[i].y=static_cast<numtyp>(lam1inv[i]);
+    dview[i].z=static_cast<numtyp>(lam4inv[i]);
+    dview[i].w=static_cast<numtyp>(zizj[i]);
+  }
+
+  ucl_copy(param1,dview,false);
+  param1_tex.get_texture(*(this->pair_program),"param1_tex");
+  param1_tex.bind_float(param1,4);
+
+  param2.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+  for (int i=0; i<nparams; i++) {
+    dview[i].x=static_cast<numtyp>(mbigd[i]);
+    dview[i].y=static_cast<numtyp>(dvrc[i]);
+    dview[i].z=static_cast<numtyp>(big6w[i]);
+    dview[i].w=static_cast<numtyp>(heta[i]);
+  }
+
+  ucl_copy(param2,dview,false);
+  param2_tex.get_texture(*(this->pair_program),"param2_tex");
+  param2_tex.bind_float(param2,4);
+
+  param3.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+  for (int i=0; i<nparams; i++) {
+    dview[i].x=static_cast<numtyp>(bigh[i]);
+    dview[i].y=static_cast<numtyp>(bigw[i]);
+    dview[i].z=static_cast<numtyp>(dvrc[i]);
+    dview[i].w=static_cast<numtyp>(c0[i]);
+  }
+
+  ucl_copy(param3,dview,false);
+  param3_tex.get_texture(*(this->pair_program),"param3_tex");
+  param3_tex.bind_float(param3,4);
+
+  param4.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+  for (int i=0; i<nparams; i++) {
+    double r0sq = r0[i]*r0[i]-1e-4; // TODO: should we have the 1e-4?
+
+    dview[i].x=static_cast<numtyp>(r0sq);
+    dview[i].y=static_cast<numtyp>(gamma[i]);
+    dview[i].z=static_cast<numtyp>(cutsq[i]);
+    dview[i].w=static_cast<numtyp>(r0[i]);
+  }
+
+  ucl_copy(param4,dview,false);
+  param4_tex.get_texture(*(this->pair_program),"param4_tex");
+  param4_tex.bind_float(param4,4);
+
+  param5.alloc(nparams,*(this->ucl_device),UCL_READ_ONLY);
+
+  for (int i=0; i<nparams; i++) {
+    dview[i].x=static_cast<numtyp>(bigc[i]);
+    dview[i].y=static_cast<numtyp>(costheta[i]);
+    dview[i].z=static_cast<numtyp>(bigb[i]);
+    dview[i].w=static_cast<numtyp>(big2b[i]);
+  }
+
+  ucl_copy(param5,dview,false);
+  param5_tex.get_texture(*(this->pair_program),"param5_tex");
+  param5_tex.bind_float(param5,4);
+
+  UCL_H_Vec<int> dview_elem2param(nelements*nelements*nelements,
+                           *(this->ucl_device), UCL_WRITE_ONLY);
+
+  elem2param.alloc(nelements*nelements*nelements,*(this->ucl_device),
+                   UCL_READ_ONLY);
+
+  for (int i = 0; i < nelements; i++)
+    for (int j = 0; j < nelements; j++)
+      for (int k = 0; k < nelements; k++) {
+         int idx = i*nelements*nelements+j*nelements+k;
+         dview_elem2param[idx] = host_elem2param[i][j][k];
+      }
+
+  ucl_copy(elem2param,dview_elem2param,false);
+
+  UCL_H_Vec<int> dview_map(lj_types, *(this->ucl_device), UCL_WRITE_ONLY);
+  for (int i = 0; i < ntypes; i++)
+    dview_map[i] = host_map[i];
+
+  map.alloc(lj_types,*(this->ucl_device), UCL_READ_ONLY);
+  ucl_copy(map,dview_map,false);
+
+  _allocated=true;
+  this->_max_bytes=param1.row_bytes()+param2.row_bytes()+param3.row_bytes()+param4.row_bytes()+param5.row_bytes()+
+    map.row_bytes()+elem2param.row_bytes();
+  return 0;
+}
+
+template <class numtyp, class acctyp>
+void VashishtaT::clear() {
+  if (!_allocated)
+    return;
+  _allocated=false;
+
+  param1.clear();
+  param2.clear();
+  param3.clear();
+  param4.clear();
+  param5.clear();
+  map.clear();
+  elem2param.clear();
+  this->clear_atomic();
+}
+
+template <class numtyp, class acctyp>
+double VashishtaT::host_memory_usage() const {
+  return this->host_memory_usage_atomic()+sizeof(Vashishta<numtyp,acctyp>);
+}
+
+#define KTHREADS this->_threads_per_atom
+#define JTHREADS this->_threads_per_atom
+// ---------------------------------------------------------------------------
+// Calculate energies, forces, and torques
+// ---------------------------------------------------------------------------
+template <class numtyp, class acctyp>
+void VashishtaT::loop(const bool _eflag, const bool _vflag, const int evatom) {
+  // Compute the block size and grid size to keep all cores busy
+  int BX=this->block_pair();
+  int eflag, vflag;
+  if (_eflag)
+    eflag=1;
+  else
+    eflag=0;
+
+  if (_vflag)
+    vflag=1;
+  else
+    vflag=0;
+
+  int GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                               (BX/this->_threads_per_atom)));
+
+  // this->_nbor_data == nbor->dev_packed for gpu_nbor == 0 and tpa > 1
+  // this->_nbor_data == nbor->dev_nbor for gpu_nbor == 1 or tpa == 1
+  int ainum=this->ans->inum();
+  int nbor_pitch=this->nbor->nbor_pitch();
+  this->time_pair.start();
+
+  this->k_pair.set_size(GX,BX);
+  this->k_pair.run(&this->atom->x, &param1, &param2, &param3, &param4, &param5,
+                   &map, &elem2param, &_nelements,
+                   &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                   &this->ans->force, &this->ans->engv,
+                   &eflag, &vflag, &ainum, &nbor_pitch,
+                   &this->_threads_per_atom);
+
+  BX=this->block_size();
+  GX=static_cast<int>(ceil(static_cast<double>(this->ans->inum())/
+                           (BX/(KTHREADS*JTHREADS))));
+  
+  this->k_three_center.set_size(GX,BX);
+  this->k_three_center.run(&this->atom->x, &param1, &param2, &param3, &param4, &param5,
+                           &map, &elem2param, &_nelements,
+                           &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                           &this->ans->force, &this->ans->engv, &eflag, &vflag, &ainum,
+                           &nbor_pitch, &this->_threads_per_atom, &evatom);
+  Answer<numtyp,acctyp> *end_ans;
+  #ifdef THREE_CONCURRENT
+  end_ans=this->ans2;
+  #else
+  end_ans=this->ans;
+  #endif
+  if (evatom!=0) {
+    
+    this->k_three_end_vatom.set_size(GX,BX);
+    this->k_three_end_vatom.run(&this->atom->x, &param1, &param2, &param3, &param4, &param5,
+                          &map, &elem2param, &_nelements,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->nbor->dev_acc,
+                          &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
+                          &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
+  } else {
+    
+    this->k_three_end.set_size(GX,BX);
+    this->k_three_end.run(&this->atom->x, &param1, &param2, &param3, &param4, &param5,
+                          &map, &elem2param, &_nelements,
+                          &this->nbor->dev_nbor, &this->_nbor_data->begin(),
+                          &this->nbor->dev_acc,
+                          &end_ans->force, &end_ans->engv, &eflag, &vflag, &ainum,
+                          &nbor_pitch, &this->_threads_per_atom, &this->_gpu_nbor);
+  }
+
+  this->time_pair.stop();
+}
+
+template class Vashishta<PRECISION,ACC_PRECISION>;
+
--- a/lib/gpu/lal_vashishta.cu
+++ b/lib/gpu/lal_vashishta.cu
@ -0,0 +1,744 @@
+// **************************************************************************
+//                                 vashishta.cu
+//                             -------------------
+//                           Anders Hafreager (UiO)
+//
+//  Device code for acceleration of the vashishta pair style
+//
+// __________________________________________________________________________
+//    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+// __________________________________________________________________________
+//
+//    begin                : Mon June 12, 2017
+//    email                : andershaf@gmail.com
+// ***************************************************************************/
+
+#ifdef NV_KERNEL
+#include "lal_aux_fun1.h"
+
+#ifndef _DOUBLE_DOUBLE
+texture<float4> pos_tex;
+texture<float4> param1_tex;
+texture<float4> param2_tex;
+texture<float4> param3_tex;
+texture<float4> param4_tex;
+texture<float4> param5_tex;
+#else
+texture<int4,1> pos_tex;
+texture<int4> param1_tex;
+texture<int4> param2_tex;
+texture<int4> param3_tex;
+texture<int4> param4_tex;
+texture<int4> param5_tex;
+#endif
+
+#else
+#define pos_tex x_
+#define param1_tex param1
+#define param2_tex param2
+#define param3_tex param3
+#define param4_tex param4
+#define param5_tex param5
+#endif
+
+#define THIRD (numtyp)0.66666666666666666667
+
+//#define THREE_CONCURRENT
+
+#if (ARCH < 300)
+
+#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, offset, \
+                      eflag, vflag, ans, engv)                              \
+  if (t_per_atom>1) {                                                       \
+    __local acctyp red_acc[6][BLOCK_ELLIPSE];                               \
+    red_acc[0][tid]=f.x;                                                    \
+    red_acc[1][tid]=f.y;                                                    \
+    red_acc[2][tid]=f.z;                                                    \
+    red_acc[3][tid]=energy;                                                 \
+    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
+      if (offset < s) {                                                     \
+        for (int r=0; r<4; r++)                                             \
+          red_acc[r][tid] += red_acc[r][tid+s];                             \
+      }                                                                     \
+    }                                                                       \
+    f.x=red_acc[0][tid];                                                    \
+    f.y=red_acc[1][tid];                                                    \
+    f.z=red_acc[2][tid];                                                    \
+    energy=red_acc[3][tid];                                                 \
+    if (vflag>0) {                                                          \
+      for (int r=0; r<6; r++)                                               \
+        red_acc[r][tid]=virial[r];                                          \
+      for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                       \
+        if (offset < s) {                                                   \
+          for (int r=0; r<6; r++)                                           \
+            red_acc[r][tid] += red_acc[r][tid+s];                           \
+        }                                                                   \
+      }                                                                     \
+      for (int r=0; r<6; r++)                                               \
+        virial[r]=red_acc[r][tid];                                          \
+    }                                                                       \
+  }                                                                         \
+  if (offset==0) {                                                          \
+    int ei=ii;                                                              \
+    if (eflag>0) {                                                          \
+      engv[ei]+=energy*(acctyp)0.5;                                         \
+      ei+=inum;                                                             \
+    }                                                                       \
+    if (vflag>0) {                                                          \
+      for (int i=0; i<6; i++) {                                             \
+        engv[ei]+=virial[i]*(acctyp)0.5;                                    \
+        ei+=inum;                                                           \
+      }                                                                     \
+    }                                                                       \
+    acctyp4 old=ans[ii];                                                    \
+    old.x+=f.x;                                                             \
+    old.y+=f.y;                                                             \
+    old.z+=f.z;                                                             \
+    ans[ii]=old;                                                            \
+  }
+
+#else
+
+#define store_answers_p(f, energy, virial, ii, inum, tid, t_per_atom, offset, \
+                      eflag, vflag, ans, engv)                              \
+  if (t_per_atom>1) {                                                       \
+    for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                         \
+        f.x += shfl_xor(f.x, s, t_per_atom);                                \
+        f.y += shfl_xor(f.y, s, t_per_atom);                                \
+        f.z += shfl_xor(f.z, s, t_per_atom);                                \
+        energy += shfl_xor(energy, s, t_per_atom);                          \
+    }                                                                       \
+    if (vflag>0) {                                                          \
+      for (unsigned int s=t_per_atom/2; s>0; s>>=1) {                       \
+          for (int r=0; r<6; r++)                                           \
+            virial[r] += shfl_xor(virial[r], s, t_per_atom);                \
+      }                                                                     \
+    }                                                                       \
+  }                                                                         \
+  if (offset==0) {                                                          \
+    int ei=ii;                                                              \
+    if (eflag>0) {                                                          \
+      engv[ei]+=energy*(acctyp)0.5;                                         \
+      ei+=inum;                                                             \
+    }                                                                       \
+    if (vflag>0) {                                                          \
+      for (int i=0; i<6; i++) {                                             \
+        engv[ei]+=virial[i]*(acctyp)0.5;                                    \
+        ei+=inum;                                                           \
+      }                                                                     \
+    }                                                                       \
+    acctyp4 old=ans[ii];                                                    \
+    old.x+=f.x;                                                             \
+    old.y+=f.y;                                                             \
+    old.z+=f.z;                                                             \
+    ans[ii]=old;                                                            \
+  }
+
+#endif
+
+
+__kernel void k_vashishta(const __global numtyp4 *restrict x_,
+                   const __global numtyp4 *restrict param1,
+                   const __global numtyp4 *restrict param2,
+                   const __global numtyp4 *restrict param3,
+                   const __global numtyp4 *restrict param4,
+                   const __global numtyp4 *restrict param5,
+                   const __global int *restrict map,
+                   const __global int *restrict elem2param,
+                   const int nelements,
+                   const __global int * dev_nbor,
+                   const __global int * dev_packed,
+                   __global acctyp4 *restrict ans,
+                   __global acctyp *restrict engv,
+                   const int eflag, const int vflag, const int inum,
+                   const int nbor_pitch, const int t_per_atom) {
+  __local int n_stride;
+  int tid, ii, offset;
+  atom_info(t_per_atom,ii,tid,offset);
+
+  acctyp energy=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int nbor, nbor_end;
+    int i, numj;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset,i,numj,
+              n_stride,nbor_end,nbor);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    itype=map[itype];
+
+    for ( ; nbor<nbor_end; nbor+=n_stride) {
+
+      int j=dev_packed[nbor];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      jtype=map[jtype];
+
+      int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];
+
+      // Compute r12
+      numtyp delx = ix.x-jx.x;
+      numtyp dely = ix.y-jx.y;
+      numtyp delz = ix.z-jx.z;
+      numtyp rsq = delx*delx+dely*dely+delz*delz;
+
+      if (rsq<param4[ijparam].z) { // cutsq = param4[ijparam].z
+        numtyp4 param1_ijparam; fetch4(param1_ijparam,ijparam,param1_tex);
+        numtyp param1_eta=param1_ijparam.x;
+        numtyp param1_lam1inv=param1_ijparam.y;
+        numtyp param1_lam4inv=param1_ijparam.z;
+        numtyp param1_zizj=param1_ijparam.w;
+
+        numtyp4 param2_ijparam; fetch4(param2_ijparam,ijparam,param2_tex);
+        numtyp param2_mbigd=param2_ijparam.x;
+        numtyp param2_dvrc =param2_ijparam.y;
+        numtyp param2_big6w=param2_ijparam.z;
+        numtyp param2_heta =param2_ijparam.w;
+
+        numtyp4 param3_ijparam; fetch4(param3_ijparam,ijparam,param3_tex);
+        numtyp param3_bigh=param3_ijparam.x;
+        numtyp param3_bigw=param3_ijparam.y;
+        numtyp param3_dvrc=param3_ijparam.z;
+        numtyp param3_c0  =param3_ijparam.w;
+
+        numtyp r=sqrt(rsq);
+        numtyp rinvsq=1.0/rsq;
+        numtyp r4inv = rinvsq*rinvsq;
+        numtyp r6inv = rinvsq*r4inv;
+
+        numtyp reta = pow(r,-param1_eta);
+        numtyp lam1r = r*param1_lam1inv;
+        numtyp lam4r = r*param1_lam4inv;
+        numtyp vc2 = param1_zizj * exp(-lam1r)/r;
+        numtyp vc3 = param2_mbigd * r4inv*exp(-lam4r);
+
+        numtyp force = (param2_dvrc*r
+            - (4.0*vc3 + lam4r*vc3+param2_big6w*r6inv
+               - param2_heta*reta - vc2 - lam1r*vc2)
+            ) * rinvsq;
+
+        f.x+=delx*force;
+        f.y+=dely*force;
+        f.z+=delz*force;
+        if (eflag>0)
+          energy += (param3_bigh*reta+vc2-vc3-param3_bigw*r6inv-r*param3_dvrc+param3_c0);
+          
+        if (vflag>0) {
+          virial[0] += delx*delx*force;
+          virial[1] += dely*dely*force;
+          virial[2] += delz*delz*force;
+          virial[3] += delx*dely*force;
+          virial[4] += delx*delz*force;
+          virial[5] += dely*delz*force;
+        }
+      }
+    } // for nbor
+
+    store_answers(f,energy,virial,ii,inum,tid,t_per_atom,offset,eflag,vflag,
+                  ans,engv);
+  } // if ii
+
+}
+
+#define threebody(delr1x, delr1y, delr1z, eflag, energy)                     \
+{                                                                            \
+  numtyp r1 = ucl_sqrt(rsq1);                                                \
+  numtyp rinvsq1 = ucl_recip(rsq1);                                          \
+  numtyp rainv1 = ucl_recip(r1 - param_r0_ij);                               \
+  numtyp gsrainv1 = param_gamma_ij * rainv1;                                    \
+  numtyp gsrainvsq1 = gsrainv1*rainv1/r1;                                    \
+  numtyp expgsrainv1 = ucl_exp(gsrainv1);                                    \
+                                                                             \
+  numtyp r2 = ucl_sqrt(rsq2);                                                \
+  numtyp rinvsq2 = ucl_recip(rsq2);                                          \
+  numtyp rainv2 = ucl_recip(r2 - param_r0_ik);                               \
+  numtyp gsrainv2 = param_gamma_ik * rainv2;                                    \
+  numtyp gsrainvsq2 = gsrainv2*rainv2/r2;                                    \
+  numtyp expgsrainv2 = ucl_exp(gsrainv2);                                    \
+                                                                             \
+  numtyp rinv12 = ucl_recip(r1*r2);                                          \
+  numtyp cs = (delr1x*delr2x + delr1y*delr2y + delr1z*delr2z) * rinv12;      \
+  numtyp delcs = cs - param_costheta_ijk;                                       \
+  numtyp delcssq = delcs*delcs;                                              \
+  numtyp pcsinv = param_bigc_ijk*delcssq+1.0;                                   \
+  numtyp pcsinvsq = pcsinv*pcsinv;                                           \
+  numtyp pcs = delcssq/pcsinv;                                               \
+                                                                             \
+  numtyp facexp = expgsrainv1*expgsrainv2;                                   \
+                                                                             \
+  numtyp facrad = param_bigb_ijk * facexp*pcs;                                  \
+  numtyp frad1 = facrad*gsrainvsq1;                                          \
+  numtyp frad2 = facrad*gsrainvsq2;                                          \
+  numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq;                      \
+  numtyp facang12 = rinv12*facang;                                           \
+  numtyp csfacang = cs*facang;                                               \
+  numtyp csfac1 = rinvsq1*csfacang;                                          \
+                                                                             \
+  fjx = delr1x*(frad1+csfac1)-delr2x*facang12;                               \
+  fjy = delr1y*(frad1+csfac1)-delr2y*facang12;                               \
+  fjz = delr1z*(frad1+csfac1)-delr2z*facang12;                               \
+                                                                             \
+  numtyp csfac2 = rinvsq2*csfacang;                                          \
+                                                                             \
+  fkx = delr2x*(frad2+csfac2)-delr1x*facang12;                               \
+  fky = delr2y*(frad2+csfac2)-delr1y*facang12;                               \
+  fkz = delr2z*(frad2+csfac2)-delr1z*facang12;                               \
+                                                                             \
+  if (eflag>0)                                                               \
+    energy+=facrad;                                                          \
+  if (vflag>0) {                                                             \
+    virial[0] += delr1x*fjx + delr2x*fkx;                                    \
+    virial[1] += delr1y*fjy + delr2y*fky;                                    \
+    virial[2] += delr1z*fjz + delr2z*fkz;                                    \
+    virial[3] += delr1x*fjy + delr2x*fky;                                    \
+    virial[4] += delr1x*fjz + delr2x*fkz;                                    \
+    virial[5] += delr1y*fjz + delr2y*fkz;                                    \
+  }                                                                          \
+}
+
+#define threebody_half(delr1x, delr1y, delr1z)                               \
+{                                                                            \
+  numtyp r1 = ucl_sqrt(rsq1);                                                \
+  numtyp rinvsq1 = ucl_recip(rsq1);                                          \
+  numtyp rainv1 = ucl_recip(r1 - param_r0_ij);                               \
+  numtyp gsrainv1 = param_gamma_ij * rainv1;                                    \
+  numtyp gsrainvsq1 = gsrainv1*rainv1/r1;                                    \
+  numtyp expgsrainv1 = ucl_exp(gsrainv1);                                    \
+                                                                             \
+  numtyp r2 = ucl_sqrt(rsq2);                                                \
+  numtyp rainv2 = ucl_recip(r2 - param_r0_ik);                               \
+  numtyp gsrainv2 = param_gamma_ik * rainv2;                                    \
+  numtyp expgsrainv2 = ucl_exp(gsrainv2);                                    \
+                                                                             \
+  numtyp rinv12 = ucl_recip(r1*r2);                                          \
+  numtyp cs = (delr1x*delr2x + delr1y*delr2y + delr1z*delr2z) * rinv12;      \
+  numtyp delcs = cs - param_costheta_ijk;                                       \
+  numtyp delcssq = delcs*delcs;                                              \
+  numtyp pcsinv = param_bigc_ijk*delcssq+1.0;                                   \
+  numtyp pcsinvsq = pcsinv*pcsinv;                                           \
+  numtyp pcs = delcssq/pcsinv;                                               \
+                                                                             \
+  numtyp facexp = expgsrainv1*expgsrainv2;                                   \
+                                                                             \
+  numtyp facrad = param_bigb_ijk * facexp*pcs;                                  \
+  numtyp frad1 = facrad*gsrainvsq1;                                          \
+  numtyp facang = param_big2b_ijk * facexp*delcs/pcsinvsq;                      \
+  numtyp facang12 = rinv12*facang;                                           \
+  numtyp csfacang = cs*facang;                                               \
+  numtyp csfac1 = rinvsq1*csfacang;                                          \
+                                                                             \
+  fjx = delr1x*(frad1+csfac1)-delr2x*facang12;                               \
+  fjy = delr1y*(frad1+csfac1)-delr2y*facang12;                               \
+  fjz = delr1z*(frad1+csfac1)-delr2z*facang12;                               \
+}
+
+__kernel void k_vashishta_three_center(const __global numtyp4 *restrict x_,
+                                const __global numtyp4 *restrict param1,
+                                const __global numtyp4 *restrict param2,
+                                const __global numtyp4 *restrict param3,
+                                const __global numtyp4 *restrict param4,
+                                const __global numtyp4 *restrict param5,
+                                const __global int *restrict map,
+                                const __global int *restrict elem2param,
+                                const int nelements,
+                                const __global int * dev_nbor,
+                                const __global int * dev_packed,
+                                __global acctyp4 *restrict ans,
+                                __global acctyp *restrict engv,
+                                const int eflag, const int vflag,
+                                const int inum,  const int nbor_pitch,
+                                const int t_per_atom, const int evatom) {
+  __local int tpa_sq, n_stride;
+  tpa_sq=fast_mul(t_per_atom,t_per_atom);
+  numtyp param_gamma_ij, param_r0sq_ij, param_r0_ij, param_gamma_ik, param_r0sq_ik, param_r0_ik;
+  numtyp param_costheta_ijk, param_bigc_ijk, param_bigb_ijk, param_big2b_ijk;
+
+  int tid, ii, offset;
+  atom_info(tpa_sq,ii,tid,offset);
+
+  acctyp energy=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int i, numj, nbor_j, nbor_end;
+
+    int offset_j=offset/t_per_atom;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
+              n_stride,nbor_end,nbor_j);
+    int offset_k=tid & (t_per_atom-1);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    itype=map[itype];
+
+    for ( ; nbor_j<nbor_end; nbor_j+=n_stride) {
+
+      int j=dev_packed[nbor_j];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      jtype=map[jtype];
+
+      // Compute r12
+      numtyp delr1x = jx.x-ix.x;
+      numtyp delr1y = jx.y-ix.y;
+      numtyp delr1z = jx.z-ix.z;
+      numtyp rsq1 = delr1x*delr1x+delr1y*delr1y+delr1z*delr1z;
+
+      int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];
+      
+      numtyp4 param4_ijparam; fetch4(param4_ijparam,ijparam,param4_tex);
+      param_r0sq_ij=param4_ijparam.x;
+      if (rsq1 > param_r0sq_ij) continue;
+      param_gamma_ij=param4_ijparam.y;
+      param_r0_ij=param4_ijparam.w;
+      
+      int nbor_k=nbor_j-offset_j+offset_k;
+      if (nbor_k<=nbor_j)
+        nbor_k+=n_stride;
+
+      for ( ; nbor_k<nbor_end; nbor_k+=n_stride) {
+        int k=dev_packed[nbor_k];
+        k &= NEIGHMASK;
+
+        numtyp4 kx; fetch4(kx,k,pos_tex);
+        int ktype=kx.w;
+        ktype=map[ktype];
+        int ikparam=elem2param[itype*nelements*nelements+ktype*nelements+ktype];
+        numtyp4 param4_ikparam; fetch4(param4_ikparam,ikparam,param4_tex);
+
+        numtyp delr2x = kx.x-ix.x;
+        numtyp delr2y = kx.y-ix.y;
+        numtyp delr2z = kx.z-ix.z;
+        numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z;
+
+        param_r0sq_ik=param4_ikparam.x;
+        if (rsq2 < param_r0sq_ik) {
+          param_gamma_ik=param4_ikparam.y;
+          param_r0_ik=param4_ikparam.w;
+
+          int ijkparam=elem2param[itype*nelements*nelements+jtype*nelements+ktype];
+          numtyp4 param5_ijkparam; fetch4(param5_ijkparam,ijkparam,param5_tex);
+          param_bigc_ijk=param5_ijkparam.x;
+          param_bigb_ijk=param5_ijkparam.z;
+          param_big2b_ijk=param5_ijkparam.w;
+          param_costheta_ijk=param5_ijkparam.y;
+
+          numtyp fjx, fjy, fjz, fkx, fky, fkz;
+          threebody(delr1x,delr1y,delr1z,eflag,energy);
+
+          f.x -= fjx + fkx;
+          f.y -= fjy + fky;
+          f.z -= fjz + fkz;
+        }
+      }
+    } // for nbor
+
+    numtyp pre;
+    if (evatom==1)
+      pre=THIRD;
+    else
+      pre=(numtyp)2.0;
+    energy*=pre;
+    for (int i=0; i<6; i++)
+      virial[i]*=pre;
+
+    store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+                    eflag,vflag,ans,engv);
+
+  } // if ii
+}
+
+__kernel void k_vashishta_three_end(const __global numtyp4 *restrict x_,
+                             const __global numtyp4 *restrict param1,
+                             const __global numtyp4 *restrict param2,
+                             const __global numtyp4 *restrict param3,
+                             const __global numtyp4 *restrict param4,
+                             const __global numtyp4 *restrict param5,
+                             const __global int *restrict map,
+                             const __global int *restrict elem2param,
+                             const int nelements,
+                             const __global int * dev_nbor,
+                             const __global int * dev_packed,
+                             const __global int * dev_acc,
+                             __global acctyp4 *restrict ans,
+                             __global acctyp *restrict engv,
+                             const int eflag, const int vflag,
+                             const int inum,  const int nbor_pitch,
+                             const int t_per_atom, const int gpu_nbor) {
+  __local int tpa_sq, n_stride;
+  tpa_sq=fast_mul(t_per_atom,t_per_atom);
+  numtyp param_gamma_ij, param_r0sq_ij, param_r0_ij, param_gamma_ik, param_r0sq_ik, param_r0_ik;
+  numtyp param_costheta_ijk, param_bigc_ijk, param_bigb_ijk, param_big2b_ijk;
+
+  int tid, ii, offset;
+  atom_info(tpa_sq,ii,tid,offset);
+
+  acctyp energy=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int i, numj, nbor_j, nbor_end, k_end;
+
+    int offset_j=offset/t_per_atom;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
+              n_stride,nbor_end,nbor_j);
+    int offset_k=tid & (t_per_atom-1);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    itype=map[itype];
+
+    for ( ; nbor_j<nbor_end; nbor_j+=n_stride) {
+      int j=dev_packed[nbor_j];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      jtype=map[jtype];
+
+      // Compute r12
+      numtyp delr1x = ix.x-jx.x;
+      numtyp delr1y = ix.y-jx.y;
+      numtyp delr1z = ix.z-jx.z;
+      numtyp rsq1 = delr1x*delr1x+delr1y*delr1y+delr1z*delr1z;
+
+      int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];
+      numtyp4 param4_ijparam; fetch4(param4_ijparam,ijparam,param4_tex);
+      param_r0sq_ij = param4_ijparam.x;
+      if (rsq1 > param_r0sq_ij) continue;
+
+      param_gamma_ij=param4_ijparam.y;
+      param_r0_ij = param4_ijparam.w;
+      
+      int nbor_k,numk;
+      if (dev_nbor==dev_packed) {
+        if (gpu_nbor) nbor_k=j+nbor_pitch;
+        else nbor_k=dev_acc[j]+nbor_pitch;
+        numk=dev_nbor[nbor_k];
+        nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
+        k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
+        nbor_k+=offset_k;
+      } else {
+        nbor_k=dev_acc[j]+nbor_pitch;
+        numk=dev_nbor[nbor_k];
+        nbor_k+=nbor_pitch;
+        nbor_k=dev_nbor[nbor_k];
+        k_end=nbor_k+numk;
+        nbor_k+=offset_k;
+      }
+
+      for ( ; nbor_k<k_end; nbor_k+=n_stride) {
+        int k=dev_packed[nbor_k];
+        k &= NEIGHMASK;
+
+        if (k == i) continue;
+
+        numtyp4 kx; fetch4(kx,k,pos_tex);
+        int ktype=kx.w;
+        ktype=map[ktype];
+        int ikparam=elem2param[jtype*nelements*nelements+ktype*nelements+ktype]; //jk
+
+        numtyp delr2x = kx.x - jx.x;
+        numtyp delr2y = kx.y - jx.y;
+        numtyp delr2z = kx.z - jx.z;
+        numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z;
+        numtyp4 param4_ikparam; fetch4(param4_ikparam,ikparam,param4_tex);
+        param_r0sq_ik=param4_ikparam.x;
+
+        if (rsq2 < param_r0sq_ik) {
+          param_gamma_ik=param4_ikparam.y;
+          param_r0_ik=param4_ikparam.w;
+          
+          int ijkparam=elem2param[jtype*nelements*nelements+itype*nelements+ktype]; //jik
+          numtyp4 param5_ijkparam; fetch4(param5_ijkparam,ijkparam,param5_tex);
+          param_bigc_ijk=param5_ijkparam.x;
+          param_costheta_ijk=param5_ijkparam.y;
+          param_bigb_ijk=param5_ijkparam.z;
+          param_big2b_ijk=param5_ijkparam.w;
+          
+          numtyp fjx, fjy, fjz;
+          //if (evatom==0) {
+            threebody_half(delr1x,delr1y,delr1z);
+          //} else {
+          //  numtyp fkx, fky, fkz;
+          //  threebody(delr1x,delr1y,delr1z,eflag,energy);
+          //}
+
+          f.x += fjx;
+          f.y += fjy;
+          f.z += fjz;
+        }
+      }
+
+    } // for nbor
+    #ifdef THREE_CONCURRENT
+    store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+                  eflag,vflag,ans,engv);
+    #else
+    store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+                    eflag,vflag,ans,engv);
+    #endif
+  } // if ii
+}
+
+__kernel void k_vashishta_three_end_vatom(const __global numtyp4 *restrict x_,
+                             const __global numtyp4 *restrict param1,
+                             const __global numtyp4 *restrict param2,
+                             const __global numtyp4 *restrict param3,
+                             const __global numtyp4 *restrict param4,
+                             const __global numtyp4 *restrict param5,
+                             const __global int *restrict map,
+                             const __global int *restrict elem2param,
+                             const int nelements,
+                             const __global int * dev_nbor,
+                             const __global int * dev_packed,
+                             const __global int * dev_acc,
+                             __global acctyp4 *restrict ans,
+                             __global acctyp *restrict engv,
+                             const int eflag, const int vflag,
+                             const int inum,  const int nbor_pitch,
+                             const int t_per_atom, const int gpu_nbor) {
+  __local int tpa_sq, n_stride;
+  tpa_sq=fast_mul(t_per_atom,t_per_atom);
+  numtyp param_gamma_ij, param_r0sq_ij, param_r0_ij, param_gamma_ik, param_r0sq_ik, param_r0_ik;
+  numtyp param_costheta_ijk, param_bigc_ijk, param_bigb_ijk, param_big2b_ijk;
+
+  int tid, ii, offset;
+  atom_info(tpa_sq,ii,tid,offset);
+
+  acctyp energy=(acctyp)0;
+  acctyp4 f;
+  f.x=(acctyp)0; f.y=(acctyp)0; f.z=(acctyp)0;
+  acctyp virial[6];
+  for (int i=0; i<6; i++)
+    virial[i]=(acctyp)0;
+
+  __syncthreads();
+
+  if (ii<inum) {
+    int i, numj, nbor_j, nbor_end, k_end;
+
+    int offset_j=offset/t_per_atom;
+    nbor_info(dev_nbor,dev_packed,nbor_pitch,t_per_atom,ii,offset_j,i,numj,
+              n_stride,nbor_end,nbor_j);
+    int offset_k=tid & (t_per_atom-1);
+
+    numtyp4 ix; fetch4(ix,i,pos_tex); //x_[i];
+    int itype=ix.w;
+    itype=map[itype];
+
+    for ( ; nbor_j<nbor_end; nbor_j+=n_stride) {
+      int j=dev_packed[nbor_j];
+      j &= NEIGHMASK;
+
+      numtyp4 jx; fetch4(jx,j,pos_tex); //x_[j];
+      int jtype=jx.w;
+      jtype=map[jtype];
+
+      // Compute r12
+      numtyp delr1x = ix.x-jx.x;
+      numtyp delr1y = ix.y-jx.y;
+      numtyp delr1z = ix.z-jx.z;
+      numtyp rsq1 = delr1x*delr1x+delr1y*delr1y+delr1z*delr1z;
+
+      int ijparam=elem2param[itype*nelements*nelements+jtype*nelements+jtype];
+      numtyp4 param4_ijparam; fetch4(param4_ijparam,ijparam,param4_tex);
+      param_r0sq_ij=param4_ijparam.x;
+      if (rsq1 > param_r0sq_ij) continue;
+
+      param_gamma_ij=param4_ijparam.y;
+      param_r0_ij=param4_ijparam.w;
+      
+      int nbor_k,numk;
+      if (dev_nbor==dev_packed) {
+        if (gpu_nbor) nbor_k=j+nbor_pitch;
+        else nbor_k=dev_acc[j]+nbor_pitch;
+        numk=dev_nbor[nbor_k];
+        nbor_k+=nbor_pitch+fast_mul(j,t_per_atom-1);
+        k_end=nbor_k+fast_mul(numk/t_per_atom,n_stride)+(numk & (t_per_atom-1));
+        nbor_k+=offset_k;
+      } else {
+        nbor_k=dev_acc[j]+nbor_pitch;
+        numk=dev_nbor[nbor_k];
+        nbor_k+=nbor_pitch;
+        nbor_k=dev_nbor[nbor_k];
+        k_end=nbor_k+numk;
+        nbor_k+=offset_k;
+      }
+
+      for ( ; nbor_k<k_end; nbor_k+=n_stride) {
+        int k=dev_packed[nbor_k];
+        k &= NEIGHMASK;
+
+        if (k == i) continue;
+
+        numtyp4 kx; fetch4(kx,k,pos_tex);
+        int ktype=kx.w;
+        ktype=map[ktype];
+        int ikparam=elem2param[jtype*nelements*nelements+ktype*nelements+ktype]; // jk
+        numtyp4 param4_ikparam; fetch4(param4_ikparam,ikparam,param4_tex);
+
+        numtyp delr2x = kx.x - jx.x;
+        numtyp delr2y = kx.y - jx.y;
+        numtyp delr2z = kx.z - jx.z;
+        numtyp rsq2 = delr2x*delr2x + delr2y*delr2y + delr2z*delr2z;
+        param_r0sq_ik=param4_ikparam.x;
+
+        if (rsq2 < param_r0sq_ik) {
+          param_gamma_ik=param4_ikparam.y;
+          param_r0_ik=param4_ikparam.w;
+
+          int ijkparam=elem2param[jtype*nelements*nelements+itype*nelements+ktype]; // jik
+          numtyp4 param5_ijkparam; fetch4(param5_ijkparam,ijkparam,param5_tex);
+          param_bigc_ijk=param5_ijkparam.x;
+          param_costheta_ijk=param5_ijkparam.y;
+          param_bigb_ijk=param5_ijkparam.z;
+          param_big2b_ijk=param5_ijkparam.w;
+          
+          numtyp fjx, fjy, fjz, fkx, fky, fkz;
+          threebody(delr1x,delr1y,delr1z,eflag,energy);
+
+          f.x += fjx;
+          f.y += fjy;
+          f.z += fjz;
+        }
+      }
+
+    } // for nbor
+    energy*=THIRD;
+    for (int i=0; i<6; i++)
+      virial[i]*=THIRD;
+    #ifdef THREE_CONCURRENT
+    store_answers(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+                  eflag,vflag,ans,engv);
+    #else
+    store_answers_p(f,energy,virial,ii,inum,tid,tpa_sq,offset,
+                    eflag,vflag,ans,engv);
+    #endif
+  } // if ii
+}
+
--- a/lib/gpu/lal_vashishta.h
+++ b/lib/gpu/lal_vashishta.h
@ -0,0 +1,97 @@
+/***************************************************************************
+                                vashishta.h
+                             -------------------
+                            Anders Hafreager (UiO9)
+
+  Class for acceleration of the vashishta pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : Mon June 12, 2017
+    email                : andershaf@gmail.com
+ ***************************************************************************/
+
+#ifndef LAL_VASHISHTA_H
+#define LAL_VASHISHTA_H
+
+#include "lal_base_three.h"
+
+namespace LAMMPS_AL {
+
+template <class numtyp, class acctyp>
+class Vashishta : public BaseThree<numtyp, acctyp> {
+ public:
+  Vashishta();
+  ~Vashishta();
+
+  /// Clear any previous data and set up for a new LAMMPS run
+  /** \param max_nbors initial number of rows in the neighbor matrix
+    * \param cell_size cutoff + skin
+    * \param gpu_split fraction of particles handled by device
+    *
+    * Returns:
+    * -  0 if successfull
+    * - -1 if fix gpu not found
+    * - -3 if there is an out of memory error
+    * - -4 if the GPU library was not compiled for GPU
+    * - -5 Double precision is not supported on card **/
+  int init(const int ntypes, const int nlocal, const int nall, const int max_nbors,
+           const double cell_size, const double gpu_split, FILE *screen,
+           int* host_map, const int nelements, int*** host_elem2param, const int nparams,
+           const double* cutsq, const double* r0, 
+           const double* gamma, const double* eta,
+           const double* lam1inv, const double* lam4inv,
+           const double* zizj, const double* mbigd,
+           const double* dvrc, const double* big6w, 
+           const double* heta, const double* bigh,
+           const double* bigw, const double* c0,
+           const double* costheta, const double* bigb,
+           const double* big2b, const double* bigc);
+
+  /// Clear all host and device data
+  /** \note This is called at the beginning of the init() routine **/
+  void clear();
+
+  /// Returns memory usage on device per atom
+  int bytes_per_atom(const int max_nbors) const;
+
+  /// Total host memory used by library for pair style
+  double host_memory_usage() const;
+
+  // --------------------------- TYPE DATA --------------------------
+
+  /// If atom type constants fit in shared memory, use fast kernels
+  bool shared_types;
+
+  /// Number of atom types
+  int _lj_types;
+
+  /// param1.x = eta, param1.y = lam1inv, param1.z = lam4inv, param1.w = zizj
+  UCL_D_Vec<numtyp4> param1;
+  /// param2.x = mbigd, param2.y = dvrc, param2.z = big6w, param2.w = heta
+  UCL_D_Vec<numtyp4> param2;
+  /// param3.x = bigh, param3.y = bigw, param3.z = dvrc, param3.w = c0
+  UCL_D_Vec<numtyp4> param3;
+  /// param4.x = r0sq, param4.y = gamma, param4.z = cutsq, param4.w = r0
+  UCL_D_Vec<numtyp4> param4;
+  /// param5.x = bigc, param5.y = costheta, param5.z = bigb, param5.w = big2b
+  UCL_D_Vec<numtyp4> param5;
+
+  UCL_D_Vec<int> elem2param;
+  UCL_D_Vec<int> map;
+  int _nparams,_nelements;
+
+  UCL_Texture param1_tex, param2_tex, param3_tex, param4_tex, param5_tex;
+
+ private:
+  bool _allocated;
+  void loop(const bool _eflag, const bool _vflag, const int evatom);
+
+};
+
+}
+
+#endif
+
--- a/lib/gpu/lal_vashishta_ext.cpp
+++ b/lib/gpu/lal_vashishta_ext.cpp
@ -0,0 +1,134 @@
+/***************************************************************************
+                              vashishta_ext.cpp
+                             -------------------
+                            Anders Hafreager (UiO)
+
+  Class for acceleration of the vashishta pair style.
+
+ __________________________________________________________________________
+    This file is part of the LAMMPS Accelerator Library (LAMMPS_AL)
+ __________________________________________________________________________
+
+    begin                : Mon June 12, 2017
+    email                : andershaf@gmail.com
+ ***************************************************************************/
+
+#include <iostream>
+#include <cassert>
+#include <math.h>
+
+#include "lal_vashishta.h"
+using namespace LAMMPS_AL;
+
+static Vashishta<PRECISION,ACC_PRECISION> VashishtaMF;
+
+// ---------------------------------------------------------------------------
+// Allocate memory on host and device and copy constants to device
+// ---------------------------------------------------------------------------
+int vashishta_gpu_init(const int ntypes, const int inum, const int nall, const int max_nbors,
+                const double cell_size, int &gpu_mode, FILE *screen,
+                int* host_map, const int nelements, int*** host_elem2param, const int nparams,
+                const double* cutsq, const double* r0,
+                const double* gamma, const double* eta,
+                const double* lam1inv, const double* lam4inv,
+                const double* zizj, const double* mbigd,
+                const double* dvrc, const double* big6w, 
+                const double* heta, const double* bigh,
+                const double* bigw, const double* c0,
+                const double* costheta, const double* bigb,
+                const double* big2b, const double* bigc) {
+  VashishtaMF.clear();
+  gpu_mode=VashishtaMF.device->gpu_mode();
+  double gpu_split=VashishtaMF.device->particle_split();
+  int first_gpu=VashishtaMF.device->first_device();
+  int last_gpu=VashishtaMF.device->last_device();
+  int world_me=VashishtaMF.device->world_me();
+  int gpu_rank=VashishtaMF.device->gpu_rank();
+  int procs_per_gpu=VashishtaMF.device->procs_per_gpu();
+
+  // disable host/device split for now
+  if (gpu_split != 1.0)
+    return -8;
+
+  VashishtaMF.device->init_message(screen,"vashishta/gpu",first_gpu,last_gpu);
+
+  bool message=false;
+  if (VashishtaMF.device->replica_me()==0 && screen)
+    message=true;
+
+  if (message) {
+    fprintf(screen,"Initializing Device and compiling on process 0...");
+    fflush(screen);
+  }
+
+  int init_ok=0;
+  if (world_me==0)
+    init_ok=VashishtaMF.init(ntypes, inum, nall, 500, cell_size, gpu_split, screen,
+                      host_map, nelements, host_elem2param, nparams,
+                      cutsq, r0, gamma, eta, lam1inv, 
+                      lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw, 
+                      c0, costheta, bigb, big2b, bigc);
+
+  VashishtaMF.device->world_barrier();
+  if (message)
+    fprintf(screen,"Done.\n");
+
+  for (int i=0; i<procs_per_gpu; i++) {
+    if (message) {
+      if (last_gpu-first_gpu==0)
+        fprintf(screen,"Initializing Device %d on core %d...",first_gpu,i);
+      else
+        fprintf(screen,"Initializing Devices %d-%d on core %d...",first_gpu,
+                last_gpu,i);
+      fflush(screen);
+    }
+    if (gpu_rank==i && world_me!=0)
+      init_ok=VashishtaMF.init(ntypes, inum, nall, 500, cell_size, gpu_split, screen,
+                        host_map, nelements, host_elem2param, nparams,
+                        cutsq, r0, gamma, eta, lam1inv, 
+                        lam4inv, zizj, mbigd, dvrc, big6w, heta, bigh, bigw, 
+                        c0, costheta, bigb, big2b, bigc);
+
+    VashishtaMF.device->gpu_barrier();
+    if (message)
+      fprintf(screen,"Done.\n");
+  }
+  if (message)
+    fprintf(screen,"\n");
+
+  if (init_ok==0)
+    VashishtaMF.estimate_gpu_overhead();
+
+  return init_ok;
+}
+
+void vashishta_gpu_clear() {
+  VashishtaMF.clear();
+}
+
+int ** vashishta_gpu_compute_n(const int ago, const int inum_full,
+                        const int nall, double **host_x, int *host_type,
+                        double *sublo, double *subhi, tagint *tag, int **nspecial,
+                        tagint **special, const bool eflag, const bool vflag,
+                        const bool eatom, const bool vatom, int &host_start,
+                        int **ilist, int **jnum, const double cpu_time,
+                        bool &success) {
+  return VashishtaMF.compute(ago, inum_full, nall, host_x, host_type, sublo,
+                       subhi, tag, nspecial, special, eflag, vflag, eatom,
+                       vatom, host_start, ilist, jnum, cpu_time, success);
+}
+
+void vashishta_gpu_compute(const int ago, const int nlocal, const int nall,
+                    const int nlist, double **host_x, int *host_type,
+                    int *ilist, int *numj, int **firstneigh, const bool eflag,
+                    const bool vflag, const bool eatom, const bool vatom,
+                    int &host_start, const double cpu_time, bool &success) {
+  VashishtaMF.compute(ago,nlocal,nall,nlist,host_x,host_type,ilist,numj,
+               firstneigh,eflag,vflag,eatom,vatom,host_start,cpu_time,success);
+}
+
+double vashishta_gpu_bytes() {
+  return VashishtaMF.host_memory_usage();
+}
+
+
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@ -1,5 +1,53 @@
 # Change Log

+
+## [2.03.05](https://github.com/kokkos/kokkos/tree/2.03.05) (2017-05-27)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.00...2.03.05)
+
+**Implemented enhancements:**
+
+- Harmonize Custom Reductions over nesting levels [\#802](https://github.com/kokkos/kokkos/issues/802)
+- Prevent users directly including KokkosCore\_config.h [\#815](https://github.com/kokkos/kokkos/issues/815)
+- DualView aborts on concurrent host/device modify \(in debug mode\) [\#814](https://github.com/kokkos/kokkos/issues/814)
+- Abort when running on a NVIDIA CC5.0 or higher architecture with code compiled for CC \< 5.0 [\#813](https://github.com/kokkos/kokkos/issues/813)
+- Add "name" function to ExecSpaces [\#806](https://github.com/kokkos/kokkos/issues/806)
+- Allow null Future in task spawn dependences [\#795](https://github.com/kokkos/kokkos/issues/795)
+- Add Unit Tests for Kokkos::complex [\#785](https://github.com/kokkos/kokkos/issues/785)
+- Add pow function for Kokkos::complex [\#784](https://github.com/kokkos/kokkos/issues/784)
+- Square root of a complex [\#729](https://github.com/kokkos/kokkos/issues/729)
+- Command line processing of --threads argument prevents users from having any commandline arguments starting with --threads [\#760](https://github.com/kokkos/kokkos/issues/760)
+- Protected deprecated API with appropriate macro [\#756](https://github.com/kokkos/kokkos/issues/756)
+- Allow task scheduler memory pool to be used by tasks [\#747](https://github.com/kokkos/kokkos/issues/747)
+- View bounds checking on host-side performance: constructing a std::string [\#723](https://github.com/kokkos/kokkos/issues/723)
+- Add check for AppleClang as compiler distinct from check for Clang. [\#705](https://github.com/kokkos/kokkos/issues/705)
+- Uninclude source files for specific configurations to prevent link warning. [\#701](https://github.com/kokkos/kokkos/issues/701)
+- Add --small option to snapshot script [\#697](https://github.com/kokkos/kokkos/issues/697)
+- CMake Standalone Support [\#674](https://github.com/kokkos/kokkos/issues/674)
+- CMake build unit test and install [\#808](https://github.com/kokkos/kokkos/issues/808)
+- CMake: Fix having kokkos as a subdirectory in a pure cmake project [\#629](https://github.com/kokkos/kokkos/issues/629)
+- Tribits macro assumes build directory is in top level source directory [\#654](https://github.com/kokkos/kokkos/issues/654)
+- Use bin/nvcc\_wrapper, not config/nvcc\_wrapper [\#562](https://github.com/kokkos/kokkos/issues/562)
+- Allow MemoryPool::allocate\(\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487)
+- Allow MemoryPool::allocate\\(\\) to be called from multiple threads per warp. [\#487](https://github.com/kokkos/kokkos/issues/487)
+- Move OpenMP 4.5 OpenMPTarget backend into Develop [\#456](https://github.com/kokkos/kokkos/issues/456)
+- Testing on ARM testbed [\#288](https://github.com/kokkos/kokkos/issues/288)
+
+**Fixed bugs:**
+
+- Fix label in OpenMP parallel\_reduce verify\_initialized [\#834](https://github.com/kokkos/kokkos/issues/834)
+- TeamScratch Level 1 on Cuda hangs [\#820](https://github.com/kokkos/kokkos/issues/820)
+- \[bug\] memory pool. [\#786](https://github.com/kokkos/kokkos/issues/786)
+- Some Reduction Tests fail on Intel 18 with aggressive vectorization on [\#774](https://github.com/kokkos/kokkos/issues/774)
+- Error copying dynamic view on copy of memory pool [\#773](https://github.com/kokkos/kokkos/issues/773)
+- CUDA stack overflow with TaskDAG test [\#758](https://github.com/kokkos/kokkos/issues/758)
+- ThreadVectorRange Customized Reduction Bug [\#739](https://github.com/kokkos/kokkos/issues/739)
+- set\_scratch\_size overflows  [\#726](https://github.com/kokkos/kokkos/issues/726)
+- Get wrong results for compiler checks in Makefile on OS X. [\#706](https://github.com/kokkos/kokkos/issues/706)
+- Fix check if multiple host architectures enabled. [\#702](https://github.com/kokkos/kokkos/issues/702)
+- Threads Backend Does not Pass on Cray Compilers [\#609](https://github.com/kokkos/kokkos/issues/609)
+- Rare bug in memory pool where allocation can finish on superblock in empty state [\#452](https://github.com/kokkos/kokkos/issues/452)
+- LDFLAGS in core/unit\_test/Makefile: potential "undefined reference" to pthread lib [\#148](https://github.com/kokkos/kokkos/issues/148)
+
 ## [2.03.00](https://github.com/kokkos/kokkos/tree/2.03.00) (2017-04-25)
 [Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.15...2.03.00)

--- a/lib/kokkos/CMakeLists.txt
+++ b/lib/kokkos/CMakeLists.txt
@ -5,11 +5,12 @@ ELSE()
 ENDIF()

 IF(NOT KOKKOS_HAS_TRILINOS)
-  CMAKE_MINIMUM_REQUIRED(VERSION 2.8.11 FATAL_ERROR)
-  INCLUDE(cmake/tribits.cmake)
-  SET(CMAKE_CXX_STANDARD 11)
-ENDIF()
+  cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
+  project(Kokkos CXX)

+  INCLUDE(cmake/kokkos.cmake)
+ELSE()
+#------------------------------------------------------------------------------
 #
 # A) Forward delcare the package so that certain options are also defined for
 # subpackages
@ -17,14 +18,13 @@ ENDIF()

 TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)

+
 #------------------------------------------------------------------------------
 #
 # B) Define the common options for Kokkos first so they can be used by
 # subpackages as well.
 #

-
-
 # mfh 01 Aug 2016: See Issue #61:
 #
 # https://github.com/kokkos/kokkos/issues/61
@ -83,10 +83,10 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
  )

 ASSERT_DEFINED(TPL_ENABLE_Pthread)
-IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
+IF(Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
  MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF).  This is not allowed.  Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
-ENDIF ()
-IF (NOT TPL_ENABLE_Pthread)
+ENDIF()
+IF(NOT TPL_ENABLE_Pthread)
  ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0)
 ENDIF()

@ -98,12 +98,13 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
  )

 TRIBITS_ADD_OPTION_AND_DEFINE(
-  Kokkos_ENABLE_Qthreads
+  Kokkos_ENABLE_QTHREAD
  KOKKOS_HAVE_QTHREADS
  "Enable Qthreads support in Kokkos."
-  "${TPL_ENABLE_QTHREADS}"
+  "${TPL_ENABLE_QTHREAD}"
  )

+# TODO: No longer an option in Kokkos.  Needs to be removed.
 TRIBITS_ADD_OPTION_AND_DEFINE(
  Kokkos_ENABLE_CXX11
  KOKKOS_HAVE_CXX11
@ -118,6 +119,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
  "${TPL_ENABLE_HWLOC}"
  )

+# TODO: This is currently not used in Kokkos.  Should it be removed?
 TRIBITS_ADD_OPTION_AND_DEFINE(
  Kokkos_ENABLE_MPI
  KOKKOS_HAVE_MPI
@ -154,13 +156,27 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
  "${Kokkos_ENABLE_Debug_Bounds_Check_DEFAULT}"
  )

+TRIBITS_ADD_OPTION_AND_DEFINE(
+  Kokkos_ENABLE_Debug_DualView_Modify_Check
+  KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK
+  "Enable abort when Kokkos::DualView modified on host and device without sync."
+  "${Kokkos_ENABLE_DEBUG}"
+  )
+
 TRIBITS_ADD_OPTION_AND_DEFINE(
  Kokkos_ENABLE_Profiling
-  KOKKOS_ENABLE_PROFILING_INTERNAL
+  KOKKOS_ENABLE_PROFILING
  "Enable KokkosP profiling support for kernel data collections."
  "${TPL_ENABLE_DLlib}"
  )

+TRIBITS_ADD_OPTION_AND_DEFINE(
+  Kokkos_ENABLE_Profiling_Load_Print
+  KOKKOS_ENABLE_PROFILING_LOAD_PRINT
+  "Print to standard output which profiling library was loaded."
+  OFF
+  )
+
 # placeholder for future device...
 TRIBITS_ADD_OPTION_AND_DEFINE(
  Kokkos_ENABLE_Winthread
@ -169,6 +185,7 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
  "${TPL_ENABLE_Winthread}"
  )

+# TODO: No longer an option in Kokkos.  Needs to be removed.
 # use new/old View
 TRIBITS_ADD_OPTION_AND_DEFINE(
  Kokkos_USING_DEPRECATED_VIEW
@ -177,12 +194,12 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
  OFF
  )

+
 #------------------------------------------------------------------------------
 #
 # C) Install Kokkos' executable scripts
 #

-
 # nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler.
 # Kokkos needs nvcc_wrapper in order to build.  Other libraries and
 # executables also need nvcc_wrapper.  Thus, we need to install it.
@ -199,6 +216,8 @@ INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin)

 TRIBITS_PROCESS_SUBPACKAGES()

+
+#------------------------------------------------------------------------------
 #
 # E) If Kokkos itself is enabled, process the Kokkos package
 #
@ -213,3 +232,4 @@ TRIBITS_EXCLUDE_FILES(
  )

 TRIBITS_PACKAGE_POSTPROCESS()
+ENDIF()
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@ -35,23 +35,26 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "e
 # Check for advanced settings.
 KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "aggressive_vectorization" | wc -l))
 KOKKOS_INTERNAL_DISABLE_PROFILING := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_profiling" | wc -l))
+KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "disable_dualview_modify_check" | wc -l))
+KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(strip $(shell echo $(KOKKOS_OPTIONS) | grep "enable_profile_load_print" | wc -l))
 KOKKOS_INTERNAL_CUDA_USE_LDG := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "use_ldg" | wc -l))
 KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "force_uvm" | wc -l))
 KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
 KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))

 # Check for Kokkos Host Execution Spaces one of which must be on.
-KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMP | wc -l))
+KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
+KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
 KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
 KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
 KOKKOS_INTERNAL_USE_SERIAL := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Serial | wc -l))

 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 0)
-ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
-ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0)
-  KOKKOS_INTERNAL_USE_SERIAL := 1
-endif
-endif
+  ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
+    ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 0)
+      KOKKOS_INTERNAL_USE_SERIAL := 1
+    endif
+  endif
 endif

 # Check for other Execution Spaces.
@ -64,24 +67,25 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 endif

 # Check OS.
-KOKKOS_OS                      := $(shell uname -s)
-KOKKOS_INTERNAL_OS_CYGWIN      := $(shell uname -s | grep CYGWIN | wc -l)
-KOKKOS_INTERNAL_OS_LINUX       := $(shell uname -s | grep Linux  | wc -l)
-KOKKOS_INTERNAL_OS_DARWIN      := $(shell uname -s | grep Darwin | wc -l)
+KOKKOS_OS                      := $(strip $(shell uname -s))
+KOKKOS_INTERNAL_OS_CYGWIN      := $(strip $(shell uname -s | grep CYGWIN | wc -l))
+KOKKOS_INTERNAL_OS_LINUX       := $(strip $(shell uname -s | grep Linux  | wc -l))
+KOKKOS_INTERNAL_OS_DARWIN      := $(strip $(shell uname -s | grep Darwin | wc -l))

 # Check compiler.
-KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version       2>&1 | grep "Intel Corporation" | wc -l)
-KOKKOS_INTERNAL_COMPILER_PGI   := $(shell $(CXX) --version       2>&1 | grep PGI                 | wc -l)
-KOKKOS_INTERNAL_COMPILER_XL    := $(shell $(CXX) -qversion       2>&1 | grep XL                  | wc -l)
-KOKKOS_INTERNAL_COMPILER_CRAY  := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-"               | wc -l)
-KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(CXX) --version       2>&1 | grep "nvcc"              | wc -l)
+KOKKOS_INTERNAL_COMPILER_INTEL       := $(strip $(shell $(CXX) --version       2>&1 | grep "Intel Corporation" | wc -l))
+KOKKOS_INTERNAL_COMPILER_PGI         := $(strip $(shell $(CXX) --version       2>&1 | grep PGI                 | wc -l))
+KOKKOS_INTERNAL_COMPILER_XL          := $(strip $(shell $(CXX) -qversion       2>&1 | grep XL                  | wc -l))
+KOKKOS_INTERNAL_COMPILER_CRAY        := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-"               | wc -l))
+KOKKOS_INTERNAL_COMPILER_NVCC        := $(strip $(shell $(CXX) --version       2>&1 | grep nvcc                | wc -l))
+KOKKOS_INTERNAL_COMPILER_CLANG       := $(strip $(shell $(CXX) --version       2>&1 | grep clang               | wc -l))
+KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version       2>&1 | grep "apple-darwin"      | wc -l))
 ifneq ($(OMPI_CXX),)
-  KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(OMPI_CXX) --version   2>&1 | grep "nvcc" | wc -l)
+  KOKKOS_INTERNAL_COMPILER_NVCC  := $(strip $(shell $(OMPI_CXX) --version   2>&1 | grep "nvcc" | wc -l))
 endif
 ifneq ($(MPICH_CXX),)
-  KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(MPICH_CXX) --version  2>&1 | grep "nvcc" | wc -l)
+  KOKKOS_INTERNAL_COMPILER_NVCC  := $(strip $(shell $(MPICH_CXX) --version  2>&1 | grep "nvcc" | wc -l))
 endif
-KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version       2>&1 | grep "clang"             | wc -l)

 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
  KOKKOS_INTERNAL_COMPILER_CLANG = 1
@ -90,6 +94,11 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2)
  KOKKOS_INTERNAL_COMPILER_XL = 1
 endif

+# Apple Clang passes both clang and apple clang tests, so turn off clang.
+ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
+  KOKKOS_INTERNAL_COMPILER_CLANG = 0
+endif
+
 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
  KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')

@ -97,29 +106,43 @@ ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
    ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
      $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)
    endif
+
    KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
  endif
 endif

+# Set OpenMP flags.
 ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
  KOKKOS_INTERNAL_OPENMP_FLAG := -mp
 else
  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
    KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
-      KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
+      KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
    else
-      ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-        # OpenMP is turned on by default in Cray compiler environment.
-        KOKKOS_INTERNAL_OPENMP_FLAG :=
+      ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
+        KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
      else
-        KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
+        ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+          # OpenMP is turned on by default in Cray compiler environment.
+          KOKKOS_INTERNAL_OPENMP_FLAG :=
+        else
+          KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
+        endif
      endif
    endif
  endif
 endif
+ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
+  KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_IBM_XL_OMP45_WORKAROUND -qsmp=omp -qoffload -qnoeh
+else
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    KOKKOS_INTERNAL_OPENMPTARGET_FLAG := -DKOKKOS_BUG_WORKAROUND_IBM_CLANG_OMP45_VIEW_INIT -fopenmp-implicit-declare-target -fopenmp-targets=nvptx64-nvidia-cuda -fopenmp -fopenmp=libomp
+  endif
+endif

+# Set C++11 flags.
 ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
  KOKKOS_INTERNAL_CXX11_FLAG := --c++11
 else
@ -146,7 +169,7 @@ KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX |
 KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))

 # NVIDIA based.
-NVCC_WRAPPER := $(KOKKOS_PATH)/config/nvcc_wrapper
+NVCC_WRAPPER := $(KOKKOS_PATH)/bin/nvcc_wrapper
 KOKKOS_INTERNAL_USE_ARCH_KEPLER30 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler30 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_KEPLER32 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler32 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_KEPLER35 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kepler35 | wc -l))
@ -180,10 +203,20 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
                                                        + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 1)
+  ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+      KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
+      CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
+      KOKKOS_INTERNAL_OPENMPTARGET_FLAG := $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG) --cuda-path=$(CUDA_PATH)
+    endif
+  endif
+endif
 # ARM based.
 KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc))

 # IBM based.
 KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
@ -206,8 +239,11 @@ KOKKOS_INTERNAL_USE_ISA_X86_64    := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_
 KOKKOS_INTERNAL_USE_ISA_KNC       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))

+# Decide whether we can support transactional memory
+KOKKOS_INTERNAL_USE_TM            := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
+
 # Incompatible flags?
-KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
+KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC)+$(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_ARM)>1" | bc ))
 KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -240,12 +276,22 @@ tmp := $(shell echo "Makefile constructed configuration:" >> KokkosCore_config.t
 tmp := $(shell date >> KokkosCore_config.tmp)
 tmp := $(shell echo "----------------------------------------------*/" >> KokkosCore_config.tmp)

+tmp := $(shell echo '\#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)' >> KokkosCore_config.tmp)
+tmp := $(shell echo '\#error "Do not include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."' >> KokkosCore_config.tmp)
+tmp := $(shell echo '\#else' >> KokkosCore_config.tmp)
+tmp := $(shell echo '\#define KOKKOS_CORE_CONFIG_H' >> KokkosCore_config.tmp)
+tmp := $(shell echo '\#endif' >> KokkosCore_config.tmp)
+
 tmp := $(shell echo "/* Execution Spaces */" >> KokkosCore_config.tmp)

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
+        tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
  tmp := $(shell echo '\#define KOKKOS_HAVE_OPENMP 1' >> KokkosCore_config.tmp)
 endif
@ -262,6 +308,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
  tmp := $(shell echo "\#define KOKKOS_HAVE_SERIAL 1" >> KokkosCore_config.tmp )
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_TM), 1)
+  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
+  tmp := $(shell echo "\#define KOKKOS_ENABLE_TM" >> KokkosCore_config.tmp )
+  tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
  tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
  tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
@ -293,13 +345,21 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
-ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-  KOKKOS_CXXFLAGS += -lineinfo
-endif
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+    KOKKOS_CXXFLAGS += -lineinfo
+  endif
+
  KOKKOS_CXXFLAGS += -g
  KOKKOS_LDFLAGS += -g -ldl
  tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_BOUNDS_CHECK 1" >> KokkosCore_config.tmp )
  tmp := $(shell echo "\#define KOKKOS_HAVE_DEBUG 1" >> KokkosCore_config.tmp )
+  ifeq ($(KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK), 0)
+    tmp := $(shell echo "\#define KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK 1" >> KokkosCore_config.tmp )
+  endif
+endif
+
+ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
+  tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING_LOAD_PRINT 1" >> KokkosCore_config.tmp )
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
@ -311,8 +371,6 @@ endif

 ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
  tmp := $(shell echo "\#define KOKKOS_USE_LIBRT 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define PREC_TIMER 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOSP_ENABLE_RTLIB 1" >> KokkosCore_config.tmp )
  KOKKOS_LIBS += -lrt
 endif

@ -323,8 +381,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
  tmp := $(shell echo "\#define KOKKOS_HAVE_HBWSPACE 1" >> KokkosCore_config.tmp )
 endif

-ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1)
-  tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING 0" >> KokkosCore_config.tmp )
+ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
+  tmp := $(shell echo "\#define KOKKOS_ENABLE_PROFILING" >> KokkosCore_config.tmp )
 endif

 tmp := $(shell echo "/* Optimization Settings */" >> KokkosCore_config.tmp)
@ -336,39 +394,44 @@ endif
 tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
+    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
+  else
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+      tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
+    endif
+  endif

-ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
-  tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
-endif
+  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
+    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
+  endif

-ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
-  tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
-endif
+  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
+    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
+    KOKKOS_CXXFLAGS += --relocatable-device-code=true
+    KOKKOS_LDFLAGS += --relocatable-device-code=true
+  endif

-ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
-  tmp := $(shell echo "\#define KOKKOS_CUDA_USE_RELOCATABLE_DEVICE_CODE 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += --relocatable-device-code=true
-  KOKKOS_LDFLAGS += --relocatable-device-code=true
-endif
+  ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+      ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
+        tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
+        KOKKOS_CXXFLAGS += -expt-extended-lambda
+      else
+        $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
+      endif
+    endif

-ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-    ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
      tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
-      KOKKOS_CXXFLAGS += -expt-extended-lambda
-    else
-      $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
    endif
  endif

  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_CUDA_CLANG_WORKAROUND" >> KokkosCore_config.tmp )
  endif
 endif

-endif
-
 # Add Architecture flags.

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
@ -469,7 +532,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER9), 1)
  endif
 endif

-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_HSW), 1)
  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )

  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
@ -491,6 +554,28 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
  endif
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_BDW), 1)
+  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX2 1" >> KokkosCore_config.tmp )
+
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+    KOKKOS_CXXFLAGS += -xCORE-AVX2
+    KOKKOS_LDFLAGS  += -xCORE-AVX2
+  else
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+
+    else
+      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+        KOKKOS_CXXFLAGS += -tp=haswell
+        KOKKOS_LDFLAGS  += -tp=haswell
+      else
+        # Assume that this is a really a GNU compiler.
+        KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2 -mrtm
+        KOKKOS_LDFLAGS  += -march=core-avx2 -mtune=core-avx2 -mrtm
+      endif
+    endif
+  endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
  tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512MIC 1" >> KokkosCore_config.tmp )

@ -501,12 +586,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)

    else
-       ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+      ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)

      else
        # Asssume that this is really a GNU compiler.
-        KOKKOS_CXXFLAGS += -march=knl
-        KOKKOS_LDFLAGS  += -march=knl
+        KOKKOS_CXXFLAGS += -march=knl -mtune=knl
+        KOKKOS_LDFLAGS  += -march=knl -mtune=knl
      endif
    endif
  endif
@ -526,8 +611,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)

      else
        # Nothing here yet.
-        KOKKOS_CXXFLAGS += -march=skylake-avx512
-        KOKKOS_LDFLAGS  += -march=skylake-avx512
+        KOKKOS_CXXFLAGS += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
+        KOKKOS_LDFLAGS  += -march=skylake-avx512 -mtune=skylake-avx512 -mrtm
      endif
    endif
  endif
@ -541,70 +626,67 @@ endif

 # Figure out the architecture flag for Cuda.
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
+  endif
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
+    KOKKOS_CXXFLAGS += -x cuda
+  endif

-ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
-  KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch
-endif
-ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
-  KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=--cuda-gpu-arch
-  KOKKOS_CXXFLAGS += -x cuda
-endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_30
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_32
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_35
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_37
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_50
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_52
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_53
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_60
+  endif
+  ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
+    KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
+  endif

-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
-endif
-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
-  tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
-  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
-endif
+  ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
+    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)

+    ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+      KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
+    endif
+  endif
 endif

 KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h)
@ -630,9 +712,24 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
  KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
-  KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
+  KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
  KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
  KOKKOS_LIBS += -lcudart -lcuda
+
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH)
+  endif
+endif
+
+ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
+  KOKKOS_SRC += $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
+  KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMPTarget/*.hpp)
+  ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+    KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
+  else
+    KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
+  endif
+  KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMPTARGET_FLAG)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -666,10 +763,27 @@ endif
 ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
  KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
  KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
-  KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC
+  KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
  KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
 endif

+# Don't include Kokkos_HBWSpace.cpp if not using MEMKIND to avoid a link warning.
+ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
+  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
+endif
+
+# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning.
+ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1)
+  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC))
+endif
+
+# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
+# device to avoid a link warning.
+ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
+  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp,$(KOKKOS_SRC))
+  KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp,$(KOKKOS_SRC))
+endif
+
 # With Cygwin functions such as fdopen and fileno are not defined
 # when strict ansi is enabled. strict ansi gets enabled with --std=c++11
 # though. So we hard undefine it here. Not sure if that has any bad side effects
--- a/lib/kokkos/Makefile.targets
+++ b/lib/kokkos/Makefile.targets
@ -53,11 +53,20 @@ Kokkos_Qthreads_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Qthreads/K
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
-Kokkos_OpenMPexec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMPexec.cpp
+Kokkos_OpenMP_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Exec.cpp
 Kokkos_OpenMP_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMP/Kokkos_OpenMP_Task.cpp
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
+Kokkos_OpenMPTarget_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Exec.cpp
+Kokkos_OpenMPTargetSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTargetSpace.cpp
+#Kokkos_OpenMPTarget_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
+#       $(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/OpenMPTarget/Kokkos_OpenMPTarget_Task.cpp
+endif
+
 Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
--- a/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp
+++ b/lib/kokkos/algorithms/src/KokkosAlgorithms_dummy.cpp
@ -0,0 +1 @@
+void KOKKOS_ALGORITHMS_SRC_DUMMY_PREVENT_LINK_ERROR() {}
--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@ -674,7 +674,7 @@ namespace Kokkos {
        const double V = 2.0*drand() - 1.0;
        S = U*U+V*V;
      }
-      return U*sqrt(-2.0*log(S)/S);
+      return U*std::sqrt(-2.0*log(S)/S);
    }

    KOKKOS_INLINE_FUNCTION
@ -917,7 +917,7 @@ namespace Kokkos {
        const double V = 2.0*drand() - 1.0;
        S = U*U+V*V;
      }
-      return U*sqrt(-2.0*log(S)/S);
+      return U*std::sqrt(-2.0*log(S)/S);
    }

    KOKKOS_INLINE_FUNCTION
@ -1171,7 +1171,7 @@ namespace Kokkos {
        const double V = 2.0*drand() - 1.0;
        S = U*U+V*V;
      }
-      return U*sqrt(-2.0*log(S)/S);
+      return U*std::sqrt(-2.0*log(S)/S);
    }

    KOKKOS_INLINE_FUNCTION
--- a/lib/kokkos/algorithms/unit_tests/Makefile
+++ b/lib/kokkos/algorithms/unit_tests/Makefile
@ -8,7 +8,7 @@ default: build_all
 	echo "End Build"

 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
+  CXX = $(KOKKOS_PATH)/bin/nvcc_wrapper
 else
  CXX = g++
 endif
@ -21,8 +21,8 @@ include $(KOKKOS_PATH)/Makefile.kokkos

 KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests

-TEST_TARGETS = 
-TARGETS = 
+TEST_TARGETS =
+TARGETS =

 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	OBJ_CUDA = TestCuda.o UnitTestMain.o gtest-all.o
@ -49,16 +49,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
 endif

 KokkosAlgorithms_UnitTest_Cuda: $(OBJ_CUDA) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Cuda
+	$(LINK) $(EXTRA_PATH) $(OBJ_CUDA) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Cuda

 KokkosAlgorithms_UnitTest_Threads: $(OBJ_THREADS) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Threads
+	$(LINK) $(EXTRA_PATH) $(OBJ_THREADS) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Threads

 KokkosAlgorithms_UnitTest_OpenMP: $(OBJ_OPENMP) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_OpenMP
+	$(LINK) $(EXTRA_PATH) $(OBJ_OPENMP) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_OpenMP

 KokkosAlgorithms_UnitTest_Serial: $(OBJ_SERIAL) $(KOKKOS_LINK_DEPENDS)
-	$(LINK) $(KOKKOS_LDFLAGS) $(LDFLAGS) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) -o KokkosAlgorithms_UnitTest_Serial
+	$(LINK) $(EXTRA_PATH) $(OBJ_SERIAL) $(KOKKOS_LIBS) $(LIB) $(KOKKOS_LDFLAGS) $(LDFLAGS) -o KokkosAlgorithms_UnitTest_Serial

 test-cuda: KokkosAlgorithms_UnitTest_Cuda
 	./KokkosAlgorithms_UnitTest_Cuda
@ -76,7 +76,7 @@ build_all: $(TARGETS)

 test: $(TEST_TARGETS)

-clean: kokkos-clean 
+clean: kokkos-clean
 	rm -f *.o $(TARGETS)

 # Compilation rules
@ -84,6 +84,5 @@ clean: kokkos-clean
 %.o:%.cpp $(KOKKOS_CPP_DEPENDS)
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

-gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc 
+gtest-all.o:$(GTEST_PATH)/gtest/gtest-all.cc
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $(GTEST_PATH)/gtest/gtest-all.cc
-
--- a/lib/kokkos/algorithms/unit_tests/TestCuda.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestCuda.cpp
@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@ -36,12 +36,15 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */

-#include <stdint.h>
+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_ENABLE_CUDA
+
+#include <cstdint>
 #include <iostream>
 #include <iomanip>

@ -49,8 +52,6 @@

 #include <Kokkos_Core.hpp>

-#ifdef KOKKOS_ENABLE_CUDA
-
 #include <TestRandom.hpp>
 #include <TestSort.hpp>

@ -105,6 +106,7 @@ CUDA_SORT_UNSIGNED(171)
 #undef CUDA_RANDOM_XORSHIFT1024
 #undef CUDA_SORT_UNSIGNED
 }
-
+#else
+void KOKKOS_ALGORITHMS_UNITTESTS_TESTCUDA_PREVENT_LINK_ERROR() {}
 #endif  /* #ifdef KOKKOS_ENABLE_CUDA */

--- a/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestOpenMP.cpp
@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@ -36,13 +36,16 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */

-#include <gtest/gtest.h>

+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_ENABLE_OPENMP
+
+#include <gtest/gtest.h>
 #include <Kokkos_Core.hpp>

 //----------------------------------------------------------------------------
@ -52,7 +55,6 @@

 namespace Test {

-#ifdef KOKKOS_ENABLE_OPENMP
 class openmp : public ::testing::Test {
 protected:
  static void SetUpTestCase()
@ -97,6 +99,8 @@ OPENMP_SORT_UNSIGNED(171)
 #undef OPENMP_RANDOM_XORSHIFT64
 #undef OPENMP_RANDOM_XORSHIFT1024
 #undef OPENMP_SORT_UNSIGNED
-#endif
 } // namespace test
+#else
+void KOKKOS_ALGORITHMS_UNITTESTS_TESTOPENMP_PREVENT_LINK_ERROR() {}
+#endif

--- a/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestRandom.hpp
@ -295,7 +295,7 @@ struct test_random_scalar {
      parallel_reduce (num_draws/1024, functor_type (pool, density_1d, density_3d), result);

      //printf("Result: %lf %lf %lf\n",result.mean/num_draws/3,result.variance/num_draws/3,result.covariance/num_draws/2);
-      double tolerance = 1.6*sqrt(1.0/num_draws);
+      double tolerance = 1.6*std::sqrt(1.0/num_draws);
      double mean_expect = 0.5*Kokkos::rand<rnd_type,Scalar>::max();
      double variance_expect = 1.0/3.0*mean_expect*mean_expect;
      double mean_eps = mean_expect/(result.mean/num_draws/3)-1.0;
@ -321,7 +321,7 @@ struct test_random_scalar {
      typedef test_histogram1d_functor<typename RandomGenerator::device_type> functor_type;
      parallel_reduce (HIST_DIM1D, functor_type (density_1d, num_draws), result);

-      double tolerance = 6*sqrt(1.0/HIST_DIM1D);
+      double tolerance = 6*std::sqrt(1.0/HIST_DIM1D);
      double mean_expect = 1.0*num_draws*3/HIST_DIM1D;
      double variance_expect = 1.0*num_draws*3/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
      double covariance_expect = -1.0*num_draws*3/HIST_DIM1D/HIST_DIM1D;
@ -354,7 +354,7 @@ struct test_random_scalar {
      typedef test_histogram3d_functor<typename RandomGenerator::device_type> functor_type;
      parallel_reduce (HIST_DIM1D, functor_type (density_3d, num_draws), result);

-      double tolerance = 6*sqrt(1.0/HIST_DIM1D);
+      double tolerance = 6*std::sqrt(1.0/HIST_DIM1D);
      double mean_expect = 1.0*num_draws/HIST_DIM1D;
      double variance_expect = 1.0*num_draws/HIST_DIM1D*(1.0-1.0/HIST_DIM1D);
      double covariance_expect = -1.0*num_draws/HIST_DIM1D/HIST_DIM1D;
--- a/lib/kokkos/algorithms/unit_tests/TestSerial.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestSerial.cpp
@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@ -36,11 +36,14 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */

+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_ENABLE_SERIAL
+
 #include <gtest/gtest.h>

 #include <Kokkos_Core.hpp>
@ -55,7 +58,6 @@

 namespace Test {

-#ifdef KOKKOS_ENABLE_SERIAL
 class serial : public ::testing::Test {
 protected:
  static void SetUpTestCase()
@ -93,7 +95,9 @@ SERIAL_SORT_UNSIGNED(171)
 #undef SERIAL_RANDOM_XORSHIFT1024
 #undef SERIAL_SORT_UNSIGNED

-#endif // KOKKOS_ENABLE_SERIAL
 } // namespace Test
+#else
+void KOKKOS_ALGORITHMS_UNITTESTS_TESTSERIAL_PREVENT_LINK_ERROR() {}
+#endif // KOKKOS_ENABLE_SERIAL


--- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp
@ -1,12 +1,12 @@
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@ -35,12 +35,12 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER

-#ifndef TESTSORT_HPP_
-#define TESTSORT_HPP_
+#ifndef KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP
+#define KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP

 #include <gtest/gtest.h>
 #include<Kokkos_Core.hpp>
@ -212,7 +212,12 @@ void test_dynamic_view_sort(unsigned int n )
  const size_t upper_bound = 2 * n ;

  typename KeyDynamicViewType::memory_pool
-    pool( memory_space() , 2 * n * sizeof(KeyType) );
+    pool( memory_space()
+        , n * sizeof(KeyType) * 1.2
+        ,     500 /* min block size in bytes */
+        ,   30000 /* max block size in bytes */
+        , 1000000 /* min superblock size in bytes */
+        );

  KeyDynamicViewType keys("Keys",pool,upper_bound);

@ -272,4 +277,4 @@ void test_sort(unsigned int N)

 }
 }
-#endif /* TESTSORT_HPP_ */
+#endif /* KOKKOS_ALGORITHMS_UNITTESTS_TESTSORT_HPP */
--- a/lib/kokkos/algorithms/unit_tests/TestThreads.cpp
+++ b/lib/kokkos/algorithms/unit_tests/TestThreads.cpp
@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@ -36,11 +36,14 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */

+#include <Kokkos_Macros.hpp>
+#ifdef KOKKOS_ENABLE_THREADS
+
 #include <gtest/gtest.h>

 #include <Kokkos_Core.hpp>
@ -55,7 +58,6 @@

 namespace Test {

-#ifdef KOKKOS_ENABLE_PTHREAD
 class threads : public ::testing::Test {
 protected:
  static void SetUpTestCase()
@ -107,7 +109,9 @@ THREADS_SORT_UNSIGNED(171)
 #undef THREADS_RANDOM_XORSHIFT1024
 #undef THREADS_SORT_UNSIGNED

-#endif
 } // namespace Test
+#else
+void KOKKOS_ALGORITHMS_UNITTESTS_TESTTHREADS_PREVENT_LINK_ERROR() {}
+#endif


--- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile
+++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile
@ -7,7 +7,7 @@ default: build
 	echo "Start Build"

 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 EXE = bytes_and_flops.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
@ -22,7 +22,7 @@ CXXFLAGS = -O3 -g

 DEPFLAGS = -M
 LINK = ${CXX}
-LINKFLAGS =  
+LINKFLAGS =

 OBJ = $(SRC:.cpp=.o)
 LIB =
@ -34,7 +34,7 @@ build: $(EXE)
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)

-clean: kokkos-clean 
+clean: kokkos-clean
 	rm -f *.o *.cuda *.host

 # Compilation rules
--- a/lib/kokkos/benchmarks/gather/Makefile
+++ b/lib/kokkos/benchmarks/gather/Makefile
@ -7,7 +7,7 @@ default: build
 	echo "Start Build"

 ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
-CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
+CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
 EXE = gather.cuda
 KOKKOS_DEVICES = "Cuda,OpenMP"
 KOKKOS_ARCH = "SNB,Kepler35"
@ -22,7 +22,7 @@ CXXFLAGS = -O3 -g

 DEPFLAGS = -M
 LINK = ${CXX}
-LINKFLAGS =  
+LINKFLAGS =

 OBJ = $(SRC:.cpp=.o)
 LIB =
@ -35,10 +35,10 @@ build: $(EXE)
 $(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
 	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)

-clean: kokkos-clean 
+clean: kokkos-clean
 	rm -f *.o *.cuda *.host

 # Compilation rules

-%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp 
+%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
--- a/lib/kokkos/cmake/KokkosConfig.cmake.in
+++ b/lib/kokkos/cmake/KokkosConfig.cmake.in
@ -0,0 +1,18 @@
+# - Config file for the Kokkos package
+# It defines the following variables
+#  Kokkos_INCLUDE_DIRS - include directories for Kokkos
+#  Kokkos_LIBRARIES    - libraries to link against
+
+# Compute paths
+GET_FILENAME_COMPONENT(Kokkos_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
+SET(Kokkos_INCLUDE_DIRS "@CONF_INCLUDE_DIRS@")
+
+# Our library dependencies (contains definitions for IMPORTED targets)
+IF(NOT TARGET kokkos AND NOT Kokkos_BINARY_DIR)
+  INCLUDE("${Kokkos_CMAKE_DIR}/KokkosTargets.cmake")
+ENDIF()
+
+# These are IMPORTED targets created by KokkosTargets.cmake
+SET(Kokkos_LIBRARY_DIRS @INSTALL_LIB_DIR@)
+SET(Kokkos_LIBRARIES @Kokkos_LIBRARIES_NAMES@)
+SET(Kokkos_TPL_LIBRARIES @KOKKOS_LIBS@)
--- a/lib/kokkos/cmake/Modules/FindHWLOC.cmake
+++ b/lib/kokkos/cmake/Modules/FindHWLOC.cmake
@ -0,0 +1,20 @@
+#.rst:
+# FindHWLOC
+# ----------
+#
+# Try to find HWLOC.
+#
+# The following variables are defined:
+#
+#   HWLOC_FOUND - System has HWLOC
+#   HWLOC_INCLUDE_DIR - HWLOC include directory
+#   HWLOC_LIBRARIES - Libraries needed to use HWLOC
+
+find_path(HWLOC_INCLUDE_DIR hwloc.h)
+find_library(HWLOC_LIBRARIES hwloc)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(HWLOC DEFAULT_MSG
+                                  HWLOC_INCLUDE_DIR HWLOC_LIBRARIES)
+
+mark_as_advanced(HWLOC_INCLUDE_DIR HWLOC_LIBRARIES)
--- a/lib/kokkos/cmake/Modules/FindMemkind.cmake
+++ b/lib/kokkos/cmake/Modules/FindMemkind.cmake
@ -0,0 +1,20 @@
+#.rst:
+# FindMemkind
+# ----------
+#
+# Try to find Memkind.
+#
+# The following variables are defined:
+#
+#   MEMKIND_FOUND - System has Memkind
+#   MEMKIND_INCLUDE_DIR - Memkind include directory
+#   MEMKIND_LIBRARIES - Libraries needed to use Memkind
+
+find_path(MEMKIND_INCLUDE_DIR memkind.h)
+find_library(MEMKIND_LIBRARIES memkind)
+
+include(FindPackageHandleStandardArgs)
+find_package_handle_standard_args(Memkind DEFAULT_MSG
+  MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES)
+
+mark_as_advanced(MEMKIND_INCLUDE_DIR MEMKIND_LIBRARIES)
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`void KOKKOS_ALGORITHMS_SRC_DUMMY_PREVENT_LINK_ERROR() {}`