sync GHub with SVN

Merge pull request #354 from stanmoore1/kokkos_bugfixes
Kokkos bugfixes
2017-01-26 14:06:43 -07:00 · 2017-01-26 13:51:47 -07:00 · 2017-01-26 13:50:13 -07:00 · 2017-01-26 13:49:56 -07:00 · 2017-01-26 13:47:12 -07:00 · 2017-01-26 13:45:45 -07:00
566 changed files with 58045 additions and 28520 deletions
--- a/doc/src/Eqs/bond_oxdna_fene.jpg
+++ b/doc/src/Eqs/bond_oxdna_fene.jpg
--- a/doc/src/Eqs/bond_oxdna_fene.tex
+++ b/doc/src/Eqs/bond_oxdna_fene.tex
@ -0,0 +1,10 @@
+\documentclass[12pt]{article}
+\pagestyle{empty}
+
+\begin{document}
+
+$$ 
+  E = - \frac{\epsilon}{2} \ln \left[ 1 - \left(\frac{r-r0}{\Delta}\right)^2\right]
+$$
+
+\end{document}
--- a/doc/src/JPG/tutorial_merged.png
+++ b/doc/src/JPG/tutorial_merged.png
--- a/doc/src/JPG/tutorial_reverse_pull_request7.png
+++ b/doc/src/JPG/tutorial_reverse_pull_request7.png
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="6 Jan 2017 version">
+<META NAME="docnumber" CONTENT="26 Jan 2017 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@ -21,7 +21,7 @@
 <H1></H1>

 LAMMPS Documentation :c,h3
-6 Jan 2017 version :c,h4
+26 Jan 2017 version :c,h4

 Version info: :h4

--- a/doc/src/PDF/USER-CGDNA-overview.pdf
+++ b/doc/src/PDF/USER-CGDNA-overview.pdf
--- a/doc/src/Section_commands.txt
+++ b/doc/src/Section_commands.txt
@ -583,6 +583,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
 "lineforce"_fix_lineforce.html,
 "momentum (k)"_fix_momentum.html,
 "move"_fix_move.html,
+"mscg"_fix_mscg.html,
 "msst"_fix_msst.html,
 "neb"_fix_neb.html,
 "nph (ko)"_fix_nh.html,
@ -701,6 +702,8 @@ package"_Section_start.html#start_3.
 "meso"_fix_meso.html,
 "manifoldforce"_fix_manifoldforce.html,
 "meso/stationary"_fix_meso_stationary.html,
+"nve/dot"_fix_nve_dot.html,
+"nve/dotc/langevin"_fix_nve_dotc_langevin.html,
 "nve/manifold/rattle"_fix_nve_manifold_rattle.html,
 "nvk"_fix_nvk.html,
 "nvt/manifold/rattle"_fix_nvt_manifold_rattle.html,
@ -918,7 +921,7 @@ KOKKOS, o = USER-OMP, t = OPT.
 "dpd (go)"_pair_dpd.html,
 "dpd/tstat (go)"_pair_dpd.html,
 "dsmc"_pair_dsmc.html,
-"eam (gkot)"_pair_eam.html,
+"eam (gkiot)"_pair_eam.html,
 "eam/alloy (gkot)"_pair_eam.html,
 "eam/fs (gkot)"_pair_eam.html,
 "eim (o)"_pair_eim.html,
@ -1034,6 +1037,11 @@ package"_Section_start.html#start_3.
 "morse/soft"_pair_morse.html,
 "multi/lucy"_pair_multi_lucy.html,
 "multi/lucy/rx"_pair_multi_lucy_rx.html,
+"oxdna/coaxstk"_pair_oxdna.html,
+"oxdna/excv"_pair_oxdna.html,
+"oxdna/hbond"_pair_oxdna.html,
+"oxdna/stk"_pair_oxdna.html,
+"oxdna/xstk"_pair_oxdna.html,
 "quip"_pair_quip.html,
 "reax/c (k)"_pair_reax_c.html,
 "smd/hertz"_pair_smd_hertz.html,
@ -1082,7 +1090,8 @@ if "LAMMPS is built with the appropriate
 package"_Section_start.html#start_3.

 "harmonic/shift (o)"_bond_harmonic_shift.html,
-"harmonic/shift/cut (o)"_bond_harmonic_shift_cut.html :tb(c=4,ea=c)
+"harmonic/shift/cut (o)"_bond_harmonic_shift_cut.html,
+"oxdna/fene"_bond_oxdna_fene.html :tb(c=4,ea=c)

 :line

--- a/doc/src/Section_errors.txt
+++ b/doc/src/Section_errors.txt
@ -55,12 +55,13 @@ LAMMPS errors are detected at setup time; others like a bond
 stretching too far may not occur until the middle of a run.

 LAMMPS tries to flag errors and print informative error messages so
-you can fix the problem.  Of course, LAMMPS cannot figure out your
-physics or numerical mistakes, like choosing too big a timestep,
-specifying erroneous force field coefficients, or putting 2 atoms on
-top of each other!  If you run into errors that LAMMPS doesn't catch
-that you think it should flag, please send an email to the
-"developers"_http://lammps.sandia.gov/authors.html.
+you can fix the problem.  For most errors it will also print the last
+input script command that it was processing.  Of course, LAMMPS cannot
+figure out your physics or numerical mistakes, like choosing too big a
+timestep, specifying erroneous force field coefficients, or putting 2
+atoms on top of each other!  If you run into errors that LAMMPS
+doesn't catch that you think it should flag, please send an email to
+the "developers"_http://lammps.sandia.gov/authors.html.

 If you get an error message about an invalid command in your input
 script, you can determine what command is causing the problem by
--- a/doc/src/Section_packages.txt
+++ b/doc/src/Section_packages.txt
@ -84,7 +84,7 @@ Package, Description, Author(s), Doc page, Example, Library
 "PERI"_#PERI, Peridynamics models, Mike Parks (Sandia), "pair_style peri"_pair_peri.html, peri, -
 "POEMS"_#POEMS, coupled rigid body motion, Rudra Mukherjee (JPL), "fix poems"_fix_poems.html, rigid, lib/poems
 "PYTHON"_#PYTHON, embed Python code in an input script, -, "python"_python.html, python, lib/python
-"REAX"_#REAX, ReaxFF potential, Aidan Thompson (Sandia), "pair_style reax"_pair_reax.html, reax,  lib/reax
+"REAX"_#REAX, ReaxFF potential, Aidan Thompson (Sandia), "pair_style reax"_pair_reax.html, reax, lib/reax
 "REPLICA"_#REPLICA, multi-replica methods, -, "Section 6.6.5"_Section_howto.html#howto_5, tad, -
 "RIGID"_#RIGID, rigid bodies, -, "fix rigid"_fix_rigid.html, rigid, -
 "SHOCK"_#SHOCK, shock loading methods, -, "fix msst"_fix_msst.html, -, -
@ -1140,6 +1140,7 @@ Package, Description, Author(s), Doc page, Example, Pic/movie, Library
 "USER-ATC"_#USER-ATC, atom-to-continuum coupling, Jones & Templeton & Zimmerman (1), "fix atc"_fix_atc.html, USER/atc, "atc"_atc, lib/atc
 "USER-AWPMD"_#USER-AWPMD, wave-packet MD, Ilya Valuev (JIHT), "pair_style awpmd/cut"_pair_awpmd.html, USER/awpmd, -, lib/awpmd
 "USER-CG-CMM"_#USER-CG-CMM, coarse-graining model, Axel Kohlmeyer (Temple U), "pair_style lj/sdk"_pair_sdk.html, USER/cg-cmm, "cg"_cg, -
+"USER-CGDNA"_#USER-CGDNA, coarse-grained DNA force fields, Oliver Henrich (U Edinburgh), src/USER-CGDNA/README, USER/cgdna, -, -
 "USER-COLVARS"_#USER-COLVARS, collective variables, Fiorin & Henin & Kohlmeyer (2), "fix colvars"_fix_colvars.html, USER/colvars, "colvars"_colvars, lib/colvars
 "USER-DIFFRACTION"_#USER-DIFFRACTION, virutal x-ray and electron diffraction, Shawn Coleman (ARL),"compute xrd"_compute_xrd.html, USER/diffraction, -, -
 "USER-DPD"_#USER-DPD, reactive dissipative particle dynamics (DPD), Larentzos & Mattox & Brennan (5), src/USER-DPD/README, USER/dpd, -, -
@ -1153,7 +1154,7 @@ Package, Description, Author(s), Doc page, Example, Pic/movie, Library
 "USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER-MISC/README, -, -, -
 "USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surface, Stefan Paquay (Eindhoven U of Technology), "fix manifoldforce"_fix_manifoldforce.html, USER/manifold, "manifold"_manifold, -
 "USER-MOLFILE"_#USER-MOLFILE, "VMD"_VMD molfile plug-ins, Axel Kohlmeyer (Temple U), "dump molfile"_dump_molfile.html, -, -, VMD-MOLFILE
-"USER-NC-DUMP"_#USER-NC-DUMP, dump output via NetCDF, Lars Pastewka (Karlsruhe Institute of Technology, KIT), "dump nc, dump nc/mpiio"_dump_nc.html, -, -, lib/netcdf
+"USER-NC-DUMP"_#USER-NC-DUMP, dump output via NetCDF, Lars Pastewka (Karlsruhe Institute of Technology, KIT), "dump nc / dump nc/mpiio"_dump_nc.html, -, -, lib/netcdf
 "USER-OMP"_#USER-OMP, OpenMP threaded styles, Axel Kohlmeyer (Temple U), "Section 5.3.4"_accelerate_omp.html, -, -, -
 "USER-PHONON"_#USER-PHONON, phonon dynamical matrix, Ling-Ti Kong (Shanghai Jiao Tong U), "fix phonon"_fix_phonon.html, USER/phonon, -, -
 "USER-QMMM"_#USER-QMMM, QM/MM coupling, Axel Kohlmeyer (Temple U), "fix qmmm"_fix_qmmm.html, USER/qmmm, -, lib/qmmm
@ -1284,6 +1285,31 @@ him directly if you have questions.

 :line

+USER-CGDNA package :link(USER-CGDNA),h5
+
+Contents: The CGDNA package implements coarse-grained force fields for
+single- and double-stranded DNA. This is at the moment mainly the
+oxDNA model, developed by Doye, Louis and Ouldridge at the University
+of Oxford.  The package also contains Langevin-type rigid-body
+integrators with improved stability.
+
+See these doc pages to get started:
+
+"bond_style oxdna_fene"_bond_oxdna_fene.html
+"pair_style oxdna_excv"_pair_oxdna_excv.html
+"fix nve/dotc/langevin"_fix_nve_dotc_langevin.html :ul
+
+Supporting info: /src/USER-CGDNA/README, "bond_style
+oxdna_fene"_bond_oxdna_fene.html, "pair_style
+oxdna_excv"_pair_oxdna_excv.html, "fix
+nve/dotc/langevin"_fix_nve_dotc_langevin.html
+
+Author: Oliver Henrich at the University of Edinburgh, UK (o.henrich
+at epcc.ed.ac.uk or ohenrich at ph.ed.ac.uk).  Contact him directly if
+you have any questions.
+
+:line
+
 USER-COLVARS package :link(USER-COLVARS),h5

 Contents: COLVARS stands for collective variables which can be used to
@ -1610,11 +1636,12 @@ and a "dump nc/mpiio"_dump_nc.html command to output LAMMPS snapshots
 in this format.  See src/USER-NC-DUMP/README for more details.

 NetCDF files can be directly visualized with the following tools:
+
 Ovito (http://www.ovito.org/). Ovito supports the AMBER convention
-  and all of the above extensions. :ulb,l
+and all of the above extensions. :ulb,l
 VMD (http://www.ks.uiuc.edu/Research/vmd/) :l
 AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye contains
-  a NetCDF reader that is not present in the standard distribution of AtomEye :l,ule
+a NetCDF reader that is not present in the standard distribution of AtomEye :l,ule

 The person who created these files is Lars Pastewka at
 Karlsruhe Institute of Technology (lars.pastewka at kit.edu).
--- a/doc/src/Section_start.txt
+++ b/doc/src/Section_start.txt
@ -1727,7 +1727,7 @@ thermodynamic state and a total run time for the simulation.  It then
 appends statistics about the CPU time and storage requirements for the
 simulation.  An example set of statistics is shown here:

-Loop time of 2.81192 on 4 procs for 300 steps with 2004 atoms
+Loop time of 2.81192 on 4 procs for 300 steps with 2004 atoms :pre

 Performance: 18.436 ns/day  1.302 hours/ns  106.689 timesteps/s
 97.0% CPU use with 4 MPI tasks x no OpenMP threads :pre
@ -1757,14 +1757,14 @@ Ave special neighs/atom = 2.34032
 Neighbor list builds = 26
 Dangerous builds = 0 :pre

-The first section provides a global loop timing summary. The loop time
+The first section provides a global loop timing summary. The {loop time}
 is the total wall time for the section.  The {Performance} line is
 provided for convenience to help predicting the number of loop
-continuations required and for comparing performance with other
-similar MD codes.  The CPU use line provides the CPU utilzation per
+continuations required and for comparing performance with other,
+similar MD codes.  The {CPU use} line provides the CPU utilzation per
 MPI task; it should be close to 100% times the number of OpenMP
-threads (or 1). Lower numbers correspond to delays due to file I/O or
-insufficient thread utilization.
+threads (or 1 of no OpenMP). Lower numbers correspond to delays due
+to file I/O or insufficient thread utilization.

 The MPI task section gives the breakdown of the CPU run time (in
 seconds) into major categories:
@ -1791,7 +1791,7 @@ is present that also prints the CPU utilization in percent. In
 addition, when using {timer full} and the "package omp"_package.html
 command are active, a similar timing summary of time spent in threaded
 regions to monitor thread utilization and load balance is provided. A
-new entry is the {Reduce} section, which lists the time spend in
+new entry is the {Reduce} section, which lists the time spent in
 reducing the per-thread data elements to the storage for non-threaded
 computation. These thread timings are taking from the first MPI rank
 only and and thus, as the breakdown for MPI tasks can change from MPI
--- a/doc/src/accelerate_intel.txt
+++ b/doc/src/accelerate_intel.txt
@ -29,7 +29,7 @@ Bond Styles: fene, harmonic :l
 Dihedral Styles: charmm, harmonic, opls :l
 Fixes: nve, npt, nvt, nvt/sllod :l
 Improper Styles: cvff, harmonic :l
-Pair Styles: buck/coul/cut, buck/coul/long, buck, gayberne,
+Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
 charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l
 K-Space Styles: pppm :l
 :ule
--- a/doc/src/accelerate_kokkos.txt
+++ b/doc/src/accelerate_kokkos.txt
@ -110,14 +110,14 @@ mpirun -np 96 -ppn 12 lmp_g++ -k on t 20 -sf kk -in in.lj   # ditto on 8 Phis :p
 [Required hardware/software:]

 Kokkos support within LAMMPS must be built with a C++11 compatible
-compiler.  If using gcc, version 4.8.1 or later is required.
+compiler.  If using gcc, version 4.7.2 or later is required.

 To build with Kokkos support for CPUs, your compiler must support the
 OpenMP interface.  You should have one or more multi-core CPUs so that
 multiple threads can be launched by each MPI task running on a CPU.

 To build with Kokkos support for NVIDIA GPUs, NVIDIA Cuda software
-version 6.5 or later must be installed on your system.  See the
+version 7.5 or later must be installed on your system.  See the
 discussion for the "GPU"_accelerate_gpu.html package for details of
 how to check and do this.

--- a/doc/src/bond_oxdna_fene.txt
+++ b/doc/src/bond_oxdna_fene.txt
@ -0,0 +1,70 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+bond_style oxdna_fene command :h3
+
+[Syntax:]
+
+bond_style oxdna_fene :pre
+
+[Examples:]
+
+bond_style oxdna_fene
+bond_coeff * 2.0 0.25 0.7525 :pre
+
+[Description:]
+
+The {oxdna_fene} bond style uses the potential
+
+:c,image(Eqs/bond_oxdna_fene.jpg)
+
+to define a modified finite extensible nonlinear elastic (FENE) potential
+"(Ouldridge)"_#oxdna_fene to model the connectivity of the phosphate backbone
+in the oxDNA force field for coarse-grained modelling of DNA. 
+
+The following coefficients must be defined for the bond type via the
+"bond_coeff"_bond_coeff.html command as given in the above example, or in
+the data file or restart files read by the "read_data"_read_data.html
+or "read_restart"_read_restart.html commands:
+
+epsilon (energy)
+Delta (distance)
+r0 (distance) :ul
+
+NOTE: This bond style has to be used together with the corresponding oxDNA pair styles
+for excluded volume interaction {oxdna_excv}, stacking {oxdna_stk}, cross-stacking {oxdna_xstk}
+and coaxial stacking interaction {oxdna_coaxstk} as well as hydrogen-bonding interaction {oxdna_hbond} (see also documentation of 
+"pair_style oxdna_excv"_pair_oxdna_excv.html). The coefficients 
+in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model.
+
+Example input and data files can be found in /examples/USER/cgdna/examples/duplex1/ and /duplex2/.
+A simple python setup tool which creates single straight or helical DNA strands,
+DNA duplexes or arrays of DNA duplexes can be found in /examples/USER/cgdna/util/.
+A technical report with more information on the model, the structure of the input file,
+the setup tool and the performance of the LAMMPS-implementation of oxDNA
+can be found "here"_PDF/USER-CGDNA-overview.pdf.
+
+:line
+
+[Restrictions:]
+
+This bond style can only be used if LAMMPS was built with the
+USER-CGDNA package and the MOLECULE and ASPHERE package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info on packages.
+
+
+[Related commands:]
+
+"pair_style oxdna_excv"_pair_oxdna_excv.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "bond_coeff"_bond_coeff.html 
+
+[Default:] none
+
+:line
+
+:link(oxdna_fene)
+[(Ouldridge)] T.E. Ouldridge, A.A. Louis, J.P.K. Doye, J. Chem. Phys. 134, 085101 (2011).
--- a/doc/src/bonds.txt
+++ b/doc/src/bonds.txt
@ -15,6 +15,7 @@ Bond Styles :h1
   bond_morse
   bond_none
   bond_nonlinear
+   bond_oxdna_fene
   bond_quartic
   bond_table
   bond_zero
--- a/doc/src/commands.txt
+++ b/doc/src/commands.txt
@ -91,6 +91,7 @@ Commands :h1
   suffix
   tad
   temper
+   temper_grem
   thermo
   thermo_modify
   thermo_style
--- a/doc/src/compute_coord_atom.txt
+++ b/doc/src/compute_coord_atom.txt
@ -10,34 +10,43 @@ compute coord/atom command :h3

 [Syntax:]

-compute ID group-ID coord/atom cutoff type1 type2 ... :pre
+compute ID group-ID coord/atom cstyle args ... :pre

-ID, group-ID are documented in "compute"_compute.html command
-coord/atom = style name of this compute command
-cutoff = distance within which to count coordination neighbors (distance units)
-typeN = atom type for Nth coordination count (see asterisk form below) :ul
+ID, group-ID are documented in "compute"_compute.html command :ulb,l
+coord/atom = style name of this compute command :l
+cstyle = {cutoff} or {orientorder} :l
+  {cutoff} args = cutoff typeN
+    cutoff = distance within which to count coordination neighbors (distance units)
+    typeN = atom type for Nth coordination count (see asterisk form below)
+  {orientorder} args = orientorderID threshold
+    orientorderID = ID of an orientorder/atom compute
+    threshold = minimum value of the product of two "connected" atoms :pre
+:ule

 [Examples:]

-compute 1 all coord/atom 2.0
-compute 1 all coord/atom 6.0 1 2
-compute 1 all coord/atom 6.0 2*4 5*8 * :pre
+compute 1 all coord/atom cutoff 2.0
+compute 1 all coord/atom cutoff 6.0 1 2
+compute 1 all coord/atom cutoff 6.0 2*4 5*8 *
+compute 1 all coord/atom orientorder 2 0.5 :pre

 [Description:]

-Define a computation that calculates one or more coordination numbers
-for each atom in a group.
+This compute performs calculations between neighboring atoms to
+determine a coordination value.  The specific calculation and the
+meaning of the resulting value depend on the {cstyle} keyword used.

-A coordination number is defined as the number of neighbor atoms with
-specified atom type(s) that are within the specified cutoff distance
-from the central atom.  Atoms not in the group are included in a
-coordination number of atoms in the group.
+The {cutoff} cstyle calculates one or more traditional coordination
+numbers for each atom.  A coordination number is defined as the number
+of neighbor atoms with specified atom type(s) that are within the
+specified cutoff distance from the central atom.  Atoms not in the
+specified group are included in the coordination number tally.

-The {typeN} keywords allow you to specify which atom types contribute
-to each coordination number.  One coordination number is computed for
-each of the {typeN} keywords listed.  If no {typeN} keywords are
-listed, a single coordination number is calculated, which includes
-atoms of all types (same as the "*" format, see below).
+The {typeN} keywords allow specification of which atom types
+contribute to each coordination number.  One coordination number is
+computed for each of the {typeN} keywords listed.  If no {typeN}
+keywords are listed, a single coordination number is calculated, which
+includes atoms of all types (same as the "*" format, see below).

 The {typeN} keywords can be specified in one of two ways.  An explicit
 numeric value can be used, as in the 2nd example above.  Or a
@ -49,8 +58,27 @@ from 1 to N.  A leading asterisk means all types from 1 to n
 (inclusive).  A middle asterisk means all types from m to n
 (inclusive).

-The value of all coordination numbers will be 0.0 for atoms not in the
-specified compute group.
+The {orientorder} cstyle calculates the number of "connected" neighbor
+atoms J around each central atom I.  For this {cstyle}, connected is
+defined by the orientational order parameter calculated by the
+"compute orientorder/atom"_compute_orientorder_atom.html command.
+This {cstyle} thus allows one to apply the ten Wolde's criterion to
+identify crystal-like atoms in a system, as discussed in "ten
+Wolde"_#tenWolde.
+
+The ID of the previously specified "compute
+orientorder/atom"_compute_orientorder/atom command is specified as
+{orientorderID}.  The compute must invoke its {components} option to
+calculate components of the {Ybar_lm} vector for each atoms, as
+described in its documenation.  Note that orientorder/atom compute
+defines its own criteria for identifying neighboring atoms.  If the
+scalar product ({Ybar_lm(i)},{Ybar_lm(j)}), calculated by the
+orientorder/atom compute is larger than the specified {threshold},
+then I and J are connected, and the coordination value of I is
+incremented by one.
+
+For all {cstyle} settings, all coordination values will be 0.0 for
+atoms not in the specified compute group.

 The neighbor list needed to compute this quantity is constructed each
 time the calculation is performed (i.e. each time a snapshot of atoms
@ -72,11 +100,16 @@ the neighbor list.

 [Output info:]

-If single {type1} keyword is specified (or if none are specified),
-this compute calculates a per-atom vector.  If multiple {typeN}
-keywords are specified, this compute calculates a per-atom array, with
-N columns.  These values can be accessed by any command that uses
-per-atom values from a compute as input.  See "Section
+For {cstyle} cutoff, this compute can calculate a per-atom vector or
+array.  If single {type1} keyword is specified (or if none are
+specified), this compute calculates a per-atom vector.  If multiple
+{typeN} keywords are specified, this compute calculates a per-atom
+array, with N columns.
+
+For {cstyle} orientorder, this compute calculates a per-atom vector.
+
+These values can be accessed by any command that uses per-atom values
+from a compute as input.  See "Section
 6.15"_Section_howto.html#howto_15 for an overview of LAMMPS output
 options.

@ -88,5 +121,12 @@ explained above.
 [Related commands:]

 "compute cluster/atom"_compute_cluster_atom.html
+"compute orientorder/atom"_compute_orientorder_atom.html

 [Default:] none
+
+:line
+
+:link(tenWolde)
+[(tenWolde)] P. R. ten Wolde, M. J. Ruiz-Montero, D. Frenkel,
+J. Chem. Phys. 104, 9932 (1996).
--- a/doc/src/compute_orientorder_atom.txt
+++ b/doc/src/compute_orientorder_atom.txt
@ -15,17 +15,19 @@ compute ID group-ID orientorder/atom keyword values ... :pre
 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 orientorder/atom = style name of this compute command :l
 one or more keyword/value pairs may be appended :l
-keyword = {cutoff} or {nnn} or {degrees}
+keyword = {cutoff} or {nnn} or {degrees} or {components}
  {cutoff} value = distance cutoff
  {nnn} value = number of nearest neighbors
-  {degrees} values = nlvalues, l1, l2,...  :pre
+  {degrees} values = nlvalues, l1, l2,...
+  {components} value = ldegree  :pre

 :ule

 [Examples:]

 compute 1 all orientorder/atom
-compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5 :pre
+compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5
+compute 1 all orientorder/atom degrees 4 6 components 6 nnn NULL cutoff 3.0 :pre

 [Description:]

@ -62,14 +64,21 @@ specified distance cutoff are used.
 The optional keyword {degrees} defines the list of order parameters to
 be computed.  The first argument {nlvalues} is the number of order
 parameters. This is followed by that number of integers giving the
-degree of each order parameter. Because {Q}2 and all odd-degree
-order parameters are zero for atoms in cubic crystals
-(see "Steinhardt"_#Steinhardt), the default order parameters
-are {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12. For the
-FCC crystal with {nnn}=12, {Q}4 = sqrt(7/3)/8 = 0.19094....
-The numerical values of all order parameters up to {Q}12
-for a range of commonly encountered high-symmetry structures are given
-in Table I of "Mickel et al."_#Mickel.
+degree of each order parameter. Because {Q}2 and all odd-degree order
+parameters are zero for atoms in cubic crystals (see
+"Steinhardt"_#Steinhardt), the default order parameters are {Q}4,
+{Q}6, {Q}8, {Q}10, and {Q}12. For the FCC crystal with {nnn}=12, {Q}4
+= sqrt(7/3)/8 = 0.19094....  The numerical values of all order
+parameters up to {Q}12 for a range of commonly encountered
+high-symmetry structures are given in Table I of "Mickel et
+al."_#Mickel.
+
+The optional keyword {components} will output the components of the
+normalized complex vector {Ybar_lm} of degree {ldegree}, which must be
+explicitly included in the keyword {degrees}. This option can be used
+in conjunction with "compute coord_atom"_compute_coord_atom.html to
+calculate the ten Wolde's criterion to identify crystal-like
+particles, as discussed in "ten Wolde"_#tenWolde.

 The value of {Ql} is set to zero for atoms not in the
 specified compute group, as well as for atoms that have less than
@ -95,8 +104,16 @@ the neighbor list.

 [Output info:]

-This compute calculates a per-atom array with {nlvalues} columns, giving the
-{Ql} values for each atom, which are real numbers on the range 0 <= {Ql} <= 1.
+This compute calculates a per-atom array with {nlvalues} columns,
+giving the {Ql} values for each atom, which are real numbers on the
+range 0 <= {Ql} <= 1.
+
+If the keyword {components} is set, then the real and imaginary parts
+of each component of (normalized) {Ybar_lm} will be added to the
+output array in the following order: Re({Ybar_-m}) Im({Ybar_-m})
+Re({Ybar_-m+1}) Im({Ybar_-m+1}) ... Re({Ybar_m}) Im({Ybar_m}).  This
+way, the per-atom array will have a total of {nlvalues}+2*(2{l}+1)
+columns.

 These values can be accessed by any command that uses
 per-atom values from a compute as input.  See "Section
@ -107,15 +124,25 @@ options.

 [Related commands:]

-"compute coord/atom"_compute_coord_atom.html, "compute centro/atom"_compute_centro_atom.html, "compute hexorder/atom"_compute_hexorder_atom.html
+"compute coord/atom"_compute_coord_atom.html, "compute
+centro/atom"_compute_centro_atom.html, "compute
+hexorder/atom"_compute_hexorder_atom.html

 [Default:]

-The option defaults are {cutoff} = pair style cutoff, {nnn} = 12, {degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12.
+The option defaults are {cutoff} = pair style cutoff, {nnn} = 12,
+{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12.

 :line

 :link(Steinhardt)
-[(Steinhardt)] P. Steinhardt, D. Nelson, and M. Ronchetti, Phys. Rev. B 28, 784 (1983).
+[(Steinhardt)] P. Steinhardt, D. Nelson, and M. Ronchetti,
+Phys. Rev. B 28, 784 (1983).
+
 :link(Mickel)
-[(Mickel)] W. Mickel, S. C. Kapfer, G. E. Schroeder-Turkand, K. Mecke, J. Chem. Phys. 138, 044501 (2013).
+[(Mickel)] W. Mickel, S. C. Kapfer, G. E. Schroeder-Turkand, K. Mecke,
+J. Chem. Phys. 138, 044501 (2013).
+
+:link(tenWolde)
+[(tenWolde)] P. R. ten Wolde, M. J. Ruiz-Montero, D. Frenkel,
+J. Chem. Phys. 104, 9932 (1996).
--- a/doc/src/computes.txt
+++ b/doc/src/computes.txt
@ -35,6 +35,7 @@ Computes :h1
   compute_erotate_sphere_atom
   compute_event_displace
   compute_fep
+   compute_global_atom
   compute_group_group
   compute_gyration
   compute_gyration_chunk
--- a/doc/src/fix_flow_gauss.txt
+++ b/doc/src/fix_flow_gauss.txt
@ -151,7 +151,7 @@ The option default for the {energy} keyword is energy = no.
 :line

 :link(Strong)
-[(Strong)] Strong and Eaves, J. Phys. Chem. Lett. 7, 1907 (2016).
+[(Strong)] Strong and Eaves, J. Phys. Chem. B 121, 189 (2017).

 :link(Evans)
 [(Evans)] Evans and Morriss, Phys. Rev. Lett. 56, 2172 (1986).
--- a/doc/src/fix_grem.txt
+++ b/doc/src/fix_grem.txt
@ -29,7 +29,7 @@ fix             fxgREM all grem 502 -0.15 -80000 fxnvt :pre
 [Description:]

 This fix implements the molecular dynamics version of the generalized
-replica exchange method (gREM) originally developed by "(Kim)"_#Kim,
+replica exchange method (gREM) originally developed by "(Kim)"_#Kim2010,
 which uses non-Boltzmann ensembles to sample over first order phase
 transitions. The is done by defining replicas with an enthalpy
 dependent effective temperature
@ -103,7 +103,7 @@ npt"_fix_nh.html, "thermo_modify"_thermo_modify.html

 :line

-:link(Kim)
+:link(Kim2010)
 [(Kim)] Kim, Keyes, Straub, J Chem. Phys, 132, 224107 (2010).

 :link(Malolepsza)
--- a/doc/src/fix_mscg.txt
+++ b/doc/src/fix_mscg.txt
@ -0,0 +1,130 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+fix mscg command :h3
+
+[Syntax:]
+
+fix ID group-ID mscg N keyword args ... :pre
+
+ID, group-ID are documented in "fix"_fix.html command :ulb,l
+mscg = style name of this fix command :l
+N = envoke this fix every this many timesteps :l
+zero or more keyword/value pairs may be appended :l
+keyword = {range} or {name} or {max} :l
+  {range} arg = {on} or {off}
+    {on} = range finding functionality is performed
+    {off} = force matching functionality is performed
+  {name} args = name1 ... nameN
+    name1,...,nameN = string names for each atom type (1-Ntype)
+  {max} args = maxb maxa maxd
+    maxb,maxa,maxd = maximum bonds/angles/dihedrals per atom :pre
+:ule
+
+[Examples:]
+
+fix 1 all mscg 1
+fix 1 all mscg 1 range name A B
+fix 1 all mscg 1 max 4 8 20 :pre
+
+[Description:]
+
+This fix applies the Multi-Scale Coarse-Graining (MSCG) method to
+snapshots from a dump file to generate potentials for coarse-grained
+simulations from all-atom simulations, using a force-matching
+technique ("Izvekov"_#Izvekov, "Noid"_#Noid).
+
+It makes use of the MS-CG library, written and maintained by Greg
+Voth's group at the University of Chicago, which is freely available
+on their "MS-CG GitHub
+site"_https://github.com/uchicago-voth/MSCG-release.  See instructions
+on obtaining and installing the MS-CG library in the src/MSCG/README
+file, which must be done before you build LAMMPS with this fix command
+and use the command in a LAMMPS input script.
+
+An example script using this fix is provided the examples/mscg
+directory.
+
+The general workflow for using LAMMPS in conjunction with the MS-CG
+library to create a coarse-grained model and run coarse-grained
+simulations is as follows:
+
+Perform all-atom simulations on the system to be coarse grained.
+Generate a trajectory mapped to the coarse-grained model.
+Create input files for the MS-CG library.
+Run the range finder functionality of the MS-CG library.  
+Run the force matching functionality of the MS-CG library.
+Check the results of the force matching.
+Run coarse-grained simulations using the new coarse-grained potentials. :ol
+
+This fix can perform the range finding and force matching steps 4 and
+5 of the above workflow when used in conjunction with the
+"rerun"_rerun.html command.  It does not perform steps 1-3 and 6-7.
+
+Step 2 can be performed using a Python script (what is the name?)
+provided with the MS-CG library which defines the coarse-grained model
+and converts a standard LAMMPS dump file for an all-atom simulation
+(step 1) into a LAMMPS dump file which has the positions of and forces
+on the coarse-grained beads.  
+
+In step 3, an input file named "control.in" is needed by the MS-CG
+library which sets parameters for the range finding and force matching
+functionalities.  See the examples/mscg/control.in file as an example.
+And see the documentation provided with the MS-CG library for more
+info on this file.
+
+When this fix is used to perform steps 4 and 5, the MS-CG library also
+produces additional output files.  The range finder functionality
+(step 4) outputs files defining pair and bonded interaction ranges.
+The force matching functionality (step 5) outputs tabulated force
+files for every interaction in the system. Other diagnostic files can
+also be output depending on the paramters in the MS-CG library input
+script.  Again, see the documentation provided with the MS-CG library
+for more info.
+
+:line
+
+The {range} keyword specifies which MS-CG library functionality should
+be invoked. If {on}, the step 4 range finder functionality is invoked.
+{off}, the step 5 force matching functionality is invoked.
+
+If the {name} keyword is used, string names are defined to associate
+with the integer atom types in LAMMPS.  {Ntype} names must be
+provided, one for each atom type (1-Ntype).
+
+The {max} keyword specifies the maximum number of bonds, angles, and
+dihedrals a bead can have in the coarse-grained model.
+
+[Restrictions:]
+
+This fix is part of the MSCG package. It is only enabled if LAMMPS was
+built with that package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info.
+
+The MS-CG library uses C++11, which may not be supported by older
+compilers. The MS-CG library also has some additional numeric library
+dependencies, which are describd in its documentation.
+
+Currently, the MS-CG library is not setup to run in parallel with MPI,
+so this fix can only be used in a serial LAMMPS build and run
+on a single processor.
+
+[Related commands:] none
+
+[Default:]
+
+The default keyword settings are range off, max 4 12 36.
+
+:line
+
+:link(Izvekov)
+[(Izvekov)] Izvekov, Voth, J Chem Phys 123, 134105 (2005).
+
+:link(Noid)
+[(Noid)] Noid, Chu, Ayton, Krishna, Izvekov, Voth, Das, Andersen, J
+Chem Phys 128, 134105 (2008).
--- a/doc/src/fix_nve_dot.txt
+++ b/doc/src/fix_nve_dot.txt
@ -0,0 +1,61 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+fix nve/dot command :h3
+
+[Syntax:]
+
+fix ID group-ID nve/dot :pre
+
+ID, group-ID are documented in "fix"_fix.html command :ulb,l
+nve/dot = style name of this fix command :l
+:ule
+
+[Examples:]
+
+fix 1 all nve/dot :pre
+
+[Description:]
+
+Apply a rigid-body integrator as described in "(Davidchack)"_#Davidchack
+to a group of atoms, but without Langevin dynamics. 
+This command performs Molecular dynamics (MD)
+via a velocity-Verlet algorithm and an evolution operator that rotates 
+the quaternion degrees of freedom, similar to the scheme outlined in "(Miller)"_#Miller. 
+
+This command is the equivalent of the "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html
+without damping and noise and can be used to determine the stability range 
+in a NVE ensemble prior to using the Langevin-type DOTC-integrator
+(see also "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html).
+The command is equivalent to the "fix nve"_fix_nve.html.
+The particles are always considered to have a finite size.
+
+An example input file can be found in /examples/USER/cgdna/examples/duplex1/.
+A technical report with more information on this integrator can be found
+"here"_PDF/USER-CGDNA-overview.pdf.
+
+:line
+
+[Restrictions:]
+
+These pair styles can only be used if LAMMPS was built with the
+USER-CGDNA package and the MOLECULE and ASPHERE package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info on packages.
+
+[Related commands:]
+
+"fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "fix nve"_fix_nve.html
+
+[Default:] none
+
+:line
+
+:link(Davidchack)
+[(Davidchack)] R.L Davidchack, T.E. Ouldridge, and M.V. Tretyakov. J. Chem. Phys. 142, 144114 (2015).
+:link(Miller)
+[(Miller)] T. F. Miller III, M. Eleftheriou, P. Pattnaik, A. Ndirango, G. J. Martyna, J. Chem. Phys., 116, 8649-8659 (2002).
--- a/doc/src/fix_nve_dotc_langevin.txt
+++ b/doc/src/fix_nve_dotc_langevin.txt
@ -0,0 +1,134 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+fix nve/dotc/langevin command :h3
+
+[Syntax:]
+
+fix ID group-ID nve/dotc/langevin Tstart Tstop damp seed keyword value :pre
+
+ID, group-ID are documented in "fix"_fix.html command :ulb,l
+nve/dotc/langevin = style name of this fix command :l
+Tstart,Tstop = desired temperature at start/end of run (temperature units) :l
+damp = damping parameter (time units) :l
+seed = random number seed to use for white noise (positive integer) :l
+keyword = {angmom} :l
+  {angmom} value = factor
+    factor = do thermostat rotational degrees of freedom via the angular momentum and apply numeric scale factor as discussed below :pre
+:ule
+
+[Examples:]
+
+fix 1 all nve/dotc/langevin 1.0 1.0 0.03 457145 angmom 10 :pre
+
+[Description:]
+
+Apply a rigid-body Langevin-type integrator of the kind "Langevin C" 
+as described in "(Davidchack)"_#Davidchack
+to a group of atoms, which models an interaction with an implicit background
+solvent.  This command performs Brownian dynamics (BD)
+via a technique that splits the integration into a deterministic Hamiltonian 
+part and the Ornstein-Uhlenbeck process for noise and damping. 
+The quaternion degrees of freedom are updated though an evolution
+operator which performs a rotation in quaternion space, preserves
+the quaternion norm and is akin to "(Miller)"_#Miller.
+
+In terms of syntax this command has been closely modelled on the 
+"fix langevin"_fix_langevin.html and its {angmom} option. But it combines 
+the "fix nve"_fix_nve.html and the "fix langevin"_fix_langevin.html in 
+one single command. The main feature is improved stability 
+over the standard integrator, permitting slightly larger timestep sizes.
+
+NOTE: Unlike the "fix langevin"_fix_langevin.html this command performs
+also time integration of the translational and quaternion degrees of freedom.
+
+The total force on each atom will have the form:
+
+F = Fc + Ff + Fr
+Ff = - (m / damp) v
+Fr is proportional to sqrt(Kb T m / (dt damp)) :pre
+
+Fc is the conservative force computed via the usual inter-particle
+interactions ("pair_style"_pair_style.html,
+"bond_style"_bond_style.html, etc).
+
+The Ff and Fr terms are implicitly taken into account by this fix 
+on a per-particle basis.
+
+Ff is a frictional drag or viscous damping term proportional to the
+particle's velocity.  The proportionality constant for each atom is
+computed as m/damp, where m is the mass of the particle and damp is
+the damping factor specified by the user.
+
+Fr is a force due to solvent atoms at a temperature T randomly bumping
+into the particle.  As derived from the fluctuation/dissipation
+theorem, its magnitude as shown above is proportional to sqrt(Kb T m /
+dt damp), where Kb is the Boltzmann constant, T is the desired
+temperature, m is the mass of the particle, dt is the timestep size,
+and damp is the damping factor.  Random numbers are used to randomize
+the direction and magnitude of this force as described in
+"(Dunweg)"_#Dunweg, where a uniform random number is used (instead of
+a Gaussian random number) for speed.
+
+:line
+
+{Tstart} and {Tstop} have to be constant values, i.e. they cannot 
+be variables.
+
+The {damp} parameter is specified in time units and determines how
+rapidly the temperature is relaxed.  For example, a value of 0.03
+means to relax the temperature in a timespan of (roughly) 0.03 time
+units tau (see the "units"_units.html command).
+The damp factor can be thought of as inversely related to the
+viscosity of the solvent, i.e. a small relaxation time implies a
+hi-viscosity solvent and vice versa.  See the discussion about gamma
+and viscosity in the documentation for the "fix
+viscous"_fix_viscous.html command for more details.
+
+The random # {seed} must be a positive integer. A Marsaglia random
+number generator is used.  Each processor uses the input seed to
+generate its own unique seed and its own stream of random numbers.
+Thus the dynamics of the system will not be identical on two runs on
+different numbers of processors.
+
+The keyword/value option has to be used in the following way:
+
+This fix has to be used together with the {angmom} keyword. The 
+particles are always considered to have a finite size. 
+The keyword {angmom} enables thermostatting of the rotational degrees of 
+freedom in addition to the usual translational degrees of freedom. 
+
+The scale factor after the {angmom} keyword gives the ratio of the rotational to 
+the translational friction coefficient.
+
+An example input file can be found in /examples/USER/cgdna/examples/duplex2/.
+A technical report with more information on this integrator can be found 
+"here"_PDF/USER-CGDNA-overview.pdf.
+
+:line
+
+[Restrictions:]
+
+These pair styles can only be used if LAMMPS was built with the
+USER-CGDNA package and the MOLECULE and ASPHERE package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info on packages.
+
+[Related commands:]
+
+"fix nve"_fix_nve.html, "fix langevin"_fix_langevin.html, "fix nve/dot"_fix_nve_dot.html,  
+
+[Default:] none
+
+:line
+
+:link(Davidchack)
+[(Davidchack)] R.L Davidchack, T.E. Ouldridge, M.V. Tretyakov. J. Chem. Phys. 142, 144114 (2015).
+:link(Miller)
+[(Miller)] T. F. Miller III, M. Eleftheriou, P. Pattnaik, A. Ndirango, G. J. Martyna, J. Chem. Phys., 116, 8649-8659 (2002).
+:link(Dunweg)
+[(Dunweg)] B. Dunweg, W. Paul, Int. J. Mod. Phys. C, 2, 817-27 (1991).
--- a/doc/src/fix_spring.txt
+++ b/doc/src/fix_spring.txt
@ -89,11 +89,7 @@ NOTE: The center of mass of a group of atoms is calculated in
 group can straddle a periodic boundary.  See the "dump"_dump.html doc
 page for a discussion of unwrapped coordinates.  It also means that a
 spring connecting two groups or a group and the tether point can cross
-a periodic boundary and its length be calculated correctly.  One
-exception is for rigid bodies, which should not be used with the fix
-spring command, if the rigid body will cross a periodic boundary.
-This is because image flags for rigid bodies are used in a different
-way, as explained on the "fix rigid"_fix_rigid.html doc page.
+a periodic boundary and its length be calculated correctly.  

 [Restart, fix_modify, output, run start/stop, minimize info:]

--- a/doc/src/fixes.txt
+++ b/doc/src/fixes.txt
@ -68,6 +68,7 @@ Fixes :h1
   fix_meso_stationary
   fix_momentum
   fix_move
+   fix_mscg
   fix_msst
   fix_neb
   fix_nh
@ -83,6 +84,8 @@ Fixes :h1
   fix_nve_asphere
   fix_nve_asphere_noforce
   fix_nve_body
+   fix_nve_dot
+   fix_nve_dotc_langevin
   fix_nve_eff
   fix_nve_limit
   fix_nve_line
--- a/doc/src/kspace_style.txt
+++ b/doc/src/kspace_style.txt
@ -229,11 +229,16 @@ dramatically in z.  For example, for a triclinic system with all three
 tilt factors set to the maximum limit, the PPPM grid should be
 increased roughly by a factor of 1.5 in the y direction and 2.0 in the
 z direction as compared to the same system using a cubic orthogonal
-simulation cell. One way to ensure the accuracy requirement is being
-met is to run a short simulation at the maximum expected tilt or
-length, note the required grid size, and then use the
+simulation cell.  One way to handle this issue if you have a long
+simulation where the box size changes dramatically, is to break it
+into shorter simulations (multiple "run"_run.html commands).  This
+works because the grid size is re-computed at the beginning of each
+run.  Another way to ensure the descired accuracy requirement is met
+is to run a short simulation at the maximum expected tilt or length,
+note the required grid size, and then use the
 "kspace_modify"_kspace_modify.html {mesh} command to manually set the
-PPPM grid size to this value.
+PPPM grid size to this value for the long run.  The simulation then
+will be "too accurate" for some portion of the run.

 RMS force errors in real space for {ewald} and {pppm} are estimated
 using equation 18 of "(Kolafa)"_#Kolafa, which is also referenced as
@ -285,6 +290,8 @@ LAMMPS"_Section_start.html#start_3 section for more info.
 See "Section 5"_Section_accelerate.html of the manual for
 more instructions on how to use the accelerated styles effectively.

+:line
+
 [Restrictions:]

 Note that the long-range electrostatic solvers in LAMMPS assume conducting
--- a/doc/src/lammps.book
+++ b/doc/src/lammps.book
@ -23,6 +23,7 @@ Section_history.html

 tutorial_drude.html
 tutorial_github.html
+tutorial_pylammps.html

 body.html
 manifolds.html
@ -113,6 +114,7 @@ special_bonds.html
 suffix.html
 tad.html
 temper.html
+temper_grem.html
 thermo.html
 thermo_modify.html
 thermo_style.html
@ -207,6 +209,8 @@ fix_nve.html
 fix_nve_asphere.html
 fix_nve_asphere_noforce.html
 fix_nve_body.html
+fix_nve_dot.html
+fix_nve_dotc_langevin.html
 fix_nve_eff.html
 fix_nve_limit.html
 fix_nve_line.html
@ -214,7 +218,6 @@ fix_nve_manifold_rattle.html
 fix_nve_noforce.html
 fix_nve_sphere.html
 fix_nve_tri.html
-fix_nvk.html
 fix_nvt_asphere.html
 fix_nvt_body.html
 fix_nvt_manifold_rattle.html
@ -455,6 +458,7 @@ pair_multi_lucy_rx.html
 pair_nb3b_harmonic.html
 pair_nm.html
 pair_none.html
+pair_oxdna_excv.html
 pair_peri.html
 pair_polymorphic.html
 pair_quip.html
@ -493,6 +497,7 @@ pair_zero.html
 bond_class2.html
 bond_fene.html
 bond_fene_expand.html
+bond_oxdna_fene.html
 bond_harmonic.html
 bond_harmonic_shift.html
 bond_harmonic_shift_cut.html
--- a/doc/src/pair_eam.txt
+++ b/doc/src/pair_eam.txt
@ -8,6 +8,7 @@

 pair_style eam command :h3
 pair_style eam/gpu command :h3
+pair_style eam/intel command :h3
 pair_style eam/kk command :h3
 pair_style eam/omp command :h3
 pair_style eam/opt command :h3
--- a/doc/src/pair_oxdna_excv.txt
+++ b/doc/src/pair_oxdna_excv.txt
@ -0,0 +1,80 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+pair_style oxdna_excv command :h3
+pair_style oxdna_stk command :h3
+pair_style oxdna_hbond command :h3
+pair_style oxdna_xstk command :h3
+pair_style oxdna_coaxstk command :h3
+
+[Syntax:]
+
+pair_style style :pre
+
+style = {hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk} :ul
+
+[Examples:]
+
+pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
+pair_coeff * * oxdna_excv    2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
+pair_coeff * * oxdna_stk     1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65
+pair_coeff * * oxdna_hbond   0.0   8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 1 4 oxdna_hbond   1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 2 3 oxdna_hbond   1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff * * oxdna_xstk    47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68
+pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65 :pre
+
+[Description:]
+
+The {oxdna} pair styles compute the pairwise-additive parts of the oxDNA force field 
+for coarse-grained modelling of DNA. The effective interaction between the nucleotides consists of potentials for the 
+excluded volume interaction {oxdna_excv}, the stacking {oxdna_stk}, cross-stacking {oxdna_xstk}
+and coaxial stacking interaction {oxdna_coaxstk} as well
+as the hydrogen-bonding interaction {oxdna_hbond} between complementary pairs of nucleotides on
+opposite strands.
+
+The exact functional form of the pair styles is rather complex, which manifests itself in the 144 coefficients 
+in the above example. The individual potentials consist of products of modulation factors, 
+which themselves are constructed from a number of more basic potentials 
+(Morse, Lennard-Jones, harmonic angle and distance) as well as quadratic smoothing and modulation terms. 
+We refer to "(Ouldridge-DPhil)"_#Ouldridge-DPhil and "(Ouldridge)"_#Ouldridge
+for a detailed description of the oxDNA force field.
+
+NOTE: These pair styles have to be used together with the related oxDNA bond style
+{oxdna_fene} for the connectivity of the phosphate backbone (see also documentation of
+"bond_style oxdna_fene"_bond_oxdna_fene.html). The coefficients
+in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model.
+
+Example input and data files can be found in /examples/USER/cgdna/examples/duplex1/ and /duplex2/.
+A simple python setup tool which creates single straight or helical DNA strands, 
+DNA duplexes or arrays of DNA duplexes can be found in /examples/USER/cgdna/util/.
+A technical report with more information on the model, the structure of the input file,
+the setup tool and the performance of the LAMMPS-implementation of oxDNA 
+can be found "here"_PDF/USER-CGDNA-overview.pdf.
+
+:line
+
+[Restrictions:]
+
+These pair styles can only be used if LAMMPS was built with the
+USER-CGDNA package and the MOLECULE and ASPHERE package.  See the "Making
+LAMMPS"_Section_start.html#start_3 section for more info on packages.
+
+[Related commands:]
+
+"bond_style oxdna_fene"_bond_oxdna_fene.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "pair_coeff"_pair_coeff.html 
+
+[Default:] none
+
+:line
+
+:link(Ouldridge-DPhil)
+[(Ouldrigde-DPhil)] T.E. Ouldridge, Coarse-grained modelling of DNA and DNA self-assembly, DPhil. University of Oxford (2011).
+
+:link(Ouldridge)
+[(Ouldridge)] T.E. Ouldridge, A.A. Louis, J.P.K. Doye, J. Chem. Phys. 134, 085101 (2011).
--- a/doc/src/pairs.txt
+++ b/doc/src/pairs.txt
@ -65,6 +65,7 @@ Pair Styles :h1
   pair_nb3b_harmonic
   pair_nm
   pair_none
+   pair_oxdna_excv
   pair_peri
   pair_polymorphic
   pair_quip
--- a/doc/src/read_dump.txt
+++ b/doc/src/read_dump.txt
@ -15,11 +15,12 @@ read_dump file Nstep field1 field2 ... keyword values ... :pre
 file = name of dump file to read :ulb,l
 Nstep = snapshot timestep to read from file :l
 one or more fields may be appended :l
-field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz}
+field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz} or {fx} or {fy} or {fz}
  {x},{y},{z} = atom coordinates
  {vx},{vy},{vz} = velocity components
  {q} = charge
-  {ix},{iy},{iz} = image flags in each dimension :pre
+  {ix},{iy},{iz} = image flags in each dimension
+  {fx},{fy},{fz} = force components :pre
 zero or more keyword/value pairs may be appended :l
 keyword = {box} or {replace} or {purge} or {trim} or {add} or {label} or {scaled} or {wrapped} or {format} :l
  {box} value = {yes} or {no} = replace simulation box with dump box
--- a/doc/src/temper_grem.txt
+++ b/doc/src/temper_grem.txt
@ -32,7 +32,7 @@ Run a parallel tempering or replica exchange simulation in LAMMPS
 partition mode using multiple generalized replicas (ensembles) of a
 system defined by "fix grem"_fix_grem.html, which stands for the
 generalized replica exchange method (gREM) originally developed by
-"(Kim)"_#Kim.  It uses non-Boltzmann ensembles to sample over first
+"(Kim)"_#KimStraub.  It uses non-Boltzmann ensembles to sample over first
 order phase transitions. The is done by defining replicas with an
 enthalpy dependent effective temperature

@ -105,5 +105,5 @@ This command must be used with "fix grem"_fix_grem.html.

 [Default:] none

-:link(Kim)
+:link(KimStraub)
 [(Kim)] Kim, Keyes, Straub, J Chem Phys, 132, 224107 (2010).
--- a/doc/src/timer.txt
+++ b/doc/src/timer.txt
@ -33,14 +33,14 @@ timer loop :pre
 Select the level of detail at which LAMMPS performs its CPU timings.
 Multiple keywords can be specified with the {timer} command.  For
 keywords that are mutually exclusive, the last one specified takes
-effect.
+precedence.

 During a simulation run LAMMPS collects information about how much
 time is spent in different sections of the code and thus can provide
 information for determining performance and load imbalance problems.
 This can be done at different levels of detail and accuracy.  For more
 information about the timing output, see this "discussion of screen
-output"_Section_start.html#start_8.
+output in Section 2.8"_Section_start.html#start_8.

 The {off} setting will turn all time measurements off. The {loop}
 setting will only measure the total time for a run and not collect any
@ -52,20 +52,22 @@ procsessors.  The {full} setting adds information about CPU
 utilization and thread utilization, when multi-threading is enabled.

 With the {sync} setting, all MPI tasks are synchronized at each timer
-call which meaures load imbalance more accuractly, though it can also
-slow down the simulation.  Using the {nosync} setting (which is the
-default) turns off this synchronization.
+call which measures load imbalance for each section more accuractly,
+though it can also slow down the simulation by prohibiting overlapping
+independent computations on different MPI ranks  Using the {nosync}
+setting (which is the default) turns this synchronization off.

-With the {timeout} keyword a walltime limit can be imposed that
+With the {timeout} keyword a walltime limit can be imposed, that
 affects the "run"_run.html and "minimize"_minimize.html commands.
-This can be convenient when runs have to confirm to time limits,
-e.g. when running under a batch system and you want to maximize
-the utilization of the batch time slot, especially when the time
-per timestep varies and is thus difficult to predict how many
-steps a simulation can perform, or for difficult to converge
-minimizations. The timeout {elapse} value should be somewhat smaller
-than the time requested from the batch system, as there is usually
-some overhead to launch jobs, and it may be advisable to write
+This can be convenient when calculations have to comply with execution
+time limits, e.g. when running under a batch system when you want to
+maximize the utilization of the batch time slot, especially for runs
+where the time per timestep varies much and thus it becomes difficult
+to predict how many steps a simulation can perform for a given walltime
+limit. This also applies for difficult to converge minimizations.
+The timeout {elapse} value should be somewhat smaller than the maximum
+wall time requested from the batch system, as there is usually
+some overhead to launch jobs, and it is advisable to write
 out a restart after terminating a run due to a timeout.

 The timeout timer starts when the command is issued. When the time
--- a/doc/src/tutorial_github.txt
+++ b/doc/src/tutorial_github.txt
@ -336,12 +336,15 @@ commit and push again:
 $ git commit -m "Merged Axel's suggestions and updated text"
 $ git push git@github.com:Pakketeretet2/lammps :pre

+This merge also shows up on the lammps Github page:
+
+:c,image(JPG/tutorial_reverse_pull_request7.png)

 :line

 [After a merge]

-When everything is fine, the feature branch is merged into the master branch.
+When everything is fine, the feature branch is merged into the master branch:

 :c,image(JPG/tutorial_merged.png)

--- a/examples/README
+++ b/examples/README
@ -82,6 +82,7 @@ meam:	  MEAM test for SiC and shear (same as shear examples)
 melt:	  rapid melt of 3d LJ system
 micelle:  self-assembly of small lipid-like molecules into 2d bilayers
 min:	  energy minimization of 2d LJ melt
+mscg:     parameterize a multi-scale coarse-graining (MSCG) model
 msst:	  MSST shock dynamics
 nb3b:     use of nonbonded 3-body harmonic pair style
 neb:	  nudged elastic band (NEB) calculation for barrier finding
--- a/examples/USER/cgdna/README
+++ b/examples/USER/cgdna/README
@ -0,0 +1,28 @@
+This directory contains example data and input files 
+and utility scripts for the oxDNA coarse-grained model 
+for DNA.
+
+/examples/duplex1:
+Input, data and log files for a DNA duplex (double-stranded DNA) 
+consisiting of 5 base pairs. The duplex contains two strands with 
+complementary base pairs. The topology is
+
+A - A - A - A - A
+|   |   |   |   |
+T - T - T - T - T     
+
+/examples/duplex2:
+Input, data and log files for a nicked DNA duplex (double-stranded DNA) 
+consisiting of 8 base pairs. The duplex contains strands with 
+complementary base pairs, but the backbone on one side is not continuous: 
+two individual strands on one side form a duplex with a longer single 
+strand on the other side. The topology is
+
+A - A - A - A - A - A - A - A
+|   |   |   |   |   |   |   |
+T - T - T   T - T - T - T - T
+
+/util:
+This directory contains a simple python setup tool which creates 
+single straight or helical DNA strands, DNA duplexes or arrays of DNA 
+duplexes.
--- a/examples/USER/cgdna/examples/duplex1/data.duplex1
+++ b/examples/USER/cgdna/examples/duplex1/data.duplex1
@ -0,0 +1,74 @@
+# LAMMPS data file
+10 atoms
+10 ellipsoids
+8 bonds
+
+4 atom types
+1 bond types
+
+# System size
+-20.000000 20.000000 xlo xhi
+-20.000000 20.000000 ylo yhi
+-20.000000 20.000000 zlo zhi
+
+# Atom masses for each atom type
+Masses
+
+1 3.1575
+2 3.1575
+3 3.1575
+4 3.1575
+
+# Atom-ID, type, position, molecule-ID, ellipsoid flag, density
+Atoms
+
+1 1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 1 1 1
+2 1 1.3274493266864451e-01 -4.2912827978022683e-01 3.7506163469402809e-01 1 1 1
+3 1 4.8460810659772807e-01 -7.0834970533509178e-01 7.5012326938805618e-01 1 1 1
+4 1 9.3267359196674593e-01 -7.4012419946742802e-01 1.1251849040820843e+00 1 1 1
+5 1 1.3204192238113461e+00 -5.1335201721887447e-01 1.5002465387761124e+00 1 1 1
+6 4 1.9958077618865377e-01 5.1335201721887447e-01 1.5002465387761124e+00 1 1 1
+7 4 5.8732640803325409e-01 7.4012419946742802e-01 1.1251849040820843e+00 1 1 1
+8 4 1.0353918934022719e+00 7.0834970533509178e-01 7.5012326938805618e-01 1 1 1
+9 4 1.3872550673313555e+00 4.2912827978022683e-01 3.7506163469402809e-01 1 1 1
+10 4 1.5200000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 1 1 1
+
+# Atom-ID, translational, rotational velocity
+Velocities
+
+1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+
+# Atom-ID, shape, quaternion
+Ellipsoids
+
+1 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 1.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
+2 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 9.5533648912560598e-01 0.0000000000000000e+00 0.0000000000000000e+00 2.9552020666133955e-01
+3 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 8.2533561490967822e-01 0.0000000000000000e+00 0.0000000000000000e+00 5.6464247339503526e-01
+4 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 6.2160996827066439e-01 0.0000000000000000e+00 0.0000000000000000e+00 7.8332690962748319e-01
+5 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 3.6235775447667351e-01 0.0000000000000000e+00 0.0000000000000000e+00 9.3203908596722607e-01
+6 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 9.3203908596722607e-01 -3.6235775447667351e-01 0.0000000000000000e+00
+7 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 7.8332690962748319e-01 -6.2160996827066439e-01 0.0000000000000000e+00
+8 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 5.6464247339503526e-01 -8.2533561490967822e-01 0.0000000000000000e+00
+9 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 2.9552020666133955e-01 -9.5533648912560598e-01 0.0000000000000000e+00
+10 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1.0000000000000000e+00 0.0000000000000000e+00
+
+# Bond topology
+Bonds
+
+1 1 1 2
+2 1 2 3
+3 1 3 4
+4 1 4 5
+5 1 6 7
+6 1 7 8
+7 1 8 9
+8 1 9 10
--- a/examples/USER/cgdna/examples/duplex1/input.duplex1
+++ b/examples/USER/cgdna/examples/duplex1/input.duplex1
@ -0,0 +1,75 @@
+variable number	equal 1
+variable ofreq	equal 1000
+variable efreq	equal 1000
+
+units lj
+
+dimension 3
+
+newton off
+
+boundary  p p p
+
+atom_style hybrid bond ellipsoid
+atom_modify sort 0 1.0
+
+# Pair interactions require lists of neighbours to be calculated
+neighbor 1.0 bin
+neigh_modify every 1 delay 0 check yes
+
+read_data data.duplex1
+
+set atom * mass 3.1575
+
+group all type 1 4
+
+# oxDNA bond interactions - FENE backbone
+bond_style oxdna_fene
+bond_coeff * 2.0 0.25 0.7525
+
+# oxDNA pair interactions
+pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
+pair_coeff * * oxdna_excv   2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
+pair_coeff * * oxdna_stk    1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65
+pair_coeff * * oxdna_hbond  0.0   8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 1 4 oxdna_hbond  1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 2 3 oxdna_hbond  1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff * * oxdna_xstk   47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68
+pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65
+
+# NVE ensemble
+#fix 1 all   nve/dotc/langevin 0.1 0.1 0.03 457145 angmom 10
+fix 1 all   nve/dot
+
+timestep 1e-5
+
+#comm_style tiled
+#fix 3 all balance 10000 1.1 rcb
+
+#compute mol all chunk/atom molecule
+#compute mychunk all vcm/chunk mol
+#fix 4 all ave/time 10000 1 10000 c_mychunk[1] c_mychunk[2] c_mychunk[3] file vcm.txt mode vector
+
+dump pos all xyz ${ofreq} traj.${number}.xyz
+
+compute quat all property/atom quatw quati quatj quatk
+dump quat all custom ${ofreq} quat.${number}.txt id c_quat[1] c_quat[2] c_quat[3] c_quat[4]
+dump_modify quat sort id
+dump_modify quat format line "%d  %13.6le  %13.6le  %13.6le  %13.6le"
+
+compute erot all erotate/asphere
+compute ekin all ke
+compute epot all pe
+variable erot equal c_erot
+variable ekin equal c_ekin
+variable epot equal c_epot
+variable etot equal c_erot+c_ekin+c_epot
+fix 5 all print ${efreq} "$(step)  ekin = ${ekin} |  erot = ${erot} | epot = ${epot} | etot = ${etot}" screen yes
+
+dump out all custom ${ofreq} out.${number}.txt id x y z vx vy vz fx fy fz tqx tqy tqz
+dump_modify out sort id
+dump_modify out format line "%d   %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le"
+
+run 1000000
+
+#write_restart config.${number}.*
--- a/examples/USER/cgdna/examples/duplex1/log.9Jan17.duplex1.g++.1
+++ b/examples/USER/cgdna/examples/duplex1/log.9Jan17.duplex1.g++.1
--- a/examples/USER/cgdna/examples/duplex1/log.9Jan17.duplex1.g++.4
+++ b/examples/USER/cgdna/examples/duplex1/log.9Jan17.duplex1.g++.4
--- a/examples/USER/cgdna/examples/duplex2/data.duplex2
+++ b/examples/USER/cgdna/examples/duplex2/data.duplex2
@ -0,0 +1,97 @@
+# LAMMPS data file
+16 atoms
+16 ellipsoids
+13 bonds
+
+4 atom types
+1 bond types
+
+# System size
+-20.0 20.0 xlo xhi
+-20.0 20.0 ylo yhi
+-20.0 20.0 zlo zhi
+
+# Atom masses for each atom type
+Masses
+
+1 3.1575
+2 3.1575
+3 3.1575
+4 3.1575
+
+# Atom-ID, type, position, molecule-ID, ellipsoid flag, density
+Atoms
+
+1  1  0.000000000000000e+00  0.000000000000000e+00  0.000000000000000e+00  1 1 1
+2  1  1.327449326686445e-01 -4.291282797802268e-01  3.750616346940281e-01  1 1 1
+3  1  4.846081065977281e-01 -7.083497053350921e-01  7.501232693880562e-01  1 1 1
+4  1  9.326735919667459e-01 -7.401241994674285e-01  1.125184904082084e+00  1 1 1
+5  1  1.320419223811347e+00 -5.133520172188747e-01  1.500246538776112e+00  1 1 1
+6  1  1.512394297416339e+00 -1.072512061254991e-01  1.875308173470140e+00  1 1 1
+7  1  1.441536396413952e+00  3.363155369040876e-01  2.250369808164169e+00  1 1 1
+8  1  1.132598224218932e+00  6.623975870343269e-01  2.625431442858197e+00  1 1 1
+9  4  5.873264080332541e-01  7.401241994674285e-01  1.125184904082084e+00  1 1 1
+10 4  1.035391893402272e+00  7.083497053350921e-01  7.501232693880562e-01  1 1 1
+11 4  1.387255067331356e+00  4.291282797802267e-01  3.750616346940281e-01  1 1 1
+12 4  1.520000000000000e+00  1.260981291332700e-33  0.000000000000000e+00  1 1 1
+13 4  3.874017757810680e-01 -6.623975870343268e-01  2.625431442858197e+00  1 1 1
+14 4  7.846360358604798e-02 -3.363155369040874e-01  2.250369808164169e+00  1 1 1
+15 4  7.605702583661333e-03  1.072512061254995e-01  1.875308173470140e+00  1 1 1
+16 4  1.995807761886533e-01  5.133520172188748e-01  1.500246538776112e+00  1 1 1
+
+# Atom-ID, translational, rotational velocity
+Velocities
+
+1  0.0  0.0  0.0  0.0  0.0  0.0 
+2  0.0  0.0  0.0  0.0  0.0  0.0 
+3  0.0  0.0  0.0  0.0  0.0  0.0 
+4  0.0  0.0  0.0  0.0  0.0  0.0 
+5  0.0  0.0  0.0  0.0  0.0  0.0 
+6  0.0  0.0  0.0  0.0  0.0  0.0 
+7  0.0  0.0  0.0  0.0  0.0  0.0 
+8  0.0  0.0  0.0  0.0  0.0  0.0 
+9  0.0  0.0  0.0  0.0  0.0  0.0 
+10 0.0  0.0  0.0  0.0  0.0  0.0 
+11 0.0  0.0  0.0  0.0  0.0  0.0 
+12 0.0  0.0  0.0  0.0  0.0  0.0 
+13 0.0  0.0  0.0  0.0  0.0  0.0 
+14 0.0  0.0  0.0  0.0  0.0  0.0 
+15 0.0  0.0  0.0  0.0  0.0  0.0 
+16 0.0  0.0  0.0  0.0  0.0  0.0 
+
+# Atom-ID, shape, quaternion
+Ellipsoids
+
+1   1.1739845031423408 1.1739845031423408 1.1739845031423408  1.000000000000000e+00  0.000000000000000e+00  0.000000000000000e+00  0.000000000000000e+00
+2   1.1739845031423408 1.1739845031423408 1.1739845031423408  9.553364891256060e-01  0.000000000000000e+00  0.000000000000000e+00  2.955202066613395e-01 
+3   1.1739845031423408 1.1739845031423408 1.1739845031423408  8.253356149096783e-01  0.000000000000000e+00  0.000000000000000e+00  5.646424733950354e-01 
+4   1.1739845031423408 1.1739845031423408 1.1739845031423408  6.216099682706646e-01  0.000000000000000e+00  0.000000000000000e+00  7.833269096274833e-01 
+5   1.1739845031423408 1.1739845031423408 1.1739845031423408  3.623577544766736e-01  0.000000000000000e+00  0.000000000000000e+00  9.320390859672263e-01 
+6   1.1739845031423408 1.1739845031423408 1.1739845031423408  7.073720166770291e-02  0.000000000000000e+00  0.000000000000000e+00  9.974949866040544e-01 
+7   1.1739845031423408 1.1739845031423408 1.1739845031423408 -2.272020946930869e-01 -0.000000000000000e+00  0.000000000000000e+00  9.738476308781953e-01 
+8   1.1739845031423408 1.1739845031423408 1.1739845031423408 -5.048461045998575e-01 -0.000000000000000e+00  0.000000000000000e+00  8.632093666488738e-01 
+9   1.1739845031423408 1.1739845031423408 1.1739845031423408  4.796493962806427e-17  7.833269096274833e-01 -6.216099682706646e-01  3.806263289803786e-17 
+10  1.1739845031423408 1.1739845031423408 1.1739845031423408  5.707093416549944e-17  5.646424733950354e-01 -8.253356149096784e-01  2.218801320830406e-17 
+11  1.1739845031423408 1.1739845031423408 1.1739845031423408  6.107895212550935e-17  2.955202066613394e-01 -9.553364891256061e-01  4.331404380149668e-18 
+12  1.1739845031423408 1.1739845031423408 1.1739845031423408  5.963096920061075e-17  0.000000000000000e+00 -1.000000000000000e+00 -1.391211590127312e-17 
+13  1.1739845031423408 1.1739845031423408 1.1739845031423408  5.285632939302787e-17  8.632093666488739e-01  5.048461045998572e-01 -3.091290830301125e-17 
+14  1.1739845031423408 1.1739845031423408 1.1739845031423408  4.136019110019290e-17  9.738476308781953e-01  2.272020946930868e-01 -4.515234267244800e-17 
+15  1.1739845031423408 1.1739845031423408 1.1739845031423408  2.616947011741696e-17  9.974949866040544e-01 -7.073720166770313e-02 -5.535845274597425e-17 
+16  1.1739845031423408 1.1739845031423408 1.1739845031423408  8.641108308308281e-18  9.320390859672264e-01 -3.623577544766736e-01 -6.061955710708163e-17 
+
+# Bond-ID, type, atom pairs
+Bonds
+
+1	1	1	2
+2	1	2	3
+3	1	3	4
+4	1	4	5
+5	1	5	6
+6	1	6	7
+7	1	7	8
+8	1	13	14
+9	1	14	15
+10	1	15	16
+11	1	9	10
+12	1	10	11
+13	1	11	12
--- a/examples/USER/cgdna/examples/duplex2/input.duplex2
+++ b/examples/USER/cgdna/examples/duplex2/input.duplex2
@ -0,0 +1,75 @@
+variable number	equal 2
+variable ofreq	equal 1000
+variable efreq	equal 1000
+
+units lj
+
+dimension 3
+
+newton off
+
+boundary  p p p
+
+atom_style hybrid bond ellipsoid
+atom_modify sort 0 1.0
+
+# Pair interactions require lists of neighbours to be calculated
+neighbor 1.0 bin
+neigh_modify every 1 delay 0 check yes
+
+read_data data.duplex2
+
+set atom * mass 3.1575
+
+group all type 1 4
+
+# oxDNA bond interactions - FENE backbone
+bond_style oxdna_fene
+bond_coeff * 2.0 0.25 0.7525
+
+# oxDNA pair interactions
+pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
+pair_coeff * * oxdna_excv   2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
+pair_coeff * * oxdna_stk    1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65   
+pair_coeff * * oxdna_hbond  0.0   8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 1 4 oxdna_hbond  1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 2 3 oxdna_hbond  1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff * * oxdna_xstk   47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68 
+pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65
+
+# NVE ensemble
+fix 1 all   nve/dotc/langevin 0.1 0.1 0.03 457145 angmom 10
+#fix 1 all   nve/dot
+
+timestep 1e-5 
+
+#comm_style tiled
+#fix 3 all balance 10000 1.1 rcb
+
+#compute mol all chunk/atom molecule
+#compute mychunk all vcm/chunk mol
+#fix 4 all ave/time 10000 1 10000 c_mychunk[1] c_mychunk[2] c_mychunk[3] file vcm.txt mode vector
+
+dump pos all xyz ${ofreq} traj.${number}.xyz
+
+compute quat all property/atom quatw quati quatj quatk
+dump quat all custom ${ofreq} quat.${number}.txt id c_quat[1] c_quat[2] c_quat[3] c_quat[4]
+dump_modify quat sort id
+dump_modify quat format line "%d  %13.6le  %13.6le  %13.6le  %13.6le"
+
+compute erot all erotate/asphere
+compute ekin all ke
+compute epot all pe
+variable erot equal c_erot
+variable ekin equal c_ekin
+variable epot equal c_epot
+variable etot equal c_erot+c_ekin+c_epot
+fix 5 all print ${efreq} "$(step)  ekin = ${ekin} |  erot = ${erot} | epot = ${epot} | etot = ${etot}" screen yes
+
+dump out all custom ${ofreq} out.${number}.txt id x y z vx vy vz fx fy fz tqx tqy tqz
+dump_modify out sort id
+dump_modify out format line "%d   %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le"
+
+run 1000000
+
+#write_restart config.${number}.*
--- a/examples/USER/cgdna/examples/duplex2/log.9Jan17.duplex2.g++.1
+++ b/examples/USER/cgdna/examples/duplex2/log.9Jan17.duplex2.g++.1
--- a/examples/USER/cgdna/examples/duplex2/log.9Jan17.duplex2.g++.4
+++ b/examples/USER/cgdna/examples/duplex2/log.9Jan17.duplex2.g++.4
--- a/examples/USER/cgdna/util/generate_input.py
+++ b/examples/USER/cgdna/util/generate_input.py
@ -0,0 +1,388 @@
+# Setup tool for oxDNA input in LAMMPS format.
+
+import math,numpy as np,sys,os
+
+# system size
+lxmin = -115.0
+lxmax = +115.0
+lymin = -115.0
+lymax = +115.0
+lzmin = -115.0
+lzmax = +115.0
+
+# rise in z-direction
+r0 = 0.7
+
+# definition of single untwisted strand
+def single():
+
+  strand = inp[1].split(':')
+
+  com_start=strand[0].split(',')
+
+  posx=float(com_start[0])
+  posy=float(com_start[1])
+  posz=float(com_start[2])
+  risex=0
+  risey=0
+  risez=r0
+
+  strandstart=len(nucleotide)+1
+
+  for letter in strand[2]:
+    temp=[]
+
+    temp.append(nt2num[letter])
+    temp.append([posx,posy,posz])
+    vel=[0,0,0,0,0,0]
+    temp.append(vel)
+    temp.append(shape)
+
+    quat=[1,0,0,0]
+    temp.append(quat)
+
+    posx=posx+risex
+    posy=posy+risey
+    posz=posz+risez
+
+    if (len(nucleotide)+1 > strandstart):
+      topology.append([1,len(nucleotide),len(nucleotide)+1])
+
+    nucleotide.append(temp)
+
+  return
+
+# definition of single twisted strand
+def single_helix():
+
+  strand = inp[1].split(':')
+
+  com_start=strand[0].split(',')
+  twist=float(strand[1])
+
+  posx = float(com_start[0])
+  posy = float(com_start[1])
+  posz = float(com_start[2])
+  risex=0
+  risey=0
+  risez=math.sqrt(r0**2-4.0*math.sin(0.5*twist)**2) 
+
+  dcomh=0.76
+  axisx=dcomh + posx
+  axisy=posy
+
+  strandstart=len(nucleotide)+1
+  quat=[1,0,0,0]
+
+  qrot0=math.cos(0.5*twist)
+  qrot1=0
+  qrot2=0
+  qrot3=math.sin(0.5*twist)
+
+  for letter in strand[2]:
+    temp=[]
+
+    temp.append(nt2num[letter])
+    temp.append([posx,posy,posz])
+    vel=[0,0,0,0,0,0]
+    temp.append(vel)
+    temp.append(shape)
+
+    temp.append(quat)
+
+    quat0 = quat[0]*qrot0 - quat[1]*qrot1 - quat[2]*qrot2 - quat[3]*qrot3 
+    quat1 = quat[0]*qrot1 + quat[1]*qrot0 + quat[2]*qrot3 - quat[3]*qrot2 
+    quat2 = quat[0]*qrot2 + quat[2]*qrot0 + quat[3]*qrot1 - quat[1]*qrot3 
+    quat3 = quat[0]*qrot3 + quat[3]*qrot0 + quat[1]*qrot2 + quat[2]*qrot1 
+
+    quat = [quat0,quat1,quat2,quat3]
+
+    posx=axisx - dcomh*(quat[0]**2+quat[1]**2-quat[2]**2-quat[3]**2)
+    posy=axisy - dcomh*(2*(quat[1]*quat[2]+quat[0]*quat[3]))
+    posz=posz+risez
+
+    if (len(nucleotide)+1 > strandstart):
+      topology.append([1,len(nucleotide),len(nucleotide)+1])
+
+    nucleotide.append(temp)
+
+  return
+
+# definition of twisted duplex  
+def duplex():
+
+  strand = inp[1].split(':')
+
+  com_start=strand[0].split(',')
+  twist=float(strand[1])
+
+  compstrand=[]
+  comptopo=[]
+
+  posx1 = float(com_start[0])
+  posy1 = float(com_start[1])
+  posz1 = float(com_start[2])
+
+  risex=0
+  risey=0
+  risez=math.sqrt(r0**2-4.0*math.sin(0.5*twist)**2) 
+
+  dcomh=0.76
+  axisx=dcomh + posx1
+  axisy=posy1
+
+  posx2 = axisx + dcomh  
+  posy2 = posy1
+  posz2 = posz1
+
+  strandstart=len(nucleotide)+1
+
+  quat1=[1,0,0,0]
+  quat2=[0,0,-1,0]
+
+  qrot0=math.cos(0.5*twist)
+  qrot1=0
+  qrot2=0
+  qrot3=math.sin(0.5*twist)
+
+  for letter in strand[2]:
+    temp1=[]
+    temp2=[]
+
+    temp1.append(nt2num[letter])
+    temp2.append(compnt2num[letter])
+
+    temp1.append([posx1,posy1,posz1])
+    temp2.append([posx2,posy2,posz2])
+
+    vel=[0,0,0,0,0,0]
+    temp1.append(vel)
+    temp2.append(vel)
+
+    temp1.append(shape)
+    temp2.append(shape)
+
+    temp1.append(quat1)
+    temp2.append(quat2)
+
+    quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3 
+    quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2 
+    quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3 
+    quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1 
+
+    quat1 = [quat1_0,quat1_1,quat1_2,quat1_3]
+
+    posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
+    posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
+    posz1=posz1+risez
+
+    quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3 
+    quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2 
+    quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3 
+    quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1 
+
+    quat2 = [quat2_0,quat2_1,quat2_2,quat2_3]
+
+    posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
+    posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
+    posz2=posz1
+
+    if (len(nucleotide)+1 > strandstart):
+      topology.append([1,len(nucleotide),len(nucleotide)+1])
+      comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1])
+
+    nucleotide.append(temp1)
+    compstrand.append(temp2)
+
+  for ib in range(len(compstrand)):
+    nucleotide.append(compstrand[len(compstrand)-1-ib])
+
+  for ib in range(len(comptopo)):
+    topology.append(comptopo[ib])
+
+  return
+
+# definition of array of duplexes  
+def duplex_array():
+
+  strand = inp[1].split(':')
+  number=strand[0].split(',')
+  posz1_0 = float(strand[1])
+  twist=float(strand[2])
+
+  nx = int(number[0])
+  ny = int(number[1])
+
+  dx = (lxmax-lxmin)/nx
+  dy = (lymax-lymin)/ny
+
+  risex=0
+  risey=0
+  risez=math.sqrt(r0**2-4.0*math.sin(0.5*twist)**2) 
+  dcomh=0.76
+
+  for ix in range(nx):
+
+    axisx=lxmin + dx/2 + ix * dx
+
+    for iy in range(ny):
+
+      axisy=lymin + dy/2 + iy * dy
+
+      compstrand=[]
+      comptopo=[]
+
+      posx1 = axisx - dcomh
+      posy1 = axisy
+      posz1 = posz1_0
+
+      posx2 = axisx + dcomh  
+      posy2 = posy1
+      posz2 = posz1
+
+      strandstart=len(nucleotide)+1
+      quat1=[1,0,0,0]
+      quat2=[0,0,-1,0]
+
+      qrot0=math.cos(0.5*twist)
+      qrot1=0
+      qrot2=0
+      qrot3=math.sin(0.5*twist)
+
+      for letter in strand[3]:
+	temp1=[]
+	temp2=[]
+
+	temp1.append(nt2num[letter])
+	temp2.append(compnt2num[letter])
+
+	temp1.append([posx1,posy1,posz1])
+	temp2.append([posx2,posy2,posz2])
+
+	vel=[0,0,0,0,0,0]
+	temp1.append(vel)
+	temp2.append(vel)
+
+	temp1.append(shape)
+	temp2.append(shape)
+
+	temp1.append(quat1)
+	temp2.append(quat2)
+
+	quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3 
+	quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2 
+	quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3 
+	quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1 
+
+	quat1 = [quat1_0,quat1_1,quat1_2,quat1_3]
+
+	posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
+	posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
+	posz1=posz1+risez
+
+	quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3 
+	quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2 
+	quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3 
+	quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1 
+
+	quat2 = [quat2_0,quat2_1,quat2_2,quat2_3]
+
+	posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
+	posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
+	posz2=posz1
+
+	if (len(nucleotide)+1 > strandstart):
+	  topology.append([1,len(nucleotide),len(nucleotide)+1])
+	  comptopo.append([1,len(nucleotide)+len(strand[3]),len(nucleotide)+len(strand[3])+1])
+
+	nucleotide.append(temp1)
+	compstrand.append(temp2)
+
+      for ib in range(len(compstrand)):
+	nucleotide.append(compstrand[len(compstrand)-1-ib])
+
+      for ib in range(len(comptopo)):
+	topology.append(comptopo[ib])
+
+  return
+
+# main part
+nt2num = {'A':1, 'C':2, 'G':3, 'T':4}
+compnt2num = {'T':1, 'G':2, 'C':3, 'A':4}
+shape = [1.1739845031423408,1.1739845031423408,1.1739845031423408]
+
+nucleotide=[]
+topology=[]
+
+seqfile = open(sys.argv[1],'r')
+
+# process sequence file line by line
+for line in seqfile:
+
+  inp = line.split()
+  if inp[0] == 'single':
+    single()
+  if inp[0] == 'single_helix':
+    single_helix()
+  if inp[0] == 'duplex':
+    duplex()
+  if inp[0] == 'duplex_array':
+    duplex_array()
+
+# output atom data in LAMMPS format
+out = open(sys.argv[2],'w')
+
+out.write('# LAMMPS data file\n')
+out.write('%d atoms\n' % len(nucleotide))
+out.write('%d ellipsoids\n' % len(nucleotide))
+out.write('%d bonds\n' % len(topology))
+out.write('\n')
+out.write('4 atom types\n')
+out.write('1 bond types\n')
+out.write('\n')
+out.write('# System size\n')
+out.write('%f %f xlo xhi\n' % (lxmin,lxmax))
+out.write('%f %f ylo yhi\n' % (lymin,lymax))
+out.write('%f %f zlo zhi\n' % (lzmin,lzmax))
+out.write('\n')
+out.write('Masses\n')
+out.write('\n')
+out.write('1 3.1575\n')
+out.write('2 3.1575\n')
+out.write('3 3.1575\n')
+out.write('4 3.1575\n')
+
+out.write('\n')
+out.write('# Atom-ID, type, position, molecule-ID, ellipsoid flag, density\n')
+out.write('Atoms\n')
+out.write('\n')
+for ib in range(len(nucleotide)):
+  out.write("%d %d %22.16le %22.16le %22.16le 1 1 1\n" % (ib+1,nucleotide[ib][0],nucleotide[ib][1][0],nucleotide[ib][1][1],nucleotide[ib][1][2]))
+
+out.write('\n')
+out.write('# Atom-ID, translational, rotational velocity\n')
+out.write('Velocities\n')
+out.write('\n')
+for ib in range(len(nucleotide)):
+  out.write("%d %22.16le %22.16le %22.16le %22.16le %22.16le %22.16le\n" % (ib+1,nucleotide[ib][2][0],nucleotide[ib][2][1],nucleotide[ib][2][2],nucleotide[ib][2][3],nucleotide[ib][2][4],nucleotide[ib][2][5]))
+
+out.write('\n')
+out.write('# Atom-ID, shape, quaternion\n')
+out.write('Ellipsoids\n')
+out.write('\n')
+for ib in range(len(nucleotide)):
+  out.write("%d %22.16le %22.16le %22.16le %22.16le %22.16le %22.16le %22.16le\n" % (ib+1,nucleotide[ib][3][0],nucleotide[ib][3][1],nucleotide[ib][3][2],nucleotide[ib][4][0],nucleotide[ib][4][1],nucleotide[ib][4][2],nucleotide[ib][4][3]))
+
+out.write('\n')
+out.write('# Bond topology\n')
+out.write('Bonds\n')
+out.write('\n')
+for ib in range(len(topology)):
+  out.write("%d %d %d %d\n" % (ib+1,topology[ib][0],topology[ib][1],topology[ib][2]))
+
+out.close() 
+
+seqfile.close()
+sys.exit(0)
+
+
--- a/examples/USER/cgdna/util/input.ref
+++ b/examples/USER/cgdna/util/input.ref
@ -0,0 +1,77 @@
+variable number	equal 8
+variable ofreq	equal 1000
+variable efreq	equal 1000
+
+units lj
+
+dimension 3
+
+newton off
+
+processors 1 1 1
+
+boundary  p p p
+
+atom_style hybrid bond ellipsoid
+atom_modify sort 0 1.0
+
+# Pair interactions require lists of neighbours to be calculated
+neighbor 1.0 bin
+neigh_modify every 1 delay 0 check yes
+
+read_data data.duplex2
+
+set atom * mass 3.1575
+
+group all type 1 4
+
+# oxDNA bond interactions - FENE backbone
+bond_style oxdna_fene
+bond_coeff * 2.0 0.25 0.7525
+
+# oxDNA pair interactions
+pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
+pair_coeff * * oxdna_excv   2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
+pair_coeff * * oxdna_stk    1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65   
+pair_coeff * * oxdna_hbond  0.0   8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 1 4 oxdna_hbond  1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff 2 3 oxdna_hbond  1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
+pair_coeff * * oxdna_xstk   47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68 
+pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65
+
+# NVE ensemble
+#fix 1 all   nve/dotc/langevin 0.1 0.1 0.03 457145 angmom 10
+fix 1 all   nve/dot
+
+timestep 1e-5 
+
+#comm_style tiled
+#fix 3 all balance 10000 1.1 rcb
+
+#compute mol all chunk/atom molecule
+#compute mychunk all vcm/chunk mol
+#fix 4 all ave/time 10000 1 10000 c_mychunk[1] c_mychunk[2] c_mychunk[3] file vcm.txt mode vector
+
+#dump pos all xyz ${ofreq} traj.${number}.xyz
+
+#compute quat all property/atom quatw quati quatj quatk
+#dump quat all custom ${ofreq} quat.${number}.txt id c_quat[1] c_quat[2] c_quat[3] c_quat[4]
+#dump_modify quat sort id
+#dump_modify quat format line "%d  %13.6le  %13.6le  %13.6le  %13.6le"
+
+compute erot all erotate/asphere
+compute ekin all ke
+compute epot all pe
+variable erot equal c_erot
+variable ekin equal c_ekin
+variable epot equal c_epot
+variable etot equal c_erot+c_ekin+c_epot
+fix 5 all print ${efreq} "$(step)  ekin = ${ekin} |  erot = ${erot} | epot = ${epot} | etot = ${etot}" screen yes
+
+dump out all custom ${ofreq} out.${number}.txt id x y z vx vy vz fx fy fz tqx tqy tqz
+dump_modify out sort id
+dump_modify out format line "%d   %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le  %13.6le %13.6le %13.6le"
+
+run 1000000
+
+#write_restart config.${number}.*
--- a/examples/USER/cgdna/util/sequence.txt
+++ b/examples/USER/cgdna/util/sequence.txt
@ -0,0 +1,4 @@
+single 0,0,0:0.6:AAAAA
+single_helix 0,0,0:0.6:AAAAA
+duplex 0,0,0:0.6:AAAAA
+duplex_array 10,10:-112.0:0.6:AAAAA
--- a/examples/USER/dpd/dpdh-shardlow/in.dpdh-shardlow
+++ b/examples/USER/dpd/dpdh-shardlow/in.dpdh-shardlow
@ -18,7 +18,7 @@ neigh_modify    every 1 delay 0 check no once no
 timestep        0.001

 compute         dpdU all dpd
-variable        totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[1]+press*vol
+variable        totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[2]+press*vol

 thermo          1
 thermo_style    custom step temp press vol pe ke v_totEnergy cella cellb cellc
--- a/examples/USER/dpd/dpdh-shardlow/log.dpdh-shardlow.reference
+++ b/examples/USER/dpd/dpdh-shardlow/log.dpdh-shardlow.reference
@ -22,7 +22,7 @@ neigh_modify    every 1 delay 0 check no once no
 timestep        0.001

 compute         dpdU all dpd
-variable        totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[1]+press*vol
+variable        totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[2]+press*vol

 thermo          1
 thermo_style    custom step temp press vol pe ke v_totEnergy cella cellb cellc
@ -34,129 +34,137 @@ fix             2 all eos/cv 0.0005

 run             100
 Neighbor list info ...
-  1 neighbor list requests
  update every 1 steps, delay 0 steps, check no
  max neighbors/atom: 2000, page size: 100000
  master list distance cutoff = 12
  ghost atom cutoff = 12
-  binsize = 6 -> bins = 22 22 22
-Memory usage per processor = 6.48143 Mbytes
+  binsize = 6, bins = 22 22 22
+  2 neighbor lists, perpetual/occasional/extra = 2 0 0
+  (1) pair dpd/fdt/energy, perpetual
+      pair build: half/bin/newton
+      stencil: half/bin/3d/newton
+      bin: standard
+  (2) fix shardlow, perpetual, ssa
+      pair build: half/bin/newton/ssa
+      stencil: half/bin/3d/newton/ssa
+      bin: ssa
+Memory usage per processor = 8.55503 Mbytes
 Step Temp Press Volume PotEng KinEng v_totEnergy Cella Cellb Cellc 
-       0  239.4274282976 2817.4421750949 2146689.0000000000 2639.8225470740  313.3218455755 6048176597.3066043854  129.0000000000  129.0000000000  129.0000000000 
-       1  239.4771405316 2817.4798146419 2146689.0000581890 2639.8304543632  313.3869004818 6048257397.9450111389  129.0000000012  129.0000000012  129.0000000012 
-       2  239.5643955010 2817.5423194969 2146689.0002327557 2639.8379071907  313.5010849268 6048391577.0431985855  129.0000000047  129.0000000047  129.0000000047 
-       3  239.6633839196 2817.6123662396 2146689.0005237064 2639.8445238058  313.6306241122 6048541946.5712032318  129.0000000105  129.0000000105  129.0000000105 
-       4  239.5371222027 2817.5355424336 2146689.0009310376 2639.8505035043  313.4653942786 6048377030.7404460907  129.0000000186  129.0000000186  129.0000000186 
-       5  239.6512678169 2817.6153097076 2146689.0014547524 2639.8561498340  313.6147686202 6048548267.9007377625  129.0000000291  129.0000000291  129.0000000291 
-       6  239.5617886781 2817.5624195435 2146689.0020948485 2639.8617493725  313.4976735610 6048434730.8592004776  129.0000000420  129.0000000420  129.0000000420 
-       7  239.5228587856 2817.5420009502 2146689.0028513218 2639.8666590407  313.4467287471 6048390900.5748577118  129.0000000571  129.0000000571  129.0000000571 
-       8  239.6066877934 2817.6008649264 2146689.0037241788 2639.8710757645  313.5564298772 6048517265.7987136841  129.0000000746  129.0000000746  129.0000000746 
-       9  239.5719861485 2817.5823530300 2146689.0047134170 2639.8752557893  313.5110182737 6048477529.2603597641  129.0000000944  129.0000000944  129.0000000944 
-      10  239.5800176776 2817.5915671176 2146689.0058190385 2639.8793778438  313.5215285712 6048497312.1706552505  129.0000001166  129.0000001166  129.0000001166 
-      11  239.6299830954 2817.6281223139 2146689.0070410441 2639.8829762049  313.5869148014 6048575788.3208351135  129.0000001410  129.0000001410  129.0000001410 
-      12  239.6011995911 2817.6132377273 2146689.0083794324 2639.8860704236  313.5492478526 6048543839.4788360596  129.0000001678  129.0000001678  129.0000001678 
-      13  239.6407681166 2817.6427924824 2146689.0098342048 2639.8889816934  313.6010284005 6048607288.5005025864  129.0000001970  129.0000001970  129.0000001970 
-      14  239.6981172055 2817.6844100046 2146689.0114053637 2639.8913405110  313.6760771219 6048696632.8825626373  129.0000002285  129.0000002285  129.0000002285 
-      15  239.8563971968 2817.7922519039 2146689.0130929090 2639.8934358481  313.8832070208 6048928140.8671455383  129.0000002623  129.0000002623  129.0000002623 
-      16  239.8561894618 2817.7971208197 2146689.0148968464 2639.8950496967  313.8829351726 6048938597.9994916916  129.0000002984  129.0000002984  129.0000002984 
-      17  239.8816520361 2817.8185621543 2146689.0168171758 2639.8961257823  313.9162562538 6048984631.3226108551  129.0000003369  129.0000003369  129.0000003369 
-      18  239.9099966096 2817.8417368960 2146689.0188538977 2639.8965743204  313.9533488047 6049034386.0627622604  129.0000003777  129.0000003777  129.0000003777 
-      19  240.0514024347 2817.9389205774 2146689.0210070144 2639.8966103811  314.1383966683 6049243015.4568052292  129.0000004208  129.0000004208  129.0000004208 
-      20  239.8802541140 2817.8327386176 2146689.0232765260 2639.8962085210  313.9144268914 6049015081.9802341461  129.0000004662  129.0000004662  129.0000004662 
-      21  239.8462621903 2817.8160306167 2146689.0256624296 2639.8953174755  313.8699440502 6048979221.7758703232  129.0000005140  129.0000005140  129.0000005140 
-      22  240.0487944678 2817.9533849157 2146689.0281647225 2639.8938590354  314.1349838054 6049274086.0571212769  129.0000005642  129.0000005642  129.0000005642 
-      23  240.0966314441 2817.9897873787 2146689.0307834130 2639.8918104774  314.1975846937 6049352238.2649183273  129.0000006166  129.0000006166  129.0000006166 
-      24  240.1765312516 2818.0463843765 2146689.0335185044 2639.8891292321  314.3021439554 6049473742.2287187576  129.0000006714  129.0000006714  129.0000006714 
-      25  240.1500705973 2818.0336048048 2146689.0363699966 2639.8858785483  314.2675167572 6049446316.4600162506  129.0000007285  129.0000007285  129.0000007285 
-      26  240.2681423500 2818.1151708195 2146689.0393378921 2639.8825176506  314.4220289603 6049621421.8445177078  129.0000007880  129.0000007880  129.0000007880 
-      27  240.4728815247 2818.2527327079 2146689.0424221945 2639.8784158747  314.6899567267 6049916733.3989181519  129.0000008498  129.0000008498  129.0000008498 
-      28  240.4793027032 2818.2613348477 2146689.0456229053 2639.8736089473  314.6983596717 6049935208.5421981812  129.0000009139  129.0000009139  129.0000009139 
-      29  240.5020619198 2818.2805472685 2146689.0489400285 2639.8681043704  314.7281430587 6049976461.0082206726  129.0000009803  129.0000009803  129.0000009803 
-      30  240.5513721776 2818.3167157263 2146689.0523735629 2639.8623484053  314.7926719270 6050054113.1760177612  129.0000010491  129.0000010491  129.0000010491 
-      31  240.7340393104 2818.4391703712 2146689.0559235099 2639.8563442170  315.0317155636 6050316995.4599781036  129.0000011202  129.0000011202  129.0000011202 
-      32  240.8254719483 2818.5014640740 2146689.0595898777 2639.8498122053  315.1513670299 6050450731.1168394089  129.0000011936  129.0000011936  129.0000011936 
-      33  240.9681573541 2818.5965480750 2146689.0633726656 2639.8425779528  315.3380893908 6050654857.7432861328  129.0000012694  129.0000012694  129.0000012694 
-      34  241.0039494187 2818.6217008564 2146689.0672718794 2639.8347174393  315.3849279499 6050708863.9733209610  129.0000013475  129.0000013475  129.0000013475 
-      35  241.0314566197 2818.6411150538 2146689.0712875174 2639.8262983643  315.4209246902 6050750551.5649127960  129.0000014279  129.0000014279  129.0000014279 
-      36  241.0829173424 2818.6763455617 2146689.0754195810 2639.8174397481  315.4882677207 6050826192.2165899277  129.0000015107  129.0000015107  129.0000015107 
-      37  241.2845682012 2818.8087982181 2146689.0796680767 2639.8080129872  315.7521540252 6051110539.1171846390  129.0000015958  129.0000015958  129.0000015958 
-      38  241.3214712920 2818.8336260248 2146689.0840330068 2639.7981963574  315.8004465062 6051163849.0412235260  129.0000016833  129.0000016833  129.0000016833 
-      39  241.3392127125 2818.8456991528 2146689.0885143690 2639.7879618658  315.8236634561 6051189778.9386901855  129.0000017730  129.0000017730  129.0000017730 
-      40  241.5383770555 2818.9753950055 2146689.0931121684 2639.7769824244  316.0842958321 6051468208.8210506439  129.0000018651  129.0000018651  129.0000018651 
-      41  241.5059730674 2818.9543817992 2146689.0978264087 2639.7656512498  316.0418910106 6051423113.2358427048  129.0000019595  129.0000019595  129.0000019595 
-      42  241.3907605672 2818.8793800508 2146689.1026570834 2639.7541331920  315.8911205101 6051262121.2551422119  129.0000020563  129.0000020563  129.0000020563 
-      43  241.5095917610 2818.9559595711 2146689.1076041958 2639.7424355740  316.0466265406 6051426527.7663059235  129.0000021554  129.0000021554  129.0000021554 
-      44  241.6271631762 2819.0312325531 2146689.1126677482 2639.7297705654  316.2004839873 6051588129.8722610474  129.0000022568  129.0000022568  129.0000022568 
-      45  241.5702411838 2818.9923790176 2146689.1178477411 2639.7163554760  316.1259941770 6051504737.9250564575  129.0000023606  129.0000023606  129.0000023606 
-      46  241.7029985068 2819.0771124986 2146689.1231441777 2639.7024246704  316.2997243538 6051686649.4576120377  129.0000024667  129.0000024667  129.0000024667 
-      47  241.7966144965 2819.1357830868 2146689.1285570571 2639.6882106593  316.4222330191 6051812612.3391046524  129.0000025751  129.0000025751  129.0000025751 
-      48  241.8573480255 2819.1726205120 2146689.1340863821 2639.6735287925  316.5017107195 6051891706.4921989441  129.0000026859  129.0000026859  129.0000026859 
-      49  241.9611147338 2819.2374095379 2146689.1397321564 2639.6583357477  316.6375029166 6052030804.4275226593  129.0000027990  129.0000027990  129.0000027990 
-      50  242.1023518806 2819.3259059811 2146689.1454943856 2639.6424863169  316.8223300428 6052220795.1955394745  129.0000029144  129.0000029144  129.0000029144 
-      51  242.1174105473 2819.3319633044 2146689.1513730693 2639.6264141131  316.8420362613 6052233814.9634265900  129.0000030321  129.0000030321  129.0000030321 
-      52  242.2534914901 2819.4164594322 2146689.1573682069 2639.6098392670  317.0201158259 6052415218.9485445023  129.0000031522  129.0000031522  129.0000031522 
-      53  242.3504633236 2819.4754119996 2146689.1634798055 2639.5930076506  317.1470160479 6052541789.1274013519  129.0000032746  129.0000032746  129.0000032746 
-      54  242.2982323323 2819.4368568264 2146689.1697078613 2639.5756353782  317.0786650211 6052459040.6286897659  129.0000033994  129.0000033994  129.0000033994 
-      55  242.3452896272 2819.4623310219 2146689.1760523771 2639.5575918586  317.1402455951 6052513743.7400159836  129.0000035265  129.0000035265  129.0000035265 
-      56  242.4181903333 2819.5048897011 2146689.1825133534 2639.5390347547  317.2356456249 6052605122.2894439697  129.0000036559  129.0000036559  129.0000036559 
-      57  242.5317091656 2819.5739975787 2146689.1890907930 2639.5199828249  317.3841997413 6052753494.0979280472  129.0000037876  129.0000037876  129.0000037876 
-      58  242.5478978740 2819.5796954935 2146689.1957846982 2639.5006137388  317.4053847660 6052765744.6257629395  129.0000039217  129.0000039217  129.0000039217 
-      59  242.6655316466 2819.6519225743 2146689.2025950695 2639.4808234811  317.5593238156 6052920813.0568208694  129.0000040582  129.0000040582  129.0000040582 
-      60  242.8126131177 2819.7431588157 2146689.2095219092 2639.4607996998  317.7517989980 6053116688.6155729294  129.0000041969  129.0000041969  129.0000041969 
-      61  242.7957124913 2819.7275989047 2146689.2165652174 2639.4406312730  317.7296823362 6053083306.1403274536  129.0000043380  129.0000043380  129.0000043380 
-      62  242.9276177041 2819.8088790098 2146689.2237249981 2639.4201279058  317.9022974164 6053257809.6067762375  129.0000044814  129.0000044814  129.0000044814 
-      63  243.0465445938 2819.8814758895 2146689.2310012528 2639.3991657500  318.0579286774 6053413673.1989650726  129.0000046272  129.0000046272  129.0000046272 
-      64  242.9890585501 2819.8387587817 2146689.2383939880 2639.3781767844  317.9827007328 6053321993.5937871933  129.0000047752  129.0000047752  129.0000047752 
-      65  242.9653746583 2819.8180104181 2146689.2459031967 2639.3568184374  317.9517072884 6053277474.4272727966  129.0000049256  129.0000049256  129.0000049256 
-      66  243.0259297024 2819.8514334947 2146689.2535288804 2639.3352568621  318.0309514181 6053349244.9473772049  129.0000050784  129.0000050784  129.0000050784 
-      67  242.9638979697 2819.8046112742 2146689.2612710390 2639.3134547096  317.9497748498 6053248753.9180717468  129.0000052335  129.0000052335  129.0000052335 
-      68  243.0283540775 2819.8395632725 2146689.2691296688 2639.2912303374  318.0341240273 6053323807.2197017670  129.0000053909  129.0000053909  129.0000053909 
-      69  243.2256418664 2819.9609646019 2146689.2771047787 2639.2684509205  318.2923006889 6053584440.8757400513  129.0000055506  129.0000055506  129.0000055506 
-      70  243.2507495334 2819.9706145524 2146689.2851963686 2639.2450126010  318.3251573278 6053605179.1483964920  129.0000057127  129.0000057127  129.0000057127 
-      71  243.4287155518 2820.0794853386 2146689.2934044413 2639.2213699915  318.5580489464 6053838914.2552747726  129.0000058771  129.0000058771  129.0000058771 
-      72  243.5097518574 2820.1249498194 2146689.3017290002 2639.1971212009  318.6640954635 6053936535.9274711609  129.0000060439  129.0000060439  129.0000060439 
-      73  243.5356790969 2820.1337977544 2146689.3101700447 2639.1723394661  318.6980246193 6053955553.5090074539  129.0000062130  129.0000062130  129.0000062130 
-      74  243.5479180498 2820.1331964183 2146689.3187275808 2639.1473868749  318.7140408766 6053954286.7515821457  129.0000063844  129.0000063844  129.0000063844 
-      75  243.7115573025 2820.2314361523 2146689.3274016059 2639.1220411207  318.9281840641 6054165201.5909118652  129.0000065581  129.0000065581  129.0000065581 
-      76  243.7457279618 2820.2454531429 2146689.3361921217 2639.0963868224  318.9729008040 6054195316.5254154205  129.0000067342  129.0000067342  129.0000067342 
-      77  243.8345031069 2820.2948644965 2146689.3450991292 2639.0700900389  319.0890745962 6054301412.5615310669  129.0000069126  129.0000069126  129.0000069126 
-      78  244.0193931195 2820.4067881628 2146689.3541226317 2639.0435094409  319.3310271594 6054541703.5689058304  129.0000070934  129.0000070934  129.0000070934 
-      79  243.9919100078 2820.3799166166 2146689.3632626338 2639.0164249037  319.2950619430 6054484044.4218587875  129.0000072765  129.0000072765  129.0000072765 
-      80  244.0965612207 2820.4387335935 2146689.3725191355 2638.9888176882  319.4320116291 6054610332.4174261093  129.0000074619  129.0000074619  129.0000074619 
-      81  244.1334315951 2820.4535208568 2146689.3818921377 2638.9608330195  319.4802612965 6054642102.5347270966  129.0000076496  129.0000076496  129.0000076496 
-      82  244.3029520408 2820.5543485196 2146689.3913816395 2638.9318525796  319.7021007878 6054858575.1664342880  129.0000078397  129.0000078397  129.0000078397 
-      83  244.3445761189 2820.5713690935 2146689.4009876498 2638.9021684795  319.7565712929 6054895140.1710596085  129.0000080321  129.0000080321  129.0000080321 
-      84  244.2696671559 2820.5125763350 2146689.4107101629 2638.8720941742  319.6585431986 6054768957.6739044189  129.0000082269  129.0000082269  129.0000082269 
-      85  244.5161919319 2820.6629431352 2146689.4205491822 2638.8415194387  319.9811528443 6055091776.5361995697  129.0000084240  129.0000084240  129.0000084240 
-      86  244.5641090282 2820.6838080201 2146689.4305047127 2638.8103612394  320.0438585800 6055136595.0767974854  129.0000086234  129.0000086234  129.0000086234 
-      87  244.5348240638 2820.6541129118 2146689.4405767513 2638.7789728309  320.0055354056 6055072877.2416200638  129.0000088251  129.0000088251  129.0000088251 
-      88  244.6939431427 2820.7468233396 2146689.4507653015 2638.7470269267  320.2137633592 6055271926.6536149979  129.0000090292  129.0000090292  129.0000090292 
-      89  244.8800201091 2820.8567117003 2146689.4610703662 2638.7147520097  320.4572692055 6055507852.1186332703  129.0000092356  129.0000092356  129.0000092356 
-      90  244.8804280382 2820.8451141876 2146689.4714919478 2638.6820441173  320.4578030336 6055482985.2258749008  129.0000094444  129.0000094444  129.0000094444 
-      91  244.9558851986 2820.8815975090 2146689.4820300462 2638.6491836104  320.5565485155 6055561333.3803453445  129.0000096555  129.0000096555  129.0000096555 
-      92  244.9965893140 2820.8949614294 2146689.4926846647 2638.6159817170  320.6098151301 6055590051.6433181763  129.0000098689  129.0000098689  129.0000098689 
-      93  245.1381056687 2820.9732811388 2146689.5034558061 2638.5824451870  320.7950076360 6055758210.2774200439  129.0000100846  129.0000100846  129.0000100846 
-      94  245.2954807041 2821.0619342131 2146689.5143434699 2638.5485198222  321.0009532826 6055948551.7882709503  129.0000103027  129.0000103027  129.0000103027 
-      95  245.3535822199 2821.0860553731 2146689.5253476589 2638.5144817512  321.0769866522 6056000363.5151576996  129.0000105232  129.0000105232  129.0000105232 
-      96  245.5013476026 2821.1682908185 2146689.5364683764 2638.4801107361  321.2703568219 6056176929.0169925690  129.0000107459  129.0000107459  129.0000107459 
-      97  245.4166531417 2821.0989038023 2146689.5477056229 2638.4453663061  321.1595231342 6056028008.1910057068  129.0000109710  129.0000109710  129.0000109710 
-      98  245.4121937790 2821.0817490953 2146689.5590593945 2638.4097762390  321.1536874797 6055991214.3494396210  129.0000111984  129.0000111984  129.0000111984 
-      99  245.4532592994 2821.0946353191 2146689.5705296928 2638.3738037546  321.2074270397 6056018909.4480972290  129.0000114282  129.0000114282  129.0000114282 
-     100  245.7500657390 2821.2735939427 2146689.5821165247 2638.3375549051  321.5958367642 6056403111.1006488800  129.0000116603  129.0000116603  129.0000116603 
-Loop time of 4.05006 on 1 procs for 100 steps with 10125 atoms
+       0  239.4274282976 2817.4421750949 2146689.0000000000 2639.8225470740  313.3218455755 6048176597.3066034317  129.0000000000  129.0000000000  129.0000000000 
+       1  239.4771405316 2817.4798146419 2146689.0000581890 2639.8304543632  313.3869004818 6048257397.8720483780  129.0000000012  129.0000000012  129.0000000012 
+       2  239.5643955010 2817.5423194969 2146689.0002327557 2639.8379071907  313.5010849268 6048391576.8485937119  129.0000000047  129.0000000047  129.0000000047 
+       3  239.6633839196 2817.6123662396 2146689.0005237064 2639.8445238058  313.6306241122 6048541946.2404479980  129.0000000105  129.0000000105  129.0000000105 
+       4  239.5371222027 2817.5355424336 2146689.0009310376 2639.8505035043  313.4653942786 6048377030.5689325333  129.0000000186  129.0000000186  129.0000000186 
+       5  239.6512678169 2817.6153097076 2146689.0014547524 2639.8561498340  313.6147686202 6048548267.5742130280  129.0000000291  129.0000000291  129.0000000291 
+       6  239.5617886781 2817.5624195435 2146689.0020948485 2639.8617493725  313.4976735610 6048434730.6441593170  129.0000000420  129.0000000420  129.0000000420 
+       7  239.5228587856 2817.5420009502 2146689.0028513218 2639.8666590407  313.4467287471 6048390900.4058599472  129.0000000571  129.0000000571  129.0000000571 
+       8  239.6066877934 2817.6008649264 2146689.0037241788 2639.8710757645  313.5564298772 6048517265.5155982971  129.0000000746  129.0000000746  129.0000000746 
+       9  239.5719861485 2817.5823530300 2146689.0047134170 2639.8752557893  313.5110182737 6048477529.0184717178  129.0000000944  129.0000000944  129.0000000944 
+      10  239.5800176776 2817.5915671176 2146689.0058190385 2639.8793778438  313.5215285712 6048497311.9141387939  129.0000001166  129.0000001166  129.0000001166 
+      11  239.6299830954 2817.6281223139 2146689.0070410441 2639.8829762049  313.5869148014 6048575787.9953098297  129.0000001410  129.0000001410  129.0000001410 
+      12  239.6011995911 2817.6132377273 2146689.0083794324 2639.8860704236  313.5492478526 6048543839.1878814697  129.0000001678  129.0000001678  129.0000001678 
+      13  239.6407681166 2817.6427924824 2146689.0098342048 2639.8889816934  313.6010284005 6048607288.1548709869  129.0000001970  129.0000001970  129.0000001970 
+      14  239.6981172055 2817.6844100046 2146689.0114053637 2639.8913405110  313.6760771219 6048696632.4595127106  129.0000002285  129.0000002285  129.0000002285 
+      15  239.8563971968 2817.7922519039 2146689.0130929090 2639.8934358481  313.8832070208 6048928140.2348766327  129.0000002623  129.0000002623  129.0000002623 
+      16  239.8561894618 2817.7971208196 2146689.0148968464 2639.8950496967  313.8829351726 6048938597.3658657074  129.0000002984  129.0000002984  129.0000002984 
+      17  239.8816520361 2817.8185621543 2146689.0168171758 2639.8961257823  313.9162562538 6048984630.6545839310  129.0000003369  129.0000003369  129.0000003369 
+      18  239.9099966096 2817.8417368960 2146689.0188538977 2639.8965743204  313.9533488047 6049034385.3571958542  129.0000003777  129.0000003777  129.0000003777 
+      19  240.0514024347 2817.9389205774 2146689.0210070144 2639.8966103811  314.1383966683 6049243014.5661621094  129.0000004208  129.0000004208  129.0000004208 
+      20  239.8802541140 2817.8327386176 2146689.0232765260 2639.8962085210  313.9144268914 6049015081.3139505386  129.0000004662  129.0000004662  129.0000004662 
+      21  239.8462621903 2817.8160306167 2146689.0256624296 2639.8953174755  313.8699440502 6048979221.1549577713  129.0000005140  129.0000005140  129.0000005140 
+      22  240.0487944678 2817.9533849157 2146689.0281647225 2639.8938590354  314.1349838054 6049274085.1726217270  129.0000005642  129.0000005642  129.0000005642 
+      23  240.0966314441 2817.9897873787 2146689.0307834130 2639.8918104774  314.1975846937 6049352237.3198652267  129.0000006166  129.0000006166  129.0000006166 
+      24  240.1765312516 2818.0463843765 2146689.0335185044 2639.8891292321  314.3021439554 6049473741.1817827225  129.0000006714  129.0000006714  129.0000006714 
+      25  240.1500705973 2818.0336048048 2146689.0363699966 2639.8858785483  314.2675167572 6049446315.4509468079  129.0000007285  129.0000007285  129.0000007285 
+      26  240.2681423500 2818.1151708195 2146689.0393378921 2639.8825176506  314.4220289603 6049621420.6842966080  129.0000007880  129.0000007880  129.0000007880 
+      27  240.4728815247 2818.2527327079 2146689.0424221945 2639.8784158747  314.6899567267 6049916731.9748563766  129.0000008498  129.0000008498  129.0000008498 
+      28  240.4793027032 2818.2613348477 2146689.0456229053 2639.8736089473  314.6983596717 6049935207.1145420074  129.0000009139  129.0000009139  129.0000009139 
+      29  240.5020619198 2818.2805472685 2146689.0489400285 2639.8681043704  314.7281430587 6049976459.5562763214  129.0000009803  129.0000009803  129.0000009803 
+      30  240.5513721776 2818.3167157263 2146689.0523735629 2639.8623484053  314.7926719270 6050054111.6652946472  129.0000010491  129.0000010491  129.0000010491 
+      31  240.7340393104 2818.4391703712 2146689.0559235099 2639.8563442170  315.0317155636 6050316993.7162160873  129.0000011202  129.0000011202  129.0000011202 
+      32  240.8254719483 2818.5014640740 2146689.0595898777 2639.8498122053  315.1513670299 6050450729.2599506378  129.0000011936  129.0000011936  129.0000011936 
+      33  240.9681573541 2818.5965480750 2146689.0633726656 2639.8425779528  315.3380893908 6050654855.7068986893  129.0000012694  129.0000012694  129.0000012694 
+      34  241.0039494187 2818.6217008564 2146689.0672718794 2639.8347174393  315.3849279499 6050708861.8979463577  129.0000013475  129.0000013475  129.0000013475 
+      35  241.0314566197 2818.6411150538 2146689.0712875174 2639.8262983643  315.4209246902 6050750549.4619541168  129.0000014279  129.0000014279  129.0000014279 
+      36  241.0829173424 2818.6763455617 2146689.0754195810 2639.8174397481  315.4882677207 6050826190.0551443100  129.0000015107  129.0000015107  129.0000015107 
+      37  241.2845682012 2818.8087982181 2146689.0796680767 2639.8080129872  315.7521540252 6051110536.7012710571  129.0000015958  129.0000015958  129.0000015958 
+      38  241.3214712920 2818.8336260248 2146689.0840330068 2639.7981963574  315.8004465062 6051163846.5868301392  129.0000016833  129.0000016833  129.0000016833 
+      39  241.3392127125 2818.8456991528 2146689.0885143690 2639.7879618658  315.8236634561 6051189776.4712991714  129.0000017730  129.0000017730  129.0000017730 
+      40  241.5383770555 2818.9753950055 2146689.0931121684 2639.7769824244  316.0842958321 6051468206.1039972305  129.0000018651  129.0000018651  129.0000018651 
+      41  241.5059730674 2818.9543817992 2146689.0978264087 2639.7656512498  316.0418910106 6051423110.5725250244  129.0000019595  129.0000019595  129.0000019595 
+      42  241.3907605672 2818.8793800508 2146689.1026570834 2639.7541331920  315.8911205101 6051262118.7541017532  129.0000020563  129.0000020563  129.0000020563 
+      43  241.5095917610 2818.9559595711 2146689.1076041958 2639.7424355740  316.0466265406 6051426525.1214485168  129.0000021554  129.0000021554  129.0000021554 
+      44  241.6271631762 2819.0312325531 2146689.1126677482 2639.7297705654  316.2004839873 6051588127.0861988068  129.0000022568  129.0000022568  129.0000022568 
+      45  241.5702411838 2818.9923790176 2146689.1178477411 2639.7163554760  316.1259941770 6051504735.2269029617  129.0000023606  129.0000023606  129.0000023606 
+      46  241.7029985068 2819.0771124986 2146689.1231441777 2639.7024246704  316.2997243538 6051686646.5996389389  129.0000024667  129.0000024667  129.0000024667 
+      47  241.7966144965 2819.1357830868 2146689.1285570571 2639.6882106593  316.4222330191 6051812609.3728218079  129.0000025751  129.0000025751  129.0000025751 
+      48  241.8573480255 2819.1726205120 2146689.1340863821 2639.6735287925  316.5017107195 6051891703.4611186981  129.0000026859  129.0000026859  129.0000026859 
+      49  241.9611147338 2819.2374095379 2146689.1397321564 2639.6583357477  316.6375029166 6052030801.2758235931  129.0000027990  129.0000027990  129.0000027990 
+      50  242.1023518806 2819.3259059811 2146689.1454943856 2639.6424863169  316.8223300428 6052220791.8748512268  129.0000029144  129.0000029144  129.0000029144 
+      51  242.1174105473 2819.3319633044 2146689.1513730693 2639.6264141131  316.8420362613 6052233811.6391019821  129.0000030321  129.0000030321  129.0000030321 
+      52  242.2534914901 2819.4164594322 2146689.1573682069 2639.6098392671  317.0201158259 6052415215.4627037048  129.0000031522  129.0000031522  129.0000031522 
+      53  242.3504633236 2819.4754119996 2146689.1634798055 2639.5930076506  317.1470160479 6052541785.5314817429  129.0000032746  129.0000032746  129.0000032746 
+      54  242.2982323323 2819.4368568264 2146689.1697078613 2639.5756353782  317.0786650211 6052459037.1184797287  129.0000033994  129.0000033994  129.0000033994 
+      55  242.3452896272 2819.4623310219 2146689.1760523771 2639.5575918586  317.1402455951 6052513740.1862611771  129.0000035265  129.0000035265  129.0000035265 
+      56  242.4181903333 2819.5048897011 2146689.1825133534 2639.5390347547  317.2356456249 6052605118.6588287354  129.0000036559  129.0000036559  129.0000036559 
+      57  242.5317091656 2819.5739975787 2146689.1890907930 2639.5199828249  317.3841997413 6052753490.3378009796  129.0000037876  129.0000037876  129.0000037876 
+      58  242.5478978740 2819.5796954935 2146689.1957846982 2639.5006137388  317.4053847660 6052765740.8638200760  129.0000039217  129.0000039217  129.0000039217 
+      59  242.6655316466 2819.6519225743 2146689.2025950695 2639.4808234811  317.5593238156 6052920809.1607065201  129.0000040582  129.0000040582  129.0000040582 
+      60  242.8126131177 2819.7431588157 2146689.2095219092 2639.4607996998  317.7517989980 6053116684.5470046997  129.0000041969  129.0000041969  129.0000041969 
+      61  242.7957124913 2819.7275989047 2146689.2165652174 2639.4406312730  317.7296823362 6053083302.1140241623  129.0000043380  129.0000043380  129.0000043380 
+      62  242.9276177041 2819.8088790098 2146689.2237249981 2639.4201279058  317.9022974164 6053257805.4283437729  129.0000044814  129.0000044814  129.0000044814 
+      63  243.0465445938 2819.8814758895 2146689.2310012528 2639.3991657500  318.0579286774 6053413668.8858547211  129.0000046272  129.0000046272  129.0000046272 
+      64  242.9890585501 2819.8387587817 2146689.2383939880 2639.3781767844  317.9827007328 6053321989.3768787384  129.0000047752  129.0000047752  129.0000047752 
+      65  242.9653746583 2819.8180104181 2146689.2459031967 2639.3568184374  317.9517072884 6053277470.2627182007  129.0000049256  129.0000049256  129.0000049256 
+      66  243.0259297024 2819.8514334947 2146689.2535288804 2639.3352568621  318.0309514181 6053349240.7251205444  129.0000050784  129.0000050784  129.0000050784 
+      67  242.9638979697 2819.8046112742 2146689.2612710390 2639.3134547096  317.9497748498 6053248749.7987766266  129.0000052335  129.0000052335  129.0000052335 
+      68  243.0283540775 2819.8395632725 2146689.2691296688 2639.2912303374  318.0341240273 6053323803.0382738113  129.0000053909  129.0000053909  129.0000053909 
+      69  243.2256418664 2819.9609646019 2146689.2771047787 2639.2684509205  318.2923006889 6053584436.4588871002  129.0000055506  129.0000055506  129.0000055506 
+      70  243.2507495334 2819.9706145524 2146689.2851963686 2639.2450126010  318.3251573278 6053605174.7221174240  129.0000057127  129.0000057127  129.0000057127 
+      71  243.4287155518 2820.0794853386 2146689.2934044413 2639.2213699915  318.5580489464 6053838909.6197280884  129.0000058771  129.0000058771  129.0000058771 
+      72  243.5097518574 2820.1249498194 2146689.3017290002 2639.1971212009  318.6640954635 6053936531.2101163864  129.0000060439  129.0000060439  129.0000060439 
+      73  243.5356790969 2820.1337977544 2146689.3101700447 2639.1723394661  318.6980246193 6053955548.7824945450  129.0000062130  129.0000062130  129.0000062130 
+      74  243.5479180498 2820.1331964183 2146689.3187275808 2639.1473868749  318.7140408766 6053954282.0339813232  129.0000063844  129.0000063844  129.0000063844 
+      75  243.7115573025 2820.2314361523 2146689.3274016059 2639.1220411207  318.9281840641 6054165196.6845111847  129.0000065581  129.0000065581  129.0000065581 
+      76  243.7457279618 2820.2454531429 2146689.3361921217 2639.0963868224  318.9729008040 6054195311.5999307632  129.0000067342  129.0000067342  129.0000067342 
+      77  243.8345031069 2820.2948644965 2146689.3450991292 2639.0700900389  319.0890745962 6054301407.5461502075  129.0000069126  129.0000069126  129.0000069126 
+      78  244.0193931195 2820.4067881628 2146689.3541226317 2639.0435094409  319.3310271594 6054541698.3381366730  129.0000070934  129.0000070934  129.0000070934 
+      79  243.9919100078 2820.3799166166 2146689.3632626338 2639.0164249037  319.2950619430 6054484039.2541246414  129.0000072765  129.0000072765  129.0000072765 
+      80  244.0965612207 2820.4387335935 2146689.3725191355 2638.9888176882  319.4320116291 6054610327.1403293610  129.0000074619  129.0000074619  129.0000074619 
+      81  244.1334315951 2820.4535208568 2146689.3818921377 2638.9608330195  319.4802612965 6054642097.2373485565  129.0000076496  129.0000076496  129.0000076496 
+      82  244.3029520408 2820.5543485196 2146689.3913816395 2638.9318525796  319.7021007878 6054858569.6761827469  129.0000078397  129.0000078397  129.0000078397 
+      83  244.3445761189 2820.5713690935 2146689.4009876498 2638.9021684795  319.7565712929 6054895134.6560049057  129.0000080321  129.0000080321  129.0000080321 
+      84  244.2696671559 2820.5125763350 2146689.4107101629 2638.8720941742  319.6585431986 6054768952.2869329453  129.0000082269  129.0000082269  129.0000082269 
+      85  244.5161919319 2820.6629431352 2146689.4205491822 2638.8415194387  319.9811528443 6055091770.8571672440  129.0000084240  129.0000084240  129.0000084240 
+      86  244.5641090282 2820.6838080201 2146689.4305047127 2638.8103612394  320.0438585800 6055136589.3662166595  129.0000086234  129.0000086234  129.0000086234 
+      87  244.5348240638 2820.6541129118 2146689.4405767513 2638.7789728309  320.0055354056 6055072871.6007261276  129.0000088251  129.0000088251  129.0000088251 
+      88  244.6939431427 2820.7468233396 2146689.4507653015 2638.7470269267  320.2137633592 6055271920.8364210129  129.0000090292  129.0000090292  129.0000090292 
+      89  244.8800201091 2820.8567117003 2146689.4610703662 2638.7147520097  320.4572692055 6055507846.0901927948  129.0000092356  129.0000092356  129.0000092356 
+      90  244.8804280382 2820.8451141876 2146689.4714919478 2638.6820441173  320.4578030336 6055482979.2295818329  129.0000094444  129.0000094444  129.0000094444 
+      91  244.9558851986 2820.8815975090 2146689.4820300462 2638.6491836104  320.5565485155 6055561327.3181543350  129.0000096555  129.0000096555  129.0000096555 
+      92  244.9965893140 2820.8949614294 2146689.4926846647 2638.6159817170  320.6098151301 6055590045.5610351562  129.0000098689  129.0000098689  129.0000098689 
+      93  245.1381056687 2820.9732811388 2146689.5034558061 2638.5824451870  320.7950076360 6055758204.0434722900  129.0000100846  129.0000100846  129.0000100846 
+      94  245.2954807041 2821.0619342131 2146689.5143434699 2638.5485198222  321.0009532826 6055948545.3822879791  129.0000103027  129.0000103027  129.0000103027 
+      95  245.3535822199 2821.0860553731 2146689.5253476589 2638.5144817512  321.0769866522 6056000357.0671482086  129.0000105232  129.0000105232  129.0000105232 
+      96  245.5013476026 2821.1682908185 2146689.5364683764 2638.4801107361  321.2703568219 6056176922.4099712372  129.0000107459  129.0000107459  129.0000107459 
+      97  245.4166531417 2821.0989038023 2146689.5477056229 2638.4453663061  321.1595231342 6056028001.7295455933  129.0000109710  129.0000109710  129.0000109710 
+      98  245.4121937790 2821.0817490953 2146689.5590593945 2638.4097762390  321.1536874797 6055991207.9293851852  129.0000111984  129.0000111984  129.0000111984 
+      99  245.4532592994 2821.0946353191 2146689.5705296928 2638.3738037546  321.2074270397 6056018903.0102539062  129.0000114282  129.0000114282  129.0000114282 
+     100  245.7500657390 2821.2735939427 2146689.5821165247 2638.3375549051  321.5958367642 6056403104.3106222153  129.0000116603  129.0000116603  129.0000116603 
+Loop time of 5.22601 on 1 procs for 100 steps with 10125 atoms

-Performance: 2.133 ns/day, 11.250 hours/ns, 24.691 timesteps/s
-99.8% CPU use with 1 MPI tasks x no OpenMP threads
+Performance: 1.653 ns/day, 14.517 hours/ns, 19.135 timesteps/s
+99.7% CPU use with 1 MPI tasks x no OpenMP threads

 MPI task timing breakdown:
 Section |  min time  |  avg time  |  max time  |%varavg| %total
 ---------------------------------------------------------------
-Pair    | 0.46587    | 0.46587    | 0.46587    |   0.0 | 11.50
-Neigh   | 1.4713     | 1.4713     | 1.4713     |   0.0 | 36.33
-Comm    | 0.05567    | 0.05567    | 0.05567    |   0.0 |  1.37
-Output  | 0.011364   | 0.011364   | 0.011364   |   0.0 |  0.28
-Modify  | 2.0158     | 2.0158     | 2.0158     |   0.0 | 49.77
-Other   |            | 0.03004    |            |       |  0.74
+Pair    | 0.44045    | 0.44045    | 0.44045    |   0.0 |  8.43
+Neigh   | 2.669      | 2.669      | 2.669      |   0.0 | 51.07
+Comm    | 0.056143   | 0.056143   | 0.056143   |   0.0 |  1.07
+Output  | 0.012469   | 0.012469   | 0.012469   |   0.0 |  0.24
+Modify  | 2.0163     | 2.0163     | 2.0163     |   0.0 | 38.58
+Other   |            | 0.03168    |            |       |  0.61

 Nlocal:    10125 ave 10125 max 10125 min
 Histogram: 1 0 0 0 0 0 0 0 0 0
@ -172,4 +180,4 @@ Dangerous builds not checked

 Please see the log.cite file for references relevant to this simulation

-Total wall time: 0:00:04
+Total wall time: 0:00:05
--- a/examples/USER/misc/basal/in.basal
+++ b/examples/USER/misc/basal/in.basal
@ -1,163 +1,163 @@
-############################################################################
-# Input file for investigating twinning nucleation under uniaxial loading with basal plane vector analysis
-# Christopher Barrett, March 2013
-# This script requires a Mg pair potential file to be in the same directory.
-
-# fname is the file name.  It is necessary for loops to work correctly. (See jump command)
-variable fname index in.basal
-
-######################################
-# POTENTIAL VARIABLES
-# lattice parameters and the minimum energy per atom which should be obtained with the current pair potential and homogeneous lattice
-variable lx equal 3.181269601 
-variable b equal sqrt(3)
-variable c equal sqrt(8/3)
-variable ly equal ${b}*${lx}
-variable lz equal ${c}*${lx}
-variable pairlocation index almg.liu
-variable pairstyle index eam/alloy/opt
-
-######################################
-# EQUILIBRATION/DEFORMATION VARIABLES
-# eqpress = 10 bar = 1 MPa
-# tstep (the timestep) is set to a default value of 0.001 (1 fs) 
-# seed randomizes the velocity
-# srate is the rate of strain in 1/s
-# Ndump is the number of timesteps in between each dump of the atom coordinates
-variable tstep equal 0.001
-variable seed equal 95812384
-variable srate equal 1e9
-
-######################################
-# INITIALIZATION
-units 		metal
-dimension		3
-boundary		s	s	s
-atom_style		atomic
-
-######################################
-# ATOM BUILD
-atom_modify map array
-
-# lattice custom scale a1 "coordinates of a1" a2 "coordinates of a2" a3 "coordinates of a3" basis "atom1 coordinates" basis "atom2 coordinates" basis "atom3 coordinates" basis "atom4 coordinates" orient x "crystallagraphic orientation of x axis" orient y "crystallagraphic orientation of y axis" z "crystallagraphic orientation of z axis"
-lattice custom 3.181269601 a1 1 0 0 a2 0 1.732050808 0 a3 0 0 1.632993162 basis 0.0 0.0 0.0 basis 0.5 0.5 0 basis 0 0.3333333 0.5 basis 0.5 0.833333 0.5 orient x 0 1 1 orient y 1 0 0 orient z 0 1 -1
-variable multiple equal 20
-variable mx equal "v_lx*v_multiple"
-variable my equal "v_ly*v_multiple"
-variable mz equal "v_lz*v_multiple"
-
-# the simulation region should be from 0 to a multiple of the periodic boundary in x, y and z.
-region		whole block 0 ${mz} 0 ${mx} 0 ${my} units box 
-create_box		2 whole
-create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1 
-
-region fixed1 block INF INF INF INF INF 10 units box
-region fixed2 block INF INF INF INF  100 INF units box
-group lower region fixed1
-group upper region fixed2
-group boundary union upper lower
-group mobile subtract all boundary
-
-variable natoms equal "count(all)"
-print "# of atoms are: ${natoms}"
-
-######################################
-# INTERATOMIC POTENTIAL
-pair_style	${pairstyle}
-pair_coeff	* * ${pairlocation} Mg Mg
-
-######################################
-# COMPUTES REQUIRED
-compute csym all centro/atom 12
-compute eng all pe/atom
-compute eatoms all reduce sum c_eng
-compute basal all basal/atom
-
-######################################
-# MINIMIZATION
-# Primarily adjusts the c/a ratio to value predicted by EAM potential
-reset_timestep	0
-thermo 1
-thermo_style custom step pe c_eatoms
-min_style cg
-minimize	1e-15 1e-15 1000 2000
-variable eminimum equal "c_eatoms / count(all)"
-print "%%e(it,1)=${eminimum}"
-
-######################################
-# EQUILIBRATION
-reset_timestep	0
-timestep ${tstep}
-# atoms are given a random velocity based on a temperature of 100K.
-velocity all create 100 ${seed} mom yes rot no
-
-# temperature and pressure are set to 100 and 0
-fix 1 all nve
-
-# Set thermo output
-thermo 100
-thermo_style custom step lx ly lz press pxx pyy pzz pe temp
-
-# Run for at least 2 picosecond (assuming 1 fs timestep)
-run 2000
-
-# Loop to run until pressure is below the variable eqpress (defined at beginning of file)
-label loopeq 
-variable eq loop 100 
-run 250
-variable converge equal press
-if "${converge} <= 0" then "variable converge equal -press" else "variable converge equal press" 
-if "${converge} <= 50" then "jump ${fname} breakeq" 
-next eq 
-jump ${fname} loopeq 
-label breakeq 
-
-# Store length for strain rate calculations
-variable tmp equal "lx"
-variable L0 equal ${tmp}
-print "Initial Length, L0: ${L0}"
-unfix 1
-
-######################################
-# DEFORMATION
-reset_timestep	0
-timestep ${tstep}
-
-# Impose constant strain rate 
-variable srate1 equal "v_srate / 1.0e10"
-velocity	upper set 0.0 NULL 0.0 units box
-velocity        lower set 0.0 NULL 0.0 units box
-
-fix 2 upper setforce 0.0 NULL 0.0
-fix 3 lower setforce 0.0 NULL 0.0
-fix 1 all nve
-
-# Output strain and stress info to file
-# for units metal, pressure is in [bars] = 100 [kPa] = 1/10000 [GPa]
-# p2 is in GPa
-variable strain equal "(lx - v_L0)/v_L0"
-variable p1 equal "v_strain"
-variable p2 equal "-pxz/10000"
-variable p3 equal "lx"
-variable p4 equal "temp"
-variable p5 equal "pe"
-variable p6 equal "ke"
-fix def1 all print 100 "${p1} ${p2} ${p3} ${p4} ${p5} ${p6}" file output.def1.txt screen no
-# Dump coordinates to file (for void size calculations)
-dump 		1 all custom 1000 output.dump.* id x y z c_basal[1] c_basal[2] c_basal[3]
-
-# Display thermo
-thermo_style	custom step v_strain pxz lx temp pe ke
-restart 50000 output.restart
-
-# run deformation for 100000 timesteps (10% strain assuming 1 fs timestep and 1e9/s strainrate)
-variable runtime equal 0
-label loop
-displace_atoms	all ramp x 0.0 ${srate1} z 10 100 units box
-run		100
-variable runtime equal ${runtime}+100
-if "${runtime} < 100000" then "jump ${fname} loop"
-
-######################################
-# SIMULATION DONE
-print "All done"
+############################################################################
+# Input file for investigating twinning nucleation under uniaxial loading with basal plane vector analysis
+# Christopher Barrett, March 2013
+# This script requires a Mg pair potential file to be in the same directory.
+
+# fname is the file name.  It is necessary for loops to work correctly. (See jump command)
+variable fname index in.basal
+
+######################################
+# POTENTIAL VARIABLES
+# lattice parameters and the minimum energy per atom which should be obtained with the current pair potential and homogeneous lattice
+variable lx equal 3.181269601 
+variable b equal sqrt(3)
+variable c equal sqrt(8/3)
+variable ly equal ${b}*${lx}
+variable lz equal ${c}*${lx}
+variable pairlocation index almg.liu
+variable pairstyle index eam/alloy/opt
+
+######################################
+# EQUILIBRATION/DEFORMATION VARIABLES
+# eqpress = 10 bar = 1 MPa
+# tstep (the timestep) is set to a default value of 0.001 (1 fs) 
+# seed randomizes the velocity
+# srate is the rate of strain in 1/s
+# Ndump is the number of timesteps in between each dump of the atom coordinates
+variable tstep equal 0.001
+variable seed equal 95812384
+variable srate equal 1e9
+
+######################################
+# INITIALIZATION
+units 		metal
+dimension		3
+boundary		s	s	s
+atom_style		atomic
+
+######################################
+# ATOM BUILD
+atom_modify map array
+
+# lattice custom scale a1 "coordinates of a1" a2 "coordinates of a2" a3 "coordinates of a3" basis "atom1 coordinates" basis "atom2 coordinates" basis "atom3 coordinates" basis "atom4 coordinates" orient x "crystallagraphic orientation of x axis" orient y "crystallagraphic orientation of y axis" z "crystallagraphic orientation of z axis"
+lattice custom 3.181269601 a1 1 0 0 a2 0 1.732050808 0 a3 0 0 1.632993162 basis 0.0 0.0 0.0 basis 0.5 0.5 0 basis 0 0.3333333 0.5 basis 0.5 0.833333 0.5 orient x 0 1 1 orient y 1 0 0 orient z 0 1 -1
+variable multiple equal 20
+variable mx equal "v_lx*v_multiple"
+variable my equal "v_ly*v_multiple"
+variable mz equal "v_lz*v_multiple"
+
+# the simulation region should be from 0 to a multiple of the periodic boundary in x, y and z.
+region		whole block 0 ${mz} 0 ${mx} 0 ${my} units box 
+create_box		2 whole
+create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1 
+
+region fixed1 block INF INF INF INF INF 10 units box
+region fixed2 block INF INF INF INF  100 INF units box
+group lower region fixed1
+group upper region fixed2
+group boundary union upper lower
+group mobile subtract all boundary
+
+variable natoms equal "count(all)"
+print "# of atoms are: ${natoms}"
+
+######################################
+# INTERATOMIC POTENTIAL
+pair_style	${pairstyle}
+pair_coeff	* * ${pairlocation} Mg Mg
+
+######################################
+# COMPUTES REQUIRED
+compute csym all centro/atom 12
+compute eng all pe/atom
+compute eatoms all reduce sum c_eng
+compute basal all basal/atom
+
+######################################
+# MINIMIZATION
+# Primarily adjusts the c/a ratio to value predicted by EAM potential
+reset_timestep	0
+thermo 1
+thermo_style custom step pe c_eatoms
+min_style cg
+minimize	1e-15 1e-15 1000 2000
+variable eminimum equal "c_eatoms / count(all)"
+print "%%e(it,1)=${eminimum}"
+
+######################################
+# EQUILIBRATION
+reset_timestep	0
+timestep ${tstep}
+# atoms are given a random velocity based on a temperature of 100K.
+velocity all create 100 ${seed} mom yes rot no
+
+# temperature and pressure are set to 100 and 0
+fix 1 all nve
+
+# Set thermo output
+thermo 100
+thermo_style custom step lx ly lz press pxx pyy pzz pe temp
+
+# Run for at least 2 picosecond (assuming 1 fs timestep)
+run 2000
+
+# Loop to run until pressure is below the variable eqpress (defined at beginning of file)
+label loopeq 
+variable eq loop 100 
+run 250
+variable converge equal press
+if "${converge} <= 0" then "variable converge equal -press" else "variable converge equal press" 
+if "${converge} <= 50" then "jump ${fname} breakeq" 
+next eq 
+jump ${fname} loopeq 
+label breakeq 
+
+# Store length for strain rate calculations
+variable tmp equal "lx"
+variable L0 equal ${tmp}
+print "Initial Length, L0: ${L0}"
+unfix 1
+
+######################################
+# DEFORMATION
+reset_timestep	0
+timestep ${tstep}
+
+# Impose constant strain rate 
+variable srate1 equal "v_srate / 1.0e10"
+velocity	upper set 0.0 NULL 0.0 units box
+velocity        lower set 0.0 NULL 0.0 units box
+
+fix 2 upper setforce 0.0 NULL 0.0
+fix 3 lower setforce 0.0 NULL 0.0
+fix 1 all nve
+
+# Output strain and stress info to file
+# for units metal, pressure is in [bars] = 100 [kPa] = 1/10000 [GPa]
+# p2 is in GPa
+variable strain equal "(lx - v_L0)/v_L0"
+variable p1 equal "v_strain"
+variable p2 equal "-pxz/10000"
+variable p3 equal "lx"
+variable p4 equal "temp"
+variable p5 equal "pe"
+variable p6 equal "ke"
+fix def1 all print 100 "${p1} ${p2} ${p3} ${p4} ${p5} ${p6}" file output.def1.txt screen no
+# Dump coordinates to file (for void size calculations)
+dump 		1 all custom 1000 output.dump.* id x y z c_basal[1] c_basal[2] c_basal[3]
+
+# Display thermo
+thermo_style	custom step v_strain pxz lx temp pe ke
+restart 50000 output.restart
+
+# run deformation for 100000 timesteps (10% strain assuming 1 fs timestep and 1e9/s strainrate)
+variable runtime equal 0
+label loop
+displace_atoms	all ramp x 0.0 ${srate1} z 10 100 units box
+run		100
+variable runtime equal ${runtime}+100
+if "${runtime} < 100000" then "jump ${fname} loop"
+
+######################################
+# SIMULATION DONE
+print "All done"
--- a/examples/USER/misc/srp/in.srp
+++ b/examples/USER/misc/srp/in.srp
@ -15,6 +15,7 @@ bond_style      harmonic
 bond_coeff      * 225.0 0.85

 comm_modify vel yes
+comm_modify cutoff 3.6

 # must use pair hybrid, since srp bond particles
 # do not interact with other atoms types
--- a/examples/mscg/README
+++ b/examples/mscg/README
@ -0,0 +1,10 @@
+Running this example requires that LAMMPS be built with the MSCG
+package and its fix mscg command.  The fix uses the Multi-Scale
+Coarse-Graining (MS-CG) library, freely available at
+https://github.com/uchicago-voth/MSCG-release, to compute optimized
+coarse-grained force field parameters.  The MS-CG library was
+developed by Jacob Wagner in Greg Voth's group at the University of
+Chicago.
+
+See the lib/mscg/README file for instructions on how to download and
+install the MS-CG library for use with LAMMPS.
--- a/examples/mscg/control.in
+++ b/examples/mscg/control.in
@ -0,0 +1,12 @@
+block_size 1
+start_frame 1
+n_frames 19
+nonbonded_cutoff 10.0
+basis_type 0
+primary_output_style 0
+output_solution_flag 1
+output_spline_coeffs_flag 1
+pair_nonbonded_bspline_basis_order 6
+pair_nonbonded_basis_set_resolution 0.7
+pair_nonbonded_output_binwidth 0.1
+matrix_type 0
--- a/examples/mscg/data.meoh
+++ b/examples/mscg/data.meoh
--- a/examples/mscg/dump.meoh
+++ b/examples/mscg/dump.meoh
--- a/examples/mscg/in.mscg
+++ b/examples/mscg/in.mscg
@ -0,0 +1,22 @@
+units real
+atom_style full
+pair_style zero 10.0
+
+read_data data.meoh
+pair_coeff * *
+
+thermo 1
+thermo_style custom step
+
+# Test 1a: range finder functionality
+fix 1 all mscg 1 range on
+rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
+print "TEST_1a mscg range finder"
+unfix 1
+
+# Test 1b: force matching functionality
+fix 1 all mscg 1
+rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
+print "TEST_1b mscg force matching"
+
+print TEST_DONE
--- a/examples/mscg/output_9Jan17/1_1.dat
+++ b/examples/mscg/output_9Jan17/1_1.dat
@ -0,0 +1,77 @@
+2.500000 5.670970817963099e+02
+2.600000 2.404059283529051e+02
+2.700000 9.157060823529977e+01
+2.800000 3.428273061369140e+01
+2.900000 1.619868149395266e+01
+3.000000 1.039607214301755e+01
+3.100000 6.830187514267188e+00
+3.200000 3.861970842349535e+00
+3.300000 1.645948643278161e+00
+3.400000 2.395428971623918e-01
+3.500000 -4.276763637833773e-01
+3.600000 -5.132022977965877e-01
+3.700000 -2.208024961234051e-01
+3.800000 2.402697744243800e-01
+3.900000 6.956064296165573e-01
+4.000000 1.034070044257954e+00
+4.100000 1.205997975111669e+00
+4.200000 1.209501102128581e+00
+4.300000 1.076304670380924e+00
+4.400000 8.575891319958883e-01
+4.500000 6.098309880892070e-01
+4.600000 3.807992942746473e-01
+4.700000 1.995994191469442e-01
+4.800000 7.699059877424269e-02
+4.900000 9.750744163981299e-03
+5.000000 -1.480308769532222e-02
+5.100000 -1.429422279228416e-02
+5.200000 -6.765899050869768e-03
+5.300000 -6.214398421078919e-03
+5.400000 -1.951586041390797e-02
+5.500000 -4.689090237947263e-02
+5.600000 -8.376292122940529e-02
+5.700000 -1.226699982917263e-01
+5.800000 -1.551768041657136e-01
+5.900000 -1.737865035767736e-01
+6.000000 -1.738272491408507e-01
+6.100000 -1.546779867768825e-01
+6.200000 -1.193171291488982e-01
+6.300000 -7.321054075616322e-02
+6.400000 -2.317411193286228e-02
+6.500000 2.376366715221714e-02
+6.600000 6.149913249600215e-02
+6.700000 8.597538938112201e-02
+6.800000 9.590170060736655e-02
+6.900000 9.245100462148878e-02
+7.000000 7.855487875847664e-02
+7.100000 5.818301960249692e-02
+7.200000 3.562272334783877e-02
+7.300000 1.475836615985744e-02
+7.400000 -1.639617536128255e-03
+7.500000 -1.237881063914745e-02
+7.600000 -1.768202571195587e-02
+7.700000 -1.877757119362295e-02
+7.800000 -1.748001968416543e-02
+7.900000 -1.577097622918088e-02
+8.000000 -1.537984660448136e-02
+8.100000 -1.737044400054951e-02
+8.200000 -2.187939410237979e-02
+8.300000 -2.823987455760605e-02
+8.400000 -3.525715284001425e-02
+8.500000 -4.148996251287761e-02
+8.600000 -4.553187949229211e-02
+8.700000 -4.629269831051163e-02
+8.800000 -4.327548798226762e-02
+8.900000 -3.674131754868225e-02
+9.000000 -2.758883541814894e-02
+9.100000 -1.712151838480657e-02
+9.200000 -6.810600249997737e-03
+9.300000 1.941999556272785e-03
+9.400000 8.040747353879739e-03
+9.500000 1.092691524686838e-02
+9.600000 1.063606620723048e-02
+9.700000 7.416550438142138e-03
+9.800000 1.175066786686231e-03
+9.900000 -9.084427187675534e-03
+10.000000 -2.582180514463068e-02
+10.100000 -5.352186189454393e-02
--- a/examples/mscg/output_9Jan17/1_1.table
+++ b/examples/mscg/output_9Jan17/1_1.table
@ -0,0 +1,82 @@
+# Header information on force file
+
+1_1
+N 77 R 2.500000 10.100000
+
+1 2.500000 69.428523 567.097082
+2 2.600000 29.053372 240.405928
+3 2.700000 12.454545 91.570608
+4 2.800000 6.161878 34.282731
+5 2.900000 3.637808 16.198681
+6 3.000000 2.308070 10.396072
+7 3.100000 1.446757 6.830188
+8 3.200000 0.912149 3.861971
+9 3.300000 0.636753 1.645949
+10 3.400000 0.542478 0.239543
+11 3.500000 0.551885 -0.427676
+12 3.600000 0.598929 -0.513202
+13 3.700000 0.635629 -0.220802
+14 3.800000 0.634656 0.240270
+15 3.900000 0.587862 0.695606
+16 4.000000 0.501378 1.034070
+17 4.100000 0.389375 1.205998
+18 4.200000 0.268600 1.209501
+19 4.300000 0.154310 1.076305
+20 4.400000 0.057615 0.857589
+21 4.500000 -0.015756 0.609831
+22 4.600000 -0.065288 0.380799
+23 4.700000 -0.094307 0.199599
+24 4.800000 -0.108137 0.076991
+25 4.900000 -0.112474 0.009751
+26 5.000000 -0.112221 -0.014803
+27 5.100000 -0.110767 -0.014294
+28 5.200000 -0.109714 -0.006766
+29 5.300000 -0.109065 -0.006214
+30 5.400000 -0.107778 -0.019516
+31 5.500000 -0.104458 -0.046891
+32 5.600000 -0.097925 -0.083763
+33 5.700000 -0.087603 -0.122670
+34 5.800000 -0.073711 -0.155177
+35 5.900000 -0.057263 -0.173787
+36 6.000000 -0.039882 -0.173827
+37 6.100000 -0.023457 -0.154678
+38 6.200000 -0.009757 -0.119317
+39 6.300000 -0.000131 -0.073211
+40 6.400000 0.004688 -0.023174
+41 6.500000 0.004659 0.023764
+42 6.600000 0.000396 0.061499
+43 6.700000 -0.006978 0.085975
+44 6.800000 -0.016072 0.095902
+45 6.900000 -0.025489 0.092451
+46 7.000000 -0.034040 0.078555
+47 7.100000 -0.040877 0.058183
+48 7.200000 -0.045567 0.035623
+49 7.300000 -0.048086 0.014758
+50 7.400000 -0.048742 -0.001640
+51 7.500000 -0.048041 -0.012379
+52 7.600000 -0.046538 -0.017682
+53 7.700000 -0.044715 -0.018778
+54 7.800000 -0.042902 -0.017480
+55 7.900000 -0.041239 -0.015771
+56 8.000000 -0.039682 -0.015380
+57 8.100000 -0.038044 -0.017370
+58 8.200000 -0.036082 -0.021879
+59 8.300000 -0.033576 -0.028240
+60 8.400000 -0.030401 -0.035257
+61 8.500000 -0.026564 -0.041490
+62 8.600000 -0.022213 -0.045532
+63 8.700000 -0.017621 -0.046293
+64 8.800000 -0.013143 -0.043275
+65 8.900000 -0.009142 -0.036741
+66 9.000000 -0.005926 -0.027589
+67 9.100000 -0.003690 -0.017122
+68 9.200000 -0.002494 -0.006811
+69 9.300000 -0.002250 0.001942
+70 9.400000 -0.002749 0.008041
+71 9.500000 -0.003698 0.010927
+72 9.600000 -0.004776 0.010636
+73 9.700000 -0.005678 0.007417
+74 9.800000 -0.006108 0.001175
+75 9.900000 -0.005712 -0.009084
+76 10.000000 -0.003967 -0.025822
+77 10.100000 0.000000 -0.053522
--- a/examples/mscg/output_9Jan17/b-spline.out
+++ b/examples/mscg/output_9Jan17/b-spline.out
@ -0,0 +1,2 @@
+n: 1 1 6 12 2.400000000000002e+00 1.010000000000000e+01
+1.200460787805587e+03 2.169623423326193e+01 2.388396964379328e+01 -1.197754948555067e+01 6.472482422420378e+00 -1.483711824891365e+00 7.768139601662113e-01 -7.869494711740244e-01 4.830820182054661e-01 -1.892989444995645e-01 1.021275453070386e-01 -1.637649039972671e-01 5.570978712841167e-02 7.637188693695119e-03 -4.109175461195019e-03 -5.352186189455146e-02 
--- a/examples/mscg/output_9Jan17/rmin.in
+++ b/examples/mscg/output_9Jan17/rmin.in
@ -0,0 +1 @@
+1 1 2.852369 10.000000 fm
--- a/examples/mscg/output_9Jan17/rmin_b.in
+++ b/examples/mscg/output_9Jan17/rmin_b.in
--- a/examples/mscg/output_9Jan17/sol_info.out
+++ b/examples/mscg/output_9Jan17/sol_info.out
@ -0,0 +1,18 @@
+fm_matrix_rows:3000; fm_matrix_columns:16;
+Singular vector:
+2.442317e+00
+2.105009e+00
+1.433251e+00
+1.184602e+00
+9.739627e-01
+6.944898e-01
+5.376709e-01
+4.616070e-01
+3.257062e-01
+2.683729e-01
+1.530153e-01
+9.336288e-02
+5.042150e-02
+2.126912e-02
+1.446682e-02
+4.167763e-05
--- a/examples/mscg/output_9Jan17/x.out
+++ b/examples/mscg/output_9Jan17/x.out
@ -0,0 +1 @@
+<EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD>@47h<<3C>5@<40><><EFBFBD><EFBFBD>K<EFBFBD>7@<40>R<EFBFBD>]<5D><>'<27><><EFBFBD><EFBFBD>n<EFBFBD><6E>@݌I<DD8C>H<EFBFBD><48><EFBFBD><19>?<3F><><EFBFBD><EFBFBD>?r<>I<EFBFBD><49>.<2E><><11>^<5E><><EFBFBD><EFBFBD>?W<57><7F><EFBFBD>:ȿ(O<1D>%<25>?<3F>Ns<4E>?<3F>Ŀ<EFBFBD>:<3A>C<EFBFBD><43><EFBFBD>?<3F><><EFBFBD>:,H?<3F>}<7D>c<EFBFBD><63>p<EFBFBD><70><EFBFBD><EFBFBD><EFBFBD>7g<37><67>
--- a/examples/prd/in.prd
+++ b/examples/prd/in.prd
@ -78,7 +78,7 @@ run             100

 # only output atoms near vacancy

-compute coord all coord/atom $r
+compute coord all coord/atom cutoff $r

 #dump events all custom 1 dump.prd id type x y z
 #dump_modify events thresh c_coord != 4
--- a/examples/tad/in.tad
+++ b/examples/tad/in.tad
@ -80,7 +80,7 @@ velocity all zero linear

 # only output atoms near vacancy

-compute coord all coord/atom $r
+compute coord all coord/atom cutoff $r

 #dump events all custom 1 dump.prd id type x y z
 #dump_modify events thresh c_coord != 4
--- a/examples/voronoi/README
+++ b/examples/voronoi/README
@ -0,0 +1,10 @@
+Running this example requires that LAMMPS be built with the VORONOI
+package and its compute voronoi command.  The compute uses the Voro++
+library, freely available at http://math.lbl.gov/voro++, to compute
+the Voronoi tesselation locally on each processor.  Voro++ was
+developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
+Laboratory.
+
+See the lib/voronoi/README file for instructions on how to download
+and install the Voro++ library for use with LAMMPS.
+
--- a/lib/README
+++ b/lib/README
@ -39,6 +39,8 @@ meam	      modified embedded atom method (MEAM) potential, MEAM package
                from Greg Wagner (Sandia)
 molfile       hooks to VMD molfile plugins, used by the USER-MOLFILE package
                from Axel Kohlmeyer (Temple U) and the VMD development team
+mscg          hooks to the MSCG library, used by fix_mscg command
+                from Jacob Wagner and Greg Voth group (U Chicago)
 python        hooks to the system Python library, used by the PYTHON package
                from the LAMMPS development team
 qmmm	      quantum mechanics/molecular mechanics coupling interface
--- a/lib/kokkos/.gitignore
+++ b/lib/kokkos/.gitignore
@ -1,8 +0,0 @@
-# Standard ignores
-*~
-*.pyc
-\#*#
-.#*
-.*.swp
-.cproject
-.project
--- a/lib/kokkos/CHANGELOG.md
+++ b/lib/kokkos/CHANGELOG.md
@ -0,0 +1,284 @@
+# Change Log
+
+## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)
+
+**Implemented enhancements:**
+
+- Add CMake option to enable Cuda Lambda support [\#589](https://github.com/kokkos/kokkos/issues/589)
+- Add CMake option to enable Cuda RDC support [\#588](https://github.com/kokkos/kokkos/issues/588)
+- Add Initial Intel Sky Lake Xeon-HPC Compiler Support to Kokkos Make System [\#584](https://github.com/kokkos/kokkos/issues/584)
+- Building Tutorial Examples  [\#582](https://github.com/kokkos/kokkos/issues/582)
+- Internal way for using ThreadVectorRange without TeamHandle  [\#574](https://github.com/kokkos/kokkos/issues/574)
+- Testing: Add testing for uvm and rdc [\#571](https://github.com/kokkos/kokkos/issues/571)
+- Profiling: Add Memory Tracing and Region Markers [\#557](https://github.com/kokkos/kokkos/issues/557)
+- nvcc\_wrapper not installed with Kokkos built with CUDA through CMake [\#543](https://github.com/kokkos/kokkos/issues/543)
+- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
+- Benchmarks: Add Gather benchmark [\#536](https://github.com/kokkos/kokkos/issues/536)
+- Testing: add spot\_check option to test\_all\_sandia [\#535](https://github.com/kokkos/kokkos/issues/535)
+- Deprecate Kokkos::Impl::VerifyExecutionCanAccessMemorySpace [\#527](https://github.com/kokkos/kokkos/issues/527)
+- Add AtomicAdd support for 64bit float for Pascal [\#522](https://github.com/kokkos/kokkos/issues/522)
+- Add Restrict and Aligned memory trait [\#517](https://github.com/kokkos/kokkos/issues/517)
+- Kokkos Tests are Not Run using Compiler Optimization [\#501](https://github.com/kokkos/kokkos/issues/501)
+- Add support for clang 3.7 w/ openmp backend [\#393](https://github.com/kokkos/kokkos/issues/393)
+- Provide an error throw class [\#79](https://github.com/kokkos/kokkos/issues/79)
+
+**Fixed bugs:**
+
+- Cuda UVM Allocation test broken with UVM as default space [\#586](https://github.com/kokkos/kokkos/issues/586)
+- Bug \(develop branch only\): multiple tests are now failing when forcing uvm usage. [\#570](https://github.com/kokkos/kokkos/issues/570)
+- Error in generate\_makefile.sh for Kokkos when Compiler is Empty String/Fails [\#568](https://github.com/kokkos/kokkos/issues/568)
+- XL 13.1.4 incorrect C++11 flag [\#553](https://github.com/kokkos/kokkos/issues/553)
+- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
+- Installing Library on MAC broken due to cp -u [\#539](https://github.com/kokkos/kokkos/issues/539)
+- Intel Nightly Testing with Debug enabled fails [\#534](https://github.com/kokkos/kokkos/issues/534)
+
+## [2.02.01](https://github.com/kokkos/kokkos/tree/2.02.01) (2016-11-01)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.00...2.02.01)
+
+**Implemented enhancements:**
+
+- Add Changelog generation to our process. [\#506](https://github.com/kokkos/kokkos/issues/506)
+
+**Fixed bugs:**
+
+- Test scratch\_request fails in Serial with Debug enabled [\#520](https://github.com/kokkos/kokkos/issues/520)
+- Bug In BoundsCheck for DynRankView [\#516](https://github.com/kokkos/kokkos/issues/516)
+
+## [2.02.00](https://github.com/kokkos/kokkos/tree/2.02.00) (2016-10-30)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.10...2.02.00)
+
+**Implemented enhancements:**
+
+- Add PowerPC assembly for grabbing clock register in memory pool [\#511](https://github.com/kokkos/kokkos/issues/511)
+- Add GCC 6.x support [\#508](https://github.com/kokkos/kokkos/issues/508)
+- Test install and build against installed library [\#498](https://github.com/kokkos/kokkos/issues/498)
+- Makefile.kokkos adds expt-extended-lambda to cuda build with clang [\#490](https://github.com/kokkos/kokkos/issues/490)
+- Add top-level makefile option to just test kokkos-core unit-test [\#485](https://github.com/kokkos/kokkos/issues/485)
+- Split and harmonize Object Files of Core UnitTests to increase build parallelism [\#484](https://github.com/kokkos/kokkos/issues/484)
+- LayoutLeft to LayoutLeft subview for 3D and 4D views [\#473](https://github.com/kokkos/kokkos/issues/473)
+- Add official Cuda 8.0 support [\#468](https://github.com/kokkos/kokkos/issues/468)
+- Allow C++1Z Flag for Class Lambda capture [\#465](https://github.com/kokkos/kokkos/issues/465)
+- Add Clang 4.0+ compilation of Cuda code [\#455](https://github.com/kokkos/kokkos/issues/455)
+- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
+- Add name of view to "View bounds error" [\#432](https://github.com/kokkos/kokkos/issues/432)
+- Move Sort Binning Operators into Kokkos namespace [\#421](https://github.com/kokkos/kokkos/issues/421)
+- TaskPolicy - generate error when attempt to use uninitialized  [\#396](https://github.com/kokkos/kokkos/issues/396)
+- Import WithoutInitializing and AllowPadding into Kokkos namespace [\#325](https://github.com/kokkos/kokkos/issues/325)
+- TeamThreadRange requires begin, end to be the same type [\#305](https://github.com/kokkos/kokkos/issues/305)
+- CudaUVMSpace should track \# allocations, due to CUDA limit on \# UVM allocations [\#300](https://github.com/kokkos/kokkos/issues/300)
+- Remove old View and its infrastructure [\#259](https://github.com/kokkos/kokkos/issues/259)
+
+**Fixed bugs:**
+
+- Bug in TestCuda\_Other.cpp: most likely assembly inserted into Device code [\#515](https://github.com/kokkos/kokkos/issues/515)
+- Cuda Compute Capability check of GPU is outdated [\#509](https://github.com/kokkos/kokkos/issues/509)
+- multi\_scratch test with hwloc and pthreads seg-faults.  [\#504](https://github.com/kokkos/kokkos/issues/504)
+- generate\_makefile.bash: "make install" is broken [\#503](https://github.com/kokkos/kokkos/issues/503)
+- make clean in Out of Source Build/Tests Does Not Work Correctly [\#502](https://github.com/kokkos/kokkos/issues/502)
+- Makefiles for test and examples have issues in Cuda when CXX is not explicitly specified [\#497](https://github.com/kokkos/kokkos/issues/497)
+- Dispatch lambda test directly inside GTEST macro doesn't work with nvcc [\#491](https://github.com/kokkos/kokkos/issues/491)
+- UnitTests with HWLOC enabled fail if run with mpirun bound to a single core [\#489](https://github.com/kokkos/kokkos/issues/489)
+- Failing Reducer Test on Mac with Pthreads [\#479](https://github.com/kokkos/kokkos/issues/479)
+- make test Dumps Error with Clang Not Found [\#471](https://github.com/kokkos/kokkos/issues/471)
+- OpenMP TeamPolicy member broadcast not using correct volatile shared variable [\#424](https://github.com/kokkos/kokkos/issues/424)
+- TaskPolicy - generate error when attempt to use uninitialized  [\#396](https://github.com/kokkos/kokkos/issues/396)
+- New task policy implementation is pulling in old experimental code. [\#372](https://github.com/kokkos/kokkos/issues/372)
+- MemoryPool unit test hangs on Power8 with GCC 6.1.0 [\#298](https://github.com/kokkos/kokkos/issues/298)
+
+## [2.01.10](https://github.com/kokkos/kokkos/tree/2.01.10) (2016-09-27)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.06...2.01.10)
+
+**Implemented enhancements:**
+
+- Enable Profiling by default in Tribits build [\#438](https://github.com/kokkos/kokkos/issues/438)
+- parallel\_reduce\(0\), parallel\_scan\(0\) unit tests [\#436](https://github.com/kokkos/kokkos/issues/436)
+- data\(\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
+- Fix tutorials to track new Kokkos::View [\#323](https://github.com/kokkos/kokkos/issues/323)
+- Rename team policy set\_scratch\_size. [\#195](https://github.com/kokkos/kokkos/issues/195)
+
+**Fixed bugs:**
+
+- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
+- Makefile spits syntax error [\#435](https://github.com/kokkos/kokkos/issues/435)
+- Kokkos::sort fails for view with all the same values [\#422](https://github.com/kokkos/kokkos/issues/422)
+- Generic Reducers: can't accept inline constructed reducer [\#404](https://github.com/kokkos/kokkos/issues/404)
+- data\\(\\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
+- const subview of const view with compile time dimensions on Cuda backend [\#310](https://github.com/kokkos/kokkos/issues/310)
+- Kokkos \(in Trilinos\) Causes Internal Compiler Error on CUDA 8.0.21-EA on POWER8 [\#307](https://github.com/kokkos/kokkos/issues/307)
+- Core Oversubscription Detection Broken? [\#159](https://github.com/kokkos/kokkos/issues/159)
+
+
+## [2.01.06](https://github.com/kokkos/kokkos/tree/2.01.06) (2016-09-02)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.00...2.01.06)
+
+**Implemented enhancements:**
+
+- Add "standard" reducers for lambda-supportable customized reduce [\#411](https://github.com/kokkos/kokkos/issues/411)
+- TaskPolicy - single thread back-end execution [\#390](https://github.com/kokkos/kokkos/issues/390)
+- Kokkos master clone tag [\#387](https://github.com/kokkos/kokkos/issues/387)
+- Query memory requirements from task policy [\#378](https://github.com/kokkos/kokkos/issues/378)
+- Output order of test\_atomic.cpp is confusing [\#373](https://github.com/kokkos/kokkos/issues/373)
+- Missing testing for atomics [\#341](https://github.com/kokkos/kokkos/issues/341)
+- Feature request for Kokkos to provide Kokkos::atomic\_fetch\_max and atomic\_fetch\_min [\#336](https://github.com/kokkos/kokkos/issues/336)
+- TaskPolicy\<Cuda\> performance requires teams mapped to warps [\#218](https://github.com/kokkos/kokkos/issues/218)
+
+**Fixed bugs:**
+
+- Reduce with Teams broken for custom initialize [\#407](https://github.com/kokkos/kokkos/issues/407)
+- Failing Kokkos build on Debian [\#402](https://github.com/kokkos/kokkos/issues/402)
+- Failing Tests on NVIDIA Pascal GPUs [\#398](https://github.com/kokkos/kokkos/issues/398)
+- Algorithms: fill\_random assumes dimensions fit in unsigned int [\#389](https://github.com/kokkos/kokkos/issues/389)
+- Kokkos::subview with RandomAccess Memory Trait [\#385](https://github.com/kokkos/kokkos/issues/385)
+- Build warning \(signed / unsigned comparison\) in Cuda implementation [\#365](https://github.com/kokkos/kokkos/issues/365)
+- wrong results for a parallel\_reduce with CUDA8 / Maxwell50 [\#352](https://github.com/kokkos/kokkos/issues/352)
+- Hierarchical parallelism - 3 level unit test [\#344](https://github.com/kokkos/kokkos/issues/344)
+- Can I allocate a View w/ both WithoutInitializing & AllowPadding? [\#324](https://github.com/kokkos/kokkos/issues/324)
+- subview View layout determination [\#309](https://github.com/kokkos/kokkos/issues/309)
+- Unit tests with Cuda - Maxwell [\#196](https://github.com/kokkos/kokkos/issues/196)
+
+## [2.01.00](https://github.com/kokkos/kokkos/tree/2.01.00) (2016-07-21)
+[Full Changelog](https://github.com/kokkos/kokkos/compare/End_C++98...2.01.00)
+
+**Implemented enhancements:**
+
+- Edit ViewMapping so assigning Views with the same custom layout compiles when const casting [\#327](https://github.com/kokkos/kokkos/issues/327)
+- DynRankView: Performance improvement for operator\(\) [\#321](https://github.com/kokkos/kokkos/issues/321)
+- Interoperability between static and dynamic rank views [\#295](https://github.com/kokkos/kokkos/issues/295)
+- subview member function ? [\#280](https://github.com/kokkos/kokkos/issues/280)
+- Inter-operatibility between View and DynRankView. [\#245](https://github.com/kokkos/kokkos/issues/245)
+- \(Trilinos\) build warning in atomic\_assign, with Kokkos::complex [\#177](https://github.com/kokkos/kokkos/issues/177)
+- View\<\>::shmem\_size should runtime check for number of arguments equal to rank [\#176](https://github.com/kokkos/kokkos/issues/176)
+- Custom reduction join via lambda argument [\#99](https://github.com/kokkos/kokkos/issues/99)
+- DynRankView with 0 dimensions passed in at construction [\#293](https://github.com/kokkos/kokkos/issues/293)
+- Inject view\_alloc and friends into Kokkos namespace [\#292](https://github.com/kokkos/kokkos/issues/292)
+- Less restrictive TeamPolicy reduction on Cuda [\#286](https://github.com/kokkos/kokkos/issues/286)
+- deep\_copy using remap with source execution space [\#267](https://github.com/kokkos/kokkos/issues/267)
+- Suggestion:  Enable opt-in L1 caching via nvcc-wrapper [\#261](https://github.com/kokkos/kokkos/issues/261)
+- More flexible create\_mirror functions [\#260](https://github.com/kokkos/kokkos/issues/260)
+- Rename View::memory\_span to View::required\_allocation\_size [\#256](https://github.com/kokkos/kokkos/issues/256)
+- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
+- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
+- Kokkos::Timer [\#234](https://github.com/kokkos/kokkos/issues/234)
+- Fence CudaUVMSpace allocations [\#230](https://github.com/kokkos/kokkos/issues/230)
+- View::operator\(\) accept std::is\_integral and std::is\_enum [\#227](https://github.com/kokkos/kokkos/issues/227)
+- Allocating zero size View [\#216](https://github.com/kokkos/kokkos/issues/216)
+- Thread scalable memory pool [\#212](https://github.com/kokkos/kokkos/issues/212)
+- Add a way to disable memory leak output [\#194](https://github.com/kokkos/kokkos/issues/194)
+- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
+- Runtime rank wrapper for View [\#189](https://github.com/kokkos/kokkos/issues/189)
+- Profiling Interface [\#158](https://github.com/kokkos/kokkos/issues/158)
+- Fix View assignment \(of managed to unmanaged\) [\#153](https://github.com/kokkos/kokkos/issues/153)
+- Add unit test for assignment of managed View to unmanaged View [\#152](https://github.com/kokkos/kokkos/issues/152)
+- Check for oversubscription of threads with MPI in Kokkos::initialize [\#149](https://github.com/kokkos/kokkos/issues/149)
+- Dynamic resizeable 1dimensional view [\#143](https://github.com/kokkos/kokkos/issues/143)
+- Develop TaskPolicy for CUDA [\#142](https://github.com/kokkos/kokkos/issues/142)
+- New View : Test Compilation Downstream [\#138](https://github.com/kokkos/kokkos/issues/138)
+- New View Implementation [\#135](https://github.com/kokkos/kokkos/issues/135)
+- Add variant of subview that lets users add traits [\#134](https://github.com/kokkos/kokkos/issues/134)
+- NVCC-WRAPPER: Add --host-only flag [\#121](https://github.com/kokkos/kokkos/issues/121)
+- Address gtest issue with TriBITS Kokkos build outside of Trilinos [\#117](https://github.com/kokkos/kokkos/issues/117)
+- Make tests pass with -expt-extended-lambda on CUDA [\#108](https://github.com/kokkos/kokkos/issues/108)
+- Dynamic scheduling for parallel\_for and parallel\_reduce [\#106](https://github.com/kokkos/kokkos/issues/106)
+- Runtime or compile time error when reduce functor's join is not properly specified as const member function or with volatile arguments [\#105](https://github.com/kokkos/kokkos/issues/105)
+- Error out when the number of threads is modified after kokkos is initialized [\#104](https://github.com/kokkos/kokkos/issues/104)
+- Porting to POWER and remove assumption of X86 default [\#103](https://github.com/kokkos/kokkos/issues/103)
+- Dynamic scheduling option for RangePolicy [\#100](https://github.com/kokkos/kokkos/issues/100)
+- SharedMemory Support for Lambdas [\#81](https://github.com/kokkos/kokkos/issues/81)
+- Recommended TeamSize for Lambdas [\#80](https://github.com/kokkos/kokkos/issues/80)
+- Add Aggressive Vectorization Compilation mode [\#72](https://github.com/kokkos/kokkos/issues/72)
+- Dynamic scheduling team execution policy [\#53](https://github.com/kokkos/kokkos/issues/53)
+- UVM allocations in multi-GPU systems [\#50](https://github.com/kokkos/kokkos/issues/50)
+- Synchronic in Kokkos::Impl [\#44](https://github.com/kokkos/kokkos/issues/44)
+- index and dimension types in for loops [\#28](https://github.com/kokkos/kokkos/issues/28)
+- Subview assign of 1D Strided with stride 1 to LayoutLeft/Right [\#1](https://github.com/kokkos/kokkos/issues/1)
+
+**Fixed bugs:**
+
+- misspelled variable name in Kokkos\_Atomic\_Fetch + missing unit tests [\#340](https://github.com/kokkos/kokkos/issues/340)
+- seg fault Kokkos::Impl::CudaInternal::print\_configuration [\#338](https://github.com/kokkos/kokkos/issues/338)
+- Clang compiler error with named parallel\_reduce, tags, and TeamPolicy. [\#335](https://github.com/kokkos/kokkos/issues/335)
+- Shared Memory Allocation Error at parallel\_reduce [\#311](https://github.com/kokkos/kokkos/issues/311)
+- DynRankView: Fix resize and realloc [\#303](https://github.com/kokkos/kokkos/issues/303)
+- Scratch memory and dynamic scheduling [\#279](https://github.com/kokkos/kokkos/issues/279)
+- MemoryPool infinite loop when out of memory [\#312](https://github.com/kokkos/kokkos/issues/312)
+- Kokkos DynRankView changes break Sacado and Panzer [\#299](https://github.com/kokkos/kokkos/issues/299)
+- MemoryPool fails to compile on non-cuda non-x86 [\#297](https://github.com/kokkos/kokkos/issues/297)
+- Random Number Generator Fix [\#296](https://github.com/kokkos/kokkos/issues/296)
+- View template parameter ordering Bug [\#282](https://github.com/kokkos/kokkos/issues/282)
+- Serial task policy broken. [\#281](https://github.com/kokkos/kokkos/issues/281)
+- deep\_copy with LayoutStride should not memcpy [\#262](https://github.com/kokkos/kokkos/issues/262)
+- DualView::need\_sync should be a const method [\#248](https://github.com/kokkos/kokkos/issues/248)
+- Arbitrary-sized atomics on GPUs broken; loop forever [\#238](https://github.com/kokkos/kokkos/issues/238)
+- boolean reduction value\_type changes answer [\#225](https://github.com/kokkos/kokkos/issues/225)
+- Custom init\(\) function for parallel\_reduce with array value\_type [\#210](https://github.com/kokkos/kokkos/issues/210)
+- unit\_test Makefile is Broken - Recursively Calls itself until Machine Apocalypse. [\#202](https://github.com/kokkos/kokkos/issues/202)
+- nvcc\_wrapper Does Not Support  -Xcompiler \<compiler option\> [\#198](https://github.com/kokkos/kokkos/issues/198)
+- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
+- Kokkos Threads Backend impl\_shared\_alloc Broken on Intel 16.1 \(Shepard Haswell\) [\#186](https://github.com/kokkos/kokkos/issues/186)
+- pthread back end hangs if used uninitialized [\#182](https://github.com/kokkos/kokkos/issues/182)
+- parallel\_reduce of size 0, not calling init/join [\#175](https://github.com/kokkos/kokkos/issues/175)
+- Bug in Threads with OpenMP enabled [\#173](https://github.com/kokkos/kokkos/issues/173)
+- KokkosExp\_SharedAlloc, m\_team\_work\_index inaccessible [\#166](https://github.com/kokkos/kokkos/issues/166)
+- 128-bit CAS without Assembly Broken? [\#161](https://github.com/kokkos/kokkos/issues/161)
+- fatal error: Cuda/Kokkos\_Cuda\_abort.hpp: No such file or directory [\#157](https://github.com/kokkos/kokkos/issues/157)
+- Power8: Fix OpenMP backend [\#139](https://github.com/kokkos/kokkos/issues/139)
+- Data race in Kokkos OpenMP initialization [\#131](https://github.com/kokkos/kokkos/issues/131)
+- parallel\_launch\_local\_memory and cuda 7.5 [\#125](https://github.com/kokkos/kokkos/issues/125)
+- Resize can fail with Cuda due to asynchronous dispatch [\#119](https://github.com/kokkos/kokkos/issues/119)
+- Qthread taskpolicy initialization bug. [\#92](https://github.com/kokkos/kokkos/issues/92)
+- Windows: sys/mman.h [\#89](https://github.com/kokkos/kokkos/issues/89)
+- Windows: atomic\_fetch\_sub\(\) [\#88](https://github.com/kokkos/kokkos/issues/88)
+- Windows: snprintf [\#87](https://github.com/kokkos/kokkos/issues/87)
+- Parallel\_Reduce with TeamPolicy and league size of 0 returns garbage [\#85](https://github.com/kokkos/kokkos/issues/85)
+- Throw with Cuda when using \(2D\) team\_policy parallel\_reduce with less than a warp size [\#76](https://github.com/kokkos/kokkos/issues/76)
+- Scalar views don't work with Kokkos::Atomic memory trait [\#69](https://github.com/kokkos/kokkos/issues/69)
+- Reduce the number of threads per team for Cuda [\#63](https://github.com/kokkos/kokkos/issues/63)
+- Named Kernels fail for reductions with CUDA [\#60](https://github.com/kokkos/kokkos/issues/60)
+- Kokkos View dimension\_\(\) for long returning unsigned int [\#20](https://github.com/kokkos/kokkos/issues/20)
+- atomic test hangs with LLVM [\#6](https://github.com/kokkos/kokkos/issues/6)
+- OpenMP Test should set omp\_set\_num\_threads to 1 [\#4](https://github.com/kokkos/kokkos/issues/4)
+
+**Closed issues:**
+
+- develop branch broken with CUDA 8 and --expt-extended-lambda  [\#354](https://github.com/kokkos/kokkos/issues/354)
+- --arch=KNL with Intel 2016 build failure [\#349](https://github.com/kokkos/kokkos/issues/349)
+- Error building with Cuda when passing -DKOKKOS\_CUDA\_USE\_LAMBDA to generate\_makefile.bash [\#343](https://github.com/kokkos/kokkos/issues/343)
+- Can I safely use int indices in a 2-D View with capacity \> 2B? [\#318](https://github.com/kokkos/kokkos/issues/318)
+- Kokkos::ViewAllocateWithoutInitializing is not working [\#317](https://github.com/kokkos/kokkos/issues/317)
+- Intel build on Mac OS X [\#277](https://github.com/kokkos/kokkos/issues/277)
+- deleted [\#271](https://github.com/kokkos/kokkos/issues/271)
+- Broken Mira build [\#268](https://github.com/kokkos/kokkos/issues/268)
+- 32-bit build [\#246](https://github.com/kokkos/kokkos/issues/246)
+- parallel\_reduce with RDC crashes linker [\#232](https://github.com/kokkos/kokkos/issues/232)
+- build of Kokkos\_Sparse\_MV\_impl\_spmv\_Serial.cpp.o fails if you use nvcc and have cuda disabled [\#209](https://github.com/kokkos/kokkos/issues/209)
+- Kokkos Serial execution space is not tested with TeamPolicy. [\#207](https://github.com/kokkos/kokkos/issues/207)
+- Unit test failure on Hansen  KokkosCore\_UnitTest\_Cuda\_MPI\_1 [\#200](https://github.com/kokkos/kokkos/issues/200)
+- nvcc compiler warning: calling a \_\_host\_\_ function from a \_\_host\_\_ \_\_device\_\_ function is not allowed [\#180](https://github.com/kokkos/kokkos/issues/180)
+- Intel 15 build error with defaulted "move" operators [\#171](https://github.com/kokkos/kokkos/issues/171)
+- missing libkokkos.a during Trilinos 12.4.2 build, yet other libkokkos\*.a libs are there [\#165](https://github.com/kokkos/kokkos/issues/165)
+- Tie atomic updates to execution space or even to thread team? \(speculation\) [\#144](https://github.com/kokkos/kokkos/issues/144)
+- New View: Compiletime/size Test [\#137](https://github.com/kokkos/kokkos/issues/137)
+- New View : Performance Test [\#136](https://github.com/kokkos/kokkos/issues/136)
+- Signed/unsigned  comparison warning in CUDA parallel [\#130](https://github.com/kokkos/kokkos/issues/130)
+- Kokkos::complex: Need op\* w/ std::complex & real [\#126](https://github.com/kokkos/kokkos/issues/126)
+- Use uintptr\_t for casting pointers [\#110](https://github.com/kokkos/kokkos/issues/110)
+- Default thread mapping behavior between P and Q threads. [\#91](https://github.com/kokkos/kokkos/issues/91)
+- Windows: Atomic\_Fetch\_Exchange\(\) return type [\#90](https://github.com/kokkos/kokkos/issues/90)
+- Synchronic unit test is way too long [\#84](https://github.com/kokkos/kokkos/issues/84)
+- nvcc\_wrapper -\> $\(NVCC\_WRAPPER\) [\#42](https://github.com/kokkos/kokkos/issues/42)
+- Check compiler version and print helpful message [\#39](https://github.com/kokkos/kokkos/issues/39)
+- Kokkos shared memory on Cuda uses a lot of registers [\#31](https://github.com/kokkos/kokkos/issues/31)
+- Can not pass unit test `cuda.space` without a GT 720 [\#25](https://github.com/kokkos/kokkos/issues/25)
+- Makefile.kokkos lacks bounds checking option that CMake has [\#24](https://github.com/kokkos/kokkos/issues/24)
+- Kokkos can not complete unit tests with CUDA UVM enabled [\#23](https://github.com/kokkos/kokkos/issues/23)
+- Simplify teams + shared memory histogram example to remove vectorization [\#21](https://github.com/kokkos/kokkos/issues/21)
+- Kokkos needs to rever to ${PROJECT\_NAME}\_ENABLE\_CXX11 not Trilinos\_ENABLE\_CXX11 [\#17](https://github.com/kokkos/kokkos/issues/17)
+- Kokkos Base Makefile adds AVX to KNC Build [\#16](https://github.com/kokkos/kokkos/issues/16)
+- MS Visual Studio 2013 Build Errors [\#9](https://github.com/kokkos/kokkos/issues/9)
+- subview\(X, ALL\(\), j\) for 2-D LayoutRight View X: should it view a column? [\#5](https://github.com/kokkos/kokkos/issues/5)
+
+## [End_C++98](https://github.com/kokkos/kokkos/tree/End_C++98) (2015-04-15)
+
+
+\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*
--- a/lib/kokkos/CMakeLists.txt
+++ b/lib/kokkos/CMakeLists.txt
@ -34,8 +34,8 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
 # for compatibility with Kokkos' Makefile build system.

 TRIBITS_ADD_OPTION_AND_DEFINE(
-  ${PACKAGE_NAME}_ENABLE_DEBUG
-  ${PACKAGE_NAME_UC}_HAVE_DEBUG
+  Kokkos_ENABLE_DEBUG
+  KOKKOS_HAVE_DEBUG
  "Enable run-time debug checks.  These checks may be expensive, so they are disabled by default in a release build."
  ${${PROJECT_NAME}_ENABLE_DEBUG}
 )
@ -57,7 +57,21 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
 TRIBITS_ADD_OPTION_AND_DEFINE(
  Kokkos_ENABLE_Cuda_UVM
  KOKKOS_USE_CUDA_UVM
-  "Enable CUDA Unified Virtual Memory support in Kokkos."
+  "Enable CUDA Unified Virtual Memory as the default in Kokkos."
+  OFF
+  )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+  Kokkos_ENABLE_Cuda_RDC
+  KOKKOS_HAVE_CUDA_RDC
+  "Enable CUDA Relocatable Device Code support in Kokkos."
+  OFF
+  )
+
+TRIBITS_ADD_OPTION_AND_DEFINE(
+  Kokkos_ENABLE_Cuda_Lambda
+  KOKKOS_HAVE_CUDA_LAMBDA
+  "Enable CUDA LAMBDA support in Kokkos."
  OFF
  )

@ -72,6 +86,9 @@ ASSERT_DEFINED(TPL_ENABLE_Pthread)
 IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
  MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF).  This is not allowed.  Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
 ENDIF ()
+IF (NOT TPL_ENABLE_Pthread)
+  ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0)
+ENDIF()

 TRIBITS_ADD_OPTION_AND_DEFINE(
  Kokkos_ENABLE_OpenMP
@ -162,13 +179,28 @@ TRIBITS_ADD_OPTION_AND_DEFINE(

 #------------------------------------------------------------------------------
 #
-# C) Process the subpackages for Kokkos
+# C) Install Kokkos' executable scripts
+#
+
+
+# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler.
+# Kokkos needs nvcc_wrapper in order to build.  Other libraries and
+# executables also need nvcc_wrapper.  Thus, we need to install it.
+# If the argument of DESTINATION is a relative path, CMake computes it
+# as relative to ${CMAKE_INSTALL_PATH}.
+
+INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin)
+
+
+#------------------------------------------------------------------------------
+#
+# D) Process the subpackages for Kokkos
 #

 TRIBITS_PROCESS_SUBPACKAGES()

 #
-# D) If Kokkos itself is enabled, process the Kokkos package
+# E) If Kokkos itself is enabled, process the Kokkos package
 #

 TRIBITS_PACKAGE_DEF()
--- a/lib/kokkos/Makefile.kokkos
+++ b/lib/kokkos/Makefile.kokkos
@ -7,25 +7,26 @@ CXXFLAGS=$(CCFLAGS)
 #Options: OpenMP,Serial,Pthreads,Cuda
 KOKKOS_DEVICES ?= "OpenMP"
 #KOKKOS_DEVICES ?= "Pthreads"
-#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW
+#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
 KOKKOS_ARCH ?= ""
 #Options: yes,no
 KOKKOS_DEBUG ?= "no"
 #Options: hwloc,librt,experimental_memkind
 KOKKOS_USE_TPLS ?= ""
-#Options: c++11
+#Options: c++11,c++1z
 KOKKOS_CXX_STANDARD ?= "c++11"
 #Options: aggressive_vectorization,disable_profiling
 KOKKOS_OPTIONS ?= ""

 #Default settings specific options
 #Options: force_uvm,use_ldg,rdc,enable_lambda
-KOKKOS_CUDA_OPTIONS ?= ""
+KOKKOS_CUDA_OPTIONS ?= "enable_lambda"

 # Check for general settings

 KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
 KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
+KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))

 # Check for external libraries
 KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
@ -53,23 +54,71 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
 endif
 endif

+# Check for other Execution Spaces
+
+KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
+
+ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+  KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
+  CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
+  KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
+endif
+
+# Check OS
+
+KOKKOS_OS                      := $(shell uname -s)
+KOKKOS_INTERNAL_OS_CYGWIN      := $(shell uname -s | grep CYGWIN | wc -l)
+KOKKOS_INTERNAL_OS_LINUX       := $(shell uname -s | grep Linux  | wc -l)
+KOKKOS_INTERNAL_OS_DARWIN      := $(shell uname -s | grep Darwin | wc -l)
+
+# Check compiler
+
 KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version        2>&1 | grep "Intel Corporation" | wc -l)
 KOKKOS_INTERNAL_COMPILER_PGI   := $(shell $(CXX) --version        2>&1 | grep PGI   | wc -l)
 KOKKOS_INTERNAL_COMPILER_XL    := $(shell $(CXX) -qversion        2>&1 | grep XL    | wc -l)
 KOKKOS_INTERNAL_COMPILER_CRAY  := $(shell $(CXX) -craype-verbose  2>&1 | grep "CC-" | wc -l)
-KOKKOS_INTERNAL_OS_CYGWIN      := $(shell uname | grep CYGWIN | wc -l)
+KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(CXX) --version        2>&1 | grep "nvcc" | wc -l)
+ifneq ($(OMPI_CXX),)
+  KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(OMPI_CXX) --version   2>&1 | grep "nvcc" | wc -l)
+endif
+ifneq ($(MPICH_CXX),)
+  KOKKOS_INTERNAL_COMPILER_NVCC  := $(shell $(MPICH_CXX) --version  2>&1 | grep "nvcc" | wc -l)
+endif
+KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version        2>&1 | grep "clang" | wc -l)
+
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
+  KOKKOS_INTERNAL_COMPILER_CLANG = 1
+endif
+ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2)
+  KOKKOS_INTERNAL_COMPILER_XL = 1
+endif
+
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+  KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
+  ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+    ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
+      $(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)    
+    endif
+    KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
+  endif
+endif
+

 ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
  KOKKOS_INTERNAL_OPENMP_FLAG := -mp 
 else
-  ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
-    KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
  else
-    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
-      # OpenMP is turned on by default in Cray compiler environment
-      KOKKOS_INTERNAL_OPENMP_FLAG :=
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
+      KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
    else
-      KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
+      ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+        # OpenMP is turned on by default in Cray compiler environment
+        KOKKOS_INTERNAL_OPENMP_FLAG :=
+      else
+        KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
+      endif
    endif
  endif
 endif
@ -84,13 +133,11 @@ else
      KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
    else
      KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
+      KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
    endif
  endif
 endif

-# Check for other Execution Spaces
-KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
-
 # Check for Kokkos Architecture settings

 #Intel based
@ -98,6 +145,7 @@ KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC |
 KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))

 #NVIDIA based
@ -110,11 +158,13 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep
 KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
 KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
+                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
@ -127,13 +177,16 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61)  \
+                                                      + $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60)  \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
                                                      + $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
 endif

 #ARM based
-KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
+KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))

 #IBM based
 KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
@ -145,17 +198,18 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
 KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))

 #Any AVX?
-KOKKOS_INTERNAL_USE_ARCH_AVX       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX2      := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
-KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX        := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX2       := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX512MIC  := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
+KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))

 # Decide what ISA level we are able to support
-KOKKOS_INTERNAL_USE_ISA_X86_64     := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
+KOKKOS_INTERNAL_USE_ISA_X86_64     := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
 KOKKOS_INTERNAL_USE_ISA_KNC        := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
 KOKKOS_INTERNAL_USE_ISA_POWERPCLE  := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))

 #Incompatible flags?
-KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
+KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
 KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -207,15 +261,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
+	tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
  	tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
+	tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
  	tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
+	tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
  	tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
@ -230,9 +290,15 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
 	tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
 endif

+ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
+        KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
+        tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
+        tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
+endif
+
 ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	KOKKOS_CXXFLAGS += -G
+ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+	KOKKOS_CXXFLAGS += -lineinfo
 endif
 	KOKKOS_CXXFLAGS += -g 
 	KOKKOS_LDFLAGS += -g -ldl
@ -273,13 +339,14 @@ endif

 tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)

+ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
 	tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
 endif

 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
 	tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
-  tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
+	tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
 endif

 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
@ -289,27 +356,101 @@ ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
 endif

 ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
-  tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
-  KOKKOS_CXXFLAGS += -expt-extended-lambda
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+    ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
+	tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
+	KOKKOS_CXXFLAGS += -expt-extended-lambda
+    else
+      $(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
+    endif
+  endif
+  ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
+  endif
+endif
 endif

 #Add Architecture flags

-ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
-    tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
 	KOKKOS_CXXFLAGS +=
 	KOKKOS_LDFLAGS +=
-    else	
-	KOKKOS_CXXFLAGS += -mavx
-	KOKKOS_LDFLAGS += -mavx
+    else
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+		KOKKOS_CXXFLAGS +=
+		KOKKOS_LDFLAGS +=
+	else
+		KOKKOS_CXXFLAGS += -march=armv8-a
+		KOKKOS_LDFLAGS += -march=armv8-a
+	endif
    endif
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+	KOKKOS_CXXFLAGS +=
+	KOKKOS_LDFLAGS +=
+    else
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+		KOKKOS_CXXFLAGS +=
+		KOKKOS_LDFLAGS +=
+	else
+		KOKKOS_CXXFLAGS += -march=armv8.1-a
+		KOKKOS_LDFLAGS += -march=armv8.1-a
+	endif
+    endif
+endif
+
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
+    ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+	KOKKOS_CXXFLAGS +=
+	KOKKOS_LDFLAGS +=
+    else
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+		KOKKOS_CXXFLAGS +=
+		KOKKOS_LDFLAGS +=
+	else
+		KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
+		KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
+	endif
+    endif
+endif
+
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+		KOKKOS_CXXFLAGS += -mavx
+		KOKKOS_LDFLAGS  += -mavx
+	else
+		ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+
+		else
+			ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) 
+				KOKKOS_CXXFLAGS += -tp=sandybridge
+				KOKKOS_LDFLAGS  += -tp=sandybridge
+			else
+				# Assume that this is a really a GNU compiler
+				KOKKOS_CXXFLAGS += -mavx
+				KOKKOS_LDFLAGS  += -mavx
+			endif
+		endif
+	endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
-	KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) 
+
+	else
+		# Assume that this is a really a GNU compiler or it could be XL on P8
+		KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
+		KOKKOS_LDFLAGS  += -mcpu=power8 -mtune=power8
+	endif
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
@ -322,7 +463,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)

 		else
 			ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1) 
-
+				KOKKOS_CXXFLAGS += -tp=haswell
+				KOKKOS_LDFLAGS  += -tp=haswell
 			else
 				# Assume that this is a really a GNU compiler
 				KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
@ -352,52 +494,85 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
 	endif
 endif

+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
+		KOKKOS_CXXFLAGS += -xCORE-AVX512
+		KOKKOS_LDFLAGS  += -xCORE-AVX512
+	else
+		ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
+
+		else
+			ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
+
+			else
+				# Nothing here yet
+				KOKKOS_CXXFLAGS += -march=skylake-avx512
+				KOKKOS_LDFLAGS  += -march=skylake-avx512
+			endif
+		endif
+	endif
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
 	KOKKOS_CXXFLAGS += -mmic
 	KOKKOS_LDFLAGS += -mmic
 endif

+#Figure out the architecture flag for Cuda
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
+  KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch
+endif
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+  KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch
+endif
+
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_30
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_32
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_35
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_37
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_50
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_52
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
-	KOKKOS_CXXFLAGS += -arch=sm_53
+	KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
 endif
 ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
-        KOKKOS_CXXFLAGS += -arch=sm_61
+        KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
+endif
+ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
+    tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
+        KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
 endif
 endif
 
@ -424,6 +599,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
 ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
 	KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
 	KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
+	KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
 	KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64 
 	KOKKOS_LIBS += -lcudart -lcuda
 endif
@ -443,7 +619,7 @@ endif
 ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
 	KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
-	ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
+	ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
 		KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
 	else
 		KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
@ -451,6 +627,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
 	KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
 endif

+#Explicitly set the GCC Toolchain for Clang
+ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
+    KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
+    KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
+    KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC
+    KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
+endif
+
 #With Cygwin functions such as fdopen and fileno are not defined 
 #when strict ansi is enabled. strict ansi gets enabled with --std=c++11
 #though. So we hard undefine it here. Not sure if that has any bad side effects
@ -471,7 +655,7 @@ KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
 include $(KOKKOS_PATH)/Makefile.targets

 kokkos-clean:
-	-rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
+	rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a

 libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
 	ar cr libkokkos.a $(KOKKOS_OBJ_LINK)
--- a/lib/kokkos/Makefile.targets
+++ b/lib/kokkos/Makefile.targets
@ -14,20 +14,16 @@ Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
 Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
-Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
-Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
 Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
-Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
+Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
 Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
 Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
-KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
+Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
 Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp

@ -38,8 +34,6 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
 Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
-Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
@ -47,8 +41,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
 Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
-Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
 endif

 ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
@ -67,6 +59,4 @@ endif

 Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
 	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
-Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
-	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp

--- a/lib/kokkos/README
+++ b/lib/kokkos/README
@ -45,31 +45,32 @@ Primary tested compilers on X86 are:
  Intel 14.0.4
  Intel 15.0.2
  Intel 16.0.1
+  Intel 17.0.098
  Clang 3.5.2
  Clang 3.6.1
+  Clang 3.9.0

 Primary tested compilers on Power 8 are:
-  IBM XL 13.1.3 (OpenMP,Serial)
-  GCC 4.9.2 (OpenMP,Serial)
-  GCC 5.3.0 (OpenMP,Serial)
+  GCC 5.4.0 (OpenMP,Serial)
+  IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
+
+Primary tested compilers on Intel KNL are:
+   Intel 16.2.181 (with gcc 4.7.2)
+   Intel 17.0.098 (with gcc 4.7.2)

 Secondary tested compilers are:
-  CUDA 6.5 (with gcc 4.7.2)
  CUDA 7.0 (with gcc 4.7.2)
-  CUDA 7.5 (with gcc 4.8.4)
+  CUDA 7.5 (with gcc 4.7.2)
+  CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8)
+  CUDA/Clang 8.0 using Clang/Trunk compiler

 Other compilers working:
  X86:
-   Intel 17.0.042 (the FENL example causes internal compiler error)
   PGI 15.4
   Cygwin 2.1.0 64bit with gcc 4.9.3
-  KNL:
-   Intel 16.2.181 (the FENL example causes internal compiler error)
-   Intel 17.0.042 (the FENL example causes internal compiler error)

 Known non-working combinations:
  Power8:
-   GCC 6.1.0
   Pthreads backend


@ -92,9 +93,10 @@ master branch, without -Werror and only for a select set of backends.

 In the 'example/tutorial' directory you will find step by step tutorial
 examples which explain many of the features of Kokkos. They work with
-simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
+simple Makefiles. To build with g++ and OpenMP simply type 'make'
 in the 'example/tutorial' directory. This will build all examples in the
-subfolders.
+subfolders. To change the build options refer to the Programming Guide
+in the compilation section. 

 ============================================================================
 ====Running Unit Tests======================================================
--- a/lib/kokkos/algorithms/src/Kokkos_Random.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Random.hpp
@ -476,54 +476,54 @@ namespace Kokkos {
  };

  template<class Generator>
-  struct rand<Generator, ::Kokkos::complex<float> > {
+  struct rand<Generator, Kokkos::complex<float> > {
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> max () {
-      return ::Kokkos::complex<float> (1.0, 1.0);
+    static Kokkos::complex<float> max () {
+      return Kokkos::complex<float> (1.0, 1.0);
    }
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> draw (Generator& gen) {
+    static Kokkos::complex<float> draw (Generator& gen) {
      const float re = gen.frand ();
      const float im = gen.frand ();
-      return ::Kokkos::complex<float> (re, im);
+      return Kokkos::complex<float> (re, im);
    }
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) {
+    static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& range) {
      const float re = gen.frand (real (range));
      const float im = gen.frand (imag (range));
-      return ::Kokkos::complex<float> (re, im);
+      return Kokkos::complex<float> (re, im);
    }
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) {
+    static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& start, const Kokkos::complex<float>& end) {
      const float re = gen.frand (real (start), real (end));
      const float im = gen.frand (imag (start), imag (end));
-      return ::Kokkos::complex<float> (re, im);
+      return Kokkos::complex<float> (re, im);
    }
  };

  template<class Generator>
-  struct rand<Generator, ::Kokkos::complex<double> > {
+  struct rand<Generator, Kokkos::complex<double> > {
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> max () {
-      return ::Kokkos::complex<double> (1.0, 1.0);
+    static Kokkos::complex<double> max () {
+      return Kokkos::complex<double> (1.0, 1.0);
    }
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> draw (Generator& gen) {
+    static Kokkos::complex<double> draw (Generator& gen) {
      const double re = gen.drand ();
      const double im = gen.drand ();
-      return ::Kokkos::complex<double> (re, im);
+      return Kokkos::complex<double> (re, im);
    }
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) {
+    static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& range) {
      const double re = gen.drand (real (range));
      const double im = gen.drand (imag (range));
-      return ::Kokkos::complex<double> (re, im);
+      return Kokkos::complex<double> (re, im);
    }
    KOKKOS_INLINE_FUNCTION
-    static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) {
+    static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& start, const Kokkos::complex<double>& end) {
      const double re = gen.drand (real (start), real (end));
      const double im = gen.drand (imag (start), imag (end));
-      return ::Kokkos::complex<double> (re, im);
+      return Kokkos::complex<double> (re, im);
    }
  };

@ -670,8 +670,8 @@ namespace Kokkos {
      double S = 2.0;
      double U;
      while(S>=1.0) {
-        U = drand();
-        const double V = drand();
+        U = 2.0*drand() - 1.0;
+        const double V = 2.0*drand() - 1.0;
        S = U*U+V*V;
      }
      return U*sqrt(-2.0*log(S)/S);
@ -910,8 +910,8 @@ namespace Kokkos {
      double S = 2.0;
      double U;
      while(S>=1.0) {
-        U = drand();
-        const double V = drand();
+        U = 2.0*drand() - 1.0;
+        const double V = 2.0*drand() - 1.0;
        S = U*U+V*V;
      }
      return U*sqrt(-2.0*log(S)/S);
@ -1163,8 +1163,8 @@ namespace Kokkos {
      double S = 2.0;
      double U;
      while(S>=1.0) {
-        U = drand();
-        const double V = drand();
+        U = 2.0*drand() - 1.0;
+        const double V = 2.0*drand() - 1.0;
        S = U*U+V*V;
      }
      return U*sqrt(-2.0*log(S)/S);
--- a/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
+++ b/lib/kokkos/algorithms/src/Kokkos_Sort.hpp
@ -51,7 +51,7 @@

 namespace Kokkos {

-  namespace SortImpl {
+  namespace Impl {

  template<class ValuesViewType, int Rank=ValuesViewType::Rank>
  struct CopyOp;
@ -199,7 +199,7 @@ public:

    parallel_for(values.dimension_0(),
        bin_sort_sort_functor<ValuesViewType, offset_type,
-                              SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
+                              Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));

    deep_copy(values,sorted_values);
  }
@ -262,17 +262,15 @@ public:
  }
 };

-namespace SortImpl {
-
 template<class KeyViewType>
-struct DefaultBinOp1D {
+struct BinOp1D {
  const int max_bins_;
  const double mul_;
  typename KeyViewType::const_value_type range_;
  typename KeyViewType::const_value_type min_;

  //Construct BinOp with number of bins, minimum value and maxuimum value
-  DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
+  BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
                               typename KeyViewType::const_value_type max )
     :max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}

@ -298,13 +296,13 @@ struct DefaultBinOp1D {
 };

 template<class KeyViewType>
-struct DefaultBinOp3D {
+struct BinOp3D {
  int max_bins_[3];
  double mul_[3];
  typename KeyViewType::non_const_value_type range_[3];
  typename KeyViewType::non_const_value_type min_[3];

-  DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
+  BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
                               typename KeyViewType::const_value_type max[] )
  {
    max_bins_[0] = max_bins__[0]+1;
@ -348,109 +346,11 @@ struct DefaultBinOp3D {
  }
 };

-template<typename Scalar>
-struct min_max {
-  Scalar min;
-  Scalar max;
-  bool init;
-
-  KOKKOS_INLINE_FUNCTION
-  min_max() {
-    min = 0;
-    max = 0;
-    init = 0;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  min_max (const min_max& val) {
-    min = val.min;
-    max = val.max;
-    init = val.init;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  min_max operator = (const min_max& val) {
-    min = val.min;
-    max = val.max;
-    init = val.init;
-    return *this;
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (const Scalar& val) {
-    if(init) {
-      min = min<val?min:val;
-      max = max>val?max:val;
-    } else {
-      min = val;
-      max = val;
-      init = 1;
-    }
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (const min_max& val) {
-    if(init && val.init) {
-      min = min<val.min?min:val.min;
-      max = max>val.max?max:val.max;
-    } else {
-      if(val.init) {
-        min = val.min;
-        max = val.max;
-        init = 1;
-      }
-    }
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (volatile const Scalar& val) volatile {
-    if(init) {
-      min = min<val?min:val;
-      max = max>val?max:val;
-    } else {
-      min = val;
-      max = val;
-      init = 1;
-    }
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator+= (volatile const min_max& val) volatile {
-    if(init && val.init) {
-      min = min<val.min?min:val.min;
-      max = max>val.max?max:val.max;
-    } else {
-      if(val.init) {
-        min = val.min;
-        max = val.max;
-        init = 1;
-      }
-    }
-  }
-};
-
-
-template<class ViewType>
-struct min_max_functor {
-  typedef typename ViewType::execution_space execution_space;
-  ViewType view;
-  typedef min_max<typename ViewType::non_const_value_type> value_type;
-  min_max_functor (const ViewType view_):view(view_) {
-  }
-
-  KOKKOS_INLINE_FUNCTION
-  void operator()(const size_t& i, value_type& val) const {
-    val += view(i);
-  }
-};
+namespace Impl {

 template<class ViewType>
 bool try_std_sort(ViewType view) {
  bool possible = true;
-#if ! KOKKOS_USING_EXP_VIEW
-  size_t stride[8];
-  view.stride(stride);
-#else
  size_t stride[8] = { view.stride_0()
                     , view.stride_1()
                     , view.stride_2()
@ -460,8 +360,7 @@ bool try_std_sort(ViewType view) {
                     , view.stride_6()
                     , view.stride_7()
                     };
-#endif
-  possible  = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
+  possible  = possible && std::is_same<typename ViewType::memory_space, HostSpace>::value;
  possible  = possible && (ViewType::Rank == 1);
  possible  = possible && (stride[0] == 1);
  if(possible)  {
@ -470,27 +369,39 @@ bool try_std_sort(ViewType view) {
  return possible;
 }

+template<class ViewType>
+struct min_max_functor {
+  typedef Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar;
+
+  ViewType view;
+  min_max_functor(const ViewType& view_):view(view_) {}
+
+  KOKKOS_INLINE_FUNCTION
+  void operator() (const size_t& i, minmax_scalar& minmax) const {
+    if(view(i) < minmax.min_val) minmax.min_val = view(i);
+    if(view(i) > minmax.max_val) minmax.max_val = view(i);
+  }
+};
+
 }

 template<class ViewType>
 void sort(ViewType view, bool always_use_kokkos_sort = false) {
  if(!always_use_kokkos_sort) {
-    if(SortImpl::try_std_sort(view)) return;
+    if(Impl::try_std_sort(view)) return;
  }
+  typedef BinOp1D<ViewType> CompType;

-  typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
-  SortImpl::min_max<typename ViewType::non_const_value_type> val;
-  parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
-  BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
+  Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
+  Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
+  parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()),
+                  Impl::min_max_functor<ViewType>(view),reducer);
+  if(result.min_val == result.max_val) return;
+  BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true);
  bin_sort.create_permute_vector();
  bin_sort.sort(view);
 }

-/*template<class ViewType, class Comparator>
-void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
-
-}*/
-
 }

 #endif
--- a/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
+++ b/lib/kokkos/algorithms/unit_tests/CMakeLists.txt
@ -1,6 +1,6 @@

 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
-INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
+INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )

 SET(SOURCES
--- a/lib/kokkos/algorithms/unit_tests/Makefile
+++ b/lib/kokkos/algorithms/unit_tests/Makefile
@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
 default: build_all
 	echo "End Build"

+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+  CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
+else
+  CXX = g++
+endif
+
+CXXFLAGS = -O3
+LINK ?= $(CXX)
+LDFLAGS ?= -lpthread

 include $(KOKKOS_PATH)/Makefile.kokkos

-ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
-	CXX = $(NVCC_WRAPPER)
-	CXXFLAGS ?= -O3
-	LINK = $(CXX)
-	LDFLAGS ?= -lpthread
-else
-	CXX ?= g++
-	CXXFLAGS ?= -O3
-	LINK ?= $(CXX)
-	LDFLAGS ?= -lpthread
-endif
-
 KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests

 TEST_TARGETS = 
--- a/lib/kokkos/algorithms/unit_tests/TestSort.hpp
+++ b/lib/kokkos/algorithms/unit_tests/TestSort.hpp
@ -131,6 +131,10 @@ void test_1D_sort(unsigned int n,bool force_kokkos) {
  typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
  KeyViewType keys("Keys",n);

+  // Test sorting array with all numbers equal
+  Kokkos::deep_copy(keys,KeyType(1));
+  Kokkos::sort(keys,force_kokkos);
+
  Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
  Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);

@ -174,7 +178,7 @@ void test_3D_sort(unsigned int n) {
  typename KeyViewType::value_type min[3] = {0,0,0};
  typename KeyViewType::value_type max[3] = {100,100,100};

-  typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
+  typedef Kokkos::BinOp3D< KeyViewType > BinOp;
  BinOp bin_op(bin_max,min,max);
  Kokkos::BinSort< KeyViewType , BinOp >
    Sorter(keys,bin_op,false);
--- a/lib/kokkos/benchmarks/bytes_and_flops/Makefile
+++ b/lib/kokkos/benchmarks/bytes_and_flops/Makefile
@ -0,0 +1,43 @@
+KOKKOS_PATH = ${HOME}/kokkos
+SRC = $(wildcard *.cpp)
+KOKKOS_DEVICES=Cuda
+KOKKOS_CUDA_OPTIONS=enable_lambda
+
+default: build
+	echo "Start Build"
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
+EXE = bytes_and_flops.cuda
+KOKKOS_DEVICES = "Cuda,OpenMP"
+KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+EXE = bytes_and_flops.host
+KOKKOS_DEVICES = "OpenMP"
+KOKKOS_ARCH = "SNB"
+endif
+
+CXXFLAGS = -O3 -g
+
+DEPFLAGS = -M
+LINK = ${CXX}
+LINKFLAGS =  
+
+OBJ = $(SRC:.cpp=.o)
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean: kokkos-clean 
+	rm -f *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
--- a/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp
+++ b/lib/kokkos/benchmarks/bytes_and_flops/bench.hpp
@ -0,0 +1,99 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<Kokkos_Core.hpp>
+#include<impl/Kokkos_Timer.hpp>
+
+template<class Scalar, int Unroll,int Stride>
+struct Run {
+static void run(int N, int K, int R, int F, int T, int S);
+};
+
+template<class Scalar, int Stride>
+struct RunStride {
+static void run_1(int N, int K, int R, int F, int T, int S);
+static void run_2(int N, int K, int R, int F, int T, int S);
+static void run_3(int N, int K, int R, int F, int T, int S);
+static void run_4(int N, int K, int R, int F, int T, int S);
+static void run_5(int N, int K, int R, int F, int T, int S);
+static void run_6(int N, int K, int R, int F, int T, int S);
+static void run_7(int N, int K, int R, int F, int T, int S);
+static void run_8(int N, int K, int R, int F, int T, int S);
+static void run(int N, int K, int R, int U, int F, int T, int S);
+};
+
+#define STRIDE 1
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 2
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 4
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 8
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 16
+#include<bench_stride.hpp>
+#undef STRIDE
+#define STRIDE 32
+#include<bench_stride.hpp>
+#undef STRIDE
+
+template<class Scalar>
+void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) {
+ if(D == 1)
+   RunStride<Scalar,1>::run(N,K,R,U,F,T,S);
+ if(D == 2)
+   RunStride<Scalar,2>::run(N,K,R,U,F,T,S);
+ if(D == 4)
+   RunStride<Scalar,4>::run(N,K,R,U,F,T,S);
+ if(D == 8)
+   RunStride<Scalar,8>::run(N,K,R,U,F,T,S);
+ if(D == 16)
+   RunStride<Scalar,16>::run(N,K,R,U,F,T,S);
+ if(D == 32)
+   RunStride<Scalar,32>::run(N,K,R,U,F,T,S);
+}
+
--- a/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp
+++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_stride.hpp
@ -0,0 +1,124 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+
+#define UNROLL 1
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 2
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 3
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 4
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 5
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 6
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 7
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+#define UNROLL 8
+#include<bench_unroll_stride.hpp>
+#undef UNROLL
+
+template<class Scalar>
+struct RunStride<Scalar,STRIDE> {
+static void run_1(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,1,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_2(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,2,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_3(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,3,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_4(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,4,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_5(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,5,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_6(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,6,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_7(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,7,STRIDE>::run(N,K,R,F,T,S);
+}
+static void run_8(int N, int K, int R, int F, int T, int S) {
+  Run<Scalar,8,STRIDE>::run(N,K,R,F,T,S);
+}
+
+static void run(int N, int K, int R, int U, int F, int T, int S) {
+  if(U==1) {
+    run_1(N,K,R,F,T,S);
+  }
+  if(U==2) {
+    run_2(N,K,R,F,T,S);
+  }
+  if(U==3) {
+    run_3(N,K,R,F,T,S);
+  }
+  if(U==4) {
+    run_4(N,K,R,F,T,S);
+  }
+  if(U==5) {
+    run_5(N,K,R,F,T,S);
+  }
+  if(U==6) {
+    run_6(N,K,R,F,T,S);
+  }
+  if(U==7) {
+    run_7(N,K,R,F,T,S);
+  }
+  if(U==8) {
+    run_8(N,K,R,F,T,S);
+  } 
+}
+};
+
--- a/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp
+++ b/lib/kokkos/benchmarks/bytes_and_flops/bench_unroll_stride.hpp
@ -0,0 +1,148 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+template<class Scalar>
+struct Run<Scalar,UNROLL,STRIDE> {
+static void run(int N, int K, int R, int F, int T, int S) {
+  Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> A("A",N,K);
+  Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> B("B",N,K);
+  Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> C("C",N,K);
+
+  Kokkos::deep_copy(A,Scalar(1.5));
+  Kokkos::deep_copy(B,Scalar(2.5));
+  Kokkos::deep_copy(C,Scalar(3.5));
+
+  Kokkos::Timer timer;
+  Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)),
+    KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) {
+    const int n = team.league_rank();
+    for(int r=0; r<R; r++) {
+      Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,K), [&] (const int& i) {
+        Scalar a1 = A(n,i,0); 
+        const Scalar b = B(n,i,0);
+#if(UNROLL>1)
+        Scalar a2 = a1*1.3;
+#endif
+#if(UNROLL>2)
+        Scalar a3 = a2*1.1;
+#endif
+#if(UNROLL>3)
+        Scalar a4 = a3*1.1;
+#endif
+#if(UNROLL>4)
+        Scalar a5 = a4*1.3;
+#endif
+#if(UNROLL>5)
+        Scalar a6 = a5*1.1;
+#endif
+#if(UNROLL>6)
+        Scalar a7 = a6*1.1;
+#endif
+#if(UNROLL>7)
+        Scalar a8 = a7*1.1;
+#endif
+
+
+        for(int f = 0; f<F; f++) {
+          a1 += b*a1;
+#if(UNROLL>1)
+          a2 += b*a2;
+#endif
+#if(UNROLL>2)
+          a3 += b*a3;
+#endif
+#if(UNROLL>3)
+          a4 += b*a4;
+#endif
+#if(UNROLL>4)
+          a5 += b*a5;
+#endif
+#if(UNROLL>5)
+          a6 += b*a6;
+#endif
+#if(UNROLL>6)
+          a7 += b*a7;
+#endif
+#if(UNROLL>7)
+          a8 += b*a8;
+#endif
+
+
+        }
+#if(UNROLL==1)
+        C(n,i,0) = a1; 
+#endif
+#if(UNROLL==2)
+        C(n,i,0) = a1+a2; 
+#endif
+#if(UNROLL==3)
+        C(n,i,0) = a1+a2+a3; 
+#endif
+#if(UNROLL==4)
+        C(n,i,0) = a1+a2+a3+a4; 
+#endif
+#if(UNROLL==5)
+        C(n,i,0) = a1+a2+a3+a4+a5;
+#endif
+#if(UNROLL==6)
+        C(n,i,0) = a1+a2+a3+a4+a5+a6;
+#endif
+#if(UNROLL==7)
+        C(n,i,0) = a1+a2+a3+a4+a5+a6+a7;
+#endif
+#if(UNROLL==8)
+        C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8;
+#endif
+
+      });
+    }
+  });
+  Kokkos::fence(); 
+  double seconds = timer.seconds();
+
+  double bytes = 1.0*N*K*R*3*sizeof(Scalar);
+  double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
+  printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds);
+}
+};
+
--- a/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
+++ b/lib/kokkos/benchmarks/bytes_and_flops/main.cpp
@ -0,0 +1,96 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<Kokkos_Core.hpp>
+#include<impl/Kokkos_Timer.hpp>
+#include<bench.hpp>
+
+int main(int argc, char* argv[]) {
+  Kokkos::initialize();
+  
+
+  if(argc<10) { 
+    printf("Arguments: N K R D U F T S\n");
+    printf("  P:   Precision (1==float, 2==double)\n");
+    printf("  N,K: dimensions of the 2D array to allocate\n");
+    printf("  R:   how often to loop through the K dimension with each team\n");
+    printf("  D:   distance between loaded elements (stride)\n");
+    printf("  U:   how many independent flops to do per load\n");
+    printf("  F:   how many times to repeat the U unrolled operations before reading next element\n");
+    printf("  T:   team size\n");
+    printf("  S:   shared memory per team (used to control occupancy on GPUs)\n");
+    printf("Example Input GPU:\n");
+    printf("  Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n");
+    printf("  Cache Bound     : 2 100000 1024 64 1 1 1 512 20000\n");
+    printf("  Compute Bound   : 2 100000 1024 1 1 8 64 256 6000\n");
+    printf("  Load Slots Used : 2 20000 256 32 16 1 1 256 6000\n");
+    printf("  Inefficient Load: 2 20000 256 32 2 1 1 256 20000\n");
+    Kokkos::finalize();
+    return 0;
+  }
+  
+
+  int P = atoi(argv[1]);
+  int N = atoi(argv[2]);
+  int K = atoi(argv[3]);
+  int R = atoi(argv[4]);
+  int D = atoi(argv[5]);
+  int U = atoi(argv[6]);
+  int F = atoi(argv[7]);
+  int T = atoi(argv[8]);
+  int S = atoi(argv[9]);
+
+  if(U>8) {printf("U must be 1-8\n"); return 0;} 
+  if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;}
+  if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;}
+
+  if(P==1) {
+    run_stride_unroll<float>(N,K,R,D,U,F,T,S);
+  }
+  if(P==2) {
+    run_stride_unroll<double>(N,K,R,D,U,F,T,S);
+  }
+
+  Kokkos::finalize();
+}
+
--- a/lib/kokkos/benchmarks/gather/Makefile
+++ b/lib/kokkos/benchmarks/gather/Makefile
@ -0,0 +1,44 @@
+KOKKOS_PATH = ${HOME}/kokkos
+SRC = $(wildcard *.cpp)
+KOKKOS_DEVICES=Cuda
+KOKKOS_CUDA_OPTIONS=enable_lambda
+
+default: build
+	echo "Start Build"
+
+ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
+CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
+EXE = gather.cuda
+KOKKOS_DEVICES = "Cuda,OpenMP"
+KOKKOS_ARCH = "SNB,Kepler35"
+else
+CXX = g++
+EXE = gather.host
+KOKKOS_DEVICES = "OpenMP"
+KOKKOS_ARCH = "SNB"
+endif
+
+CXXFLAGS = -O3 -g
+
+DEPFLAGS = -M
+LINK = ${CXX}
+LINKFLAGS =  
+
+OBJ = $(SRC:.cpp=.o)
+LIB =
+
+include $(KOKKOS_PATH)/Makefile.kokkos
+
+$(warning ${KOKKOS_CPPFLAGS})
+build: $(EXE)
+
+$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
+	$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
+
+clean: kokkos-clean 
+	rm -f *.o *.cuda *.host
+
+# Compilation rules
+
+%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp 
+	$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<
--- a/lib/kokkos/benchmarks/gather/gather.hpp
+++ b/lib/kokkos/benchmarks/gather/gather.hpp
@ -0,0 +1,92 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+template<class Scalar, int UNROLL>
+struct RunGather {
+  static void run(int N, int K, int D, int R, int F);
+};
+
+#define UNROLL 1
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 2
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 3
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 4
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 5
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 6
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 7
+#include<gather_unroll.hpp>
+#undef UNROLL
+#define UNROLL 8
+#include<gather_unroll.hpp>
+#undef UNROLL
+
+template<class Scalar>
+void run_gather_test(int N, int K, int D, int R, int U, int F) {
+ if(U == 1)
+   RunGather<Scalar,1>::run(N,K,D,R,F);
+ if(U == 2)
+   RunGather<Scalar,2>::run(N,K,D,R,F);
+ if(U == 3)
+   RunGather<Scalar,3>::run(N,K,D,R,F);
+ if(U == 4)
+   RunGather<Scalar,4>::run(N,K,D,R,F);
+ if(U == 5)
+   RunGather<Scalar,5>::run(N,K,D,R,F);
+ if(U == 6)
+   RunGather<Scalar,6>::run(N,K,D,R,F);
+ if(U == 7)
+   RunGather<Scalar,7>::run(N,K,D,R,F);
+ if(U == 8)
+   RunGather<Scalar,8>::run(N,K,D,R,F);
+}
--- a/lib/kokkos/benchmarks/gather/gather_unroll.hpp
+++ b/lib/kokkos/benchmarks/gather/gather_unroll.hpp
@ -0,0 +1,169 @@
+/*
+//@HEADER
+// ************************************************************************
+//
+//                        Kokkos v. 2.0
+//              Copyright (2014) Sandia Corporation
+//
+// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
+// the U.S. Government retains certain rights in this software.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// 1. Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//
+// 2. Redistributions in binary form must reproduce the above copyright
+// notice, this list of conditions and the following disclaimer in the
+// documentation and/or other materials provided with the distribution.
+//
+// 3. Neither the name of the Corporation nor the names of the
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
+//
+// ************************************************************************
+//@HEADER
+*/
+
+#include<Kokkos_Core.hpp>
+#include<Kokkos_Random.hpp>
+
+template<class Scalar>
+struct RunGather<Scalar,UNROLL> {
+static void run(int N, int K, int D, int R, int F) {
+  Kokkos::View<int**> connectivity("Connectivity",N,K);
+  Kokkos::View<Scalar*> A_in("Input",N);
+  Kokkos::View<Scalar*> B_in("Input",N);
+  Kokkos::View<Scalar*> C("Output",N);
+
+  Kokkos::Random_XorShift64_Pool<> rand_pool(12313);
+
+  Kokkos::deep_copy(A_in,1.5);
+  Kokkos::deep_copy(B_in,2.0);
+
+  Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > A(A_in);
+  Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > B(B_in);
+
+  Kokkos::parallel_for("InitKernel",N,
+      KOKKOS_LAMBDA (const int& i) {
+    auto rand_gen = rand_pool.get_state();
+    for( int jj=0; jj<K; jj++) {
+      connectivity(i,jj) = (rand_gen.rand(D) + i - D/2 + N)%N;
+    }
+    rand_pool.free_state(rand_gen);
+  });
+  Kokkos::fence();
+
+
+  Kokkos::Timer timer;
+  for(int r = 0; r<R; r++) {
+  Kokkos::parallel_for("BenchmarkKernel",N,
+      KOKKOS_LAMBDA (const int& i) {
+      Scalar c = Scalar(0.0);
+      for( int jj=0; jj<K; jj++) {
+        const int j = connectivity(i,jj);
+        Scalar a1 = A(j);
+        const Scalar b = B(j);
+#if(UNROLL>1)
+        Scalar a2 = a1*Scalar(1.3);
+#endif
+#if(UNROLL>2)
+        Scalar a3 = a2*Scalar(1.1);
+#endif
+#if(UNROLL>3)
+        Scalar a4 = a3*Scalar(1.1);
+#endif
+#if(UNROLL>4)
+        Scalar a5 = a4*Scalar(1.3);
+#endif
+#if(UNROLL>5)
+        Scalar a6 = a5*Scalar(1.1);
+#endif
+#if(UNROLL>6)
+        Scalar a7 = a6*Scalar(1.1);
+#endif
+#if(UNROLL>7)
+        Scalar a8 = a7*Scalar(1.1);
+#endif
+
+
+        for(int f = 0; f<F; f++) {
+          a1 += b*a1;
+#if(UNROLL>1)
+          a2 += b*a2;
+#endif
+#if(UNROLL>2)
+          a3 += b*a3;
+#endif
+#if(UNROLL>3)
+          a4 += b*a4;
+#endif
+#if(UNROLL>4)
+          a5 += b*a5;
+#endif
+#if(UNROLL>5)
+          a6 += b*a6;
+#endif
+#if(UNROLL>6)
+          a7 += b*a7;
+#endif
+#if(UNROLL>7)
+          a8 += b*a8;
+#endif
+
+
+        }
+#if(UNROLL==1)
+        c += a1;
+#endif
+#if(UNROLL==2)
+        c += a1+a2;
+#endif
+#if(UNROLL==3)
+        c += a1+a2+a3;
+#endif
+#if(UNROLL==4)
+        c += a1+a2+a3+a4;
+#endif
+#if(UNROLL==5)
+        c += a1+a2+a3+a4+a5;
+#endif
+#if(UNROLL==6)
+        c += a1+a2+a3+a4+a5+a6;
+#endif
+#if(UNROLL==7)
+        c += a1+a2+a3+a4+a5+a6+a7;
+#endif
+#if(UNROLL==8)
+        c += a1+a2+a3+a4+a5+a6+a7+a8;
+#endif
+
+      }
+      C(i) = c ;
+  });
+  Kokkos::fence();
+  }
+  double seconds = timer.seconds();
+
+  double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar);
+  double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
+  double gather_ops = 1.0*N*K*R*2;
+  printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds);
+}
+};
--- a/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
+++ b/lib/kokkos/core/src/impl/Kokkos_HBWAllocators.cpp
@ -1,13 +1,13 @@
 /*
 //@HEADER
 // ************************************************************************
-// 
+//
 //                        Kokkos v. 2.0
 //              Copyright (2014) Sandia Corporation
-// 
+//
 // Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
 // the U.S. Government retains certain rights in this software.
-// 
+//
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions are
 // met:
@ -36,73 +36,58 @@
 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 //
 // Questions? Contact  H. Carter Edwards (hcedwar@sandia.gov)
-// 
+//
 // ************************************************************************
 //@HEADER
 */

-#include <Kokkos_HostSpace.hpp>
+#include<Kokkos_Core.hpp>
+#include<impl/Kokkos_Timer.hpp>
+#include<gather.hpp>

-#include <impl/Kokkos_HBWAllocators.hpp>
-#include <impl/Kokkos_Error.hpp>
+int main(int argc, char* argv[]) {
+  Kokkos::initialize(argc,argv);


-#include <stdint.h>    // uintptr_t
-#include <cstdlib>     // for malloc, realloc, and free
-#include <cstring>     // for memcpy
-
-#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
-#include <sys/mman.h>  // for mmap, munmap, MAP_ANON, etc
-#include <unistd.h>    // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
-#endif
-
-#include <sstream>
-#include <iostream>
-
-#ifdef KOKKOS_HAVE_HBWSPACE
-#include <memkind.h>
-
-namespace Kokkos {
-namespace Experimental {
-namespace Impl {
-#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
-/*--------------------------------------------------------------------------*/
-
-void* HBWMallocAllocator::allocate( size_t size )
-{
-  std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
-  void * ptr = NULL;
-  if (size) {
-    ptr = memkind_malloc(MEMKIND_TYPE,size);
-
-    if (!ptr)
-    {
-      std::ostringstream msg ;
-      msg << name() << ": allocate(" << size << ") FAILED";
-      Kokkos::Impl::throw_runtime_exception( msg.str() );
-    }
+  if(argc<8) {
+    printf("Arguments: S N K D\n");
+    printf("  S:   Scalar Type Size (1==float, 2==double, 4=complex<double>)\n");
+    printf("  N:   Number of entities\n");
+    printf("  K:   Number of things to gather per entity\n");
+    printf("  D:   Max distance of gathered things of an entity\n");
+    printf("  R:   how often to loop through the K dimension with each team\n");
+    printf("  U:   how many independent flops to do per load\n");
+    printf("  F:   how many times to repeat the U unrolled operations before reading next element\n");
+    printf("Example Input GPU:\n");
+    printf("  Bandwidth Bound : 2 10000000 1 1 10 1 1\n");
+    printf("  Cache Bound     : 2 10000000 64 1 10 1 1\n");
+    printf("  Cache Gather    : 2 10000000 64 256 10 1 1\n");
+    printf("  Global Gather   : 2 100000000 16 100000000 1 1 1\n");
+    printf("  Typical MD      : 2 100000 32 512 1000 8 2\n");
+    Kokkos::finalize();
+    return 0;
  }
-  return ptr;
+
+
+  int S = atoi(argv[1]);
+  int N = atoi(argv[2]);
+  int K = atoi(argv[3]);
+  int D = atoi(argv[4]);
+  int R = atoi(argv[5]);
+  int U = atoi(argv[6]);
+  int F = atoi(argv[7]);
+
+  if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;}
+  if( N<D ) {printf("N must be larger or equal to D\n"); return 0; }
+  if(S==1) {
+    run_gather_test<float>(N,K,D,R,U,F);
+  }
+  if(S==2) {
+    run_gather_test<double>(N,K,D,R,U,F);
+  }
+  if(S==4) {
+    run_gather_test<Kokkos::complex<double> >(N,K,D,R,U,F);
+  }
+  Kokkos::finalize();
 }

-void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
-{
-  if (ptr) {
-    memkind_free(MEMKIND_TYPE,ptr);
-  }
-}
-
-void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
-{
-  void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
-
-  if (new_size > 0u && ptr == NULL) {
-    Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
-  }
-  return ptr;
-}
-
-} // namespace Impl
-} // namespace Experimental
-} // namespace Kokkos
-#endif
--- a/lib/kokkos/bin/nvcc_wrapper
+++ b/lib/kokkos/bin/nvcc_wrapper
@ -0,0 +1,284 @@
+#!/bin/bash
+#
+# This shell script (nvcc_wrapper) wraps both the host compiler and
+# NVCC, if you are building legacy C or C++ code with CUDA enabled.
+# The script remedies some differences between the interface of NVCC
+# and that of the host compiler, in particular for linking.
+# It also means that a legacy code doesn't need separate .cu files;
+# it can just use .cpp files.
+#
+# Default settings: change those according to your machine.  For
+# example, you may have have two different wrappers with either icpc
+# or g++ as their back-end compiler.  The defaults can be overwritten
+# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
+
+default_arch="sm_35"
+#default_arch="sm_50"
+
+#
+# The default C++ compiler.
+#
+host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
+#host_compiler="icpc"
+#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
+#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
+
+#
+# Internal variables
+#
+
+# C++ files
+cpp_files=""
+
+# Host compiler arguments
+xcompiler_args=""
+
+# Cuda (NVCC) only arguments
+cuda_args=""
+
+# Arguments for both NVCC and Host compiler
+shared_args=""
+
+# Linker arguments
+xlinker_args=""
+
+# Object files passable to NVCC
+object_files=""
+
+# Link objects for the host linker only
+object_files_xlinker=""
+
+# Shared libraries with version numbers are not handled correctly by NVCC
+shared_versioned_libraries_host=""
+shared_versioned_libraries=""
+
+# Does the User set the architecture 
+arch_set=0
+
+# Does the user overwrite the host compiler
+ccbin_set=0
+
+#Error code of compilation
+error_code=0
+
+# Do a dry run without actually compiling
+dry_run=0
+
+# Skip NVCC compilation and use host compiler directly
+host_only=0
+
+# Enable workaround for CUDA 6.5 for pragma ident 
+replace_pragma_ident=0
+
+# Mark first host compiler argument
+first_xcompiler_arg=1
+
+temp_dir=${TMPDIR:-/tmp}
+
+# Check if we have an optimization argument already
+optimization_applied=0
+
+#echo "Arguments: $# $@"
+
+while [ $# -gt 0 ]
+do
+  case $1 in
+  #show the executed command
+  --show|--nvcc-wrapper-show)
+    dry_run=1
+    ;;
+  #run host compilation only
+  --host-only)
+    host_only=1
+    ;;
+  #replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
+  --replace-pragma-ident)
+    replace_pragma_ident=1
+    ;;
+  #handle source files to be compiled as cuda files
+  *.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
+    cpp_files="$cpp_files $1"
+    ;;
+   # Ensure we only have one optimization flag because NVCC doesn't allow muliple
+  -O*)
+    if [ $optimization_applied -eq 1 ]; then
+       echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
+    else
+       shared_args="$shared_args $1"
+       optimization_applied=1
+    fi
+    ;;
+  #Handle shared args (valid for both nvcc and the host compiler)
+  -D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
+    shared_args="$shared_args $1"
+    ;;
+  #Handle shared args that have an argument
+  -o|-MT)
+    shared_args="$shared_args $1 $2"
+    shift
+    ;;
+  #Handle known nvcc args
+  -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
+    cuda_args="$cuda_args $1"
+    ;;
+  #Handle more known nvcc args
+  --expt-extended-lambda|--expt-relaxed-constexpr)
+    cuda_args="$cuda_args $1"
+    ;;
+  #Handle known nvcc args that have an argument
+  -rdc|-maxrregcount|--default-stream)
+    cuda_args="$cuda_args $1 $2"
+    shift
+    ;;
+  #Handle c++11 setting
+  --std=c++11|-std=c++11)
+    shared_args="$shared_args $1"
+    ;;
+  #strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
+  -std=c++98|--std=c++98)
+    ;;
+  #strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
+  -pedantic|-Wpedantic|-ansi)
+    ;;
+  #strip -Xcompiler because we add it
+  -Xcompiler)
+    if [ $first_xcompiler_arg -eq 1 ]; then
+      xcompiler_args="$2"
+      first_xcompiler_arg=0
+    else
+      xcompiler_args="$xcompiler_args,$2"
+    fi
+    shift
+    ;;
+  #strip of "-x cu" because we add that
+  -x)
+    if [[ $2 != "cu" ]]; then
+      if [ $first_xcompiler_arg -eq 1 ]; then
+        xcompiler_args="-x,$2"
+        first_xcompiler_arg=0
+      else
+        xcompiler_args="$xcompiler_args,-x,$2"
+      fi
+    fi
+    shift
+    ;;
+  #Handle -ccbin (if its not set we can set it to a default value)
+  -ccbin)
+    cuda_args="$cuda_args $1 $2"
+    ccbin_set=1
+    host_compiler=$2
+    shift
+    ;;
+  #Handle -arch argument (if its not set use a default
+  -arch*)
+    cuda_args="$cuda_args $1"
+    arch_set=1
+    ;;
+  #Handle -Xcudafe argument
+  -Xcudafe)
+    cuda_args="$cuda_args -Xcudafe $2"
+    shift
+    ;;
+  #Handle args that should be sent to the linker
+  -Wl*)
+    xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
+    host_linker_args="$host_linker_args ${1:4:${#1}}"
+    ;;
+  #Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
+  *.a|*.so|*.o|*.obj)
+    object_files="$object_files $1"
+    object_files_xlinker="$object_files_xlinker -Xlinker $1"
+    ;;
+  #Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
+  *.dylib)
+    object_files="$object_files -Xlinker $1"
+    object_files_xlinker="$object_files_xlinker -Xlinker $1"
+    ;;
+  #Handle shared libraries with *.so.* names which nvcc can't do.
+  *.so.*)
+    shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
+    shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
+  ;;
+  #All other args are sent to the host compiler
+  *)
+    if [ $first_xcompiler_arg -eq 1 ]; then
+      xcompiler_args=$1
+      first_xcompiler_arg=0
+    else 
+      xcompiler_args="$xcompiler_args,$1"
+    fi
+    ;;
+  esac
+
+  shift
+done
+
+#Add default host compiler if necessary
+if [ $ccbin_set -ne 1 ]; then
+  cuda_args="$cuda_args -ccbin $host_compiler"
+fi
+
+#Add architecture command
+if [ $arch_set -ne 1 ]; then
+  cuda_args="$cuda_args -arch=$default_arch"
+fi
+
+#Compose compilation command
+nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
+if [ $first_xcompiler_arg -eq 0 ]; then
+  nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
+fi
+
+#Compose host only command
+host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
+
+#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
+if [ $replace_pragma_ident -eq 1 ]; then
+  cpp_files2=""
+  for file in $cpp_files
+  do
+    var=`grep pragma ${file} | grep ident | grep "#"`
+    if [ "${#var}" -gt 0 ]
+    then
+      sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
+      cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
+    else
+      cpp_files2="$cpp_files2 $file"
+    fi
+  done
+  cpp_files=$cpp_files2
+  #echo $cpp_files
+fi
+
+if [ "$cpp_files" ]; then
+  nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
+else
+  nvcc_command="$nvcc_command $object_files"
+fi
+
+if [ "$cpp_files" ]; then
+  host_command="$host_command $object_files $cpp_files"
+else
+  host_command="$host_command $object_files"
+fi
+
+#Print command for dryrun
+if [ $dry_run -eq 1 ]; then
+  if [ $host_only -eq 1 ]; then
+    echo $host_command
+  else
+    echo $nvcc_command
+  fi
+  exit 0
+fi
+
+#Run compilation command
+if [ $host_only -eq 1 ]; then
+  $host_command
+else
+  $nvcc_command
+fi
+error_code=$?
+
+#Report error code
+exit $error_code
--- a/lib/kokkos/cmake/deps/CUSPARSE.cmake
+++ b/lib/kokkos/cmake/deps/CUSPARSE.cmake
@ -53,12 +53,12 @@
 # ************************************************************************
 # @HEADER

-include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
+#include(${TRIBITS_DEPS_DIR}/CUDA.cmake)

-IF (TPL_ENABLE_CUDA)
-  GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
-  GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
-  GLOBAL_SET(TPL_CUSPARSE_LIBRARIES    ${CUDA_cusparse_LIBRARY})
-  TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
-ENDIF()
+#IF (TPL_ENABLE_CUDA)
+#  GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
+#  GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
+#  GLOBAL_SET(TPL_CUSPARSE_LIBRARIES    ${CUDA_cusparse_LIBRARY})
+#  TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
+#ENDIF()

--- a/lib/kokkos/cmake/tribits.cmake
+++ b/lib/kokkos/cmake/tribits.cmake
@ -1,6 +1,16 @@
 INCLUDE(CMakeParseArguments)
 INCLUDE(CTest)

+cmake_policy(SET CMP0054 NEW)
+
+IF(NOT DEFINED ${PROJECT_NAME})
+  project(Kokkos) 
+ENDIF()
+
+IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
+  SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
+ENDIF()
+
 FUNCTION(ASSERT_DEFINED VARS)
  FOREACH(VAR ${VARS})
    IF(NOT DEFINED ${VAR})
@ -75,6 +85,13 @@ MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)

 ENDMACRO()

+
+function(INCLUDE_DIRECTORIES)
+  cmake_parse_arguments(INCLUDE_DIRECTORIES "REQUIRED_DURING_INSTALLATION_TESTING" "" "" ${ARGN})
+  _INCLUDE_DIRECTORIES(${INCLUDE_DIRECTORIES_UNPARSED_ARGUMENTS})
+endfunction()
+
+
 MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT)
  SET(PROP_VALUES)
  FOREACH(TARGET_X ${ARGN})
@ -271,6 +288,11 @@ ENDFUNCTION()

 ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})

+FUNCTION(TRIBITS_ADD_TEST)
+ENDFUNCTION()
+FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE)
+ENDFUNCTION()
+
 FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME)

  SET(options STANDARD_PASS_OUTPUT WILL_FAIL)
--- a/lib/kokkos/config/configure_compton_cpu.sh
+++ b/lib/kokkos/config/configure_compton_cpu.sh
--- a/lib/kokkos/config/configure_compton_mic.sh
+++ b/lib/kokkos/config/configure_compton_mic.sh
--- a/lib/kokkos/config/configure_kokkos.sh
+++ b/lib/kokkos/config/configure_kokkos.sh
--- a/lib/kokkos/config/configure_kokkos_nvidia.sh
+++ b/lib/kokkos/config/configure_kokkos_nvidia.sh
--- a/lib/kokkos/config/configure_shannon.sh
+++ b/lib/kokkos/config/configure_shannon.sh
--- a/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt
+++ b/lib/kokkos/config/kokkos-trilinos-integration-procedure.txt
@ -91,9 +91,20 @@ Step 3:

 // -------------------------------------------------------------------------------- //

-Step 4:
-  4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github
+Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github
+  4.1. Generate Changelog (You need a github API token)
+    
+       Close all Open issues with "InDevelop" tag on github
+   
+       (Not from kokkos directory)
+       gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
+       
+       (Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md)
+       (Make desired changes to CHANGELOG.md to enhance clarity)
+       (Commit and push the CHANGELOG to develop)

+  4.2 Merge develop into Master
+              
       - DO NOT fast-forward the merge!!!!

       (From kokkos directory):
@ -103,7 +114,7 @@ Step 4:
       git reset --hard origin/master
       git merge --no-ff origin/develop

-  4.2. Update the tag in kokkos/config/master_history.txt
+  4.3. Update the tag in kokkos/config/master_history.txt
       Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
       Tag format: #.#.##

--- a/lib/kokkos/config/master_history.txt
+++ b/lib/kokkos/config/master_history.txt
@ -1,3 +1,6 @@
 tag:  2.01.00    date: 07:21:2016    master: xxxxxxxx    develop: fa6dfcc4
 tag:  2.01.06    date: 09:02:2016    master: 9afaa87f    develop: 555f1a3a
-
+tag:  2.01.10    date: 09:27:2016    master: e4119325    develop: e6cda11e
+tag:  2.02.00    date: 10:30:2016    master: 6c90a581    develop: ca3dd56e
+tag:  2.02.01    date: 11:01:2016    master: 9c698c86    develop: b0072304
+tag:  2.02.07    date: 12:16:2016    master: 4b4cc4ba    develop: 382c0966
--- a/lib/kokkos/config/nvcc_wrapper
+++ b/lib/kokkos/config/nvcc_wrapper
@ -121,6 +121,10 @@ do
  -gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
    cuda_args="$cuda_args $1"
    ;;
+  #Handle more known nvcc args
+  --expt-extended-lambda|--expt-relaxed-constexpr)
+    cuda_args="$cuda_args $1"
+    ;;
  #Handle known nvcc args that have an argument
  -rdc|-maxrregcount|--default-stream)
    cuda_args="$cuda_args $1 $2"
--- a/lib/kokkos/config/test_all_sandia
+++ b/lib/kokkos/config/test_all_sandia
@ -16,6 +16,8 @@ elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
    MACHINE=bowman
 elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
    MACHINE=shepard
+elif [[ "$HOSTNAME" =~ apollo ]]; then
+    MACHINE=apollo
 elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
    MACHINE=sems
 else
@ -28,6 +30,7 @@ IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
 INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
 CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
 CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
+CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"

 GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
 IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
@ -44,102 +47,12 @@ BUILD_ONLY=False
 declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
 TEST_SCRIPT=False
 SKIP_HWLOC=False
+SPOT_CHECK=False

-ARCH_FLAG=""
+PRINT_HELP=False
+OPT_FLAG=""
+KOKKOS_OPTIONS=""

-#
-# Machine specific config
-#
-
-if [ "$MACHINE" = "sems" ]; then
-    source /projects/modulefiles/utils/sems-modules-init.sh
-    source /projects/modulefiles/utils/kokkos-modules-init.sh
-
-    BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base"
-    CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base"
-
-    # Format: (compiler module-list build-list exe-name warning-flag)
-    COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
-               "cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
-               "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
-               "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
-    )
-
-elif [ "$MACHINE" = "white" ]; then
-    source /etc/profile.d/modules.sh
-    SKIP_HWLOC=True
-    export SLURM_TASKS_PER_NODE=32
-
-    BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
-    IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
-    CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.9.2"
-
-    # Don't do pthread on white
-    GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
-
-    # Format: (compiler module-list build-list exe-name warning-flag)
-    COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
-               "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
-    )
-
-    ARCH_FLAG="--arch=Power8"
-    NUM_JOBS_TO_RUN_IN_PARALLEL=8
-
-elif [ "$MACHINE" = "bowman" ]; then
-    source /etc/profile.d/modules.sh
-    SKIP_HWLOC=True
-    export SLURM_TASKS_PER_NODE=32
-
-    BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
-
-    OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
-
-    # Format: (compiler module-list build-list exe-name warning-flag)
-    COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-    )
-
-    ARCH_FLAG="--arch=KNL"
-    NUM_JOBS_TO_RUN_IN_PARALLEL=8
-
-elif [ "$MACHINE" = "shepard" ]; then
-    source /etc/profile.d/modules.sh
-    SKIP_HWLOC=True
-    export SLURM_TASKS_PER_NODE=32
-
-    BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
-
-    OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
-
-    # Format: (compiler module-list build-list exe-name warning-flag)
-    COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-               "intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
-    )
-
-    ARCH_FLAG="--arch=HSW"
-    NUM_JOBS_TO_RUN_IN_PARALLEL=8
-
-else
-    echo "Unhandled machine $MACHINE" >&2
-    exit 1
-fi
-
-export OMP_NUM_THREADS=4
-
-declare -i NUM_RESULTS_TO_KEEP=7
-
-RESULT_ROOT_PREFIX=TestAll
-
-SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )

 #
 # Handle arguments
@ -173,7 +86,211 @@ NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
 --dry-run*)
 DRYRUN=True
 ;;
--help)
+--spot-check*)
+SPOT_CHECK=True
+;;
+--arch*)
+ARCH_FLAG="--arch=${key#*=}"
+;;
+--opt-flag*)
+OPT_FLAG="${key#*=}"
+;;
+--with-cuda-options*)
+KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
+;;
+--help*)
+PRINT_HELP=True
+;;
+*)
+# args, just append
+ARGS="$ARGS $1"
+;;
+esac
+shift
+done
+
+SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
+
+# set kokkos path
+if [ -z "$KOKKOS_PATH" ]; then
+    KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
+else
+    # Ensure KOKKOS_PATH is abs path
+    KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
+fi
+
+#
+# Machine specific config
+#
+
+if [ "$MACHINE" = "sems" ]; then
+    source /projects/sems/modulefiles/utils/sems-modules-init.sh
+
+    BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
+    CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
+    CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
+
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG=""
+    fi 
+
+  if [ "$SPOT_CHECK" = "True" ]; then
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
+               "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  else
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  fi
+
+elif [ "$MACHINE" = "white" ]; then
+    source /etc/profile.d/modules.sh
+    SKIP_HWLOC=True
+    export SLURM_TASKS_PER_NODE=32
+
+    BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
+    IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
+    CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
+
+    # Don't do pthread on white
+    GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
+
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=Power8,Kepler37"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
+
+elif [ "$MACHINE" = "bowman" ]; then
+    source /etc/profile.d/modules.sh
+    SKIP_HWLOC=True
+    export SLURM_TASKS_PER_NODE=32
+
+    BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
+
+    OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
+
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+    )
+
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=KNL"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
+
+elif [ "$MACHINE" = "shepard" ]; then
+    source /etc/profile.d/modules.sh
+    SKIP_HWLOC=True
+    export SLURM_TASKS_PER_NODE=32
+
+    BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
+
+    OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
+
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+    )
+
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=HSW"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
+
+elif [ "$MACHINE" = "apollo" ]; then
+    source /projects/sems/modulefiles/utils/sems-modules-init.sh
+    module use /home/projects/modulefiles/local/x86-64
+    module load kokkos-env
+
+    module load sems-git
+    module load sems-tex
+    module load sems-cmake/3.5.2
+    module load sems-gdb
+
+    SKIP_HWLOC=True
+
+    BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
+    CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
+    CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
+
+    CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44"
+    NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
+
+    BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
+    BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
+    BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
+
+  if [ "$SPOT_CHECK" = "True" ]; then
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
+               "gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
+               "clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
+               "clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
+               "cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  else
+    # Format: (compiler module-list build-list exe-name warning-flag)
+    COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
+               "clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
+               "gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
+               "intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
+               "clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
+               "cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+               "cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
+    )
+  fi
+
+    if [ -z "$ARCH_FLAG" ]; then
+      ARCH_FLAG="--arch=SNB,Kepler35"
+    fi
+    NUM_JOBS_TO_RUN_IN_PARALLEL=2
+else
+    echo "Unhandled machine $MACHINE" >&2
+    exit 1
+fi
+
+
+
+export OMP_NUM_THREADS=4
+
+declare -i NUM_RESULTS_TO_KEEP=7
+
+RESULT_ROOT_PREFIX=TestAll
+
+if [ "$PRINT_HELP" = "True" ]; then
 echo "test_all_sandia <ARGS> <OPTIONS>:"
 echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
 echo "    Defaults to root repo containing this script"
@ -183,6 +300,9 @@ echo "--skip-hwloc: Do not do hwloc tests"
 echo "--num=N: Number of jobs to run in parallel "
 echo "--dry-run: Just print what would be executed"
 echo "--build-only: Just do builds, don't run anything"
+echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
+echo "--arch=ARCHITECTURE: overwrite architecture flags"
+echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
 echo "--build-list=BUILD,BUILD,BUILD..."
 echo "    Provide a comma-separated list of builds instead of running all builds"
 echo "    Valid items:"
@ -220,21 +340,6 @@ echo "  hit ctrl-z"
 echo "  % kill -9 %1"
 echo
 exit 0
-;;
-*)
-# args, just append
-ARGS="$ARGS $1"
-;;
-esac
-shift
-done
-
-# set kokkos path
-if [ -z "$KOKKOS_PATH" ]; then
-    KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
-else
-    # Ensure KOKKOS_PATH is abs path
-    KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
 fi

 # set build type
@ -381,11 +486,15 @@ single_build_and_test() {
        local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
    fi

+    if [[ "$OPT_FLAG" = "" ]]; then
+      OPT_FLAG="-O3"
+    fi
+
    if [[ "$build_type" = *debug* ]]; then
        local extra_args="$extra_args --debug"
        local cxxflags="-g $compiler_warning_flags"
    else
-        local cxxflags="-O3 $compiler_warning_flags"
+        local cxxflags="$OPT_FLAG $compiler_warning_flags"
    fi

    if [[ "$compiler" == cuda* ]]; then
@ -393,7 +502,9 @@ single_build_and_test() {
        export TMPDIR=$(pwd)
    fi

-    # cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags"
+    if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
+        local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
+    fi

    echo "  Starting job $desc"

@ -440,13 +551,14 @@ run_in_background() {
    local compiler=$1

    local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
-    if [[ "$BUILD_ONLY" == True ]]; then
-        num_jobs=8
-    else
+    # don't override command line input
+    # if [[ "$BUILD_ONLY" == True ]]; then
+        # num_jobs=8
+    # else
        if [[ "$compiler" == cuda* ]]; then
            num_jobs=1
        fi
-    fi
+    # fi
    wait_for_jobs $num_jobs

    single_build_and_test $* &
--- a/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh
+++ b/lib/kokkos/config/trilinos-integration/prepare_trilinos_repos.sh
@ -0,0 +1,50 @@
+#!/bin/bash -le
+
+export TRILINOS_UPDATED_PATH=${PWD}/trilinos-update
+export TRILINOS_PRISTINE_PATH=${PWD}/trilinos-pristine
+
+#rm -rf ${KOKKOS_PATH}
+#rm -rf ${TRILINOS_UPDATED_PATH}
+#rm -rf ${TRILINOS_PRISTINE_PATH}
+
+#Already done:
+if [ ! -d "${TRILINOS_UPDATED_PATH}" ]; then
+  git clone https://github.com/trilinos/trilinos ${TRILINOS_UPDATED_PATH}
+fi
+if [ ! -d "${TRILINOS_PRISTINE_PATH}" ]; then
+  git clone https://github.com/trilinos/trilinos ${TRILINOS_PRISTINE_PATH}
+fi
+
+cd ${TRILINOS_UPDATED_PATH}
+git checkout develop
+git reset --hard origin/develop
+git pull
+cd ..
+
+python kokkos/config/snapshot.py ${KOKKOS_PATH} ${TRILINOS_UPDATED_PATH}/packages
+
+cd ${TRILINOS_UPDATED_PATH}
+echo ""
+echo ""
+echo "Trilinos State:"
+git log --pretty=oneline --since=2.days
+SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
+cd ..
+
+cd ${TRILINOS_PRISTINE_PATH}
+git status
+git log --pretty=oneline --since=2.days
+echo "Checkout develop"
+git checkout develop
+echo "Pull"
+git pull
+echo "Checkout SHA"
+git checkout ${SHA}
+cd ..
+
+cd ${TRILINOS_PRISTINE_PATH}
+echo ""
+echo ""
+echo "Trilinos Pristine State:"
+git log --pretty=oneline --since=2.days
+cd ..
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Steve Plimpton	f8506fee23	sync GHub with SVN	2017-01-26 14:06:43 -07:00
sjplimp	18e5584311	Merge pull request #354 from stanmoore1/kokkos_bugfixes Kokkos bugfixes	2017-01-26 13:51:47 -07:00
sjplimp	851f80464f	Merge pull request #361 from akohlmey/user-omp-fix-per-atom-data fix USER-OMP bug on per-atom data with hybrid styles	2017-01-26 13:50:13 -07:00
sjplimp	5971d4c994	Merge pull request #358 from ibaned/warnings warning fixes (Kokkos+CUDA)	2017-01-26 13:49:56 -07:00
sjplimp	868d95f0a5	Merge pull request #352 from akohlmey/fix-skip-with-ghost-issue Fix skip with ghost issue	2017-01-26 13:47:12 -07:00
sjplimp	a5ff35435a	Merge pull request #351 from timattox/USER-DPD_pair_exp6_bugfix USER-DPD: Possible uninitialized variable in pair_exp6_rx.cpp bugfix.	2017-01-26 13:45:45 -07:00
Axel Kohlmeyer	8b7bd9d88e	fix bug where per atom data for USER-OMP was reducing the wrong arrays with hybrid styles	2017-01-26 14:59:10 -05:00
Dan Ibanez	672bbbe494	add more missing KOKKOS_INLINE_FUNCTION attributes	2017-01-25 16:03:11 -07:00
Dan Ibanez	03c9c46533	add missing KOKKOS_INLINE_FUNCTION attributes	2017-01-25 15:49:05 -07:00
Dan Ibanez	e992bfe510	remove unused variable	2017-01-25 15:40:52 -07:00
Dan Ibanez	053ee54a27	remove unused variable	2017-01-25 15:38:41 -07:00
Dan Ibanez	1074c6734b	add missing return keywords	2017-01-25 15:37:27 -07:00
Dan Ibanez	60b48c9d66	add missing KOKKOS_INLINE_FUNCTION attributes this structure gets put inside a DualView, so these members need to be able to execute on the GPU	2017-01-25 15:36:24 -07:00
Dan Ibanez	3d40b51708	remove unused variable	2017-01-25 15:24:52 -07:00
Dan Ibanez	effbe18c46	fix domain boundary indexing the compiler pointed out that boundary[2][2] doesn't exist. If I understand this correctly, those checks should be against boundary[*][0].	2017-01-25 15:24:01 -07:00
Dan Ibanez	6328beb7d7	fix double-return warning this #ifdef adds a return statement for little endian machines, but leaves the old one, which the compiler comlains is unreachable. this commit combines the conditionals so we can use #else	2017-01-25 15:22:42 -07:00
Stan Moore	26c8d3d98f	Fixing GPU memory issue in fix_langevin_kokkos	2017-01-25 12:53:55 -07:00
Stan Moore	73177d650d	Fixing GPU memory issue in domain_kokkos	2017-01-25 11:18:03 -07:00
Axel Kohlmeyer	b5cb74bd33	skip list build is compatible with NP_GHOST	2017-01-23 19:21:48 -05:00
Axel Kohlmeyer	31976d1dee	skip list definition was missing NP_HALFFULL flag	2017-01-23 19:20:05 -05:00
Christopher Stone	c8260af37c	Possible uninitialized variable in USER-DPD/pair_exp6_rx.cpp bugfix. Added explicit initialization (to zero) for several variables inside the inner j-loop to avoid using them uninitialized or from prior iterations within rmOldij_12 == 0.	2017-01-23 13:34:51 -05:00
Steve Plimpton	caea8973a3	add neighbor list kind output to screen	2017-01-20 13:24:09 -07:00
sjplimp	aa0ad9b483	Merge pull request #349 from akohlmey/collected-small-fixes collected fixes and improvements	2017-01-20 13:19:43 -07:00
sjplimp	5d0e4e1ba9	Merge pull request #346 from stanmoore1/kokkos_fixes Kokkos fixes	2017-01-20 13:15:16 -07:00
sjplimp	f8d3c4c740	Merge pull request #345 from timattox/USER-DPD_another_zero_compute USER-DPD another zero compute optimization	2017-01-20 13:14:59 -07:00
Axel Kohlmeyer	e6996121d1	remove dead code	2017-01-20 14:30:46 -05:00
Axel Kohlmeyer	fbfb1df5eb	fix typo causing wrong neighbor list copy selections	2017-01-19 20:47:10 -05:00
Steve Plimpton	9a299875da	simplified neighbor list copying to avoid possible same-timestep re-build issues	2017-01-19 17:01:15 -07:00
Stan Moore	fc94f1bd18	Fixing GPU memory issues in Kokkos	2017-01-19 12:14:25 -07:00
Stan Moore	5ce8e2fbae	Fixing GPU memory issue in modify_kokkos, need to cherry pick back to Master	2017-01-19 12:13:48 -07:00
Tim Mattox	f6cd98636b	USER-DPD: Also apply "check if a0 is zero" optimization to pair_dpd_fdt This relates to commit `4eb08a5822` that was applied to pair_dpd_fdt_energy	2017-01-18 16:17:11 -05:00
Tim Mattox	05cafb716f	USER-DPD: cleanup initialization of splitFDT_flag in pair_dpd_fdt.cpp	2017-01-18 15:51:50 -05:00
sjplimp	3af4b3c28c	Merge pull request #337 from ohenrich/user-cgdna Added source code and documentation for USER-CGDNA	2017-01-18 11:31:35 -07:00
sjplimp	7fc0970587	Merge pull request #344 from timattox/USER-DPD_zero_compute USER-DPD: Skip a0*stuff computations, if a0 was set to zero in pair_coeff	2017-01-18 11:31:14 -07:00
sjplimp	93262b52b4	Merge pull request #343 from timattox/USER-DPD_bugfix_molecule USER-DPD: bugfix for a segfault when using MOLECULE and DPD together.	2017-01-18 11:30:58 -07:00
Tim Mattox	4eb08a5822	USER-DPD: Skip a0*stuff computations, if a0 was set to zero in pair_coeff. This saves around 10% of the runtime for many of our tests using SSA.	2017-01-17 15:55:39 -05:00
Tim Mattox	01609f55e2	USER-DPD: bugfix for a segfault when using MOLECULE and DPD together.	2017-01-17 12:47:59 -05:00
Steve Plimpton	d2fc88a626	patch 17Jan17	2017-01-17 10:14:53 -07:00
sjplimp	c52a26382f	Merge pull request #339 from akohlmey/fixes-for-srp-example Fixes for srp example	2017-01-17 09:36:28 -07:00
sjplimp	ad4d299975	Merge pull request #335 from stanmoore1/neighbor_fixes Neighbor fixes	2017-01-17 09:33:25 -07:00
sjplimp	83408b195f	Merge pull request #342 from epfl-cosmo/ipi-multiinit-bug Bugfix in the fix_ipi initialization - prevents multiple open_socket calls	2017-01-17 09:14:03 -07:00
sjplimp	cd7bdf9251	Merge pull request #341 from stanmoore1/qeq_kk_neighlist Make fix_qeq_reax_kokkos request its own neighbor list	2017-01-17 09:13:47 -07:00
sjplimp	8c5b108900	Merge pull request #340 from stanmoore1/fix_rx_neighborlist Make fix_rx request its own neighbor list	2017-01-17 09:13:27 -07:00
sjplimp	c19d2011bb	Merge pull request #334 from sstrong99/flow_gauss_changeRef Updated the reference for the flow/gauss method	2017-01-17 09:12:22 -07:00
sjplimp	973bef4d45	Merge pull request #332 from akohlmey/coord-atom-orientorder-atom-enhancements Coord atom orientorder atom enhancements	2017-01-17 09:11:45 -07:00
sjplimp	1b9e50c8cb	Merge pull request #331 from timattox/USER-DPD_fix_example_typo USER-DPD: fix a typo in the DPD-H example input; update reference output.	2017-01-17 09:08:14 -07:00
sjplimp	252e07e083	Merge pull request #330 from akohlmey/collected-small-bugfixes Collected small bugfixes	2017-01-17 09:08:00 -07:00
sjplimp	74a661ae26	Merge pull request #328 from akohlmey/print-last-command-on-error print the last input line, when error->all() is called	2017-01-17 09:05:19 -07:00
sjplimp	d8bc590aaf	Merge pull request #327 from stanmoore1/kokkos_lib_update Updating Kokkos lib	2017-01-17 09:04:12 -07:00
sjplimp	c9bea60710	Merge pull request #326 from Pakketeretet2/github-tutorial-update Updated images of succesful merge.	2017-01-17 09:03:46 -07:00
Steve Plimpton	5cd856c97f	fix spring doc page update	2017-01-17 09:02:56 -07:00
Axel Kohlmeyer	2f13365cf5	avoid spurious error message, when no storage fix is active/used	2017-01-16 17:08:00 -05:00
Axel Kohlmeyer	0a2b78acb8	rather than adjusting the communication cutoff, we just print out the minimum value needed and error out i suspect, this communication cutoff adjustment was included into the code before it was possible to separately set it via comm_modify. stopping with an error message printing the needed/current value is cleaner, in keeping with other modules in LAMMPS and much less problematic.	2017-01-16 15:47:02 -05:00
Axel Kohlmeyer	3f46b6d782	fix bugs from incorrect code synchronization	2017-01-16 11:15:54 -05:00
Axel Kohlmeyer	5abd6e5122	reordering operations in Pair::init_style() to avoid segfaults w/o a kspace style	2017-01-16 11:08:48 -05:00
Michele Ceriotti	f3a82f454e	Included a flag to prevent multiple open_socket calls if run is included multiple times in the LAMMPS input	2017-01-16 08:42:23 +01:00
Axel Kohlmeyer	473a3ebeef	fix for bug with compute rdf with pair reax/c. we must not copy a neighbor list, if newton settings are not compatible an alternate route to address this issue would be to allow an "ANY" setting for neighbor list requests and then query the neighbor list for newton setting instead of the force class.	2017-01-15 12:05:19 -05:00
Stan Gerald Moore (stamoor)	b220850377	Removing neighbor list hack in fix_qeq_reax_kokkos	2017-01-14 16:16:02 -07:00
Stan Gerald Moore (stamoor)	fa00e0593f	Make fix_rx request its own neighbor list	2017-01-14 15:39:37 -07:00
Axel Kohlmeyer	4a09399dc6	during setup, checking timestep doesn't seem to be sufficient. comparing bins and stencil point, too. in addition, relevant pointers were not properly initialized to NULL	2017-01-14 17:13:22 -05:00
Axel Kohlmeyer	5821fe8dd5	correct out-of-bounds accesses	2017-01-14 17:06:23 -05:00
Axel Kohlmeyer	8360e70f4e	update USER-CGDNA examples to follow LAMMPS style	2017-01-13 18:56:45 -05:00
Axel Kohlmeyer	b988b29413	remove dead code	2017-01-13 18:43:35 -05:00
Axel Kohlmeyer	5d48bfdcab	USER-CGDNA whitespace cleanup: expand tabs and remove trailing whitespace	2017-01-13 18:40:34 -05:00
Axel Kohlmeyer	fe8caa8a56	apply some LAMMPS formatting style conventions for include files	2017-01-13 18:33:32 -05:00
Axel Kohlmeyer	afaacc6173	add USER-CGDNA package with dependencies into the build system	2017-01-13 18:32:32 -05:00
Axel Kohlmeyer	98ceb6feb1	add missing html files to lammps.book	2017-01-13 18:11:23 -05:00
Axel Kohlmeyer	374abea0f0	some minor documentation integration tweaks for USER-CGDNA package	2017-01-13 18:09:45 -05:00
Axel Kohlmeyer	61cff85435	avoid not only division by zero, but also computing variance for short runs with insufficient resolution	2017-01-13 14:35:35 -05:00
Axel Kohlmeyer	aa0b327f7e	Merge branch 'bugfix_dividebyzero' of https://github.com/timattox/lammps_USER-DPD into collected-small-bugfixes	2017-01-13 14:26:10 -05:00
Stan Moore	04fe071968	Merge pull request #6 from ibaned/cuda-lj-ctor-warning fix a CUDA constructor warning	2017-01-13 12:13:43 -07:00
Tim Mattox	78498715b4	Protect from divide by zero in mpi_timings() when printing results. e.g. If neighbor list(s) are never rebuilt, the Neigh time will be zero.	2017-01-13 13:32:15 -05:00
Oliver Henrich	96259ea2d2	Added source code and documentation for USER-CGDNA	2017-01-13 13:36:54 +00:00
Axel Kohlmeyer	b2f67fea30	Merge branch 'collected-small-bugfixes' of github.com:akohlmey/lammps into collected-small-bugfixes	2017-01-13 08:12:10 -05:00
Axel Kohlmeyer	c59bcf31d1	change $MKLROOT to $(MKLROOT) as reported by @WeiLiPenguin This closes #336	2017-01-13 08:10:51 -05:00
Axel Kohlmeyer	2540fc281c	Merge branch 'flow_gauss_changeRef' of github.com:sstrong99/lammps into pull-334	2017-01-12 23:54:52 -05:00
Axel Kohlmeyer	e8e03dd440	Updated the reference for the flow/gauss method, the new reference is much more comprehensive	2017-01-12 23:44:33 -05:00
Stan Moore	daf766d4f8	Fixing Kokkos neighbor bug	2017-01-12 16:22:38 -07:00
Stan Moore	630783c8e8	Fixing neighbor bug	2017-01-12 16:22:24 -07:00
Dan Ibanez	c94030d966	put pair_lj_coul in kokkos_type.h also rename pair_lj_coul_gromacs so it doesn't conflict with the one now in kokkos_type.h	2017-01-12 13:37:53 -07:00
Steven Strong	1229f6f60b	Updated the reference for the flow/gauss method, the new reference is much more comprehensive	2017-01-12 10:15:18 -07:00
Axel Kohlmeyer	0b081b0086	whitespace cleanup	2017-01-11 21:05:32 -05:00
Axel Kohlmeyer	8e1cf6643c	apply bugfix to fix wall/gran by eric_lyster@agilent.com on lammps-users	2017-01-11 20:59:40 -05:00
Axel Kohlmeyer	6950a99162	Revert "remove obsolete warning about fix rigid image flag restrictions" This reverts commit `51e52b477a`.	2017-01-11 19:49:58 -05:00
Dan Ibanez	9f4e5e0661	fix a CUDA constructor warning The class params_lj_coul was copy-pasted into many different pair styles, and only one of them had the proper KOKKOS_INLINE_FUNCTION annotations for CUDA. created a header file for this class that most of the pair styles now include. One pair style did add extra members, so it keeps a local copy of the class.	2017-01-11 09:11:35 -07:00
Axel Kohlmeyer	34cb4027df	make formatting comment consistent	2017-01-11 07:46:07 -05:00
Axel Kohlmeyer	1d0e600ab7	formatting improvements and small corrections for timer settings and output discussions	2017-01-10 23:47:14 -05:00
Stan Moore	7162cafdf5	Squelching output from Makefile	2017-01-10 14:46:30 -07:00
Stan Moore	ee9e7cfbd5	Fixing Kokkos CUDA Makefile issue	2017-01-10 13:22:36 -07:00
Stan Moore	7839c335da	Fixing compile error with Kokkos CUDA Makefiles	2017-01-10 13:05:00 -07:00
Axel Kohlmeyer	622d926849	adapt example inputs for TAD and PRD to the change in compute coord/atom	2017-01-10 13:41:35 -05:00
Axel Kohlmeyer	92d15d4a89	replace string compare with enums, fix memory leak, formatting cleanup	2017-01-10 12:52:37 -05:00
Axel Kohlmeyer	95706ac846	import contributed code for computes coord/atom and orientorder/atom	2017-01-10 12:29:22 -05:00
Tim Mattox	d06688bb91	USER-DPD: fix a typo in the DPD-H example input; update reference output.	2017-01-10 12:11:20 -05:00
Axel Kohlmeyer	d014e00e53	ignore some newly added styles from packages.	2017-01-09 17:51:38 -05:00
Axel Kohlmeyer	0db2a07993	another workaround for duplicate labels (which sphinx does not like)	2017-01-09 17:51:19 -05:00
Axel Kohlmeyer	33412c76ed	correct some formatting issues with USER-NC-DUMP	2017-01-09 17:50:49 -05:00
Axel Kohlmeyer	e5ac49d1de	Merge branch 'master' into collected-small-bugfixes	2017-01-09 17:13:46 -05:00
Axel Kohlmeyer	1a81da0f73	print the last input line, when error->all() is called this should help tracking down input file errors for many common cases without having to repeat the run with -echo screen and avoid having to explain how to use that feature all the time	2017-01-09 17:03:06 -05:00
Steve Plimpton	c31f1e9f22	add fix mscg command, example, lib	2017-01-09 13:36:40 -07:00
Stan Moore	ebd25cc078	Updating docs for Kokkos package	2017-01-09 12:40:33 -07:00
Stan Moore	9250a55923	Adding enable_lambda to KOKKOS_CUDA_OPTIONS	2017-01-09 12:24:30 -07:00
Stan Moore	a9f0b7d523	Updating Kokkos lib	2017-01-09 10:39:46 -07:00
Stefan Paquay	20f8a8c219	Merge branch 'master' into github-tutorial-update	2017-01-09 14:38:09 +01:00
Axel Kohlmeyer	09af780aa8	remove misleading comments	2017-01-06 21:31:39 -05:00
Axel Kohlmeyer	51e52b477a	remove obsolete warning about fix rigid image flag restrictions	2017-01-06 21:30:33 -05:00
Axel Kohlmeyer	20a4e365b7	reduce warning when processing manual with sphinx	2017-01-06 21:30:13 -05:00
Stefan Paquay	ccd09e3967	Updated images of succesful merge.	2017-01-06 19:04:26 +01:00
				`@ -0,0 +1 @@`
				`<EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD>@47h<<3C>5@<40><><EFBFBD><EFBFBD>K<EFBFBD>7@<40>R<EFBFBD>]<5D><>'<27><><EFBFBD><EFBFBD>n<EFBFBD><6E>@݌I<DD8C>H<EFBFBD><48><EFBFBD><19>?<3F><><EFBFBD><EFBFBD>?r<>I<EFBFBD><49>.<2E><><11>^<5E><><EFBFBD><EFBFBD>?W<57><7F><EFBFBD>:ȿ(O<1D>%<25>?<3F>Ns<4E>?<3F>Ŀ<EFBFBD>:<3A>C<EFBFBD><43><EFBFBD>?<3F><><EFBFBD>:,H?<3F>}<7D>c<EFBFBD><63>p<EFBFBD><70><EFBFBD><EFBFBD><EFBFBD>7g<37><67>`