Merge remote-tracking branch 'lammps-ro/master' into lammps-icms

2016-06-29 07:16:27 -04:00
parent ee0d6221fc df720a4565
commit e1d589bc8d
35 changed files with 2476 additions and 366 deletions
--- a/doc/html/Manual.html
+++ b/doc/html/Manual.html
@ -135,7 +135,7 @@
  <H1></H1><div class="section" id="lammps-icms-documentation">
 <h1>LAMMPS-ICMS Documentation</h1>
 <div class="section" id="jun-2016-version">
-<h2>27 Jun 2016 version</h2>
+<h2>28 Jun 2016 version</h2>
 </div>
 <div class="section" id="version-info">
 <h2>Version info:</h2>
--- a/doc/html/_sources/Manual.txt
+++ b/doc/html/_sources/Manual.txt
@ -5,7 +5,7 @@
 LAMMPS Documentation
 ====================

-27 Jun 2016 version
+28 Jun 2016 version
 -------------------

 Version info:
--- a/doc/html/_sources/package.txt
+++ b/doc/html/_sources/package.txt
@ -40,13 +40,16 @@ Syntax
     *intel* args = NPhi keyword value ...
       Nphi = # of coprocessors per node
       zero or more keyword/value pairs may be appended 
-       keywords = *omp* or *mode* or *balance* or *ghost* or *tpc* or *tptask* or *no_affinity*
-         *omp* value = Nthreads
-           Nthreads = number of OpenMP threads to use on CPU (default = 0)
+       keywords = *mode* or *omp* or *lrt* or *balance* or *ghost* or *tpc* or *tptask* or *no_affinity*
         *mode* value = *single* or *mixed* or *double*
           single = perform force calculations in single precision
           mixed = perform force calculations in mixed precision
           double = perform force calculations in double precision
+         *omp* value = Nthreads
+           Nthreads = number of OpenMP threads to use on CPU (default = 0)
+         *lrt* value = *yes* or *no*
+           yes = use additional thread dedicated for some PPPM calculations
+           no = do not dedicate an extra thread for some PPPM calculations
         *balance* value = split
           split = fraction of work to offload to coprocessor, -1 for dynamic
         *ghost* value = *yes* or *no*
@ -330,6 +333,23 @@ precision, including storage of forces, torques, energies, and virial
 quantities.  *Double* means double precision is used for the entire
 force calculation.

+The *lrt* keyword can be used to enable "Long Range Thread (LRT)" 
+mode. It can take a value of *yes* to enable and *no* to disable. 
+LRT mode generates an extra thread (in addition to any OpenMP threads
+specified with the OMP_NUM_THREADS environment variable or the *omp* 
+keyword). The extra thread is dedicated for performing part of the 
+:doc:`PPPM solver <kspace_style>` computations and communications. This
+can improve parallel performance on processors supporting 
+Simultaneous Multithreading (SMT) such as Hyperthreading on Intel 
+processors. In this mode, one additional thread is generated per MPI 
+process. LAMMPS will generate a warning in the case that more threads 
+are used than available in SMT hardware on a node. If the PPPM solver 
+from the USER-INTEL package is not used, then the LRT setting is 
+ignored and no extra threads are generated. Enabling LRT will replace
+the :doc:`run_style <run_style>` with the *verlet/lrt/intel* style that
+is identical to the default *verlet* style aside from supporting the
+LRT feature.
+
 The *balance* keyword sets the fraction of :doc:`pair style <pair_style>` work offloaded to the coprocessor for split
 values between 0.0 and 1.0 inclusive.  While this fraction of work is
 running on the coprocessor, other calculations will run on the host,
@ -568,15 +588,15 @@ must invoke the package gpu command in your input script or via the
 "-pk gpu" :ref:`command-line switch <start_7>`.

 For the USER-INTEL package, the default is Nphi = 1 and the option
-defaults are omp = 0, mode = mixed, balance = -1, tpc = 4, tptask =
-240.  The default ghost option is determined by the pair style being
-used.  This value is output to the screen in the offload report at the
-end of each run.  Note that all of these settings, except "omp" and
-"mode", are ignored if LAMMPS was not built with Xeon Phi coprocessor
-support.  These settings are made automatically if the "-sf intel"
-:ref:`command-line switch <start_7>` is used.  If it is
-not used, you must invoke the package intel command in your input
-script or or via the "-pk intel" :ref:`command-line switch <start_7>`.
+defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4, 
+tptask = 240.  The default ghost option is determined by the pair 
+style being used.  This value is output to the screen in the offload 
+report at the end of each run.  Note that all of these settings, 
+except "omp" and "mode", are ignored if LAMMPS was not built with 
+Xeon Phi coprocessor support.  These settings are made automatically 
+if the "-sf intel" :ref:`command-line switch <start_7>` 
+is used.  If it is not used, you must invoke the package intel 
+command in your input script or or via the "-pk intel" :ref:`command-line switch <start_7>`.

 For the KOKKOS package, the option defaults neigh = full, newton =
 off, binsize = 0.0, and comm = device.  These settings are made
--- a/doc/html/package.html
+++ b/doc/html/package.html
@ -162,13 +162,16 @@
 <em>intel</em> args = NPhi keyword value ...
  Nphi = # of coprocessors per node
  zero or more keyword/value pairs may be appended
-  keywords = <em>omp</em> or <em>mode</em> or <em>balance</em> or <em>ghost</em> or <em>tpc</em> or <em>tptask</em> or <em>no_affinity</em>
-    <em>omp</em> value = Nthreads
-      Nthreads = number of OpenMP threads to use on CPU (default = 0)
+  keywords = <em>mode</em> or <em>omp</em> or <em>lrt</em> or <em>balance</em> or <em>ghost</em> or <em>tpc</em> or <em>tptask</em> or <em>no_affinity</em>
    <em>mode</em> value = <em>single</em> or <em>mixed</em> or <em>double</em>
      single = perform force calculations in single precision
      mixed = perform force calculations in mixed precision
      double = perform force calculations in double precision
+    <em>omp</em> value = Nthreads
+      Nthreads = number of OpenMP threads to use on CPU (default = 0)
+    <em>lrt</em> value = <em>yes</em> or <em>no</em>
+      yes = use additional thread dedicated for some PPPM calculations
+      no = do not dedicate an extra thread for some PPPM calculations
    <em>balance</em> value = split
      split = fraction of work to offload to coprocessor, -1 for dynamic
    <em>ghost</em> value = <em>yes</em> or <em>no</em>
@ -415,6 +418,22 @@ computed in single precision, but accumulated and stored in double
 precision, including storage of forces, torques, energies, and virial
 quantities.  <em>Double</em> means double precision is used for the entire
 force calculation.</p>
+<p>The <em>lrt</em> keyword can be used to enable &#8220;Long Range Thread (LRT)&#8221;
+mode. It can take a value of <em>yes</em> to enable and <em>no</em> to disable.
+LRT mode generates an extra thread (in addition to any OpenMP threads
+specified with the OMP_NUM_THREADS environment variable or the <em>omp</em>
+keyword). The extra thread is dedicated for performing part of the
+<a class="reference internal" href="kspace_style.html"><span class="doc">PPPM solver</span></a> computations and communications. This
+can improve parallel performance on processors supporting
+Simultaneous Multithreading (SMT) such as Hyperthreading on Intel
+processors. In this mode, one additional thread is generated per MPI
+process. LAMMPS will generate a warning in the case that more threads
+are used than available in SMT hardware on a node. If the PPPM solver
+from the USER-INTEL package is not used, then the LRT setting is
+ignored and no extra threads are generated. Enabling LRT will replace
+the <a class="reference internal" href="run_style.html"><span class="doc">run_style</span></a> with the <em>verlet/lrt/intel</em> style that
+is identical to the default <em>verlet</em> style aside from supporting the
+LRT feature.</p>
 <p>The <em>balance</em> keyword sets the fraction of <a class="reference internal" href="pair_style.html"><span class="doc">pair style</span></a> work offloaded to the coprocessor for split
 values between 0.0 and 1.0 inclusive.  While this fraction of work is
 running on the coprocessor, other calculations will run on the host,
@ -608,15 +627,15 @@ automatically if the &#8220;-sf gpu&#8221; <a class="reference internal" href="S
 must invoke the package gpu command in your input script or via the
 &#8220;-pk gpu&#8221; <a class="reference internal" href="Section_start.html#start-7"><span class="std std-ref">command-line switch</span></a>.</p>
 <p>For the USER-INTEL package, the default is Nphi = 1 and the option
-defaults are omp = 0, mode = mixed, balance = -1, tpc = 4, tptask =
-240.  The default ghost option is determined by the pair style being
-used.  This value is output to the screen in the offload report at the
-end of each run.  Note that all of these settings, except &#8220;omp&#8221; and
-&#8220;mode&#8221;, are ignored if LAMMPS was not built with Xeon Phi coprocessor
-support.  These settings are made automatically if the &#8220;-sf intel&#8221;
-<a class="reference internal" href="Section_start.html#start-7"><span class="std std-ref">command-line switch</span></a> is used.  If it is
-not used, you must invoke the package intel command in your input
-script or or via the &#8220;-pk intel&#8221; <a class="reference internal" href="Section_start.html#start-7"><span class="std std-ref">command-line switch</span></a>.</p>
+defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4,
+tptask = 240.  The default ghost option is determined by the pair
+style being used.  This value is output to the screen in the offload
+report at the end of each run.  Note that all of these settings,
+except &#8220;omp&#8221; and &#8220;mode&#8221;, are ignored if LAMMPS was not built with
+Xeon Phi coprocessor support.  These settings are made automatically
+if the &#8220;-sf intel&#8221; <a class="reference internal" href="Section_start.html#start-7"><span class="std std-ref">command-line switch</span></a>
+is used.  If it is not used, you must invoke the package intel
+command in your input script or or via the &#8220;-pk intel&#8221; <a class="reference internal" href="Section_start.html#start-7"><span class="std std-ref">command-line switch</span></a>.</p>
 <p>For the KOKKOS package, the option defaults neigh = full, newton =
 off, binsize = 0.0, and comm = device.  These settings are made
 automatically by the required &#8220;-k on&#8221; <a class="reference internal" href="Section_start.html#start-7"><span class="std std-ref">command-line switch</span></a>.  You can change them bu using the
--- a/doc/html/searchindex.js
+++ b/doc/html/searchindex.js
--- a/doc/src/Manual.txt
+++ b/doc/src/Manual.txt
@ -1,7 +1,7 @@
 <!-- HTML_ONLY -->
 <HEAD>
 <TITLE>LAMMPS-ICMS Users Manual</TITLE>
-<META NAME="docnumber" CONTENT="27 Jun 2016 version">
+<META NAME="docnumber" CONTENT="28 Jun 2016 version">
 <META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
 <META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation.  This software and manual is distributed under the GNU General Public License.">
 </HEAD>
@ -21,7 +21,7 @@
 <H1></H1>

 LAMMPS-ICMS Documentation :c,h3
-27 Jun 2016 version :c,h4
+28 Jun 2016 version :c,h4

 Version info: :h4

--- a/doc/src/fix_rx.txt
+++ b/doc/src/fix_rx.txt
@ -10,20 +10,27 @@ fix rx command :h3

 [Syntax:]

-fix ID group-ID rx file localTemp solver ... :pre
+fix ID group-ID rx file localTemp matrix solver minSteps ... :pre

 ID, group-ID are documented in "fix"_fix.html command
 rx = style name of this fix command
 file = filename containing the reaction kinetic equations and Arrhenius parameters 
 localTemp = {none,lucy} = no local temperature averaging or local temperature defined through Lucy weighting function
-solver = {lammps_rk4} = Explicit 4th order Runge-Kutta method
-minSteps = # of steps for rk4 solver :ul
+matrix = {sparse, dense} format for the stoichiometric matrix
+solver = {lammps_rk4,rkf45} = rk4 is an explicit 4th order Runge-Kutta method; rkf45 is an adaptive 4th-order Runge-Kutta-Fehlberg method
+minSteps = # of steps for rk4 solver or minimum # of steps for rkf45 (rk4 or rkf45)
+maxSteps = maximum number of steps for the rkf45 solver (rkf45 only)
+relTol = relative tolerance for the rkf45 solver (rkf45 only)
+absTol = absolute tolernace for the rkf45 solver (rkf45 only)
+diag   = Diagnostics frequency for the rkf45 solver (optional, rkf45 only) :ul

 [Examples:]

-fix 1 all rx kinetics.rx none lammps_rk4 
-fix 1 all rx kinetics.rx none lammps_rk4 1
-fix 1 all rx kinetics.rx lucy lammps_rk4 10 :pre
+fix 1 all rx kinetics.rx none dense lammps_rk4 
+fix 1 all rx kinetics.rx none sparse lammps_rk4 1
+fix 1 all rx kinetics.rx lucy sparse lammps_rk4 10 
+fix 1 all rx kinetics.rx none dense rkf45 1 100 1e-6 1e-8
+fix 1 all rx kinetics.rx none dense rkf45 1 100 1e-6 1e-8 -1 :pre

 [Description:]

@ -45,13 +52,47 @@ of {m} ordinary differential equations (ODEs) that describe the change
 in concentration of a given species as a function of time are then
 constructed based on the {n} reaction rate equations.

-The ODE systems are solved over the full DPD timestep {dt} using a 4th
+The ODE systems are solved over the full DPD timestep {dt} using either a 4th
 order Runge-Kutta {rk4} method with a fixed step-size {h}, specified
-by the {lammps_rk4} keyword. The number of ODE steps per DPD timestep
+by the {lammps_rk4} keyword, or a 4th order Runge-Kutta-Fehlberg (rkf45) method 
+with an adaptive step-size for {h}. The number of ODE steps per DPD timestep
 for the rk4 method is optionally specified immediately after the rk4
 keyword. The ODE step-size is set as {dt/num_steps}. Smaller
 step-sizes tend to yield more accurate results but there is not
-control on the error.
+control on the error. For error control, use the rkf45 ODE solver.
+
+The rkf45 method adjusts the step-size so that the local truncation error is held 
+within the specified absolute and relative tolerances. The initial step-size {h0} 
+can be specified by the user or estimated internally. It is recommeded that the user
+specify {h0} since this will generally reduced the number of ODE integration steps 
+required. {h0} is defined as {dt / min_steps} if min_steps >= 1. If min_steps == 0, 
+{h0} is estimated such that an explicit Euler method would likely produce
+an acceptable solution. This is generally overly conservative for the 4th-order
+method and users are advised to specify {h0} as some fraction of the DPD timestep.
+For small DPD timesteps, only one step may be necessary depending upon the tolerances.
+Note that more than min_steps ODE steps may be taken depending upon the ODE stiffness
+but no more than max_steps will be taken. If max_steps is reached, an error warning
+is printed and the simulation is stopped.
+
+After each ODE step, the solution error {e} is tested and weighted using the absTol
+and relTol values. The error vector is weighted as {e} / (relTol * |{u}| + absTol) 
+where {u} is the solution vector. If the norm of the error is <= 1, the solution is 
+accepted, {h} is increased by a proportional amount, and the next ODE step is begun.
+Otherwise, {h} is shrunk and the ODE step is repeated.
+
+Run-time diagnostics are available for the rkf45 ODE solver. The frequency 
+(in time-steps) that diagnostics are reported is controlled by the last (optional) 
+12th argument. A negative frequency means that diagnostics are reported once at the 
+end of each run. A positive value N means that the diagnostics are reported once 
+per N time-steps.
+
+The diagnostics report the average # of integrator steps and RHS function evaluations 
+and run-time per ODE as well as the the average/RMS/min/max per process. If the 
+reporting frequency is 1, the RMS/min/max per ODE are also reported. The per ODE 
+statistics can be used to adjust the tolerance and min/max step parameters. The 
+statistics per MPI process can be useful to examine any load imbalance caused by the 
+adaptive ODE solver. (Some DPD particles can take longer to solve than others. This 
+can lead to an imbalance across the MPI processes.)

 :line

@ -91,6 +132,17 @@ where the Lucy function is expressed as:

 The self-particle interaction is included in the above equation.

+The stoichiometric coefficients for the reaction mechanism are stored
+in either a sparse or dense matrix format. The dense matrix should only be
+used for small reaction mechanisms. The sparse matrix should be used when there
+are many reactions (e.g., more than 5). This allows the number of reactions and
+species to grow while keeping the computational cost tractable. The matrix
+format can be specified as using either the {sparse} or {dense} keywords.
+If all stoichiometric coefficients for a reaction are small integers (whole
+numbers <= 3), a fast exponential function is used. This can save significant
+computational time so users are encouraged to use integer coefficients
+where possible.
+
 :line

 The format of a tabulated file is as follows (without the
--- a/doc/src/package.txt
+++ b/doc/src/package.txt
@ -40,13 +40,16 @@ args = arguments specific to the style :l
  {intel} args = NPhi keyword value ...
    Nphi = # of coprocessors per node
    zero or more keyword/value pairs may be appended 
-    keywords = {omp} or {mode} or {balance} or {ghost} or {tpc} or {tptask} or {no_affinity}
-      {omp} value = Nthreads
-        Nthreads = number of OpenMP threads to use on CPU (default = 0)
+    keywords = {mode} or {omp} or {lrt} or {balance} or {ghost} or {tpc} or {tptask} or {no_affinity}
      {mode} value = {single} or {mixed} or {double}
        single = perform force calculations in single precision
        mixed = perform force calculations in mixed precision
        double = perform force calculations in double precision
+      {omp} value = Nthreads
+        Nthreads = number of OpenMP threads to use on CPU (default = 0)
+      {lrt} value = {yes} or {no}
+        yes = use additional thread dedicated for some PPPM calculations
+        no = do not dedicate an extra thread for some PPPM calculations
      {balance} value = split
        split = fraction of work to offload to coprocessor, -1 for dynamic
      {ghost} value = {yes} or {no}
@ -316,6 +319,23 @@ precision, including storage of forces, torques, energies, and virial
 quantities.  {Double} means double precision is used for the entire
 force calculation.

+The {lrt} keyword can be used to enable "Long Range Thread (LRT)" 
+mode. It can take a value of {yes} to enable and {no} to disable. 
+LRT mode generates an extra thread (in addition to any OpenMP threads
+specified with the OMP_NUM_THREADS environment variable or the {omp} 
+keyword). The extra thread is dedicated for performing part of the 
+"PPPM solver"_kspace_style.html computations and communications. This
+can improve parallel performance on processors supporting 
+Simultaneous Multithreading (SMT) such as Hyperthreading on Intel 
+processors. In this mode, one additional thread is generated per MPI 
+process. LAMMPS will generate a warning in the case that more threads 
+are used than available in SMT hardware on a node. If the PPPM solver 
+from the USER-INTEL package is not used, then the LRT setting is 
+ignored and no extra threads are generated. Enabling LRT will replace
+the "run_style"_run_style.html with the {verlet/lrt/intel} style that
+is identical to the default {verlet} style aside from supporting the
+LRT feature.  
+
 The {balance} keyword sets the fraction of "pair
 style"_pair_style.html work offloaded to the coprocessor for split
 values between 0.0 and 1.0 inclusive.  While this fraction of work is
@ -551,15 +571,15 @@ must invoke the package gpu command in your input script or via the
 "-pk gpu" "command-line switch"_Section_start.html#start_7.

 For the USER-INTEL package, the default is Nphi = 1 and the option
-defaults are omp = 0, mode = mixed, balance = -1, tpc = 4, tptask =
-240.  The default ghost option is determined by the pair style being
-used.  This value is output to the screen in the offload report at the
-end of each run.  Note that all of these settings, except "omp" and
-"mode", are ignored if LAMMPS was not built with Xeon Phi coprocessor
-support.  These settings are made automatically if the "-sf intel"
-"command-line switch"_Section_start.html#start_7 is used.  If it is
-not used, you must invoke the package intel command in your input
-script or or via the "-pk intel" "command-line
+defaults are omp = 0, mode = mixed, lrt = no, balance = -1, tpc = 4, 
+tptask = 240.  The default ghost option is determined by the pair 
+style being used.  This value is output to the screen in the offload 
+report at the end of each run.  Note that all of these settings, 
+except "omp" and "mode", are ignored if LAMMPS was not built with 
+Xeon Phi coprocessor support.  These settings are made automatically 
+if the "-sf intel" "command-line switch"_Section_start.html#start_7 
+is used.  If it is not used, you must invoke the package intel 
+command in your input script or or via the "-pk intel" "command-line
 switch"_Section_start.html#start_7.

 For the KOKKOS package, the option defaults neigh = full, newton =
--- a/examples/USER/dpd/dpdrx-shardlow/in.dpdrx
+++ b/examples/USER/dpd/dpdrx-shardlow/in.dpdrx
@ -5,7 +5,7 @@ boundary        p p p
 units           metal  # ev, ps
 atom_style      dpd
 atom_modify     map array
-fix             4 all rx kinetics.dpdrx none lammps_rk4 1
+fix             4 all rx kinetics.dpdrx none dense lammps_rk4 1

 lattice         hcp 6.6520898 origin 0.0833333333333 0.25 0.25 orient z 1 0 0 orient x 0 1 0 orient y 0 0 1
 region          box block 0 6.0 0 6.0 0.0 6.0 units lattice
@ -17,22 +17,22 @@ comm_modify     mode single vel yes
 mass            * 222.12

 #Set concentrations
-set atom * d_rdx 1.00
-set atom * d_h2 0.0
-set atom * d_no2 0.0
-set atom * d_n2 0.0
-set atom * d_hcn 0.0
-set atom * d_no 0.0
-set atom * d_h2o 0.0
-set atom * d_co 0.0
-set atom * d_co2 0.0
-
-#Set the internal temperature of the particles
-set             atom * dpd/theta 2065.00
+set             atom * d_rdx 1.00
+set             atom * d_h2 0.0
+set             atom * d_no2 0.0
+set             atom * d_n2 0.0
+set             atom * d_hcn 0.0
+set             atom * d_no 0.0
+set             atom * d_h2o 0.0
+set             atom * d_co 0.0
+set             atom * d_co2 0.0

 #Set the kinetic temperature of the particles
 velocity	all create 2065.0 875661 dist gaussian

+#Set the internal temperature of the particles
+set             atom * dpd/theta 2065.00
+
 timestep        0.001

 pair_style      hybrid/overlay dpd/fdt/energy 16.00 234324 exp6/rx 16.00
@ -42,7 +42,6 @@ pair_coeff      * * exp6/rx params.exp6 1fluid 1fluid 1.0 1.0 16.00
 fix             1 all shardlow
 fix             2 all nve
 fix             3 all eos/table/rx linear table.eos 4001 KEYWORD thermo.dpdrx
-#fix             4 all rx lammps_rk4 kinetics.dpdrx

 compute         dpdU all dpd
 compute         dpdUatom all dpd/atom
@ -57,8 +56,4 @@ thermo_modify   format float %15.8f flush yes
 dump            2 all custom 1 dump.dpdrx id x y z vx vy vz c_dpdUatom[1] c_dpdUatom[2] c_dpdUatom[3] c_dpdUatom[4] c_crdx
 dump_modify     2 sort id

-restart         10 restart.dpdrx
-
-write_data      data.dpdrx.initial
-run 10
-write_data      data.dpdrx.final
+run             10
--- a/src/USER-DPD/fix_eos_table_rx.cpp
+++ b/src/USER-DPD/fix_eos_table_rx.cpp
@ -677,14 +677,19 @@ void FixEOStableRX::energy_lookup(int id, double thetai, double &ui)

 void FixEOStableRX::temperature_lookup(int id, double ui, double &thetai)
 {
+  int itable;
+  Table *tb = &tables[0];
+
  int it;
  double t1,t2,u1,u2,f1,f2;
  double maxit = 100;
  double temp;
-
-
+  double delta = 0.001;
+  
  // Store the current thetai in t1
-  t1 = thetai;
+  t1 = MAX(thetai,tb->lo);
+  t1 = MIN(thetai,tb->hi);
+  if(t1==tb->hi) delta = -delta;

  // Compute u1 at thetai
  energy_lookup(id,t1,u1);
@ -693,7 +698,7 @@ void FixEOStableRX::temperature_lookup(int id, double ui, double &thetai)
  f1 = u1 - ui;

  // Compute guess of t2
-  t2 = (1.0 + 0.001)*t1;
+  t2 = (1.0 + delta)*t1;

  // Compute u2 at t2
  energy_lookup(id,t2,u2);
@ -703,9 +708,16 @@ void FixEOStableRX::temperature_lookup(int id, double ui, double &thetai)

  // Apply the Secant Method
  for(it=0; it<maxit; it++){
-    if(fabs(f2-f1)<1e-15)
-      error->one(FLERR,"Divide by zero in secant solver.");
-
+    if(fabs(f2-f1)<1e-15){
+      if(isnan(f1) || isnan(f2)) error->one(FLERR,"NaN detected in secant solver.");
+      temp = t1;
+      temp = MAX(temp,tb->lo);
+      temp = MIN(temp,tb->hi);
+      char str[256];
+      sprintf(str,"Secant solver did not converge because table bounds were exceeded:  it=%d id=%d ui=%lf thetai=%lf t1=%lf t2=%lf f1=%lf f2=%lf dpdTheta=%lf\n",it,id,ui,thetai,t1,t2,f1,f2,temp);
+      error->warning(FLERR,str);
+      break;
+    }
    temp = t2 - f2*(t2-t1)/(f2-f1);
    if(fabs(temp-t2) < 1e-6) break;
    f1 = f2;
@ -715,8 +727,11 @@ void FixEOStableRX::temperature_lookup(int id, double ui, double &thetai)
    f2 = u2 - ui;
  }
  if(it==maxit){
-    printf("id=%d ui=%lf t1=%lf t2=%lf f1=%lf f2=%lf\n",id,ui,t1,t2,f1,f2);
-    error->one(FLERR,"Maxit exceeded in secant solver");
+    char str[256];
+    sprintf(str,"Maxit exceeded in secant solver:  id=%d ui=%lf thetai=%lf t1=%lf t2=%lf f1=%lf f2=%lf\n",id,ui,thetai,t1,t2,f1,f2);
+    if(isnan(f1) || isnan(f2) || isnan(ui) || isnan(thetai) || isnan(t1) || isnan(t2)) 
+      error->one(FLERR,"NaN detected in secant solver.");
+    error->one(FLERR,str);
  }
  thetai = temp;
 }
--- a/src/USER-DPD/fix_eos_table_rx.h
+++ b/src/USER-DPD/fix_eos_table_rx.h
@ -142,9 +142,13 @@ E:  fix eos/table/rx parameters did not set N

 The number of table entries was not set in the eos/table/rx file

-E:  Divide by zero in secant solver.
+W:  Secant solver did not converge because table bounds were exceeded

-The secant solver failed to find a solution.
+The secant solver failed to converge, resulting in the lower or upper table bound temperature to be returned
+
+E: NaN detected in secant solver.
+
+Self-explanatory.

 E: Maxit exceeded in secant solver

--- a/src/USER-DPD/fix_rx.cpp
+++ b/src/USER-DPD/fix_rx.cpp
--- a/src/USER-DPD/fix_rx.h
+++ b/src/USER-DPD/fix_rx.h
@ -1,4 +1,4 @@
-/* -*- c++ -*- ----------------------------------------------------------
+/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
@ -25,7 +25,7 @@ FixStyle(rx,FixRX)
 typedef int (*fnptr)(double, const double *, double *, void *);
 namespace LAMMPS_NS {

-enum { ODE_LAMMPS_RK4 };
+enum { ODE_LAMMPS_RK4, ODE_LAMMPS_RKF45 };

 class FixRX : public Fix {
 public:
@ -61,7 +61,21 @@ class FixRX : public Fix {
  double *Arr, *nArr, *Ea, *tempExp;
  double **stoich, **stoichReactants, **stoichProducts;
  double *kR;
-  void rk4(int);
+
+  //!< Classic Runge-Kutta 4th-order stepper.
+  void rk4(int,double*);
+
+  //!< Runge-Kutta-Fehlberg ODE Solver.
+  void rkf45(int,double*);
+
+  //!< Runge-Kutta-Fehlberg ODE stepper function.
+  void rkf45_step (const int neq, const double h, double y[], double y_out[],
+                   double rwk[], void* v_param);
+
+  //!< Initial step size estimation for the Runge-Kutta-Fehlberg ODE solver.
+  int rkf45_h0 (const int neq, const double t, const double t_stop,
+                     const double hmin, const double hmax,
+                     double& h0, double y[], double rwk[], void* v_params);

  class PairDPDfdtEnergy *pairDPDE;
  double *dpdThetaLocal;
@ -71,15 +85,54 @@ class FixRX : public Fix {
  double sigFactor;

  int rhs(double, const double *, double *, void *);
+  int rhs_dense (double, const double *, double *, void *);
+
+  // Sparse stoichiometric matrix storage format and methods.
+  bool useSparseKinetics;
+  //SparseKinetics sparseKinetics;
+  int initSparse(void);
+  int rhs_sparse(double, const double *, double *, void *) const;
+
+  int sparseKinetics_maxReactants; //<! Max # of reactants species in any reaction
+  int sparseKinetics_maxProducts; //<! Max # of product species in any reaction
+  int sparseKinetics_maxSpecies; //<! Max # of species (maxReactants + maxProducts) in any reaction
+
+  //! Objects to hold the stoichiometric coefficients using a sparse matrix
+  //! format. Enables a sparse formulation for the reaction rates:
+  //! \f${\omega}_i = \Pi_{j=1}^{NS_i} K^{f}_i [x_j]^{\nu^{'}_{ij}} -
+  //!                                  K^{r}_i x_j^{\nu^{''}_{ij}}\f$.
+  double **sparseKinetics_nu; //<! Stoichiometric matrix with FLT values.
+  int    **sparseKinetics_nuk; //<! Index (base-0) of species ... this is the column sparse matrix.
+  int    **sparseKinetics_inu; //<! Stoichiometric matrix with integral values.
+
+  bool *sparseKinetics_isIntegralReaction; //<! Flag indicating if a reaction has integer stoichiometric values.
+
+  // ODE Parameters
+  int minSteps; //!< Minimum # of steps for the ODE solver(s).
+  int maxIters; //!< Maximum # of iterations for the ODE solver(s).
+  double relTol, absTol; //!< Relative and absolute tolerances for the ODE solver(s).
+
+  // ODE Diagnostics
+  int nSteps; //!< # of accepted steps taken over all atoms.
+  int nIters; //!< # of attemped steps for all atoms.
+  int nFuncs; //!< # of RHS evaluations for all atoms.
+  int nFails; //!< # of ODE systems that failed (for some reason).
+
+  int diagnosticFrequency; //!< Frequency (LMP steps) that run-time diagnostics will be printed to the log.
+  enum { numDiagnosticCounters = 5 };
+  enum { StepSum=0, FuncSum, TimeSum, AtomSum, CountSum };
+  double diagnosticCounter[ numDiagnosticCounters ];
+
+  int *diagnosticCounterPerODE[ numDiagnosticCounters ];
+
+  //!< ODE Solver diagnostics.
+  void odeDiagnostics(void);

 private:
  char *kineticsFile;
  char *id_fix_species,*id_fix_species_old;
  class FixPropertyAtom *fix_species,*fix_species_old;

-  // ODE Parameters
-  int minSteps; //!< Minimum # of steps for the ODE solver(s).
-
 };

 }
@ -119,6 +172,10 @@ E:  fix rx requires fix eos/table/rx to be specified.

 Self-explanatory

+W:  in FixRX::pre_force, ODE solver failed for %d atoms.
+
+Self-explanatory
+
 E:  Missing parameters in reaction kinetic equation.

 Self-explanatory
@ -127,7 +184,7 @@ E:  Potential file has duplicate entry.

 Self-explanatory

-E:  Computed concentration in RK4 solver is < -1.0e-10.
+E:  Computed concentration in RK4 (RKF45) solver is < -1.0e-10.

 Self-explanatory:  Adjust settings for the RK4 solver.

--- a/src/USER-DPD/pair_exp6_rx.cpp
+++ b/src/USER-DPD/pair_exp6_rx.cpp
@ -33,6 +33,32 @@ using namespace MathConst;
 #define MAXLINE 1024
 #define DELTA 4

+#define oneFluidApproxParameter (-1)
+#define isOneFluidApprox(_site) ( (_site) == oneFluidApproxParameter )
+
+#define exp6PotentialType (1)
+#define isExp6PotentialType(_type) ( (_type) == exp6PotentialType )
+
+// Create a structure to hold the parameter data for all
+// local and neighbor particles. Pack inside this struct
+// to avoid any name clashes.
+struct PairExp6ParamDataType
+{
+   int n;
+   double *epsilon1, *alpha1, *rm1, *fraction1,
+          *epsilon2, *alpha2, *rm2, *fraction2,
+          *epsilonOld1, *alphaOld1, *rmOld1, *fractionOld1,
+          *epsilonOld2, *alphaOld2, *rmOld2, *fractionOld2;
+
+   // Default constructor -- nullify everything.
+   PairExp6ParamDataType(void)
+      : n(0), epsilon1(NULL), alpha1(NULL), rm1(NULL), fraction1(NULL),
+              epsilon2(NULL), alpha2(NULL), rm2(NULL), fraction2(NULL),
+              epsilonOld1(NULL), alphaOld1(NULL), rmOld1(NULL), fractionOld1(NULL),
+              epsilonOld2(NULL), alphaOld2(NULL), rmOld2(NULL), fractionOld2(NULL)
+   {}
+};
+
 /* ---------------------------------------------------------------------- */

 PairExp6rx::PairExp6rx(LAMMPS *lmp) : Pair(lmp)
@ -43,17 +69,12 @@ PairExp6rx::PairExp6rx(LAMMPS *lmp) : Pair(lmp)
  nparams = maxparam = 0;
  params = NULL;
  mol2param = NULL;
-  site1 = site2 = NULL;
 }

 /* ---------------------------------------------------------------------- */

 PairExp6rx::~PairExp6rx()
 {
-  for (int i=0; i < nparams; ++i) {
-    delete[] params[i].name;
-    delete[] params[i].potential;
-  }
  memory->destroy(params);
  memory->destroy(mol2param);

@ -63,8 +84,6 @@ PairExp6rx::~PairExp6rx()

    memory->destroy(cut);
  }
-  delete[] site1;
-  delete[] site2;
 }

 /* ---------------------------------------------------------------------- */
@ -73,7 +92,7 @@ void PairExp6rx::compute(int eflag, int vflag)
 {
  int i,j,ii,jj,inum,jnum,itype,jtype;
  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair;
-  double rsq,r2inv,r6inv,forceExp6,factor_lj;
+  double rsq,rinv,r2inv,r6inv,forceExp6,factor_lj;
  double rCut,rCutInv,rCut2inv,rCut6inv,rCutExp,urc,durc;
  double rm2ij,rm6ij;
  double r,rexp;
@ -117,6 +136,53 @@ void PairExp6rx::compute(int eflag, int vflag)
  const double shift = 1.05;
  double rin1, aRep, uin1, win1, uin1rep, rin1exp, rin6, rin6inv;

+  // Initialize the Exp6 parameter data for both the local
+  // and ghost atoms. Make the parameter data persistent
+  // and exchange like any other atom property later.
+
+  PairExp6ParamDataType PairExp6ParamData;
+
+  {
+     const int np_total = nlocal + atom->nghost;
+
+     memory->create( PairExp6ParamData.epsilon1     , np_total, "PairExp6ParamData.epsilon1");
+     memory->create( PairExp6ParamData.alpha1       , np_total, "PairExp6ParamData.alpha1");
+     memory->create( PairExp6ParamData.rm1          , np_total, "PairExp6ParamData.rm1");
+     memory->create( PairExp6ParamData.fraction1    , np_total, "PairExp6ParamData.fraction1");
+     memory->create( PairExp6ParamData.epsilon2     , np_total, "PairExp6ParamData.epsilon2");
+     memory->create( PairExp6ParamData.alpha2       , np_total, "PairExp6ParamData.alpha2");
+     memory->create( PairExp6ParamData.rm2          , np_total, "PairExp6ParamData.rm2");
+     memory->create( PairExp6ParamData.fraction2    , np_total, "PairExp6ParamData.fraction2");
+     memory->create( PairExp6ParamData.epsilonOld1  , np_total, "PairExp6ParamData.epsilonOld1");
+     memory->create( PairExp6ParamData.alphaOld1    , np_total, "PairExp6ParamData.alphaOld1");
+     memory->create( PairExp6ParamData.rmOld1       , np_total, "PairExp6ParamData.rmOld1");
+     memory->create( PairExp6ParamData.fractionOld1 , np_total, "PairExp6ParamData.fractionOld1");
+     memory->create( PairExp6ParamData.epsilonOld2  , np_total, "PairExp6ParamData.epsilonOld2");
+     memory->create( PairExp6ParamData.alphaOld2    , np_total, "PairExp6ParamData.alphaOld2");
+     memory->create( PairExp6ParamData.rmOld2       , np_total, "PairExp6ParamData.rmOld2");
+     memory->create( PairExp6ParamData.fractionOld2 , np_total, "PairExp6ParamData.fractionOld2");
+
+     for (i = 0; i < np_total; ++i)
+     {
+        getParamsEXP6 (i, PairExp6ParamData.epsilon1[i],
+                          PairExp6ParamData.alpha1[i],
+                          PairExp6ParamData.rm1[i],
+                          PairExp6ParamData.fraction1[i],
+                          PairExp6ParamData.epsilon2[i],
+                          PairExp6ParamData.alpha2[i],
+                          PairExp6ParamData.rm2[i],
+                          PairExp6ParamData.fraction2[i],
+                          PairExp6ParamData.epsilonOld1[i],
+                          PairExp6ParamData.alphaOld1[i],
+                          PairExp6ParamData.rmOld1[i],
+                          PairExp6ParamData.fractionOld1[i],
+                          PairExp6ParamData.epsilonOld2[i],
+                          PairExp6ParamData.alphaOld2[i],
+                          PairExp6ParamData.rmOld2[i],
+                          PairExp6ParamData.fractionOld2[i]);
+     }
+  }
+
  inum = list->inum;
  ilist = list->ilist;
  numneigh = list->numneigh;
@ -133,7 +199,24 @@ void PairExp6rx::compute(int eflag, int vflag)
    jlist = firstneigh[i];
    jnum = numneigh[i];

-    getParamsEXP6(i,epsilon1_i,alpha1_i,rm1_i,fraction1_i,epsilon2_i,alpha2_i,rm2_i,fraction2_i,epsilonOld1_i,alphaOld1_i,rmOld1_i,fractionOld1_i,epsilonOld2_i,alphaOld2_i,rmOld2_i,fractionOld2_i);
+    {
+       epsilon1_i     = PairExp6ParamData.epsilon1[i];
+       alpha1_i       = PairExp6ParamData.alpha1[i];
+       rm1_i          = PairExp6ParamData.rm1[i];
+       fraction1_i    = PairExp6ParamData.fraction1[i];
+       epsilon2_i     = PairExp6ParamData.epsilon2[i];
+       alpha2_i       = PairExp6ParamData.alpha2[i];
+       rm2_i          = PairExp6ParamData.rm2[i];
+       fraction2_i    = PairExp6ParamData.fraction2[i];
+       epsilonOld1_i  = PairExp6ParamData.epsilonOld1[i];
+       alphaOld1_i    = PairExp6ParamData.alphaOld1[i];
+       rmOld1_i       = PairExp6ParamData.rmOld1[i];
+       fractionOld1_i = PairExp6ParamData.fractionOld1[i];
+       epsilonOld2_i  = PairExp6ParamData.epsilonOld2[i];
+       alphaOld2_i    = PairExp6ParamData.alphaOld2[i];
+       rmOld2_i       = PairExp6ParamData.rmOld2[i];
+       fractionOld2_i = PairExp6ParamData.fractionOld2[i];
+    }

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
@ -152,6 +235,7 @@ void PairExp6rx::compute(int eflag, int vflag)
        r6inv = r2inv*r2inv*r2inv;

        r = sqrt(rsq);
+        rinv = 1.0/r;

        rCut2inv = 1.0/cutsq[itype][jtype];
        rCut6inv = rCut2inv*rCut2inv*rCut2inv;
@ -163,7 +247,25 @@ void PairExp6rx::compute(int eflag, int vflag)
        //

        // A1.  Get alpha, epsilon and rm for particle j
-        getParamsEXP6(j,epsilon1_j,alpha1_j,rm1_j,fraction1_j,epsilon2_j,alpha2_j,rm2_j,fraction2_j,epsilonOld1_j,alphaOld1_j,rmOld1_j,fractionOld1_j,epsilonOld2_j,alphaOld2_j,rmOld2_j,fractionOld2_j);
+
+        {
+           epsilon1_j     = PairExp6ParamData.epsilon1[j];
+           alpha1_j       = PairExp6ParamData.alpha1[j];
+           rm1_j          = PairExp6ParamData.rm1[j];
+           fraction1_j    = PairExp6ParamData.fraction1[j];
+           epsilon2_j     = PairExp6ParamData.epsilon2[j];
+           alpha2_j       = PairExp6ParamData.alpha2[j];
+           rm2_j          = PairExp6ParamData.rm2[j];
+           fraction2_j    = PairExp6ParamData.fraction2[j];
+           epsilonOld1_j  = PairExp6ParamData.epsilonOld1[j];
+           alphaOld1_j    = PairExp6ParamData.alphaOld1[j];
+           rmOld1_j       = PairExp6ParamData.rmOld1[j];
+           fractionOld1_j = PairExp6ParamData.fractionOld1[j];
+           epsilonOld2_j  = PairExp6ParamData.epsilonOld2[j];
+           alphaOld2_j    = PairExp6ParamData.alphaOld2[j];
+           rmOld2_j       = PairExp6ParamData.rmOld2[j];
+           fractionOld2_j = PairExp6ParamData.fractionOld2[j];
+        }

        // A2.  Apply Lorentz-Berthelot mixing rules for the i-j pair
        alphaOld12_ij = sqrt(alphaOld1_i*alphaOld2_j);
@ -252,7 +354,7 @@ void PairExp6rx::compute(int eflag, int vflag)
            evdwlOldEXP6_21 = buck1*(6.0*rexp - alphaOld21_ij*rm6ij*r6inv) - urc - durc*(r-rCut);
          }

-          if (strcmp(site1,site2) == 0)
+          if (isite1 == isite2)
            evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwlOldEXP6_12;
          else
            evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwlOldEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*evdwlOldEXP6_21;
@ -260,7 +362,8 @@ void PairExp6rx::compute(int eflag, int vflag)
          evdwlOld *= factor_lj;

          uCG[i] += 0.5*evdwlOld;
-          uCG[j] += 0.5*evdwlOld;
+          if (newton_pair || j < nlocal)
+            uCG[j] += 0.5*evdwlOld;
        }

        if(rm12_ij!=0.0 && rm21_ij!=0.0){
@ -351,7 +454,7 @@ void PairExp6rx::compute(int eflag, int vflag)
          //
          // Apply Mixing Rule to get the overall force for the CG pair
          //
-          if (strcmp(site1,site2) == 0) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairEXP6_12;
+          if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairEXP6_12; 
          else fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpairEXP6_12 + sqrt(fractionOld2_i*fractionOld1_j)*fpairEXP6_21;

          f[i][0] += delx*fpair;
@ -363,14 +466,13 @@ void PairExp6rx::compute(int eflag, int vflag)
            f[j][2] -= delz*fpair;
          }

-          if (strcmp(site1,site2) == 0)
-            evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12;
-          else
-            evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12 + sqrt(fraction2_i*fraction1_j)*evdwlEXP6_21;
+          if (isite1 == isite2) evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12;
+          else evdwl = sqrt(fraction1_i*fraction2_j)*evdwlEXP6_12 + sqrt(fraction2_i*fraction1_j)*evdwlEXP6_21;
          evdwl *= factor_lj;

-          uCGnew[i] += 0.5*evdwl;
-          uCGnew[j] += 0.5*evdwl;
+          uCGnew[i]   += 0.5*evdwl;
+          if (newton_pair || j < nlocal)
+            uCGnew[j] += 0.5*evdwl;
          evdwl = evdwlOld;
          if (evflag) ev_tally(i,j,nlocal,newton_pair,
                               evdwl,0.0,fpair,delx,dely,delz);
@ -379,6 +481,27 @@ void PairExp6rx::compute(int eflag, int vflag)
    }
  }
  if (vflag_fdotr) virial_fdotr_compute();
+
+  // Release the local parameter data.
+  {
+     if (PairExp6ParamData.epsilon1    ) memory->destroy(PairExp6ParamData.epsilon1);
+     if (PairExp6ParamData.alpha1      ) memory->destroy(PairExp6ParamData.alpha1);
+     if (PairExp6ParamData.rm1         ) memory->destroy(PairExp6ParamData.rm1);
+     if (PairExp6ParamData.fraction1   ) memory->destroy(PairExp6ParamData.fraction1);
+     if (PairExp6ParamData.epsilon2    ) memory->destroy(PairExp6ParamData.epsilon2);
+     if (PairExp6ParamData.alpha2      ) memory->destroy(PairExp6ParamData.alpha2);
+     if (PairExp6ParamData.rm2         ) memory->destroy(PairExp6ParamData.rm2);
+     if (PairExp6ParamData.fraction2   ) memory->destroy(PairExp6ParamData.fraction2);
+     if (PairExp6ParamData.epsilonOld1 ) memory->destroy(PairExp6ParamData.epsilonOld1);
+     if (PairExp6ParamData.alphaOld1   ) memory->destroy(PairExp6ParamData.alphaOld1);
+     if (PairExp6ParamData.rmOld1      ) memory->destroy(PairExp6ParamData.rmOld1);
+     if (PairExp6ParamData.fractionOld1) memory->destroy(PairExp6ParamData.fractionOld1);
+     if (PairExp6ParamData.epsilonOld2 ) memory->destroy(PairExp6ParamData.epsilonOld2);
+     if (PairExp6ParamData.alphaOld2   ) memory->destroy(PairExp6ParamData.alphaOld2);
+     if (PairExp6ParamData.rmOld2      ) memory->destroy(PairExp6ParamData.rmOld2);
+     if (PairExp6ParamData.fractionOld2) memory->destroy(PairExp6ParamData.fractionOld2);
+  }
+
 }

 /* ----------------------------------------------------------------------
@ -466,6 +589,49 @@ void PairExp6rx::coeff(int narg, char **arg)
  if (ispecies == nspecies && strcmp(site2,"1fluid") != 0)
    error->all(FLERR,"Site2 name not recognized in pair coefficients");

+  {
+    // Set isite1 and isite2 parameters based on site1 and site2 strings.
+    
+    if (strcmp(site1,"1fluid") == 0)
+      isite1 = oneFluidApproxParameter;
+    else
+      {
+        int isp;
+        for (isp = 0; isp < nspecies; isp++)
+          if (strcmp(site1, &atom->dname[isp][0]) == 0) break;
+
+        if (isp == nspecies)
+          error->all(FLERR,"Site1 name not recognized in pair coefficients");
+        else
+          isite1 = isp;
+      }
+    
+    if (strcmp(site2,"1fluid") == 0)
+      isite2 = oneFluidApproxParameter;
+    else
+      {
+        int isp;
+        for (isp = 0; isp < nspecies; isp++)
+        if (strcmp(site2, &atom->dname[isp][0]) == 0) break;
+
+        if (isp == nspecies)
+          error->all(FLERR,"Site2 name not recognized in pair coefficients");
+        else
+          isite2 = isp;
+      }
+    
+    // Set the interaction potential type to the enumerated type.
+    for (int iparam = 0; iparam < nparams; ++iparam)
+      {
+        if (strcmp( params[iparam].potential, "exp6") == 0)
+          params[iparam].potentialType = exp6PotentialType;
+        else
+          error->all(FLERR,"params[].potential type unknown");
+
+        //printf("params[%d].name= %s ispecies= %d potential= %s potentialType= %d\n", iparam, params[iparam].name, params[iparam].ispecies, params[iparam].potential, params[iparam].potentialType);
+      }
+  }
+
  fuchslinR = force->numeric(FLERR,arg[5]);
  fuchslinEpsilon = force->numeric(FLERR,arg[6]);

@ -711,14 +877,14 @@ void PairExp6rx::read_restart_settings(FILE *fp)

 /* ---------------------------------------------------------------------- */

-void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm1, double &fraction1,double &epsilon2,double &alpha2,double &rm2,double &fraction2,double &epsilon1_old,double &alpha1_old,double &rm1_old, double &fraction1_old,double &epsilon2_old,double &alpha2_old,double &rm2_old,double &fraction2_old)
+void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm1, double &fraction1,double &epsilon2,double &alpha2,double &rm2,double &fraction2,double &epsilon1_old,double &alpha1_old,double &rm1_old, double &fraction1_old,double &epsilon2_old,double &alpha2_old,double &rm2_old,double &fraction2_old) const
 {
  int iparam, jparam;
  double rmi, rmj, rmij, rm3ij;
  double epsiloni, epsilonj, epsilonij;
  double alphai, alphaj, alphaij;
  double epsilon_old, rm3_old, alpha_old;
-  double epsilon, rm3, alpha;
+  double epsilon, rm3, alpha, fraction;
  double fractionOFA, fractionOFA_old;
  double nTotalOFA, nTotalOFA_old;
  double nTotal, nTotal_old;
@ -730,6 +896,7 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
  epsilon_old = 0.0;
  rm3_old = 0.0;
  alpha_old = 0.0;
+  fraction = 0.0;
  fractionOFA = 0.0;
  fractionOFA_old = 0.0;
  nTotalOFA = 0.0;
@ -743,9 +910,10 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    nTotal_old += atom->dvector[ispecies+nspecies][id];

    iparam = mol2param[ispecies];
-    if (iparam < 0 || strcmp(params[iparam].potential,"exp6") != 0) continue;
-    if (strcmp(site1,"1fluid") == 0 || strcmp(site2,"1fluid") == 0) {
-      if (strcmp(site1,params[iparam].name) == 0 || strcmp(site2,params[iparam].name) == 0) continue;
+
+    if (iparam < 0 || params[iparam].potentialType != exp6PotentialType ) continue;
+    if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) {
+      if (isite1 == params[iparam].ispecies || isite2 == params[iparam].ispecies) continue;
      nTotalOFA_old += atom->dvector[ispecies+nspecies][id];
      nTotalOFA += atom->dvector[ispecies][id];
    }
@ -759,10 +927,10 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm

  for (int ispecies = 0; ispecies < nspecies; ispecies++) {
    iparam = mol2param[ispecies];
-    if (iparam < 0 || strcmp(params[iparam].potential,"exp6") != 0) continue;
+    if (iparam < 0 || params[iparam].potentialType != exp6PotentialType ) continue;

    // If Site1 matches a pure species, then grab the parameters
-    if (strcmp(site1,params[iparam].name) == 0){
+    if (isite1 == params[iparam].ispecies){
      rm1_old = params[iparam].rm;
      rm1 = params[iparam].rm;
      epsilon1_old = params[iparam].epsilon;
@ -776,7 +944,7 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    }

    // If Site2 matches a pure species, then grab the parameters
-    if (strcmp(site2,params[iparam].name) == 0){
+    if (isite2 == params[iparam].ispecies){
      rm2_old = params[iparam].rm;
      rm2 = params[iparam].rm;
      epsilon2_old = params[iparam].epsilon;
@ -790,8 +958,8 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    }

    // If Site1 or Site2 matches is a fluid, then compute the paramters
-    if (strcmp(site1,"1fluid") == 0 || strcmp(site2,"1fluid") == 0) {
-      if (strcmp(site1,params[iparam].name) == 0 || strcmp(site2,params[iparam].name) == 0) continue;
+    if (isOneFluidApprox(isite1) || isOneFluidApprox(isite2)) {
+      if (isite1 == params[iparam].ispecies || isite2 == params[iparam].ispecies) continue;
      rmi = params[iparam].rm;
      epsiloni = params[iparam].epsilon;
      alphai = params[iparam].alpha;
@ -800,8 +968,8 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm

      for (int jspecies = 0; jspecies < nspecies; jspecies++) {
        jparam = mol2param[jspecies];
-        if (jparam < 0 || strcmp(params[jparam].potential,"exp6") != 0) continue;
-        if (strcmp(site1,params[jparam].name) == 0 || strcmp(site2,params[jparam].name) == 0) continue;
+        if (jparam < 0 || params[jparam].potentialType != exp6PotentialType ) continue;
+        if (isite1 == params[jparam].ispecies || isite2 == params[jparam].ispecies) continue;
        rmj = params[jparam].rm;
        epsilonj = params[jparam].epsilon;
        alphaj = params[jparam].alpha;
@ -827,7 +995,7 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    }
  }

-  if(strcmp(site1,"1fluid") == 0){
+  if (isOneFluidApprox(isite1)){
    rm1 = cbrt(rm3);
    if(rm1 < 1e-16) {
      rm1 = 0.0;
@ -854,12 +1022,11 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    // Fuchslin-Like Exp-6 Scaling
    double powfuch = 0.0;
    if(fuchslinEpsilon < 0.0){
-      fuchslinEpsilon = -1.0*fuchslinEpsilon;
-      powfuch = pow(nTotalOFA,fuchslinEpsilon);
+      powfuch = pow(nTotalOFA,-fuchslinEpsilon);
      if(powfuch<1e-15) epsilon1 = 0.0;
      else epsilon1 *= 1.0/powfuch;

-      powfuch = pow(nTotalOFA_old,fuchslinEpsilon);
+      powfuch = pow(nTotalOFA_old,-fuchslinEpsilon);
      if(powfuch<1e-15) epsilon1_old = 0.0;
      else epsilon1_old *= 1.0/powfuch;

@ -869,12 +1036,11 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    }

    if(fuchslinR < 0.0){
-      fuchslinR = -1.0*fuchslinR;
-      powfuch = pow(nTotalOFA,fuchslinR);
+      powfuch = pow(nTotalOFA,-fuchslinR);
      if(powfuch<1e-15) rm1 = 0.0;
      else rm1 *= 1.0/powfuch;

-      powfuch = pow(nTotalOFA_old,fuchslinR);
+      powfuch = pow(nTotalOFA_old,-fuchslinR);
      if(powfuch<1e-15) rm1_old = 0.0;
      else rm1_old *= 1.0/powfuch;

@ -884,7 +1050,7 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    }
  }

-  if(strcmp(site2,"1fluid") == 0){
+  if (isOneFluidApprox(isite2)){
    rm2 = cbrt(rm3);
    if(rm2 < 1e-16) {
      rm2 = 0.0;
@ -910,12 +1076,11 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    // Fuchslin-Like Exp-6 Scaling
    double powfuch = 0.0;
    if(fuchslinEpsilon < 0.0){
-      fuchslinEpsilon = -1.0*fuchslinEpsilon;
-      powfuch = pow(nTotalOFA,fuchslinEpsilon);
+      powfuch = pow(nTotalOFA,-fuchslinEpsilon);
      if(powfuch<1e-15) epsilon2 = 0.0;
      else epsilon2 *= 1.0/powfuch;

-      powfuch = pow(nTotalOFA_old,fuchslinEpsilon);
+      powfuch = pow(nTotalOFA_old,-fuchslinEpsilon);
      if(powfuch<1e-15) epsilon2_old = 0.0;
      else epsilon2_old *= 1.0/powfuch;

@ -925,12 +1090,11 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm
    }

    if(fuchslinR < 0.0){
-      fuchslinR = -1.0*fuchslinR;
-      powfuch = pow(nTotalOFA,fuchslinR);
+      powfuch = pow(nTotalOFA,-fuchslinR);
      if(powfuch<1e-15) rm2 = 0.0;
      else rm2 *= 1.0/powfuch;

-      powfuch = pow(nTotalOFA_old,fuchslinR);
+      powfuch = pow(nTotalOFA_old,-fuchslinR);
      if(powfuch<1e-15) rm2_old = 0.0;
      else rm2_old *= 1.0/powfuch;

@ -943,7 +1107,7 @@ void PairExp6rx::getParamsEXP6(int id,double &epsilon1,double &alpha1,double &rm

 /* ---------------------------------------------------------------------- */

-double PairExp6rx::func_rin(double &alpha)
+inline double PairExp6rx::func_rin(const double &alpha) const
 {
  double function;

@ -958,7 +1122,7 @@ double PairExp6rx::func_rin(double &alpha)

 /* ---------------------------------------------------------------------- */

-double PairExp6rx::expValue(double value)
+inline double PairExp6rx::expValue(double value) const
 {
  double returnValue;
  if(value < DBL_MIN_EXP) returnValue = 0.0;
--- a/src/USER-DPD/pair_exp6_rx.h
+++ b/src/USER-DPD/pair_exp6_rx.h
@ -1,4 +1,4 @@
-/* -*- c++ -*- ----------------------------------------------------------
+/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
@ -53,6 +53,7 @@ class PairExp6rx : public Pair {
    int ispecies;
    char *name, *potential;      // names of unique molecules and interaction type
    char *tablename;             // name of interaction table
+   int potentialType;              // enumerated interaction potential type.
  };
  Param *params;                // parameter set for an I-J-K interaction

@ -60,13 +61,13 @@ class PairExp6rx : public Pair {
  void read_file(char *);
  void setup();

+  int isite1, isite2;
  char *site1, *site2;
  double fuchslinR, fuchslinEpsilon;
-  void getParamsEXP6(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &);
-  double func_rin(double &);
-  double expValue(double);
+  void getParamsEXP6(int, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &, double &) const;

-/*   class FixRX *fixRX; */
+  inline double func_rin(const double &) const;
+  inline double expValue(const double) const;
 };

 }
@ -78,7 +79,7 @@ class PairExp6rx : public Pair {

 E:  alpha_ij is 6.0 in pair exp6

-Self-explanator
+Self-explanatory

 E: Illegal ... command

--- a/src/USER-DPD/pair_multi_lucy_rx.cpp
+++ b/src/USER-DPD/pair_multi_lucy_rx.cpp
@ -43,6 +43,9 @@ enum{NONE,RLINEAR,RSQ};

 #define MAXLINE 1024

+#define oneFluidParameter (-1)
+#define isOneFluid(_site) ( (_site) == oneFluidParameter )
+
 static const char cite_pair_multi_lucy_rx[] =
  "pair_style multi/lucy/rx command:\n\n"
  "@Article{Moore16,\n"
@ -90,7 +93,7 @@ void PairMultiLucyRX::compute(int eflag, int vflag)
 {
  int i,j,ii,jj,inum,jnum,itype,jtype,itable;
  double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,evdwlOld,fpair;
-  double rsq;
+  double rsq,factor_lj;
  int *ilist,*jlist,*numneigh,**firstneigh;
  Table *tb;

@ -105,6 +108,8 @@ void PairMultiLucyRX::compute(int eflag, int vflag)
  double **f = atom->f;
  int *type = atom->type;
  int nlocal = atom->nlocal;
+  int nghost = atom->nghost;
+  double *special_lj = force->special_lj;
  int newton_pair = force->newton_pair;

  double fractionOld1_i,fractionOld1_j;
@ -120,6 +125,22 @@ void PairMultiLucyRX::compute(int eflag, int vflag)
  int jtable;
  double *rho = atom->rho;

+  double *fractionOld1 = NULL;
+  double *fractionOld2 = NULL;
+  double *fraction1 = NULL;
+  double *fraction2 = NULL;
+
+  {
+    const int ntotal = nlocal + nghost;
+    memory->create(fractionOld1, ntotal, "PairMultiLucyRX::fractionOld1");
+    memory->create(fractionOld2, ntotal, "PairMultiLucyRX::fractionOld2");
+    memory->create(fraction1, ntotal, "PairMultiLucyRX::fraction1");
+    memory->create(fraction2, ntotal, "PairMultiLucyRX::fraction2");
+
+    for (int i = 0; i < ntotal; ++i)
+       getParams(i, fractionOld1[i], fractionOld2[i], fraction1[i], fraction2[i]);
+  }
+
  inum = list->inum;
  ilist = list->ilist;
  numneigh = list->numneigh;
@ -138,10 +159,18 @@ void PairMultiLucyRX::compute(int eflag, int vflag)
    jlist = firstneigh[i];
    jnum = numneigh[i];

-    getParams(i,fractionOld1_i,fractionOld2_i,fraction1_i,fraction2_i);
+    double fx_i = 0.0;
+    double fy_i = 0.0;
+    double fz_i = 0.0;
+
+    fractionOld1_i = fractionOld1[i];
+    fractionOld2_i = fractionOld2[i];
+    fraction1_i = fraction1[i];
+    fraction2_i = fraction2[i];

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
+      factor_lj = special_lj[sbmask(j)];
      j &= NEIGHMASK;

      delx = xtmp - x[j][0];
@ -152,7 +181,11 @@ void PairMultiLucyRX::compute(int eflag, int vflag)

      if (rsq < cutsq[itype][jtype]) {
        fpair = 0.0;
-        getParams(j,fractionOld1_j,fractionOld2_j,fraction1_j,fraction2_j);
+
+        fractionOld1_j = fractionOld1[j];
+        fractionOld2_j = fractionOld2[j];
+        fraction1_j = fraction1[j];
+        fraction2_j = fraction2[j];

        tb = &tables[tabindex[itype][jtype]];
        if (rho[i]*rho[i] < tb->innersq || rho[j]*rho[j] < tb->innersq){
@ -205,23 +238,25 @@ void PairMultiLucyRX::compute(int eflag, int vflag)

        } else error->one(FLERR,"Only LOOKUP and LINEAR table styles have been implemented for pair multi/lucy/rx");

-
-        if (strcmp(site1,site2) == 0) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair;
+        if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair; 
        else fpair = (sqrt(fractionOld1_i*fractionOld2_j) + sqrt(fractionOld2_i*fractionOld1_j))*fpair;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        fx_i += delx*fpair;
+        fy_i += dely*fpair;
+        fz_i += delz*fpair;
        if (newton_pair || j < nlocal) {
          f[j][0] -= delx*fpair;
          f[j][1] -= dely*fpair;
          f[j][2] -= delz*fpair;
        }
-        if (evflag) ev_tally(i,j,nlocal,newton_pair,
-        		     0.0,0.0,fpair,delx,dely,delz);
+        if (evflag) ev_tally(i,j,nlocal,newton_pair,0.0,0.0,fpair,delx,dely,delz);
      }
    }

+    f[i][0] += fx_i;
+    f[i][1] += fy_i;
+    f[i][2] += fz_i;
+
    tb = &tables[tabindex[itype][itype]];
    itable = static_cast<int> (((rho[i]*rho[i]) - tb->innersq) * tb->invdelta);
    if (tabstyle == LOOKUP) evdwl = tb->e[itable];
@ -244,11 +279,15 @@ void PairMultiLucyRX::compute(int eflag, int vflag)

    evdwl = evdwlOld;

-    if (evflag) ev_tally(0,0,nlocal,newton_pair,
-        		 evdwl,0.0,0.0,0.0,0.0,0.0);
+    if (evflag) ev_tally(0,0,nlocal,newton_pair,evdwl,0.0,0.0,0.0,0.0,0.0);
  }

  if (vflag_fdotr) virial_fdotr_compute();
+
+  memory->destroy(fractionOld1);
+  memory->destroy(fractionOld2);
+  memory->destroy(fraction1);
+  memory->destroy(fraction2);
 }

 /* ----------------------------------------------------------------------
@ -350,11 +389,13 @@ void PairMultiLucyRX::coeff(int narg, char **arg)
  // insure cutoff is within table

  if (tb->ninput <= 1) error->one(FLERR,"Invalid pair table length");
-  double rlo;
+  double rlo,rhi;
  if (tb->rflag == 0) {
    rlo = tb->rfile[0];
+    rhi = tb->rfile[tb->ninput-1];
  } else {
    rlo = tb->rlo;
+    rhi = tb->rhi;
  }
  rho_0 = rlo;

@ -380,6 +421,37 @@ void PairMultiLucyRX::coeff(int narg, char **arg)

  if (count == 0) error->all(FLERR,"Illegal pair_coeff command");
  ntables++;
+
+  // Match site* to isite values.
+
+  if (strcmp(site1, "1fluid") == 0)
+     isite1 = oneFluidParameter;
+  else {
+     isite1 = nspecies;
+     for (int ispecies = 0; ispecies < nspecies; ++ispecies)
+        if (strcmp(site1, atom->dname[ispecies]) == 0){
+           isite1 = ispecies;
+           break;
+        }
+
+     if (isite1 == nspecies)
+        error->all(FLERR,"Pair_multi_lucy_rx site1 is invalid.");
+  }
+
+  if (strcmp(site2, "1fluid") == 0)
+     isite2 = oneFluidParameter;
+  else {
+     isite2 = nspecies;
+     for (int ispecies = 0; ispecies < nspecies; ++ispecies)
+        if (strcmp(site2, atom->dname[ispecies]) == 0){
+           isite2 = ispecies;
+           break;
+        }
+
+     if (isite2 == nspecies)
+        error->all(FLERR,"Pair_multi_lucy_rx site2 is invalid.");
+  }
+
 }

 /* ----------------------------------------------------------------------
@ -778,64 +850,83 @@ void PairMultiLucyRX::read_restart_settings(FILE *fp)

 void PairMultiLucyRX::computeLocalDensity()
 {
-  int i,j,m,ii,jj,inum,jnum,itype,jtype;
-  double xtmp,ytmp,ztmp,delx,dely,delz;
-  double rsq;
-  int *ilist,*jlist,*numneigh,**firstneigh;
-
  double **x = atom->x;
-  int *type = atom->type;
-  int nlocal = atom->nlocal;
-  int newton_pair = force->newton_pair;
-  double factor;
+  const int *type = atom->type;
+  const int nlocal = atom->nlocal;

-  inum = list->inum;
-  ilist = list->ilist;
-  numneigh = list->numneigh;
-  firstneigh = list->firstneigh;
+  const int inum = list->inum;
+  const int *ilist = list->ilist;
+  const int *numneigh = list->numneigh;
+        int **firstneigh = list->firstneigh;
+
+  const double pi = MathConst::MY_PI;
+
+  const bool newton_pair = force->newton_pair;
+  const bool one_type = (atom->ntypes == 1);
+
+  // Special cut-off values for when there's only one type.
+  const double cutsq_type11 = cutsq[1][1];
+  const double rcut_type11 = sqrt(cutsq_type11);
+  const double factor_type11 = 84.0/(5.0*pi*rcut_type11*rcut_type11*rcut_type11);

-  double pi = MathConst::MY_PI;
  double *rho = atom->rho;

- // zero out density
-
+  // zero out density
  if (newton_pair) {
-    m = nlocal + atom->nghost;
-    for (i = 0; i < m; i++) rho[i] = 0.0;
-  } else for (i = 0; i < nlocal; i++) rho[i] = 0.0;
+    const int m = nlocal + atom->nghost;
+    for (int i = 0; i < m; i++) rho[i] = 0.0;
+  }
+  else
+    for (int i = 0; i < nlocal; i++) rho[i] = 0.0;

 // rho = density at each atom
 // loop over neighbors of my atoms
-  for (ii = 0; ii < inum; ii++) {
-    i = ilist[ii];
-    xtmp = x[i][0];
-    ytmp = x[i][1];
-    ztmp = x[i][2];
-    itype = type[i];
-    jlist = firstneigh[i];
-    jnum = numneigh[i];
+  for (int ii = 0; ii < inum; ii++){
+    const int i = ilist[ii];

-    for (jj = 0; jj < jnum; jj++) {
-      j = jlist[jj];
-      j &= NEIGHMASK;
-      jtype = type[j];
+    const double xtmp = x[i][0];
+    const double ytmp = x[i][1];
+    const double ztmp = x[i][2];

-      delx = xtmp - x[j][0];
-      dely = ytmp - x[j][1];
-      delz = ztmp - x[j][2];
-      rsq = delx*delx + dely*dely + delz*delz;
+    double rho_i = rho[i];

-      if (rsq < cutsq[itype][jtype]) {
-        double rcut = sqrt(cutsq[itype][jtype]);
-        double tmpFactor = 1.0-sqrt(rsq)/rcut;
-        double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor;
-        factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4;
-        rho[i] += factor;
-        if (newton_pair || j < nlocal) {
-          rho[j] += factor;
+    const int itype = type[i];
+    const int *jlist = firstneigh[i];
+    const int jnum = numneigh[i];
+
+    for (int jj = 0; jj < jnum; jj++){
+      const int j = (jlist[jj] & NEIGHMASK);
+      const int jtype = type[j];
+
+      const double delx = xtmp - x[j][0];
+      const double dely = ytmp - x[j][1];
+      const double delz = ztmp - x[j][2];
+      const double rsq = delx*delx + dely*dely + delz*delz;
+
+      if (one_type)
+        if (rsq < cutsq_type11){
+          const double rcut = rcut_type11;
+          const double r_over_rcut = sqrt(rsq) / rcut;
+          const double tmpFactor = 1.0 - r_over_rcut;
+          const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor;
+          const double factor = factor_type11*(1.0 + 1.5*r_over_rcut)*tmpFactor4;
+          rho_i += factor;
+          if (newton_pair || j < nlocal)
+            rho[j] += factor;
+        }
+      else
+        if (rsq < cutsq[itype][jtype]){
+          const double rcut = sqrt(cutsq[itype][jtype]);
+          const double tmpFactor = 1.0-sqrt(rsq)/rcut;
+          const double tmpFactor4 = tmpFactor*tmpFactor*tmpFactor*tmpFactor;
+          const double factor = (84.0/(5.0*pi*rcut*rcut*rcut))*(1.0+3.0*sqrt(rsq)/(2.0*rcut))*tmpFactor4;
+          rho_i += factor;
+          if (newton_pair || j < nlocal)
+            rho[j] += factor;
        }
-      }
    }
+
+    rho[i] = rho_i;
  }
  if (newton_pair) comm->reverse_comm_pair(this);

@ -850,54 +941,39 @@ void PairMultiLucyRX::getParams(int id, double &fractionOld1, double &fractionOl
  double fractionOld, fraction;
  double nTotal, nTotalOld;

-  fractionOld  = 0.0;
-  fraction  = 0.0;
-  fractionOld1 = 0.0;
-  fractionOld2 = 0.0;
-  fraction1 = 0.0;
-  fraction2 = 0.0;
-
  nTotal = 0.0;
  nTotalOld = 0.0;
-  for(int ispecies=0;ispecies<nspecies;ispecies++){
-    nTotal += atom->dvector[ispecies][id];
+  for (int ispecies = 0; ispecies < nspecies; ispecies++){
+    nTotal += atom->dvector[ispecies][id]; 
    nTotalOld += atom->dvector[ispecies+nspecies][id];
  }

-  for (int ispecies = 0; ispecies < nspecies; ispecies++) {
-    if (strcmp(site1,&atom->dname[ispecies][0]) == 0){
-      fractionOld1 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction1 = atom->dvector[ispecies][id]/nTotal;
-    }
-    if (strcmp(site2,&atom->dname[ispecies][0]) == 0){
-      fractionOld2 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction2 = atom->dvector[ispecies][id]/nTotal;
-    }
+  if (isOneFluid(isite1) == false){
+    fractionOld1 = atom->dvector[isite1+nspecies][id]/nTotalOld;
+    fraction1 = atom->dvector[isite1][id]/nTotal;
+  }
+  if (isOneFluid(isite2) == false){
+    fractionOld2 = atom->dvector[isite2+nspecies][id]/nTotalOld;
+    fraction2 = atom->dvector[isite2][id]/nTotal;
  }

-  for (int ispecies = 0; ispecies < nspecies; ispecies++) {
-    if (strcmp(site1,&atom->dname[ispecies][0]) == 0){
-      fractionOld1 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction1 = atom->dvector[ispecies][id]/nTotal;
-    }
-    if (strcmp(site2,&atom->dname[ispecies][0]) == 0){
-      fractionOld2 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction2 = atom->dvector[ispecies][id]/nTotal;
-    }
+  if (isOneFluid(isite1) || isOneFluid(isite2)){
+    fractionOld  = 0.0;
+    fraction  = 0.0;

-    if (strcmp(site1,"all") == 0 || strcmp(site2,"all") == 0) {
-      if (strcmp(site1,&atom->dname[ispecies][0]) == 0 || strcmp(site2,&atom->dname[ispecies][0]) == 0) continue;
-      fractionOld += atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction += atom->dvector[ispecies][id]/nTotal;
+    for (int ispecies = 0; ispecies < nspecies; ispecies++){
+      if (isite1 == ispecies || isite2 == ispecies) continue;
+      fractionOld += atom->dvector[ispecies+nspecies][id] / nTotalOld;
+      fraction += atom->dvector[ispecies][id] / nTotal;
+    }
+    if (isOneFluid(isite1)){
+      fractionOld1 = fractionOld;
+      fraction1 = fraction;
+    }
+    if (isOneFluid(isite2)){
+      fractionOld2 = fractionOld;
+      fraction2 = fraction;
    }
-  }
-  if(strcmp(site1,"all") == 0){
-    fractionOld1 = fractionOld;
-    fraction1 = fraction;
-  }
-  if(strcmp(site2,"all") == 0){
-    fractionOld2 = fractionOld;
-    fraction2 = fraction;
  }
 }

--- a/src/USER-DPD/pair_multi_lucy_rx.h
+++ b/src/USER-DPD/pair_multi_lucy_rx.h
@ -1,4 +1,4 @@
-/* -*- c++ -*- ----------------------------------------------------------
+/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
@ -77,9 +77,9 @@ class PairMultiLucyRX : public Pair {

  int nspecies;
  char *site1, *site2;
+  int isite1, isite2;
  void getParams(int, double &, double &, double &, double &);

-/*   class FixRX *fixRX; */
 };

 }
--- a/src/USER-DPD/pair_table_rx.cpp
+++ b/src/USER-DPD/pair_table_rx.cpp
@ -35,6 +35,9 @@ enum{NONE,RLINEAR,RSQ,BMP};

 #define MAXLINE 1024

+#define OneFluidValue (-1)
+#define isOneFluid(_site_) ( (_site_) == OneFluidValue )
+
 /* ---------------------------------------------------------------------- */

 PairTableRX::PairTableRX(LAMMPS *lmp) : Pair(lmp)
@ -79,6 +82,21 @@ void PairTableRX::compute(int eflag, int vflag)
  if (eflag || vflag) ev_setup(eflag,vflag);
  else evflag = vflag_fdotr = 0;

+  double *fractionOld1, *fractionOld2;
+  double *fraction1, *fraction2;
+
+  {
+    const int ntotal = atom->nlocal + atom->nghost;
+
+    memory->create(fractionOld1, ntotal, "PairTableRx::compute::fractionOld1");
+    memory->create(fractionOld2, ntotal, "PairTableRx::compute::fractionOld2");
+    memory->create(fraction1, ntotal, "PairTableRx::compute::fraction1");
+    memory->create(fraction2, ntotal, "PairTableRx::compute::fraction2");
+
+    for (int i = 0; i < ntotal; ++i)
+      getParams(i, fractionOld1[i], fractionOld2[i], fraction1[i], fraction2[i]);
+  }
+
  double **x = atom->x;
  double **f = atom->f;
  int *type = atom->type;
@ -108,7 +126,14 @@ void PairTableRX::compute(int eflag, int vflag)
    jlist = firstneigh[i];
    jnum = numneigh[i];

-    getParams(i,fractionOld1_i,fractionOld2_i,fraction1_i,fraction2_i);
+    double uCG_i = 0.0;
+    double uCGnew_i = 0.0;
+    double fx_i = 0.0, fy_i = 0.0, fz_i = 0.0;
+
+    fractionOld1_i = fractionOld1[i];
+    fractionOld2_i = fractionOld2[i];
+    fraction1_i = fraction1[i];
+    fraction2_i = fraction2[i];

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
@ -122,7 +147,10 @@ void PairTableRX::compute(int eflag, int vflag)
      jtype = type[j];

      if (rsq < cutsq[itype][jtype]) {
-        getParams(j,fractionOld1_j,fractionOld2_j,fraction1_j,fraction2_j);
+        fractionOld1_j = fractionOld1[j];
+        fractionOld2_j = fractionOld2[j];
+        fraction1_j = fraction1[j];
+        fraction2_j = fraction2[j];

        tb = &tables[tabindex[itype][jtype]];
        if (rsq < tb->innersq)
@ -158,12 +186,12 @@ void PairTableRX::compute(int eflag, int vflag)
          value = tb->f[itable] + fraction*tb->df[itable];
          fpair = factor_lj * value;
        }
-        if (strcmp(site1,site2) == 0) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair;
+        if (isite1 == isite2) fpair = sqrt(fractionOld1_i*fractionOld2_j)*fpair; 
        else fpair = (sqrt(fractionOld1_i*fractionOld2_j) + sqrt(fractionOld2_i*fractionOld1_j))*fpair;

-        f[i][0] += delx*fpair;
-        f[i][1] += dely*fpair;
-        f[i][2] += delz*fpair;
+        fx_i += delx*fpair;
+        fy_i += dely*fpair;
+        fz_i += delz*fpair;
        if (newton_pair || j < nlocal) {
          f[j][0] -= delx*fpair;
          f[j][1] -= dely*fpair;
@ -179,7 +207,7 @@ void PairTableRX::compute(int eflag, int vflag)
          evdwl = a * tb->e[itable] + b * tb->e[itable+1] +
            ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) *
            tb->deltasq6;
-        if (strcmp(site1,site2) == 0){
+        if (isite1 == isite2){
          evdwlOld = sqrt(fractionOld1_i*fractionOld2_j)*evdwl;
          evdwl = sqrt(fraction1_i*fraction2_j)*evdwl;
        } else {
@ -188,9 +216,11 @@ void PairTableRX::compute(int eflag, int vflag)
        }
        evdwlOld *= factor_lj;
        evdwl *= factor_lj;
-        uCG[i] += 0.5*evdwlOld;
+
+        uCG_i += 0.5*evdwlOld;
        uCG[j] += 0.5*evdwlOld;
-        uCGnew[i] += 0.5*evdwl;
+
+        uCGnew_i += 0.5*evdwl;
        uCGnew[j] += 0.5*evdwl;
        evdwl = evdwlOld;

@ -198,8 +228,20 @@ void PairTableRX::compute(int eflag, int vflag)
                             evdwl,0.0,fpair,delx,dely,delz);
      }
    }
+
+    uCG[i] += uCG_i;
+    uCGnew[i] += uCGnew_i;
+
+    f[i][0] += fx_i;
+    f[i][1] += fy_i;
+    f[i][2] += fz_i;
  }
  if (vflag_fdotr) virial_fdotr_compute();
+
+  memory->destroy(fractionOld1);
+  memory->destroy(fractionOld2);
+  memory->destroy(fraction1);
+  memory->destroy(fraction2);
 }

 /* ----------------------------------------------------------------------
@ -374,6 +416,40 @@ void PairTableRX::coeff(int narg, char **arg)

  if (count == 0) error->all(FLERR,"Illegal pair_coeff command");
  ntables++;
+
+  {
+     if ( strcmp(site1,"1fluid") == 0 )
+       isite1 = OneFluidValue;
+     else {
+       isite1 = nspecies;
+
+       for (int k = 0; k < nspecies; k++){
+         if (strcmp(site1, atom->dname[k]) == 0){
+           isite1 = k;
+           break;
+         }
+       }
+
+       if (isite1 == nspecies) error->all(FLERR,"isite1 == nspecies");
+     }
+
+     if ( strcmp(site2,"1fluid") == 0 )
+       isite2 = OneFluidValue;
+     else {
+       isite2 = nspecies;
+
+       for (int k = 0; k < nspecies; k++){
+         if (strcmp(site2, atom->dname[k]) == 0){
+           isite2 = ispecies;
+           break;
+         }
+       }
+
+       if (isite2 == nspecies)
+         error->all(FLERR,"isite2 == nspecies");
+     }
+  }
+
 }

 /* ----------------------------------------------------------------------
@ -1026,7 +1102,7 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq,
    fforce = factor_lj * value;
  }

-  if (strcmp(site1,site2) == 0) fforce = sqrt(fraction1_i*fraction2_j)*fforce;
+  if (isite1 == isite2) fforce = sqrt(fraction1_i*fraction2_j)*fforce; 
  else fforce = (sqrt(fraction1_i*fraction2_j) + sqrt(fraction2_i*fraction1_j))*fforce;

  if (tabstyle == LOOKUP)
@ -1037,7 +1113,7 @@ double PairTableRX::single(int i, int j, int itype, int jtype, double rsq,
    phi = a * tb->e[itable] + b * tb->e[itable+1] +
      ((a*a*a-a)*tb->e2[itable] + (b*b*b-b)*tb->e2[itable+1]) * tb->deltasq6;

-  if (strcmp(site1,site2) == 0) phi = sqrt(fraction1_i*fraction2_j)*phi;
+  if (isite1 == isite2) phi = sqrt(fraction1_i*fraction2_j)*phi;
  else phi = (sqrt(fraction1_i*fraction2_j) + sqrt(fraction2_i*fraction1_j))*phi;

  return factor_lj*phi;
@ -1058,8 +1134,7 @@ void *PairTableRX::extract(const char *str, int &dim)
  double cut_coul = tables[0].cut;
  for (int m = 1; m < ntables; m++)
    if (tables[m].cut != cut_coul)
-      error->all(FLERR,
-                 "Pair table cutoffs must all be equal to use with KSpace");
+      error->all(FLERR,"Pair table cutoffs must all be equal to use with KSpace");
  dim = 0;
  return &tables[0].cut;
 }
@ -1068,59 +1143,44 @@ void *PairTableRX::extract(const char *str, int &dim)

 void PairTableRX::getParams(int id, double &fractionOld1, double &fractionOld2, double &fraction1, double &fraction2)
 {
-  double fractionOld, fraction;
-  double nTotal, nTotalOld;
-
-  fractionOld  = 0.0;
-  fraction  = 0.0;
-  fractionOld1 = 0.0;
-  fractionOld2 = 0.0;
-  fraction1 = 0.0;
-  fraction2 = 0.0;
-
-  nTotal = 0.0;
-  nTotalOld = 0.0;
-  for(int ispecies=0;ispecies<nspecies;ispecies++){
-    nTotal += atom->dvector[ispecies][id];
+  double nTotal = 0.0;
+  double nTotalOld = 0.0;
+  for (int ispecies = 0; ispecies < nspecies; ++ispecies){
+    nTotal += atom->dvector[ispecies][id]; 
    nTotalOld += atom->dvector[ispecies+nspecies][id];
  }
  if(nTotal < 1e-8 || nTotalOld < 1e-8)
    error->all(FLERR,"The number of molecules in CG particle is less than 1e-8.");

-  for (int ispecies = 0; ispecies < nspecies; ispecies++) {
-    if (strcmp(site1,&atom->dname[ispecies][0]) == 0){
-      fractionOld1 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction1 = atom->dvector[ispecies][id]/nTotal;
-    }
-    if (strcmp(site2,&atom->dname[ispecies][0]) == 0){
-      fractionOld2 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction2 = atom->dvector[ispecies][id]/nTotal;
-    }
+  if (isOneFluid(isite1) == false){ 
+    fractionOld1 = atom->dvector[isite1+nspecies][id]/nTotalOld;
+    fraction1 = atom->dvector[isite1][id]/nTotal;
+  }
+  if (isOneFluid(isite2) == false){ 
+    fractionOld2 = atom->dvector[isite2+nspecies][id]/nTotalOld;
+    fraction2 = atom->dvector[isite2][id]/nTotal;
  }

-  for (int ispecies = 0; ispecies < nspecies; ispecies++) {
-    if (strcmp(site1,&atom->dname[ispecies][0]) == 0){
-      fractionOld1 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction1 = atom->dvector[ispecies][id]/nTotal;
-    }
-    if (strcmp(site2,&atom->dname[ispecies][0]) == 0){
-      fractionOld2 = atom->dvector[ispecies+nspecies][id]/nTotalOld;
-      fraction2 = atom->dvector[ispecies][id]/nTotal;
-    }
+  if (isOneFluid(isite1) || isOneFluid(isite2)){
+    double fractionOld  = 0.0;
+    double fraction  = 0.0;
+
+    for (int ispecies = 0; ispecies < nspecies; ispecies++){
+      if (isite1 == ispecies || isite2 == ispecies) continue;

-    if (strcmp(site1,"1fluid") == 0 || strcmp(site2,"1fluid") == 0) {
-      if (strcmp(site1,&atom->dname[ispecies][0]) == 0 || strcmp(site2,&atom->dname[ispecies][0]) == 0) continue;
      fractionOld += atom->dvector[ispecies+nspecies][id]/nTotalOld;
      fraction += atom->dvector[ispecies][id]/nTotal;
    }
-  }
-  if(strcmp(site1,"1fluid") == 0){
-    fractionOld1 = fractionOld;
-    fraction1 = fraction;
-  }
-  if(strcmp(site2,"1fluid") == 0){
-    fractionOld2 = fractionOld;
-    fraction2 = fraction;
+
+    if(isOneFluid(isite1)){
+      fractionOld1 = fractionOld;
+      fraction1 = fraction;
+    }
+
+    if(isOneFluid(isite2)){
+      fractionOld2 = fractionOld;
+      fraction2 = fraction;
+    }
  }
 }

--- a/src/USER-DPD/pair_table_rx.h
+++ b/src/USER-DPD/pair_table_rx.h
@ -1,4 +1,4 @@
-/* -*- c++ -*- ----------------------------------------------------------
+/* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
@ -71,9 +71,9 @@ class PairTableRX : public Pair {

  int nspecies;
  char *site1, *site2;
+  int isite1, isite2;
  void getParams(int, double &, double &, double &, double &);

-/*   class FixRX *fixRX; */
 };

 }
--- a/src/USER-INTEL/Install.sh
+++ b/src/USER-INTEL/Install.sh
@ -42,6 +42,8 @@ action intel_buffers.cpp
 action math_extra_intel.h
 action intel_simd.h pair_sw_intel.cpp
 action intel_intrinsics.h pair_tersoff_intel.cpp
+action verlet_lrt_intel.h pppm.cpp
+action verlet_lrt_intel.cpp pppm.cpp

 # step 2: handle cases and tasks not handled in step 1.

--- a/src/USER-INTEL/README
+++ b/src/USER-INTEL/README
@ -26,16 +26,18 @@ This package is based on the USER-OMP package and provides LAMMPS styles that:
 When using the suffix command with "intel", intel styles will be used if they
 exist. If the suffix command is used with "hybrid intel omp" and the USER-OMP 
 USER-OMP styles will be used whenever USER-INTEL styles are not available. This
-allow for running most styles in LAMMPS with threading. For example, in the
-latter case with the USER-OMP package installed,
+allow for running most styles in LAMMPS with threading.

-      kspace_style     pppm 1e-4
+-----------------------------------------------------------------------------

-is equivalent to:
-
-      kspace_style     pppm/omp   1e-4
-
-because no pppm style has been implemented for the Intel package.
+The Long-Range Thread mode (LRT) in the Intel package currently uses
+pthreads by default. If pthreads are not supported in the build environment,
+the compile flag "-DLMP_INTEL_NOLRT" will disable the feature to allow for 
+builds without pthreads. Alternatively, "-DLMP_INTEL_LRT11" can be used to
+build with compilers that support threads using the C++11 standard. When using
+LRT mode, you might need to disable OpenMP affinity settings (e.g.
+export KMP_AFFINITY=none). LAMMPS will generate a warning if the settings
+need to be changed.

 -----------------------------------------------------------------------------

--- a/src/USER-INTEL/fix_intel.cpp
+++ b/src/USER-INTEL/fix_intel.cpp
@ -94,6 +94,7 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
  int nomp = 0, no_affinity = 0;
  _allow_separate_buffers = 1;
  _offload_ghost = -1;
+  _lrt = 0;

  int iarg = 4;
  while (iarg < narg) {
@ -132,6 +133,12 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
    } else if (strcmp(arg[iarg],"no_affinity") == 0) {
      no_affinity = 1;
      iarg++;
+    } else if (strcmp(arg[iarg], "lrt") == 0) {
+      if (iarg+2 > narg) error->all(FLERR,"Illegal package intel command");
+      if (strcmp(arg[iarg+1],"yes") == 0) _lrt = 1;
+      else if (strcmp(arg[iarg+1],"no") == 0) _lrt = 0;
+      else error->all(FLERR,"Illegal package intel command");
+      iarg += 2;
    }

    // undocumented options
@ -152,6 +159,13 @@ FixIntel::FixIntel(LAMMPS *lmp, int narg, char **arg) :  Fix(lmp, narg, arg)
    _offload_balance = 0.0;
  }

+  // if using LRT mode, create the integrate style
+  if (_lrt) {
+    char *str;
+    str = (char *) "verlet/lrt/intel";
+    update->create_integrate(1,&str,0);
+  }
+
  // error check

  if (_offload_balance > 1.0 || _offload_threads < 0 ||
--- a/src/USER-INTEL/fix_intel.h
+++ b/src/USER-INTEL/fix_intel.h
@ -74,6 +74,10 @@ class FixIntel : public Fix {
    return 0;
  }
  inline void set_reduce_flag() { _need_reduce = 1; }
+  inline int lrt() {
+    if (force->kspace_match("pppm/intel", 0)) return _lrt;
+    else return 0;
+  }

 protected:
  IntelBuffers<float,float> *_single_buffers;
@ -152,7 +156,7 @@ class FixIntel : public Fix {
 protected:
  int _overflow_flag[5];
  _alignvar(int _off_overflow_flag[5],64);
-  int _allow_separate_buffers, _offload_ghost;
+  int _allow_separate_buffers, _offload_ghost, _lrt;

  IntelBuffers<float,float>::vec3_acc_t *_force_array_s;
  IntelBuffers<float,double>::vec3_acc_t *_force_array_m;
--- a/src/USER-INTEL/pair_buck_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_buck_coul_long_intel.cpp
@ -83,7 +83,7 @@ void PairBuckCoulLongIntel::compute(int eflag, int vflag,
  const int offload_end = fix->offload_end_pair();
  const int ago = neighbor->ago;

-  if (ago != 0 && fix->separate_buffers() == 0) {
+  if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) {
    fix->start_watch(TIME_PACK);
    #if defined(_OPENMP)
    #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
@ -476,6 +476,8 @@ void PairBuckCoulLongIntel::init_style()
    pack_force_const(force_const_double, fix->get_double_buffers());
  else
    pack_force_const(force_const_single, fix->get_single_buffers());
+
+  _lrt = fix->lrt();
 }

 template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_buck_coul_long_intel.h
+++ b/src/USER-INTEL/pair_buck_coul_long_intel.h
@ -40,7 +40,7 @@ class PairBuckCoulLongIntel : public PairBuckCoulLong {

 private:
  FixIntel *fix;
-  int _cop;
+  int _cop, _lrt;

  template <class flt_t> class ForceConst;

--- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.cpp
@ -80,7 +80,7 @@ void PairLJCharmmCoulLongIntel::compute(int eflag, int vflag,
  const int offload_end = fix->offload_end_pair();
  const int ago = neighbor->ago;

-  if (ago != 0 && fix->separate_buffers() == 0) {
+  if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) {
    fix->start_watch(TIME_PACK);
    #if defined(_OPENMP)
    #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
@ -504,6 +504,8 @@ void PairLJCharmmCoulLongIntel::init_style()
    pack_force_const(force_const_double, fix->get_double_buffers());
  else
    pack_force_const(force_const_single, fix->get_single_buffers());
+
+  _lrt = fix->lrt();
 }

 template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
+++ b/src/USER-INTEL/pair_lj_charmm_coul_long_intel.h
@ -42,7 +42,7 @@ class PairLJCharmmCoulLongIntel : public PairLJCharmmCoulLong {

 private:
  FixIntel *fix;
-  int _cop;
+  int _cop, _lrt;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
+++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.cpp
@ -81,7 +81,7 @@ void PairLJCutCoulLongIntel::compute(int eflag, int vflag,
  const int offload_end = fix->offload_end_pair();
  const int ago = neighbor->ago;

-  if (ago != 0 && fix->separate_buffers() == 0) {
+  if (_lrt == 0 && ago != 0 && fix->separate_buffers() == 0) {
    fix->start_watch(TIME_PACK);
    #if defined(_OPENMP)
    #pragma omp parallel default(none) shared(eflag,vflag,buffers,fc)
@ -472,6 +472,8 @@ void PairLJCutCoulLongIntel::init_style()
    pack_force_const(force_const_double, fix->get_double_buffers());
  else
    pack_force_const(force_const_single, fix->get_single_buffers());
+
+  _lrt = fix->lrt();
 }

 template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pair_lj_cut_coul_long_intel.h
+++ b/src/USER-INTEL/pair_lj_cut_coul_long_intel.h
@ -42,7 +42,7 @@ class PairLJCutCoulLongIntel : public PairLJCutCoulLong {

 private:
  FixIntel *fix;
-  int _cop;
+  int _cop, _lrt;

  template <class flt_t> class ForceConst;
  template <class flt_t, class acc_t>
--- a/src/USER-INTEL/pppm_intel.cpp
+++ b/src/USER-INTEL/pppm_intel.cpp
@ -12,7 +12,8 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Rodrigo Canales (RWTH Aachen University)
+   Contributing authors: Rodrigo Canales (RWTH Aachen University)
+                         W. Michael Brown (Intel)
 ------------------------------------------------------------------------- */

 #include <mpi.h>
@ -107,7 +108,14 @@ void PPPMIntel::compute(int eflag, int vflag)
    return;
  }
  #endif
+  compute_first(eflag,vflag);
+  compute_second(eflag,vflag);
+}

+/* ---------------------------------------------------------------------- */
+
+void PPPMIntel::compute_first(int eflag, int vflag)
+{
  int i,j;

  // set energy/virial flags
@ -192,6 +200,13 @@ void PPPMIntel::compute(int eflag, int vflag)
    else if (differentiation_flag == 0)
      cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+void PPPMIntel::compute_second(int eflag, int vflag)
+{
+  int i,j;

  // calculate the force on my particles

@ -617,3 +632,38 @@ void PPPMIntel::fieldforce_ad(IntelBuffers<flt_t,acc_t> *buffers)
    if (slabflag != 2) f[i].z += qfactor * ekz - fqqrd2es * sf;
  }
 }
+
+/* ----------------------------------------------------------------------
+  Pack data into intel package buffers if using LRT mode
+------------------------------------------------------------------------- */
+
+void PPPMIntel::pack_buffers()
+{
+  fix->start_watch(TIME_PACK);
+  #if defined(_OPENMP)
+  #pragma omp parallel default(none)
+  #endif
+  {
+    int ifrom, ito, tid;
+    IP_PRE_omp_range_id_align(ifrom, ito, tid, atom->nlocal+atom->nghost,
+                              comm->nthreads, 
+                              sizeof(IntelBuffers<float,double>::atom_t));
+    if (fix->precision() == FixIntel::PREC_MODE_MIXED)
+      fix->get_mixed_buffers()->thr_pack(ifrom,ito,1);
+    else if (fix->precision() == FixIntel::PREC_MODE_DOUBLE)
+      fix->get_double_buffers()->thr_pack(ifrom,ito,1);
+    else
+      fix->get_single_buffers()->thr_pack(ifrom,ito,1);
+  }
+  fix->stop_watch(TIME_PACK);
+}
+
+/* ----------------------------------------------------------------------
+   Returns 0 if Intel optimizations for PPPM ignored due to offload
+------------------------------------------------------------------------- */
+
+#ifdef _LMP_INTEL_OFFLOAD
+int PPPMIntel::use_base() {
+  return _use_base;
+}
+#endif
--- a/src/USER-INTEL/pppm_intel.h
+++ b/src/USER-INTEL/pppm_intel.h
@ -12,7 +12,8 @@
 ------------------------------------------------------------------------- */

 /* ----------------------------------------------------------------------
-   Contributing author: Rodrigo Canales (RWTH Aachen University)
+   Contributing authors: Rodrigo Canales (RWTH Aachen University)
+                         W. Michael Brown (Intel)
 ------------------------------------------------------------------------- */

 #ifdef KSPACE_CLASS
@ -35,6 +36,13 @@ class PPPMIntel : public PPPM {
  virtual ~PPPMIntel();
  virtual void init();
  virtual void compute(int, int);
+  void compute_first(int, int);
+  void compute_second(int, int);
+  void pack_buffers();
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  int use_base();
+  #endif

 protected:
  FixIntel *fix;
--- a/src/USER-INTEL/verlet_lrt_intel.cpp
+++ b/src/USER-INTEL/verlet_lrt_intel.cpp
@ -0,0 +1,420 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include <string.h>
+#include "verlet_lrt_intel.h"
+#include "neighbor.h"
+#include "domain.h"
+#include "comm.h"
+#include "atom.h"
+#include "atom_vec.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "dihedral.h"
+#include "improper.h"
+#include "kspace.h"
+#include "output.h"
+#include "update.h"
+#include "modify.h"
+#include "compute.h"
+#include "fix.h"
+#include "timer.h"
+#include "memory.h"
+#include "error.h"
+
+#if defined(_OPENMP)
+#include <omp.h>
+#endif
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+VerletLRTIntel::VerletLRTIntel(LAMMPS *lmp, int narg, char **arg) :
+  Verlet(lmp, narg, arg) {}
+
+/* ----------------------------------------------------------------------
+   initialization before run
+------------------------------------------------------------------------- */
+
+void VerletLRTIntel::init()
+{
+  Verlet::init();
+
+  _intel_kspace = (PPPMIntel*)(force->kspace_match("pppm/intel", 0));
+  
+  #ifdef LMP_INTEL_NOLRT
+  error->all(FLERR, 
+	     "LRT otion for Intel package disabled at compile time");
+  #endif
+}
+
+/* ----------------------------------------------------------------------
+   setup before run
+------------------------------------------------------------------------- */
+
+void VerletLRTIntel::setup()
+{
+  if (_intel_kspace == 0) {
+    Verlet::setup();
+    return;
+  } 
+
+  #ifdef _LMP_INTEL_OFFLOAD
+  if (_intel_kspace->use_base()) {
+    _intel_kspace = 0;
+    Verlet::setup();
+    return;
+  }
+  #endif
+
+  if (comm->me == 0 && screen) {
+    fprintf(screen,"Setting up VerletLRTIntel run ...\n");
+    fprintf(screen,"  Unit style    : %s\n", update->unit_style);
+    fprintf(screen,"  Current step  : " BIGINT_FORMAT "\n", update->ntimestep);
+    fprintf(screen,"  Time step     : %g\n", update->dt);
+    timer->print_timeout(screen);
+  }
+
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  if (comm->me == 0) {
+    cpu_set_t cpuset;
+    sched_getaffinity(0, sizeof(cpuset), &cpuset);
+    int my_cpu_count = CPU_COUNT(&cpuset);
+    if (my_cpu_count < comm->nthreads + 1) {
+      char str[128];
+      sprintf(str,"Using %d threads per MPI, but only %d core(s) allocated"
+                  " per MPI",
+              comm->nthreads + 1, my_cpu_count);
+      error->warning(FLERR, str);
+    }
+  }
+
+  _kspace_ready = 0;
+  _kspace_done = 0;
+  pthread_cond_init(&_kcond, NULL);
+  pthread_attr_init(&_kspace_attr);
+  pthread_attr_setdetachstate(&_kspace_attr, PTHREAD_CREATE_JOINABLE);
+  #endif
+
+  update->setupflag = 1;
+
+  // setup domain, communication and neighboring
+  // acquire ghosts
+  // build neighbor lists
+
+  atom->setup();
+  modify->setup_pre_exchange();
+  if (triclinic) domain->x2lamda(atom->nlocal);
+  domain->pbc();
+  domain->reset_box();
+  comm->setup();
+  if (neighbor->style) neighbor->setup_bins();
+  comm->exchange();
+  if (atom->sortfreq > 0) atom->sort();
+  comm->borders();
+  if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+  domain->image_check();
+  domain->box_too_small_check();
+  modify->setup_pre_neighbor();
+  neighbor->build();
+  neighbor->ncalls = 0;
+
+  // compute all forces
+
+  force->setup();
+  ev_set(update->ntimestep);
+  force_clear();
+  modify->setup_pre_force(vflag);
+  _intel_kspace->setup();
+
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  pthread_create(&_kspace_thread, &_kspace_attr, 
+		 &VerletLRTIntel::k_launch_loop, this);
+  #elif defined(_LMP_INTEL_LRT_11)
+  std::thread kspace_thread;
+  if (kspace_compute_flag) 
+    _kspace_thread=std::thread([=]{ _intel_kspace->compute_first(eflag, 
+                                                                 vflag); });
+  else 
+    _kspace_thread=std::thread([=]{ _intel_kspace->compute_dummy(eflag, 
+                                                                 vflag); });
+  #endif
+
+  if (pair_compute_flag) force->pair->compute(eflag,vflag);
+  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
+
+  if (atom->molecular) {
+    if (force->bond) force->bond->compute(eflag,vflag);
+    if (force->angle) force->angle->compute(eflag,vflag);
+    if (force->dihedral) force->dihedral->compute(eflag,vflag);
+    if (force->improper) force->improper->compute(eflag,vflag);
+  }
+
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  pthread_mutex_lock(&_kmutex);
+  while (!_kspace_done)
+    pthread_cond_wait(&_kcond, &_kmutex);
+  _kspace_done = 0;
+  pthread_mutex_unlock(&_kmutex);
+  #elif defined(_LMP_INTEL_LRT_11)
+  kspace_thread.join();
+  #endif
+
+  if (kspace_compute_flag) _intel_kspace->compute_second(eflag,vflag);
+
+  modify->pre_reverse(eflag,vflag);
+  if (force->newton) comm->reverse_comm();
+
+  modify->setup(vflag);
+  output->setup();
+  update->setupflag = 0;
+}
+
+/* ----------------------------------------------------------------------
+   run for N steps
+------------------------------------------------------------------------- */
+
+void VerletLRTIntel::run(int n)
+{
+  if (_intel_kspace == 0) {
+    Verlet::run(n);
+    return;
+  }
+
+  bigint ntimestep;
+  int nflag,sortflag;
+
+  int n_post_integrate = modify->n_post_integrate;
+  int n_pre_exchange = modify->n_pre_exchange;
+  int n_pre_neighbor = modify->n_pre_neighbor;
+  int n_pre_force = modify->n_pre_force;
+  int n_pre_reverse = modify->n_pre_reverse;
+  int n_post_force = modify->n_post_force;
+  int n_end_of_step = modify->n_end_of_step;
+
+  if (atom->sortfreq > 0) sortflag = 1;
+  else sortflag = 0;
+
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  _krun_n = n;
+  #endif
+
+  int run_cancelled = 0;
+
+  for (int i = 0; i < n; i++) {
+    if (timer->check_timeout(i)) {
+      update->nsteps = i;
+      run_cancelled = 1;
+      break;
+    }
+
+    ntimestep = ++update->ntimestep;
+    ev_set(ntimestep);
+
+    // initial time integration
+
+    timer->stamp();
+    modify->initial_integrate(vflag);
+    if (n_post_integrate) modify->post_integrate();
+    timer->stamp(Timer::MODIFY);
+
+    // regular communication vs neighbor list rebuild
+
+    nflag = neighbor->decide();
+
+    if (nflag == 0) {
+      timer->stamp();
+      comm->forward_comm();
+      timer->stamp(Timer::COMM);
+      _intel_kspace->pack_buffers();
+    } else {
+      if (n_pre_exchange) {
+        timer->stamp();
+        modify->pre_exchange();
+        timer->stamp(Timer::MODIFY);
+      }
+      if (triclinic) domain->x2lamda(atom->nlocal);
+      domain->pbc();
+      if (domain->box_change) {
+        domain->reset_box();
+        comm->setup();
+        if (neighbor->style) neighbor->setup_bins();
+      }
+      timer->stamp();
+      comm->exchange();
+      if (sortflag && ntimestep >= atom->nextsort) atom->sort();
+      comm->borders();
+      if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+      timer->stamp(Timer::COMM);
+      if (n_pre_neighbor) {
+        modify->pre_neighbor();
+        timer->stamp(Timer::MODIFY);
+      }
+      neighbor->build();
+      timer->stamp(Timer::NEIGH);
+    }
+
+    // force computations
+    // important for pair to come before bonded contributions
+    // since some bonded potentials tally pairwise energy/virial
+    // and Pair:ev_tally() needs to be called before any tallying
+
+    force_clear();
+
+    timer->stamp();
+
+    #if defined(_LMP_INTEL_LRT_PTHREAD)
+    pthread_mutex_lock(&_kmutex);
+    _kspace_ready = 1;
+    _kspace_done = 0;
+    pthread_cond_signal(&_kcond);
+    pthread_mutex_unlock(&_kmutex);
+    #elif defined(_LMP_INTEL_LRT_11)
+    std::thread kspace_thread;
+    if (kspace_compute_flag) 
+      kspace_thread=std::thread([=] { 
+        _intel_kspace->compute_first(eflag, vflag);
+        timer->stamp(Timer::KSPACE);
+      } );
+    #endif
+
+    if (n_pre_force) {
+      modify->pre_force(vflag);
+      timer->stamp(Timer::MODIFY);
+    }
+
+    if (pair_compute_flag) {
+      force->pair->compute(eflag,vflag);
+      timer->stamp(Timer::PAIR);
+    }
+
+    if (atom->molecular) {
+      if (force->bond) force->bond->compute(eflag,vflag);
+      if (force->angle) force->angle->compute(eflag,vflag);
+      if (force->dihedral) force->dihedral->compute(eflag,vflag);
+      if (force->improper) force->improper->compute(eflag,vflag);
+      timer->stamp(Timer::BOND);
+    }
+
+    #if defined(_LMP_INTEL_LRT_PTHREAD)
+    pthread_mutex_lock(&_kmutex);
+    while (!_kspace_done)
+      pthread_cond_wait(&_kcond, &_kmutex);
+    _kspace_done = 0;
+    pthread_mutex_unlock(&_kmutex);
+    #elif defined(_LMP_INTEL_LRT_11)
+    if (kspace_compute_flag) 
+      kspace_thread.join();
+    #endif
+
+    if (kspace_compute_flag) {
+      _intel_kspace->compute_second(eflag,vflag);
+      timer->stamp(Timer::KSPACE);
+    }
+
+    if (n_pre_reverse) {
+      modify->pre_reverse(eflag,vflag);
+      timer->stamp(Timer::MODIFY);
+    }
+
+    // reverse communication of forces
+
+    if (force->newton) {
+      comm->reverse_comm();
+      timer->stamp(Timer::COMM);
+    }
+
+    // force modifications, final time integration, diagnostics
+
+    if (n_post_force) modify->post_force(vflag);
+    modify->final_integrate();
+    if (n_end_of_step) modify->end_of_step();
+    timer->stamp(Timer::MODIFY);
+
+    // all output
+
+    if (ntimestep == output->next) {
+      timer->stamp();
+      output->write(ntimestep);
+      timer->stamp(Timer::OUTPUT);
+    }
+  }
+
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  if (run_cancelled) 
+    pthread_cancel(_kspace_thread);
+  else {
+    pthread_mutex_lock(&_kmutex);
+    _kspace_ready = 1;
+    _kspace_done = 0;
+    pthread_cond_signal(&_kcond);
+    pthread_mutex_unlock(&_kmutex);
+    pthread_join(_kspace_thread, NULL);
+    pthread_attr_destroy(&_kspace_attr);
+  }
+  #endif
+}
+
+#if defined(_LMP_INTEL_LRT_PTHREAD)
+
+/* ----------------------------------------------------------------------
+   PTHREAD Loop for long-range
+------------------------------------------------------------------------- */
+void * VerletLRTIntel::k_launch_loop(void *context)
+{
+  VerletLRTIntel * const c = (VerletLRTIntel *)context;
+
+  if (c->kspace_compute_flag) 
+    c->_intel_kspace->compute_first(c->eflag, c->vflag);
+  else 
+    c->_intel_kspace->compute_dummy(c->eflag, c->vflag);
+
+  pthread_mutex_lock(&(c->_kmutex));
+  c->_kspace_done = 1;
+  pthread_cond_signal(&(c->_kcond));
+  pthread_mutex_unlock(&(c->_kmutex));
+
+  pthread_mutex_lock(&(c->_kmutex));
+  while (!(c->_kspace_ready))
+    pthread_cond_wait(&(c->_kcond), &(c->_kmutex));
+  c->_kspace_ready = 0;
+  const int n = c->_krun_n;
+  pthread_mutex_unlock(&(c->_kmutex));
+
+  for (int i = 0; i < n; i++) {
+    
+    if (c->kspace_compute_flag) {
+      c->_intel_kspace->compute_first(c->eflag, c->vflag);
+      c->timer->stamp(Timer::KSPACE);
+    }
+
+    pthread_mutex_lock(&(c->_kmutex));
+    c->_kspace_done = 1;
+    pthread_cond_signal(&(c->_kcond));
+    pthread_mutex_unlock(&(c->_kmutex));
+
+    pthread_mutex_lock(&(c->_kmutex));
+    while (!(c->_kspace_ready))
+      pthread_cond_wait(&(c->_kcond), &(c->_kmutex));
+    c->_kspace_ready = 0;
+    pthread_mutex_unlock(&(c->_kmutex));
+  }
+
+  pthread_exit(NULL);
+  return NULL;
+}
+
+#endif
--- a/src/USER-INTEL/verlet_lrt_intel.h
+++ b/src/USER-INTEL/verlet_lrt_intel.h
@ -0,0 +1,78 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef INTEGRATE_CLASS
+
+IntegrateStyle(verlet/lrt/intel,VerletLRTIntel)
+
+#else
+
+#ifndef LMP_VERLET_LRT_INTEL_H
+#define LMP_VERLET_LRT_INTEL_H
+
+#include "verlet.h"
+#include "pppm_intel.h"
+
+#ifndef LMP_INTEL_NOLRT
+
+#ifdef LMP_INTEL_LRT11
+#define _LMP_INTEL_LRT_11
+#include <thread>
+#else
+#define _LMP_INTEL_LRT_PTHREAD
+#include <pthread.h>
+#endif
+
+#endif
+
+namespace LAMMPS_NS {
+
+class VerletLRTIntel : public Verlet {
+ public:
+  VerletLRTIntel(class LAMMPS *, int, char **);
+  virtual ~VerletLRTIntel() {}
+  virtual void init();
+  virtual void setup();
+  virtual void run(int);
+
+ protected:
+  PPPMIntel *_intel_kspace;
+
+  #if defined(_LMP_INTEL_LRT_PTHREAD)
+  static void *k_launch_loop(void *context);
+  pthread_t _kspace_thread;
+  pthread_attr_t _kspace_attr;
+  pthread_mutex_t _kmutex;
+  pthread_cond_t _kcond;
+  int _kspace_ready, _kspace_done, _krun_n;
+  #endif
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+E: LRT otion for Intel package disabled at compile time
+
+This option cannot be used with the Intel package because LAMMPS was not built
+with support for it.
+
+W: No fixes defined, atoms won't move
+
+If you are not using a fix like nve, nvt, npt then atom velocities and
+coordinates will not be updated during timestepping.
+
+*/
--- a/src/fix_store.cpp
+++ b/src/fix_store.cpp
@ -34,7 +34,7 @@ FixStore::FixStore(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
  // 4th arg determines GLOBAL vs PERATOM values
  // syntax: id group style global nrow ncol
  //   Nrow by Ncol array of global values
-  //   Ncol = 1 is vector, Nrow > 1 is array
+  //   Ncol = 1 is vector, Ncol > 1 is array
  // syntax: id group style peratom 0/1 nvalues
  //   0/1 flag = not-store or store peratom values in restart file
  //   nvalues = # of peratom values, N = 1 is vector, N > 1 is array
--- a/src/version.h
+++ b/src/version.h
@ -1 +1 @@
-#define LAMMPS_VERSION "27 Jun 2016"
+#define LAMMPS_VERSION "28 Jun 2016"