diff --git a/doc/src/Tools.txt b/doc/src/Tools.txt
index eb7b6d81b8..90f598890f 100644
--- a/doc/src/Tools.txt
+++ b/doc/src/Tools.txt
@@ -76,9 +76,10 @@ Post-processing tools :h3
 "pymol_asphere"_#pymol,
 "python"_#pythontools,
 "reax"_#reax_tool,
+"replica"_#replica,
 "smd"_#smd,
 "spin"_#spin,
-"xmgrace"_#xmgrace :tb(c=6,ea=c,a=l)
+"xmgrace"_#xmgrace :tb(c=6,ea=c,a=l) 
 
 Miscellaneous tools :h3
 
@@ -485,6 +486,21 @@ README for more info on Pizza.py and how to use these scripts.
 
 :line
 
+replica tool :h4,link(replica)
+
+The tools/replica directory contains the reorder_remd_traj python script which
+can be used to reorder the replica trajectories (resulting from the use of the 
+temper command) according to temperature. This will produce discontinuous
+trajectories with all frames at the same temperature in each trajectory.
+Additional options can be used to calculate the canonical configurational
+log-weight for each frame at each temperature using the pymbar package. See
+the README.md file for further details. Try out the peptide example provided.
+
+This tool was written by (and is maintained by) Tanmoy Sanyal, 
+while at the Shell lab at UC Santa Barbara. (tanmoy dot 7989 at gmail.com) 
+
+:line
+
 reax tool :h4,link(reax_tool)
 
 The reax sub-directory contains stand-alone codes that can
@@ -549,3 +565,4 @@ simulation.
 See the README file for details.
 
 These files were provided by Vikas Varshney (vv0210 at gmail.com)
+
diff --git a/doc/src/temper.txt b/doc/src/temper.txt
index edd578fbc9..6a61dfa6dd 100644
--- a/doc/src/temper.txt
+++ b/doc/src/temper.txt
@@ -110,7 +110,13 @@ the information from the log.lammps file.  E.g. you could produce one
 dump file with snapshots at 300K (from all replicas), another with
 snapshots at 310K, etc.  Note that these new dump files will not
 contain "continuous trajectories" for individual atoms, because two
-successive snapshots (in time) may be from different replicas.
+successive snapshots (in time) may be from different replicas. The 
+reorder_remd_traj python script can do the reordering for you 
+(and additionally also calculated configurational log-weights of 
+trajectory snapshots in the canonical ensemble). The script can be found
+in the tools/replica directory while instructions on how to use it is
+available in doc/Tools (in brief) and as a README file in tools/replica
+(in detail).
 
 The last argument {index} in the temper command is optional and is
 used when restarting a tempering run from a set of restart files (one
diff --git a/doc/utils/sphinx-config/false_positives.txt b/doc/utils/sphinx-config/false_positives.txt
index 3ce7c4d975..4c7dc756cc 100644
--- a/doc/utils/sphinx-config/false_positives.txt
+++ b/doc/utils/sphinx-config/false_positives.txt
@@ -2238,6 +2238,7 @@ Py
 pydir
 pylammps
 PyLammps
+pymbar
 pymodule
 pymol
 pypar
@@ -2324,6 +2325,7 @@ reinit
 relink
 relTol
 remappings
+remd
 Ren
 Rendon
 reneighbor
@@ -2455,6 +2457,7 @@ Sandia
 sandybrown
 Sanitizer
 sanitizers
+Sanyal
 sc
 scafacos
 SCAFACOS
@@ -2688,6 +2691,8 @@ Tajkhorshid
 Tamaskovics
 Tanaka
 tanh
+tanmoy
+Tanmoy
 Tartakovsky
 taskset
 taubi
diff --git a/tools/README b/tools/README
index 54f8d86898..b20e82c53e 100644
--- a/tools/README
+++ b/tools/README
@@ -38,6 +38,7 @@ polybond               Python tool for programmable polymer bonding
 pymol_asphere	       convert LAMMPS output of ellipsoids to PyMol format
 python		       Python scripts for post-processing LAMMPS output
 reax	       	       Tools for analyzing output of ReaxFF simulations
+replica        tool to reorder LAMMPS replica trajectories according to temperature
 smd                    convert Smooth Mach Dynamics triangles to VTK
 spin                   perform a cubic polynomial interpolation of a GNEB MEP
 vim		       add-ons to VIM editor for editing LAMMPS input scripts
diff --git a/tools/replica/README.md b/tools/replica/README.md
new file mode 100644
index 0000000000..9205d962b2
--- /dev/null
+++ b/tools/replica/README.md
@@ -0,0 +1,86 @@
+## reorder_remd_traj
+
+LAMMPS Replica Exchange Molecular Dynamics (REMD) trajectories (implemented using the temper command) are arranged by replica, i.e., each trajectory is a continuous replica that records all the ups and downs in temperature. However, often the requirement is  that trajectories be continuous in temperature. This requires the LAMMPS REMD trajectories to be re-ordered, which LAMMPS does not do automatically. (see the discussion [here](https://lammps.sandia.gov/threads/msg60440.html)). The reorderLAMMPSREMD tool does exactly this in parallel (using MPI)
+
+(Protein folding trajectories in [Sanyal, Mittal and Shell, JPC, 2019, 151(4), 044111](https://aip.scitation.org/doi/abs/10.1063/1.5108761) were ordered in temperature space using this tool)
+
+#### Author
+
+Tanmoy Sanyal, Shell lab, UC Santa Barbara
+
+(currently at UC San Francisco)
+
+email: tanmoy dot 7989 at gmail.com
+
+#### Features
+
+- reorder LAMMPS REMD trajectories by temperature keeping only desired frames.
+  Note: this only handles LAMMPS format trajectories (i.e., lammpstrj format)
+  Trajectories can be gzipped or bz2-compressed. The trajectories are assumed to
+  be named as \<prefix>\.%d.lammpstrj[.gz or .bz2]
+
+- (optionally) calculate configurational weights for each frame at each
+  temperature if potential energies are supplied (only implemented for the canonical (NVT) ensemble)
+
+#### Dependencies
+
+[`mpi4py`](https://mpi4py.readthedocs.io/en/stable/)  
+[`pymbar`](https://pymbar.readthedocs.io/en/master/) (for getting configurational weights)  
+[`tqdm`](https://github.com/tqdm/tqdm) (for printing pretty progress bars)  
+[`StringIO`](https://docs.python.org/2/library/stringio.html) (or [`io`](https://docs.python.org/3/library/io.html) if in Python 3.x)
+
+#### Example
+
+###### REMD Simulation specs 
+Suppose you ran a REMD simulation for the peptide example using the CHARMM forcefield (see lammps/examples/peptide) in Lammps with the following settings:
+
+- number of replicas = 16
+- temperatures used (in K): 200 209 219 230 241 252 264 276 289 303 317 332 348 365 382 400 (i.e., exponentially distributed in the range 270-400 K)
+- timestep = 2 fs
+- total number of timesteps simulated using temper = 2000 (i.e. 4 ps)
+- swap frequency = temperatures swapped after every this many steps = `ns` = 10 (i.e. 20 fs)
+- write frequency = trajectory frame written to disk after this many steps (using the dump command) = `nw` = 20 (i.e. 40 fs)
+
+###### LAMMPS output
+So, when the dust settles,
+
+- You'll have 16 replica trajectories. For this tool to work, each replica traj must be named: `<prefix>.<n>.lammpstrj[.gz or .bz2]`, where,
+  - `prefix` = some common prefix for all your trajectories and (say it is called "peptide")` 
+  - `n` = replica number (0-15 in this case). Note: trajectories **must be in default LAMMPS format **(so stuff like dcd won't work)
+
+- You will also have a master LAMMPS log file (`logfn`) that contains the swap history of all the replicas
+  (for more details see [here](https://lammps.sandia.gov/doc/temper.html). Assume that this is called `log.peptide`
+
+- Further you must have a txt file that numpy can read which stores all the temperature values (say this is called `temps.txt`)
+
+######  Your desired output
+- The total number of timesteps you want consider as production (i.e. after equilbration)  = 1000 (i.e. last 2 ps)
+
+- Reordered trajectories at temperatures 200 K, 276 K, 400 K
+
+- Configurational log-weight calculation (using [`pymbar`](https://github.com/choderalab/pymbar)). Here, this is limited to the canonical (NVT) ensemble **and without biasing restraints** in your simulation. To do this you'd need to have a file (say called `ene.dat`) that stores a 2D  (K X N) array of total potential energies, where,
+
+  - K = total number of replicas = 16, and N = total number of frames in each replica trajectory (= 1000 / 20 = 50 in this case) 
+
+  - `ene[k,n]` = energy from n-th frame of k-th replica.
+
+###### Using the tool (description of the workflow)
+Assume you have 16 processors at your disposal. When you run the following:
+
+```bash
+mpirun -np 16 python reorder_remd_traj.py peptide -logfn log.peptide -tfn temps.txt -ns 10 -nw 20 -np 1000 -ot 200 276 400 -logw -e ene.peptide -od ./output
+```
+
+1. First the temperature swap history file (`log.peptide` in this case) is read. This is done on one processor since it is usually fast.
+2. Then the (compressed or otherwise) LAMMPS replica trajectories are read in parallel. So if you have less processors than replicas at this stage, it'll be slower.
+3. Then using the frame ordering generated in (1), trajectory frames read in (2) are re-ordered and written to disk in parallel. Each processor writes one trajectory. So, If you request reordered trajectories for less temperatures (3 in this case) than the total number of temperatures (16), then 16-3 = 13 processors will be retired.
+4. If you have further requested configurational log-weight calculation, then they will be done on a single processor since pymbar is pretty fast.
+5. Finally you will have 3 LAMMPS trajectories of the form ``peptide.<temp>.lammpstrj.gz`` each with 1000 / 20 = 50 frames,  where `<temp>` = 200, 276, 400. If you request reordering at a temperature like say 280 K which is not present in the supplied temp schedule (as written in `temps.txt`), the closest temperature (276 K) will be chosen.
+
+For more details, use the help menu generated by the tool by using:
+python reorder_remd_traj.py -h
+
+###### Caveats
+- This tool crawls through the replica trajectories and creates index files that are hidden. These are called .byteind_`<replicanum>'.gz files. You may delete these if you want, but subsequent replica reads will be slow in that case.
+
+- When writing trajectories to disk, the trajectories are first written to a buffer in memory, and then finally dumped all-at-once to the disk. While this makes the tool very fast, it can cause out-of-memory errors for very large trajectories. A useful feature might be to write to the buffer in batches and emptying to disk when some (predefined) max-buffer-size is exceeded.
diff --git a/tools/replica/example/data.peptide b/tools/replica/example/data.peptide
new file mode 120000
index 0000000000..f523fc92c1
--- /dev/null
+++ b/tools/replica/example/data.peptide
@@ -0,0 +1 @@
+../../../examples/peptide/data.peptide
\ No newline at end of file
diff --git a/tools/replica/example/in.peptide b/tools/replica/example/in.peptide
new file mode 100644
index 0000000000..5d321e34c3
--- /dev/null
+++ b/tools/replica/example/in.peptide
@@ -0,0 +1,50 @@
+# Solvated 5-mer peptide
+
+#---------------------------------- 
+# Taken as is from examples/peptide 
+
+units		    real
+atom_style	    full
+boundary        p p p 
+
+pair_style	    lj/charmm/coul/long 8.0 10.0 10.0
+bond_style      harmonic
+angle_style     charmm
+dihedral_style  charmm
+improper_style  harmonic
+kspace_style	pppm 0.0001
+
+read_data	    data.peptide
+
+neighbor	    2.0 bin
+neigh_modify	delay 5
+
+timestep	    2.0
+#----------------------------------
+
+
+# temperature schedule for REMD
+variable        idx world 0     1     2     3     4     5     6     7     8     9     10    11    12    13    14    15
+variable        t   world 200.0 209.0 219.0 230.0 241.0 252.0 264.0 276.0 289.0 303.0 317.0 332.0 348.0 365.0 382.0 400.0
+
+# thermostat
+fix             thermostat all langevin $t $t 1000 772530
+
+# log-file output before minimization
+thermo_style	custom step temp ke pe
+thermo		    20
+
+# minimization
+minimize        1e-4 0.0 1000 1000
+
+# change logfile output after minimization
+thermo_style	custom step temp pe
+thermo		    20
+
+# trajectory style
+dump            myDump all atom 20 peptide.${idx}.lammpstrj.gz
+dump_modify     myDump sort id scale no
+
+# run REMD (for realistic results run for 100000000 steps with 10000 frequency) 
+reset_timestep  0
+temper          2000 10 $t thermostat 3847 5382
diff --git a/tools/replica/example/parse_ene.py b/tools/replica/example/parse_ene.py
new file mode 100644
index 0000000000..00cee08d68
--- /dev/null
+++ b/tools/replica/example/parse_ene.py
@@ -0,0 +1,30 @@
+#!/usr/bin/env python
+
+import os, sys, numpy as np
+
+tempfn = os.path.abspath(sys.argv[1])
+logfnprefix = os.path.abspath(sys.argv[2])
+
+temps = np.loadtxt(tempfn)
+ntemps = len(temps)
+u_kn = []
+start_token = 'Step Temp PotEng'
+end_token = 'Loop time'
+
+for i in range(ntemps):
+    logfn = '%s.%d' % (logfnprefix, i)
+    with open(logfn, 'r') as of:
+        lines = of.readlines()
+
+    # extract relevant lines from logfile
+    start = [lines.index(line) for line in lines if line.startswith(start_token)][0]
+    lines = lines[(start+1) : ]
+    stop = [lines.index(line) for line in lines if line.startswith(end_token)][0]
+    lines = lines[:stop]
+
+    # store the potential energies
+    pe = [float(line.strip().split()[-1]) for line in lines]
+    u_kn.append(pe)
+
+u_kn = np.array(u_kn)
+np.savetxt('ene.peptide', u_kn, fmt = '%5.5f')
diff --git a/tools/replica/example/run.sh b/tools/replica/example/run.sh
new file mode 100755
index 0000000000..f190bf5dfa
--- /dev/null
+++ b/tools/replica/example/run.sh
@@ -0,0 +1,13 @@
+#!/bin/bash
+
+## run REMD using LAMMPS
+mpirun -np 16 ~/mysoftware/lammps/src/lmp_mpi -partition 16x1 -in in.peptide -log log.peptide
+
+## collect all energies from different replica logs
+echo ; echo
+echo "Parsing energies from replica logs"
+python parse_ene.py temps.txt log.peptide
+
+## run the reordering tool to get reordered trajectories @ 200 K, 276 K, 400 K
+echo ; echo
+mpirun -np 16 python ../reorder_remd_traj.py peptide -logfn log.peptide -tfn temps.txt -ns 10 -nw 20 -np 1000 -ot 200 276 400 -logw -e ene.peptide -od ./output
diff --git a/tools/replica/example/runlog.05Sep19 b/tools/replica/example/runlog.05Sep19
new file mode 100644
index 0000000000..b4eeafdc9e
--- /dev/null
+++ b/tools/replica/example/runlog.05Sep19
@@ -0,0 +1,249 @@
+LAMMPS (7 Aug 2019)
+Running on 16 partitions of processors
+Setting up tempering ...
+Step T0 T1 T2 T3 T4 T5 T6 T7 T8 T9 T10 T11 T12 T13 T14 T15
+0 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+10 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+20 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+30 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+40 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+50 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+60 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+70 4 6 2 8 0 10 1 12 3 14 5 15 7 13 9 11
+80 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+90 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+100 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+110 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+120 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+130 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+140 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+150 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+160 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+170 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+180 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+190 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+200 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+210 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+220 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+230 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+240 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+250 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+260 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+270 4 6 2 8 0 10 1 12 3 14 5 15 7 13 9 11
+280 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+290 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+300 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+310 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+320 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+330 4 6 2 8 0 10 1 12 3 14 5 15 7 13 9 11
+340 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+350 4 6 2 8 0 10 1 12 3 14 5 15 7 13 9 11
+360 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+370 4 6 2 8 0 10 1 12 3 14 5 15 7 13 9 11
+380 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+390 4 6 2 8 0 10 1 12 3 14 5 15 7 13 9 11
+400 5 7 3 9 1 11 0 13 2 15 4 14 6 12 8 10
+410 4 6 2 8 0 10 1 12 3 14 5 15 7 13 9 11
+420 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+430 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+440 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+450 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+460 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+470 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+480 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+490 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+500 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+510 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+520 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+530 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+540 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+550 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+560 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+570 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+580 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+590 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+600 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+610 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+620 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+630 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+640 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+650 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+660 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+670 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+680 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+690 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+700 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+710 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+720 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+730 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+740 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+750 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+760 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+770 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+780 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+790 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+800 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+810 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+820 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+830 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+840 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+850 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+860 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+870 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+880 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+890 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+900 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+910 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+920 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+930 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+940 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+950 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+960 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+970 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+980 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+990 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1000 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1010 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1020 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1030 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+1040 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+1050 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+1060 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+1070 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+1080 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+1090 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+1100 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+1110 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+1120 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1130 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1140 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1150 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1160 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1170 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+1180 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+1190 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+1200 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1210 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1220 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1230 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+1240 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1250 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1260 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1270 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1280 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1290 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1300 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1310 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1320 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1330 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1340 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1350 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1360 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1370 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1380 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1390 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1400 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1410 7 5 9 3 11 1 13 0 15 2 14 4 12 6 10 8
+1420 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1430 7 5 9 3 11 1 13 0 15 2 14 4 12 6 10 8
+1440 8 6 10 4 12 2 14 0 15 1 13 3 11 5 9 7
+1450 7 5 9 3 11 1 13 0 15 2 14 4 12 6 10 8
+1460 8 6 10 4 12 2 14 0 15 1 13 3 11 5 9 7
+1470 7 5 9 3 11 1 13 0 15 2 14 4 12 6 10 8
+1480 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1490 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1500 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1510 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1520 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1530 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1540 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1550 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1560 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1570 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1580 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1590 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1600 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1610 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1620 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1630 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1640 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1650 7 5 9 3 11 1 13 0 15 2 14 4 12 6 10 8
+1660 8 6 10 4 12 2 14 0 15 1 13 3 11 5 9 7
+1670 7 5 9 3 11 1 13 0 15 2 14 4 12 6 10 8
+1680 6 4 8 2 10 0 12 1 14 3 15 5 13 7 11 9
+1690 5 3 7 1 9 0 11 2 13 4 15 6 14 8 12 10
+1700 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1710 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1720 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1730 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1740 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1750 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1760 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1770 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1780 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1790 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1800 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1810 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1820 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1830 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1840 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1850 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1860 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1870 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1880 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1890 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1900 4 2 6 0 8 1 10 3 12 5 14 7 15 9 13 11
+1910 3 1 5 0 7 2 9 4 11 6 13 8 15 10 14 12
+1920 2 0 4 1 6 3 8 5 10 7 12 9 14 11 15 13
+1930 1 0 3 2 5 4 7 6 9 8 11 10 13 12 15 14
+1940 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
+1950 0 2 1 4 3 6 5 8 7 10 9 12 11 14 13 15
+1960 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+1970 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+1980 3 5 1 7 0 9 2 11 4 13 6 15 8 14 10 12
+1990 2 4 0 6 1 8 3 10 5 12 7 14 9 15 11 13
+2000 1 3 0 5 2 7 4 9 6 11 8 13 10 15 12 14
+
+
+Parsing energies from replica logs
+
+
+Getting frames from all replicas at temperature:
+200.00 K
+209.00 K
+219.00 K
+230.00 K
+241.00 K
+252.00 K
+264.00 K
+276.00 K
+289.00 K
+303.00 K
+317.00 K
+332.00 K
+348.00 K
+365.00 K
+382.00 K
+400.00 K
+
+Releasing 13 excess procs
+Writing buffer to file
+
+Running pymbar...
+K (total states) = 16, total samples = 800
+N_k = 
+[50 50 50 50 50 50 50 50 50 50 50 50 50 50 50 50]
+There are 16 states with samples.
+Initializing free energies to zero.
+Initial dimensionless free energies with method zeros
+f_k = 
+[0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
+Final dimensionless free energies
+f_k = 
+[    0.           889.75988663  1792.61111609  2695.06980154
+  3515.14636632  4263.62894529  5009.01862104  5689.59180324
+  6363.10021193  7023.7847743   7626.11234062  8215.09228661
+  8787.37318432  9340.44739436  9844.29510404 10331.10090589]
+MBAR initialization complete.
+
+Extracting log-weights...
diff --git a/tools/replica/example/temps.txt b/tools/replica/example/temps.txt
new file mode 100644
index 0000000000..5931ff06c8
--- /dev/null
+++ b/tools/replica/example/temps.txt
@@ -0,0 +1 @@
+200.0 209.0 219.0 230.0 241.0 252.0 264.0 276.0 289.0 303.0 317.0 332.0 348.0 365.0 382.0 400.0
diff --git a/tools/replica/reorder_remd_traj.py b/tools/replica/reorder_remd_traj.py
new file mode 100644
index 0000000000..5f4f316b14
--- /dev/null
+++ b/tools/replica/reorder_remd_traj.py
@@ -0,0 +1,573 @@
+#!/usr/bin/env python
+
+"""
+LAMMPS Replica Exchange Molecular Dynamics (REMD) trajectories are arranged by
+replica, i.e., each trajectory is a continuous replica that records all the
+ups and downs in temperature. However, often the requirement is trajectories
+that are continuous in temperature, which is achieved by this tool.
+
+Author:
+Tanmoy Sanyal, Shell lab, Chemical Engineering, UC Santa Barbara
+Email: tanmoy dot 7989 at gmail dot com
+
+Usage
+-----
+To get detailed information about the arguments, flags, etc use:
+python reorder_remd_traj.py -h or
+python reorder_remd_traj.py --help
+
+Features of this script
+-----------------------
+a) reorder LAMMPS REMD trajectories by temperature keeping only desired frames.
+Note: this only handles LAMMPS format trajectories (i.e. .lammpstrj format)
+Trajectories can be gzipped or bz2-compressed. The trajectories are assumed to
+be named as <prefix>.%d.lammpstrj[.gz or .bz2]
+
+b) (optionally) calculate configurational weights for each frame at each
+temperature if potential energies are supplied. But this if for the canonical
+(NVT) ensemble only.
+
+Dependencies
+------------
+mpi4py
+pymbar (for getting configurational weights)
+tqdm (for printing pretty progress bars)
+StringIO (or io if in Python 3.x)
+
+"""
+
+
+
+import os, numpy as np, argparse, time, pickle
+from scipy.special import logsumexp
+from mpi4py import MPI
+
+from tqdm import tqdm
+import gzip, bz2
+try:
+    # python-2
+    from StringIO import StringIO as IOBuffer
+except ImportError:
+    # python-3
+    from io import BytesIO as IOBuffer
+
+
+
+#### INITIALISE MPI ####
+# (note that all output on screen will be printed only on the ROOT proc)
+ROOT = 0
+comm = MPI.COMM_WORLD
+me = comm.rank # my proc id
+nproc = comm.size
+
+
+#### HELPER FUNCTIONS ####
+def _get_nearest_temp(temps, query_temp):
+    """
+    Helper function to get the nearest temp in a list
+    from a given query_temp
+
+    :param temps: list of temps.
+
+    :param query_temp: query temp
+
+    Returns:
+    idx: index of nearest temp in the list
+
+    out_temp: nearest temp from the list
+    """
+
+    if isinstance(temps, list): temps = np.array(temps)
+    return temps[np.argmin(np.abs(temps-query_temp))]
+
+
+def readwrite(trajfn, mode):
+    """
+    Helper function for input/output LAMMPS traj files.
+    Trajectories may be plain text, .gz or .bz2 compressed.
+
+    :param trajfn: name of LAMMPS traj
+
+    :param mode: "r" ("w") and "rb" ("wb") depending on read or write
+
+    Returns: file pointer
+    """
+
+    if trajfn.endswith(".gz"):
+        of = gzip.open(trajfn, mode)
+        #return gzip.GzipFile(trajfn, mode)
+    elif trajfn.endswith(".bz2"):
+        of = bz2.open(trajfn, mode)
+        #return bz2.BZ2File(trajfn, mode)
+    else:
+        of = open(trajfn, mode)
+    return of
+
+
+def get_replica_frames(logfn, temps, nswap, writefreq):
+    """
+    Get a list of frames from each replica that is
+    at a particular temp. Do this for all temps.
+
+    :param logfn: master LAMMPS log file that contains the temp
+                  swap history of all replicas
+
+    :param temps: list of all temps used in the REMD simulation.
+
+    :param nswap: swap frequency of the REMD simulation
+
+    :param writefreq: traj dump frequency in LAMMPS
+
+    Returns: master_frametuple_dict:
+             dict containing a tuple (replica #, frame #) for each temp.
+    """
+
+    n_rep = len(temps)
+    swap_history = np.loadtxt(logfn, skiprows = 3)
+    master_frametuple_dict = dict( (n, []) for n in range(n_rep) )
+
+    # walk through the replicas
+    print("Getting frames from all replicas at temperature:")
+    for n in range(n_rep):
+        print("%3.2f K" % temps[n])
+        rep_inds = [np.where(x[1:] == n)[0][0] for x in swap_history]
+
+        # case-1: when frames are dumped faster than temp. swaps
+        if writefreq <= nswap:
+            for ii, i in enumerate(rep_inds[:-1]):
+                start = int(ii * nswap / writefreq)
+                stop = int( (ii+1) * nswap / writefreq)
+                [master_frametuple_dict[n].append( (i,x) ) \
+                                        for x in range(start, stop)]
+
+        # case-2: when temps. are swapped faster than dumping frames
+        else:
+            nskip = int(writefreq / nswap)
+            [master_frametuple_dict[n].append( (i,ii) ) \
+            for ii, i in enumerate(rep_inds[0::nskip])]
+
+    return master_frametuple_dict
+
+
+def get_byte_index(rep_inds, byteindfns, intrajfns):
+    """
+    Get byte indices from (un-ordered) trajectories.
+
+    :param rep_inds: indices of replicas to process on this proc
+
+    :param byteindsfns: list of filenames that will contain the byte indices
+
+    :param intrajfns: list of (unordered) input traj filenames
+    """
+    for n in rep_inds:
+        # check if the byte indices for this traj has aleady been computed
+        if os.path.isfile(byteindfns[n]): continue
+
+        # extract bytes
+        fobj = readwrite(intrajfns[n], "rb")
+        byteinds = [ [0,0] ]
+
+        # place file pointer at first line
+        nframe = 0
+        first_line = fobj.readline()
+        cur_pos = fobj.tell()
+
+        # status printed only for replica read on root proc
+        # this assumes that each proc takes roughly the same time
+        if me == ROOT:
+            pb = tqdm(desc = "Reading replicas", leave = True,
+                  position = ROOT + 2*me,
+                  unit = "B/replica", unit_scale = True,
+                  unit_divisor = 1024)
+
+        # start crawling through the bytes
+        while True:
+            next_line = fobj.readline()
+            if len(next_line) == 0: break
+            # this will only work with lammpstrj traj format.
+            # this condition essentially checks periodic recurrences
+            # of the token TIMESTEP. Each time it is found,
+            # we have crawled through a frame (snapshot)
+            if next_line == first_line:
+                nframe += 1
+                byteinds.append( [nframe, cur_pos] )
+                if me == ROOT: pb.update()
+            cur_pos = fobj.tell()
+            if me == ROOT: pb.update(0)
+        if me == ROOT: pb.close()
+
+        # take care of the EOF
+        cur_pos = fobj.tell()
+        byteinds.append( [nframe+1, cur_pos] ) # dummy index for the EOF
+
+        # write to file
+        np.savetxt(byteindfns[n], np.array(byteinds), fmt = "%d")
+
+        # close the trajfile object
+        fobj.close()
+
+        return
+
+
+def write_reordered_traj(temp_inds, byte_inds, outtemps, temps,
+                         frametuple_dict, nprod, writefreq,
+                         outtrajfns, infobjs):
+    """
+    Reorders trajectories by temp. and writes them to disk
+
+    :param temp_inds: list index of temps (in the list of all temps) for which
+                      reordered trajs will be produced on this proc.
+
+    :param byte_inds: dict containing the (previously stored) byte indices
+                      for each replica file (key = replica number)
+
+    :param outtemps: list of all temps for which to produce reordered trajs.
+
+    :param temps: list of all temps used in the REMD simulation.
+
+    :param outtrajfns: list of filenames for output (ordered) trajs.
+
+    :param frametuple_dict: dict containing a tuple (replica #, frame #)
+                            for each temp.
+
+    :param nprod: number of production timesteps.
+                  Last (nprod / writefreq) frames
+                  from the end will be written to disk.
+
+    :param writefreq: traj dump frequency in LAMMPS
+
+    :param infobjs: list of file pointers to input (unordered) trajs.
+    """
+
+    nframes = int(nprod / writefreq)
+
+    for n in temp_inds:
+        # open string-buffer and file
+        buf = IOBuffer()
+        of = readwrite(outtrajfns[n], "wb")
+
+        # get frames
+        abs_temp_ind = np.argmin( abs(temps - outtemps[n]) )
+        frametuple = frametuple_dict[abs_temp_ind][-nframes:]
+
+        # write frames to buffer
+        if me == ROOT:
+            pb = tqdm(frametuple,
+                  desc = ("Buffering trajectories for writing"),
+                  leave = True, position = ROOT + 2*me,
+                  unit = 'frame/replica', unit_scale = True)
+
+            iterable = pb
+        else:
+            iterable = frametuple
+
+        for i, (rep, frame) in enumerate(iterable):
+            infobj = infobjs[rep]
+            start_ptr = int(byte_inds[rep][frame,1])
+            stop_ptr = int(byte_inds[rep][frame+1,1])
+            byte_len = stop_ptr - start_ptr
+            infobj.seek(start_ptr)
+            buf.write(infobj.read(byte_len))
+        if me == ROOT: pb.close()
+
+        # write buffer to disk
+        if me == ROOT: print("Writing buffer to file")
+        of.write(buf.getvalue())
+        of.close()
+        buf.close()
+
+    for i in infobjs: i.close()
+
+    return
+
+
+def get_canonical_logw(enefn, frametuple_dict, temps, nprod, writefreq,
+                       kB):
+    """
+    Gets configurational log-weights (logw) for each frame and at each temp.
+    from the REMD simulation. ONLY WRITTEN FOR THE CANONICAL (NVT) ensemble.
+
+    This weights can be used to calculate the
+    ensemble averaged value of any simulation observable X at a given temp. T :
+    <X> (T) = \sum_{k=1, ntemps} \sum_{n=1, nframes} w[idx][k,n] X[k,n]
+    where nframes is the number of frames to use from each *reordered* traj
+
+    :param enefn: ascii file (readable by numpy.loadtxt) containing an array
+                  u[r,n] of *total* potential energy for the n-th frame for
+                  the r-th replica.
+
+    :param frametuple_dict: dict containing a tuple (replica #, frame #)
+                            for each temp.
+
+    :param temps: array of temps. used in the REMD simulation
+
+    :param nprod: number of production timesteps. Last (nprod / writefreq)
+                  frames from the end will be written to disk.
+
+    :param writefreq: traj dump frequency in LAMMPS
+
+    :param kB : Boltzmann constant to set the energy scale.
+                Default is in kcal/mol
+
+    Returns: logw: dict, logw[l][k,n] gives the log weights from the
+                   n-th frame of the k-th temp. *ordered* trajectory
+                   to reweight to the l-th temp.
+
+    """
+
+    try:
+        import pymbar
+    except ImportError:
+        print("""
+              Configurational log-weight calculation requires pymbar.
+              Here are some options to install it:
+              conda install -c omnia pymbar
+              pip install --user pymbar
+              sudo pip install pymbar
+
+              To install the dev. version directly from github, use:
+              pip install pip install git+https://github.com/choderalab/pymbar.git
+              """)
+
+    u_rn = np.loadtxt(enefn)
+    ntemps = u_rn.shape[0] # number of temps.
+    nframes = int(nprod / writefreq) # number of frames at each temp.
+
+    # reorder the temps
+    u_kn = np.zeros([ntemps, nframes], float)
+    for k in range(ntemps):
+        frame_tuple = frametuple_dict[k][-nframes:]
+        for i, (rep, frame) in enumerate(frame_tuple):
+            u_kn[k, i] = u_rn[rep, frame]
+
+    # prep input for pymbar
+    #1) array of frames at each temp.
+    nframes_k = nframes * np.ones(ntemps, np.uint8)
+
+    #2) inverse temps. for chosen energy scale
+    beta_k = 1.0 / (kB * temps)
+
+    #3) get reduced energies (*ONLY FOR THE CANONICAL ENSEMBLE*)
+    u_kln = np.zeros([ntemps, ntemps, nframes], float)
+    for k in range(ntemps):
+        u_kln[k] = np.outer(beta_k, u_kn[k])
+
+    # run pymbar and extract the free energies
+    print("\nRunning pymbar...")
+    mbar = pymbar.mbar.MBAR(u_kln, nframes_k, verbose = True)
+    f_k = mbar.f_k # (1 x k array)
+
+    # calculate the log-weights
+    print("\nExtracting log-weights...")
+    log_nframes = np.log(nframes)
+    logw = dict( (k, np.zeros([ntemps, nframes], float)) for k in range(ntemps) )
+    # get log-weights to reweight to this temp.
+    for k in range(ntemps):
+        for n in range(nframes):
+            num = -beta_k[k] * u_kn[k,n]
+            denom = f_k - beta_k[k] * u_kn[k,n]
+            for l in range(ntemps):
+                logw[l][k,n] = num - logsumexp(denom) - log_nframes
+
+    return logw
+
+
+
+#### MAIN WORKFLOW ####
+if __name__ == "__main__":
+    # accept user inputs
+    parser = argparse.ArgumentParser(description = __doc__,
+             formatter_class = argparse.RawDescriptionHelpFormatter)
+
+    parser.add_argument("prefix",
+                        help = "Prefix of REMD LAMMPS trajectories.\
+                        Supply full path. Trajectories assumed to be named as \
+                        <prefix>.%%d.lammpstrj. \
+                        Can be in compressed (.gz or .bz2) format. \
+                        This is a required argument")
+
+    parser.add_argument("-logfn", "--logfn", default = "log.lammps",
+                        help = "LAMMPS log file that contains swap history \
+                        of temperatures among replicas. \
+                        Default = 'lammps.log'")
+
+    parser.add_argument("-tfn", "--tempfn", default = "temps.txt",
+                        help = "ascii file (readable by numpy.loadtxt) with \
+                        the temperatures used in the REMD simulation.")
+
+    parser.add_argument("-ns", "--nswap", type = int,
+                        help = "Swap frequency used in LAMMPS temper command")
+
+    parser.add_argument("-nw", "--nwrite", type = int, default = 1,
+                        help = "Trajectory writing frequency used \
+                        in LAMMPS dump command")
+
+    parser.add_argument("-np", "--nprod", type = int, default = 0,
+                        help = "Number of timesteps to save in the reordered\
+                        trajectories.\
+                        This should be in units of the LAMMPS timestep")
+
+    parser.add_argument("-logw", "--logw", action = 'store_true',
+                        help = "Supplying this flag \
+                        calculates *canonical* (NVT ensemble) log weights")
+
+    parser.add_argument("-e", "--enefn",
+                        help = "File that has n_replica x n_frames array\
+                        of total potential energies")
+
+    parser.add_argument("-kB", "--boltzmann_const",
+                        type = float, default = 0.001987,
+                        help = "Boltzmann constant in appropriate units. \
+                        Default is kcal/mol")
+
+    parser.add_argument("-ot", "--out_temps", nargs = '+', type = np.float64,
+                        help = "Reorder trajectories at these temperatures.\n \
+                        Default is all temperatures used in the simulation")
+
+    parser.add_argument("-od", "--outdir", default = ".",
+                        help = "All output will be saved to this directory")
+
+    # parse inputs
+    args = parser.parse_args()
+    traj_prefix = os.path.abspath(args.prefix)
+    logfn = os.path.abspath(args.logfn)
+    tempfn = os.path.abspath(args.tempfn)
+
+    nswap = args.nswap
+    writefreq = args.nwrite
+    nprod = args.nprod
+
+    enefn = args.enefn
+    if not enefn is None: enefn = os.path.abspath(enefn)
+    get_logw = args.logw
+    kB = args.boltzmann_const
+
+    out_temps = args.out_temps
+    outdir = os.path.abspath(args.outdir)
+    if not os.path.isdir(outdir):
+        if me == ROOT: os.mkdir(outdir)
+
+    # check that all input files are present (only on the ROOT proc)
+    if me == ROOT:
+        if not os.path.isfile(tempfn):
+            raise IOError("Temperature file %s not found." % tempfn)
+        elif not os.path.isfile(logfn):
+            raise IOError("LAMMPS log file %s not found." % logfn)
+        elif get_logw and not os.path.isfile(enefn):
+            raise IOError("Canonical log-weight calculation requested but\
+                          energy file %s not found" % enefn)
+
+    # get (unordered) trajectories
+    temps = np.loadtxt(tempfn)
+    ntemps = len(temps)
+    intrajfns = ["%s.%d.lammpstrj" % (traj_prefix, k) for k in range(ntemps)]
+    # check if the trajs. (or their zipped versions are present)
+    for i in range(ntemps):
+        this_intrajfn = intrajfns[i]
+        x = this_intrajfn + ".gz"
+        if os.path.isfile(this_intrajfn): continue
+        elif os.path.isfile(this_intrajfn + ".gz"):
+            intrajfns[i] = this_intrajfn + ".gz"
+        elif os.path.isfile(this_intrajfn + ".bz2"):
+            intrajfns[i] = this_intrajfn + ".bz2"
+        else:
+            if me == ROOT:
+                raise IOError("Trajectory for replica # %d missing" % i)
+
+    # set output filenames
+    outprefix = os.path.join(outdir, traj_prefix.split('/')[-1])
+    outtrajfns = ["%s.%3.2f.lammpstrj.gz" % \
+                 (outprefix, _get_nearest_temp(temps, t)) \
+                  for t in out_temps]
+    byteindfns = [os.path.join(outdir, ".byteind_%d.gz" % k) \
+                  for k in range(ntemps)]
+    frametuplefn = outprefix + '.frametuple.pickle'
+    if get_logw:
+        logwfn = outprefix + ".logw.pickle"
+
+
+    # get a list of all frames at a particular temp visited by each replica
+    # this is fast so run only on ROOT proc.
+    master_frametuple_dict = {}
+    if me == ROOT:
+        master_frametuple_dict = get_replica_frames(logfn = logfn,
+                                                    temps = temps,
+                                                    nswap = nswap,
+                                                    writefreq = writefreq)
+        # save to a pickle from the ROOT proc
+        with open(frametuplefn, 'wb') as of:
+            pickle.dump(master_frametuple_dict, of)
+
+    # broadcast to all procs
+    master_frametuple_dict = comm.bcast(master_frametuple_dict, root = ROOT)
+
+    # define a chunk of replicas  to process on each proc
+    CHUNKSIZE_1 = int(ntemps/nproc)
+    if me < nproc - 1:
+        my_rep_inds = range( (me*CHUNKSIZE_1), (me+1)*CHUNKSIZE_1 )
+    else:
+        my_rep_inds = range( (me*CHUNKSIZE_1), ntemps )
+
+    # get byte indices from replica (un-ordered) trajs. in parallel
+    get_byte_index(rep_inds = my_rep_inds,
+                   byteindfns = byteindfns,
+                   intrajfns = intrajfns)
+
+    # block until all procs have finished
+    comm.barrier()
+
+    # open all replica files for reading
+    infobjs = [readwrite(i, "rb") for i in intrajfns]
+
+    # open all byteindex files
+    byte_inds = dict( (i, np.loadtxt(fn)) for i, fn in enumerate(byteindfns) )
+
+    # define a chunk of output trajs. to process for each proc.
+    # # of reordered trajs. to write may be less than the total # of replicas
+    # which is usually equal to the requested nproc. If that is indeed the case,
+    # retire excess procs
+    n_out_temps = len(out_temps)
+    CHUNKSIZE_2 = int(n_out_temps / nproc)
+    if CHUNKSIZE_2 == 0:
+        nproc_active = n_out_temps
+        CHUNKSIZE_2 = 1
+        if me == ROOT:
+            print("\nReleasing %d excess procs" % (nproc - nproc_active))
+    else:
+        nproc_active = nproc
+    if me < nproc_active-1:
+        my_temp_inds = range( (me*CHUNKSIZE_2), (me+1)*CHUNKSIZE_1 )
+    else:
+        my_temp_inds = range( (me*CHUNKSIZE_2), n_out_temps)
+
+    # retire the excess procs
+    # dont' forget to close any open file objects
+    if me >= nproc_active:
+        for fobj in infobjs: fobj.close()
+        exit()
+
+    # write reordered trajectories to disk from active procs in parallel
+    write_reordered_traj(temp_inds = my_temp_inds,
+                         byte_inds = byte_inds,
+                         outtemps = out_temps, temps = temps,
+                         frametuple_dict = master_frametuple_dict,
+                         nprod = nprod, writefreq = writefreq,
+                         outtrajfns = outtrajfns,
+                         infobjs = infobjs)
+
+    # calculate canonical log-weights if requested
+    # usually this is very fast so retire all but the ROOT proc
+    if not get_logw: exit()
+    if not me == ROOT: exit()
+
+    logw = get_canonical_logw(enefn = enefn, temps = temps,
+                              frametuple_dict = master_frametuple_dict,
+                              nprod = nprod, writefreq = writefreq,
+                              kB = kB)
+
+
+    # save the logweights to a pickle
+    with open(logwfn, 'wb') as of:
+        pickle.dump(logw, of)
+
+