Merge remote-tracking branch 'origin/master' into lammps_gjf

2019-09-17 12:08:12 -07:00
parent 971f4763e3 d357259cc4
commit eba10b10e8
51 changed files with 3356 additions and 225 deletions
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -17,6 +17,8 @@ if(PKG_KOKKOS)
                         ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
+                         ${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp
+                         ${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp
--- a/doc/src/Commands_bond.txt
+++ b/doc/src/Commands_bond.txt
@ -108,7 +108,7 @@ OPT.
 "class2 (ko)"_dihedral_class2.html,
 "cosine/shift/exp (o)"_dihedral_cosine_shift_exp.html,
 "fourier (io)"_dihedral_fourier.html,
-"harmonic (io)"_dihedral_harmonic.html,
+"harmonic (iko)"_dihedral_harmonic.html,
 "helix (o)"_dihedral_helix.html,
 "multi/harmonic (o)"_dihedral_multi_harmonic.html,
 "nharmonic (o)"_dihedral_nharmonic.html,
--- a/doc/src/Run_options.txt
+++ b/doc/src/Run_options.txt
@ -126,9 +126,10 @@ are intended for computational work like running LAMMPS.  By default
 Ng = 1 and Ns is not set.

 Depending on which flavor of MPI you are running, LAMMPS will look for
-one of these 3 environment variables
+one of these 4 environment variables

 SLURM_LOCALID (various MPI variants compiled with SLURM support)
+MPT_LRANK (HPE MPI)
 MV2_COMM_WORLD_LOCAL_RANK (Mvapich)
 OMPI_COMM_WORLD_LOCAL_RANK (OpenMPI) :pre

--- a/doc/src/comm_modify.txt
+++ b/doc/src/comm_modify.txt
@ -40,11 +40,12 @@ coordinates and other properties are exchanged between neighboring
 processors and stored as properties of ghost atoms.

 NOTE: These options apply to the currently defined comm style.  When
-you specify a "comm_style"_comm_style.html command, all communication
-settings are restored to their default values, including those
+you specify a "comm_style"_comm_style.html or
+"read_restart"_read_restart.html command, all communication settings
+are restored to their default or stored values, including those
 previously reset by a comm_modify command.  Thus if your input script
-specifies a comm_style command, you should use the comm_modify command
-after it.
+specifies a comm_style or read_restart command, you should use the
+comm_modify command after it.

 The {mode} keyword determines whether a single or multiple cutoff
 distances are used to determine which atoms to communicate.
--- a/doc/src/compute_bond_local.txt
+++ b/doc/src/compute_bond_local.txt
@ -15,10 +15,11 @@ compute ID group-ID bond/local value1 value2 ... keyword args ... :pre
 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 bond/local = style name of this compute command :l
 one or more values may be appended :l
-value = {dist} or {engpot} or {force} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} or {v_name} :l
+value = {dist} or {engpot} or {force} or {fx} or {fy} or {fz} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} or {v_name} :l
  {dist} = bond distance
  {engpot} = bond potential energy
  {force} = bond force :pre
+  {fx},{fy},{fz} = components of bond force
  {engvib} = bond kinetic energy of vibration
  {engrot} = bond kinetic energy of rotation
  {engtrans} = bond kinetic energy of translation
@ -38,6 +39,7 @@ keyword = {set} :l

 compute 1 all bond/local engpot
 compute 1 all bond/local dist engpot force :pre
+compute 1 all bond/local dist fx fy fz :pre
 compute 1 all angle/local dist v_distsq set dist d :pre

 [Description:]
@ -59,6 +61,9 @@ based on the current separation of the pair of atoms in the bond.
 The value {force} is the magnitude of the force acting between the
 pair of atoms in the bond.

+The values {fx}, {fy}, and {fz} are the xyz components of
+{force} between the pair of atoms in the bond.
+
 The remaining properties are all computed for motion of the two atoms
 relative to the center of mass (COM) velocity of the 2 atoms in the
 bond.
--- a/doc/src/compute_orientorder_atom.txt
+++ b/doc/src/compute_orientorder_atom.txt
@ -19,6 +19,8 @@ keyword = {cutoff} or {nnn} or {degrees} or {components}
  {cutoff} value = distance cutoff
  {nnn} value = number of nearest neighbors
  {degrees} values = nlvalues, l1, l2,...
+  {wl} value = yes or no
+  {wl/hat} value = yes or no
  {components} value = ldegree  :pre

 :ule
@ -27,7 +29,8 @@ keyword = {cutoff} or {nnn} or {degrees} or {components}

 compute 1 all orientorder/atom
 compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5
-compute 1 all orientorder/atom degrees 4 6 components 6 nnn NULL cutoff 3.0 :pre
+compute 1 all orientorder/atom wl/hat yes
+compute 1 all orientorder/atom components 6 :pre

 [Description:]

@ -48,7 +51,7 @@ neighbors of the central atom.
 The angles theta and phi are the standard spherical polar angles
 defining the direction of the bond vector {rij}.
 The second equation defines {Ql}, which is a
-rotationally invariant scalar quantity obtained by summing
+rotationally invariant non-negative amplitude obtained by summing
 over all the components of degree {l}.

 The optional keyword {cutoff} defines the distance cutoff
@ -63,7 +66,7 @@ specified distance cutoff are used.

 The optional keyword {degrees} defines the list of order parameters to
 be computed.  The first argument {nlvalues} is the number of order
-parameters. This is followed by that number of integers giving the
+parameters. This is followed by that number of non-negative integers giving the
 degree of each order parameter. Because {Q}2 and all odd-degree order
 parameters are zero for atoms in cubic crystals (see
 "Steinhardt"_#Steinhardt), the default order parameters are {Q}4,
@ -71,7 +74,20 @@ parameters are zero for atoms in cubic crystals (see
 = sqrt(7/3)/8 = 0.19094....  The numerical values of all order
 parameters up to {Q}12 for a range of commonly encountered
 high-symmetry structures are given in Table I of "Mickel et
-al."_#Mickel.
+al."_#Mickel, and these can be reproduced with this compute
+
+The optional keyword {wl} will output the third-order invariants {Wl} 
+(see Eq. 1.4 in "Steinhardt"_#Steinhardt) for the same degrees as
+for the {Ql} parameters. For the FCC crystal with {nnn} =12,
+{W}4 = -sqrt(14/143).(49/4096)/Pi^1.5 = -0.0006722136...
+
+The optional keyword {wl/hat} will output the normalized third-order 
+invariants {Wlhat} (see Eq. 2.2 in "Steinhardt"_#Steinhardt) 
+for the same degrees as for the {Ql} parameters. For the FCC crystal 
+with {nnn} =12, {W}4hat = -7/3*sqrt(2/429) = -0.159317...The numerical
+values of {Wlhat} for a range of commonly encountered high-symmetry
+structures are given in Table I of "Steinhardt"_#Steinhardt, and these
+can be reproduced with this keyword.

 The optional keyword {components} will output the components of the
 normalized complex vector {Ybar_lm} of degree {ldegree}, which must be
@ -82,7 +98,7 @@ particles, as discussed in "ten Wolde"_#tenWolde2.

 The value of {Ql} is set to zero for atoms not in the
 specified compute group, as well as for atoms that have less than
-{nnn} neighbors within the distance cutoff.
+{nnn} neighbors within the distance cutoff, unless {nnn} is NULL.

 The neighbor list needed to compute this quantity is constructed each
 time the calculation is performed (i.e. each time a snapshot of atoms
@ -108,6 +124,12 @@ This compute calculates a per-atom array with {nlvalues} columns,
 giving the {Ql} values for each atom, which are real numbers on the
 range 0 <= {Ql} <= 1.

+If the keyword {wl} is set to yes, then the {Wl} values for each
+atom will be added to the output array, which are real numbers.
+
+If the keyword {wl/hat} is set to yes, then the {Wl_hat} 
+values for each atom will be added to the output array, which are real numbers.
+ 
 If the keyword {components} is set, then the real and imaginary parts
 of each component of (normalized) {Ybar_lm} will be added to the
 output array in the following order: Re({Ybar_-m}) Im({Ybar_-m})
@ -130,7 +152,8 @@ hexorder/atom"_compute_hexorder_atom.html
 [Default:]

 The option defaults are {cutoff} = pair style cutoff, {nnn} = 12,
-{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12.
+{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12,
+{wl} = no, {wl/hat} = no, and {components} off

 :line

--- a/doc/src/dihedral_harmonic.txt
+++ b/doc/src/dihedral_harmonic.txt
@ -8,6 +8,7 @@

 dihedral_style harmonic command :h3
 dihedral_style harmonic/intel command :h3
+dihedral_style harmonic/kk command :h3
 dihedral_style harmonic/omp command :h3

 [Syntax:]
--- a/doc/src/min_style.txt
+++ b/doc/src/min_style.txt
@ -11,7 +11,7 @@ min_style command :h3

 min_style style :pre

-style = {cg} or {hftn} or {sd} or {quickmin} or {fire} or {spin} :ul
+style = {cg} or {cg/kk} or {hftn} or {sd} or {quickmin} or {fire} or {spin} :ul

 [Examples:]

@ -74,9 +74,34 @@ defined via the "timestep"_timestep.html command.  Often they will
 converge more quickly if you use a timestep about 10x larger than you
 would normally use for dynamics simulations.

-NOTE: The {quickmin}, {fire}, and {hftn} styles do not yet support the
-use of the "fix box/relax"_fix_box_relax.html command or minimizations
-involving the electron radius in "eFF"_pair_eff.html models.
+NOTE: The {quickmin}, {fire}, {hftn}, and {cg/kk} styles do not yet 
+support the use of the "fix box/relax"_fix_box_relax.html command or 
+minimizations involving the electron radius in "eFF"_pair_eff.html 
+models. 
+
+:line
+
+Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
+functionally the same as the corresponding style without the suffix.
+They have been optimized to run faster, depending on your available
+hardware, as discussed on the "Speed packages"_Speed_packages.html doc
+page.  The accelerated styles take the same arguments and should
+produce the same results, except for round-off and precision issues.
+
+These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
+USER-OMP and OPT packages, respectively.  They are only enabled if
+LAMMPS was built with those packages.  See the "Build
+package"_Build_package.html doc page for more info.
+
+You can specify the accelerated styles explicitly in your input script
+by including their suffix, or you can use the "-suffix command-line
+switch"_Run_options.html when you invoke LAMMPS, or you can use the
+"suffix"_suffix.html command in your input script.
+
+See the "Speed packages"_Speed_packages.html doc page for more
+instructions on how to use the accelerated styles effectively.
+
+:line

 [Restrictions:] none

--- a/doc/src/minimize.txt
+++ b/doc/src/minimize.txt
@ -7,6 +7,7 @@
 :line

 minimize command :h3
+minimize/kk command :h3

 [Syntax:]

@ -256,6 +257,28 @@ info in the Restrictions section below.

 :line

+Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
+functionally the same as the corresponding style without the suffix.
+They have been optimized to run faster, depending on your available
+hardware, as discussed on the "Speed packages"_Speed_packages.html doc
+page.  The accelerated styles take the same arguments and should
+produce the same results, except for round-off and precision issues.
+
+These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
+USER-OMP and OPT packages, respectively.  They are only enabled if
+LAMMPS was built with those packages.  See the "Build
+package"_Build_package.html doc page for more info.
+
+You can specify the accelerated styles explicitly in your input script
+by including their suffix, or you can use the "-suffix command-line
+switch"_Run_options.html when you invoke LAMMPS, or you can use the
+"suffix"_suffix.html command in your input script.
+
+See the "Speed packages"_Speed_packages.html doc page for more
+instructions on how to use the accelerated styles effectively.
+
+:line
+
 [Restrictions:]

 Features that are not yet implemented are listed here, in case someone
--- a/examples/steinhardt/in.bcc
+++ b/examples/steinhardt/in.bcc
@ -0,0 +1,91 @@
+# Steinhardt-Nelson bond orientational order parameters for BCC
+
+variable  	rcut equal 3.0
+
+boundary	p p p
+
+atom_style	atomic
+neighbor	0.3 bin
+neigh_modify	delay 5
+
+# create geometry
+
+lattice		bcc 1.0
+region		box block 0 3 0 3 0 3
+create_box	1 box
+create_atoms	1 box
+
+mass		1 1.0
+
+# LJ potentials
+
+pair_style	lj/cut ${rcut}
+pair_coeff	* * 1.0 1.0 ${rcut}
+
+# 14 neighbors, perfect crystal 
+
+compute 	qlwlhat all orientorder/atom degrees 6 2 4 6 8 10 12 nnn 14 wl/hat yes
+compute 	avql all reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] c_qlwlhat[6] 
+compute 	avwlhat all reduce ave c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10] c_qlwlhat[11] c_qlwlhat[12] 
+
+thermo_style    custom step temp epair etotal c_avql[*] c_avwlhat[*]
+
+run		0
+
+# check Q_l values
+
+print " "
+print "*******************************************************************"
+print " "
+print "Comparison with reference values of Q_l "
+print "   [Table I in W. Mickel, S. C. Kapfer," 
+print "   G. E. Schroeder-Turkand, K. Mecke, "
+print "   J. Chem. Phys. 138, 044501 (2013).]"
+print " "
+
+variable 	q2ref equal 0.0
+variable 	q4ref equal 0.036
+variable 	q6ref equal 0.511
+variable 	q8ref equal 0.429
+variable 	q10ref equal 0.195
+variable 	q12ref equal 0.405
+
+variable 	q2 equal c_avql[1]
+variable 	q4 equal c_avql[2]
+variable 	q6 equal c_avql[3]
+variable 	q8 equal c_avql[4]
+variable 	q10 equal c_avql[5]
+variable 	q12 equal c_avql[6]
+
+print "q2     = $(v_q2:%10.6f) delta = $(v_q2-v_q2ref:%10.4f)"
+print "q4     = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)"
+print "q6     = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)"
+print "q8     = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)"
+print "q10    = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)"
+print "q12    = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)"
+
+# check W_l_hat values
+
+print " "
+print "Comparison with reference values of W_l_hat"
+print "   [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, "
+print "   Phys. Rev. B 28, 784 (1983).]"
+print " "
+
+variable 	w4hatref equal 0.159317
+variable 	w6hatref equal 0.013161
+variable 	w8hatref equal -0.058455
+variable 	w10hatref equal -0.090130
+
+variable 	w4hat equal c_avwlhat[2]
+variable 	w6hat equal c_avwlhat[3]
+variable 	w8hat equal c_avwlhat[4]
+variable 	w10hat equal c_avwlhat[5]
+
+print "w4hat  = $(v_w4hat:%10.6f) delta = $(v_w4hat-v_w4hatref:%10.6f)"
+print "w6hat  = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)"
+print "w8hat  = $(v_w8hat:%10.6f) delta = $(v_w8hat-v_w8hatref:%10.6f)"
+print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)"
+print " "
+print "*******************************************************************"
+print " "
--- a/examples/steinhardt/in.fcc
+++ b/examples/steinhardt/in.fcc
@ -0,0 +1,88 @@
+# Steinhardt-Nelson bond orientational order parameters for FCC
+
+variable  	rcut equal 3.0
+
+boundary	p p p
+
+atom_style	atomic
+neighbor	0.3 bin
+neigh_modify	delay 5
+
+# create geometry
+
+lattice		fcc 1.0
+region		box block 0 3 0 3 0 3
+create_box	1 box
+create_atoms	1 box
+
+mass		1 1.0
+
+# LJ potentials
+
+pair_style	lj/cut ${rcut}
+pair_coeff	* * 1.0 1.0 ${rcut}
+
+# 12 neighbors, perfect crystal
+
+compute 	qlwlhat all orientorder/atom wl/hat yes
+compute 	avql all reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] 
+compute 	avwlhat all reduce ave c_qlwlhat[6] c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10]
+
+thermo_style    custom step temp epair etotal c_avql[*] c_avwlhat[*]
+
+run		0
+
+# check Q_l values
+
+print " "
+print "*******************************************************************"
+print " "
+print "Comparison with reference values of Q_l "
+print "   [Table I in W. Mickel, S. C. Kapfer," 
+print "   G. E. Schroeder-Turkand, K. Mecke, "
+print "   J. Chem. Phys. 138, 044501 (2013).]"
+print " "
+
+variable 	q4ref equal 0.190
+variable 	q6ref equal 0.575
+variable 	q8ref equal 0.404
+variable 	q10ref equal 0.013
+variable 	q12ref equal 0.600
+
+variable 	q4 equal c_avql[1]
+variable 	q6 equal c_avql[2]
+variable 	q8 equal c_avql[3]
+variable 	q10 equal c_avql[4]
+variable 	q12 equal c_avql[5]
+
+print "q4     = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)"
+print "q6     = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)"
+print "q8     = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)"
+print "q10    = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)"
+print "q12    = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)"
+
+# check W_l_hat values
+
+print " "
+print "Comparison with reference values of W_l_hat"
+print "   [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, "
+print "   Phys. Rev. B 28, 784 (1983).]"
+print " "
+
+variable 	w4hatref equal -0.159316
+variable 	w6hatref equal -0.013161
+variable 	w8hatref equal 0.058454
+variable 	w10hatref equal -0.090128
+
+variable 	w4hat equal c_avwlhat[1]
+variable 	w6hat equal c_avwlhat[2]
+variable 	w8hat equal c_avwlhat[3]
+variable 	w10hat equal c_avwlhat[4]
+
+print "w4hat  = $(v_w4hat:%10.6f) delta = $(v_w4hat-v_w4hatref:%10.6f)"
+print "w6hat  = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)"
+print "w8hat  = $(v_w8hat:%10.6f) delta = $(v_w8hat-v_w8hatref:%10.6f)"
+print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)"
+print " "
+print "*******************************************************************"
+print " "
--- a/examples/steinhardt/in.icos
+++ b/examples/steinhardt/in.icos
@ -0,0 +1,106 @@
+# Steinhardt-Nelson bond orientational order parameters for icosahedral cluster
+# W_6_hat is sensitive to icosohedral order
+
+variable        rcut equal 1.2 # a bit bigger than LJ Rmin 
+variable        rcutred equal 0.75 # a bit bigger than 1/sqrt(2)
+
+# create a perfect fcc crystallite
+
+atom_style	atomic
+boundary	s s s
+lattice		fcc 1.0 # neighbors at LJ Rmin
+region		box block 0 2 0 2 0 2
+create_box	1 box
+create_atoms	1 box
+mass		1 1.0
+
+region 		centralatom sphere 1 1 1 0.0 side in
+group 		centralatom region centralatom
+
+region 		mysphere sphere 1 1 1 ${rcutred} side out
+delete_atoms 	region mysphere
+
+# LJ potential
+
+pair_style	lj/cut 100.0
+pair_coeff	* * 1.0 1.0 100.0
+
+# define output for central atom
+
+compute 	qlwlhat all orientorder/atom wl/hat yes cutoff ${rcut} nnn NULL
+compute 	avql centralatom reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] 
+compute 	avwlhat centralatom reduce ave c_qlwlhat[6] c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10] 
+variable	q6 equal c_avql[2]
+variable	w6hat equal c_avwlhat[2]
+
+compute 	mype all pe/atom
+compute 	centralatompe centralatom reduce ave c_mype
+
+# gently equilibrate the crystallite
+
+velocity     	all create 0.001 482748
+fix	     	1 all nve
+neighbor	0.3 bin
+neigh_modify	every 1 check no delay 0
+timestep	0.003
+thermo_style    custom step temp epair etotal c_centralatompe v_q6 v_w6hat
+thermo		10
+
+run		10
+
+# quench to icosehedral cluster
+
+minimize      	1.0e-10 1.0e-6 100 1000
+
+# check Q_l values
+
+print " "
+print "*******************************************************************"
+print " "
+print "Comparison with reference values of Q_l "
+print "   [Table I in W. Mickel, S. C. Kapfer," 
+print "   G. E. Schroeder-Turkand, K. Mecke, "
+print "   J. Chem. Phys. 138, 044501 (2013).]"
+print " "
+
+variable 	q4ref equal 0.0
+variable 	q6ref equal 0.663
+variable 	q8ref equal 0.0
+variable 	q10ref equal 0.363
+variable 	q12ref equal 0.585
+
+variable 	q4 equal c_avql[1]
+variable 	q6 equal c_avql[2]
+variable 	q8 equal c_avql[3]
+variable 	q10 equal c_avql[4]
+variable 	q12 equal c_avql[5]
+
+print "q4     = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)"
+print "q6     = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)"
+print "q8     = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)"
+print "q10    = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)"
+print "q12    = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)"
+
+# check W_l_hat values
+
+print " "
+print "Comparison with reference values of W_l_hat"
+print "   [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, "
+print "   Phys. Rev. B 28, 784 (1983).]"
+print " "
+
+variable 	w6hatref equal -0.169754
+variable 	w10hatref equal -0.093967
+
+variable 	w4hat equal c_avwlhat[1]
+variable 	w6hat equal c_avwlhat[2]
+variable 	w8hat equal c_avwlhat[3]
+variable 	w10hat equal c_avwlhat[4]
+variable 	w12hat equal c_avwlhat[5]
+
+print "w6hat  = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)"
+print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)"
+print " "
+print "*******************************************************************"
+print " "
+
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@ -91,6 +91,8 @@ action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp
 action dihedral_charmm_kokkos.h dihedral_charmm.h
 action dihedral_class2_kokkos.cpp dihedral_class2.cpp 
 action dihedral_class2_kokkos.h dihedral_class2.h
+action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp 
+action dihedral_harmonic_kokkos.h dihedral_harmonic.h
 action dihedral_opls_kokkos.cpp dihedral_opls.cpp
 action dihedral_opls_kokkos.h dihedral_opls.h
 action domain_kokkos.cpp
@ -107,6 +109,8 @@ action fix_gravity_kokkos.cpp
 action fix_gravity_kokkos.h
 action fix_langevin_kokkos.cpp
 action fix_langevin_kokkos.h
+action fix_minimize_kokkos.cpp
+action fix_minimize_kokkos.h
 action fix_neigh_history_kokkos.cpp
 action fix_neigh_history_kokkos.h
 action fix_nh_kokkos.cpp
@ -179,6 +183,12 @@ action nbin_ssa_kokkos.cpp nbin_ssa.cpp
 action nbin_ssa_kokkos.h nbin_ssa.h
 action math_special_kokkos.cpp
 action math_special_kokkos.h
+action min_cg_kokkos.cpp
+action min_cg_kokkos.h
+action min_kokkos.cpp
+action min_kokkos.h
+action min_linesearch_kokkos.cpp
+action min_linesearch_kokkos.h
 action pair_buck_coul_cut_kokkos.cpp
 action pair_buck_coul_cut_kokkos.h
 action pair_buck_coul_long_kokkos.cpp pair_buck_coul_long.cpp
--- a/src/KOKKOS/dihedral_harmonic_kokkos.cpp
+++ b/src/KOKKOS/dihedral_harmonic_kokkos.cpp
@ -0,0 +1,537 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "dihedral_harmonic_kokkos.h"
+#include <cmath>
+#include <cstdlib>
+#include "atom_kokkos.h"
+#include "comm.h"
+#include "neighbor_kokkos.h"
+#include "domain.h"
+#include "force.h"
+#include "update.h"
+#include "memory_kokkos.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+#define TOLERANCE 0.05
+#define SMALL     0.001
+#define SMALLER   0.00001
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+DihedralHarmonicKokkos<DeviceType>::DihedralHarmonicKokkos(LAMMPS *lmp) : DihedralHarmonic(lmp)
+{
+  atomKK = (AtomKokkos *) atom;
+  neighborKK = (NeighborKokkos *) neighbor;
+  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
+  datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
+  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
+
+  k_warning_flag = DAT::tdual_int_scalar("Dihedral:warning_flag");
+  d_warning_flag = k_warning_flag.view<DeviceType>();
+  h_warning_flag = k_warning_flag.h_view;
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+DihedralHarmonicKokkos<DeviceType>::~DihedralHarmonicKokkos()
+{
+  if (!copymode) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
+{
+  eflag = eflag_in;
+  vflag = vflag_in;
+
+  ev_init(eflag,vflag,0);
+
+  // reallocate per-atom arrays if necessary
+
+  if (eflag_atom) {
+    memoryKK->destroy_kokkos(k_eatom,eatom);
+    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
+    d_eatom = k_eatom.view<DeviceType>();
+  }
+  if (vflag_atom) {
+    memoryKK->destroy_kokkos(k_vatom,vatom);
+    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom");
+    d_vatom = k_vatom.view<DeviceType>();
+  }
+
+  k_k.template sync<DeviceType>();
+  k_cos_shift.template sync<DeviceType>();
+  k_sin_shift.template sync<DeviceType>();
+  k_sign.template sync<DeviceType>();
+  k_multiplicity.template sync<DeviceType>();
+
+  x = atomKK->k_x.view<DeviceType>();
+  f = atomKK->k_f.view<DeviceType>();
+  neighborKK->k_dihedrallist.template sync<DeviceType>();
+  dihedrallist = neighborKK->k_dihedrallist.view<DeviceType>();
+  int ndihedrallist = neighborKK->ndihedrallist;
+  nlocal = atom->nlocal;
+  newton_bond = force->newton_bond;
+
+  h_warning_flag() = 0;
+  k_warning_flag.template modify<LMPHostType>();
+  k_warning_flag.template sync<DeviceType>();
+
+  copymode = 1;
+
+  // loop over neighbors of my atoms
+
+  EV_FLOAT ev;
+
+  if (evflag) {
+    if (newton_bond) {
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<1,1> >(0,ndihedrallist),*this,ev);
+    } else {
+      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<0,1> >(0,ndihedrallist),*this,ev);
+    }
+  } else {
+    if (newton_bond) {
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<1,0> >(0,ndihedrallist),*this);
+    } else {
+      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<0,0> >(0,ndihedrallist),*this);
+    }
+  }
+
+  // error check
+
+  k_warning_flag.template modify<DeviceType>();
+  k_warning_flag.template sync<LMPHostType>();
+  if (h_warning_flag())
+    error->warning(FLERR,"Dihedral problem",0);
+
+  if (eflag_global) energy += ev.evdwl;
+  if (vflag_global) {
+    virial[0] += ev.v[0];
+    virial[1] += ev.v[1];
+    virial[2] += ev.v[2];
+    virial[3] += ev.v[3];
+    virial[4] += ev.v[4];
+    virial[5] += ev.v[5];
+  }
+
+  if (eflag_atom) {
+    k_eatom.template modify<DeviceType>();
+    k_eatom.template sync<LMPHostType>();
+  }
+
+  if (vflag_atom) {
+    k_vatom.template modify<DeviceType>();
+    k_vatom.template sync<LMPHostType>();
+  }
+
+  copymode = 0;
+}
+
+template<class DeviceType>
+template<int NEWTON_BOND, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void DihedralHarmonicKokkos<DeviceType>::operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
+
+  // The f array is atomic
+  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
+
+  const int i1 = dihedrallist(n,0);
+  const int i2 = dihedrallist(n,1);
+  const int i3 = dihedrallist(n,2);
+  const int i4 = dihedrallist(n,3);
+  const int type = dihedrallist(n,4);
+
+  // 1st bond
+
+  const F_FLOAT vb1x = x(i1,0) - x(i2,0);
+  const F_FLOAT vb1y = x(i1,1) - x(i2,1);
+  const F_FLOAT vb1z = x(i1,2) - x(i2,2);
+
+  // 2nd bond
+
+  const F_FLOAT vb2x = x(i3,0) - x(i2,0);
+  const F_FLOAT vb2y = x(i3,1) - x(i2,1);
+  const F_FLOAT vb2z = x(i3,2) - x(i2,2);
+
+  const F_FLOAT vb2xm = -vb2x;
+  const F_FLOAT vb2ym = -vb2y;
+  const F_FLOAT vb2zm = -vb2z;
+
+  // 3rd bond
+
+  const F_FLOAT vb3x = x(i4,0) - x(i3,0);
+  const F_FLOAT vb3y = x(i4,1) - x(i3,1);
+  const F_FLOAT vb3z = x(i4,2) - x(i3,2);
+
+  // c,s calculation
+
+  const F_FLOAT ax = vb1y*vb2zm - vb1z*vb2ym;
+  const F_FLOAT ay = vb1z*vb2xm - vb1x*vb2zm;
+  const F_FLOAT az = vb1x*vb2ym - vb1y*vb2xm;
+  const F_FLOAT bx = vb3y*vb2zm - vb3z*vb2ym;
+  const F_FLOAT by = vb3z*vb2xm - vb3x*vb2zm;
+  const F_FLOAT bz = vb3x*vb2ym - vb3y*vb2xm;
+
+  const F_FLOAT rasq = ax*ax + ay*ay + az*az;
+  const F_FLOAT rbsq = bx*bx + by*by + bz*bz;
+  const F_FLOAT rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
+  const F_FLOAT rg = sqrt(rgsq);
+
+  F_FLOAT rginv,ra2inv,rb2inv;
+  rginv = ra2inv = rb2inv = 0.0;
+  if (rg > 0) rginv = 1.0/rg;
+  if (rasq > 0) ra2inv = 1.0/rasq;
+  if (rbsq > 0) rb2inv = 1.0/rbsq;
+  const F_FLOAT rabinv = sqrt(ra2inv*rb2inv);
+
+  F_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv;
+  const F_FLOAT s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
+
+  // error check
+
+  if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag())
+    Kokkos::atomic_fetch_add(&d_warning_flag(),1);
+
+  if (c > 1.0) c = 1.0;
+  if (c < -1.0) c = -1.0;
+
+  const int m = d_multiplicity[type];
+  F_FLOAT p = 1.0;
+  F_FLOAT ddf1,df1;
+  ddf1 = df1 = 0.0;
+
+  for (int i = 0; i < m; i++) {
+    ddf1 = p*c - df1*s;
+    df1 = p*s + df1*c;
+    p = ddf1;
+  }
+
+  p = p*d_cos_shift[type] + df1*d_sin_shift[type];
+  df1 = df1*d_cos_shift[type] - ddf1*d_sin_shift[type];
+  df1 *= -m;
+  p += 1.0;
+
+  if (m == 0) {
+    p = 1.0 + d_cos_shift[type];
+    df1 = 0.0;
+  }
+
+  E_FLOAT edihedral = 0.0;
+  if (eflag) edihedral = d_k[type] * p;
+
+  const F_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
+  const F_FLOAT hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
+  const F_FLOAT fga = fg*ra2inv*rginv;
+  const F_FLOAT hgb = hg*rb2inv*rginv;
+  const F_FLOAT gaa = -ra2inv*rg;
+  const F_FLOAT gbb = rb2inv*rg;
+
+  const F_FLOAT dtfx = gaa*ax;
+  const F_FLOAT dtfy = gaa*ay;
+  const F_FLOAT dtfz = gaa*az;
+  const F_FLOAT dtgx = fga*ax - hgb*bx;
+  const F_FLOAT dtgy = fga*ay - hgb*by;
+  const F_FLOAT dtgz = fga*az - hgb*bz;
+  const F_FLOAT dthx = gbb*bx;
+  const F_FLOAT dthy = gbb*by;
+  const F_FLOAT dthz = gbb*bz;
+
+  const F_FLOAT df = -d_k[type] * df1;
+
+  const F_FLOAT sx2  = df*dtgx;;
+  const F_FLOAT sy2  = df*dtgy;;
+  const F_FLOAT sz2  = df*dtgz;;
+
+  F_FLOAT f1[3],f2[3],f3[3],f4[3];
+  f1[0] = df*dtfx;
+  f1[1] = df*dtfy;
+  f1[2] = df*dtfz;
+
+  f2[0] = sx2 - f1[0];
+  f2[1] = sy2 - f1[1];
+  f2[2] = sz2 - f1[2];
+
+  f4[0] = df*dthx;
+  f4[1] = df*dthy;
+  f4[2] = df*dthz;
+
+  f3[0] = -sx2 - f4[0];
+  f3[1] = -sy2 - f4[1];
+  f3[2] = -sz2 - f4[2];
+
+  // apply force to each of 4 atoms
+
+  if (NEWTON_BOND || i1 < nlocal) {
+    a_f(i1,0) += f1[0];
+    a_f(i1,1) += f1[1];
+    a_f(i1,2) += f1[2];
+  }
+
+  if (NEWTON_BOND || i2 < nlocal) {
+    a_f(i2,0) += f2[0];
+    a_f(i2,1) += f2[1];
+    a_f(i2,2) += f2[2];
+  }
+
+  if (NEWTON_BOND || i3 < nlocal) {
+    a_f(i3,0) += f3[0];
+    a_f(i3,1) += f3[1];
+    a_f(i3,2) += f3[2];
+  }
+
+  if (NEWTON_BOND || i4 < nlocal) {
+    a_f(i4,0) += f4[0];
+    a_f(i4,1) += f4[1];
+    a_f(i4,2) += f4[2];
+  }
+
+  if (EVFLAG)
+    ev_tally(ev,i1,i2,i3,i4,edihedral,f1,f3,f4,
+             vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z);
+}
+
+template<class DeviceType>
+template<int NEWTON_BOND, int EVFLAG>
+KOKKOS_INLINE_FUNCTION
+void DihedralHarmonicKokkos<DeviceType>::operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n) const {
+  EV_FLOAT ev;
+  this->template operator()<NEWTON_BOND,EVFLAG>(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>(), n, ev);
+}
+
+/* ---------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralHarmonicKokkos<DeviceType>::allocate()
+{
+  DihedralHarmonic::allocate();
+
+  int n = atom->ndihedraltypes;
+  k_k = DAT::tdual_ffloat_1d("DihedralHarmonic::k",n+1);
+  k_cos_shift = DAT::tdual_ffloat_1d("DihedralHarmonic::cos_shift",n+1);
+  k_sin_shift = DAT::tdual_ffloat_1d("DihedralHarmonic::sin_shift",n+1);
+  k_sign = DAT::tdual_int_1d("DihedralHarmonic::sign",n+1);
+  k_multiplicity = DAT::tdual_int_1d("DihedralHarmonic::multiplicity",n+1);
+
+  d_k = k_k.template view<DeviceType>();
+  d_cos_shift = k_cos_shift.template view<DeviceType>();
+  d_sin_shift = k_sin_shift.template view<DeviceType>();
+  d_sign = k_sign.template view<DeviceType>();
+  d_multiplicity = k_multiplicity.template view<DeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   set coeffs for one type
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralHarmonicKokkos<DeviceType>::coeff(int narg, char **arg)
+{
+  DihedralHarmonic::coeff(narg, arg);
+
+  int n = atom->ndihedraltypes;
+  for (int i = 1; i <= n; i++) {
+    k_k.h_view[i] = k[i];
+    k_cos_shift.h_view[i] = cos_shift[i];
+    k_sin_shift.h_view[i] = sin_shift[i];
+    k_sign.h_view[i] = sign[i];
+    k_multiplicity.h_view[i] = multiplicity[i];
+  }
+
+  k_k.template modify<LMPHostType>();
+  k_cos_shift.template modify<LMPHostType>();
+  k_sin_shift.template modify<LMPHostType>();
+  k_sign.template modify<LMPHostType>();
+  k_multiplicity.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   proc 0 reads coeffs from restart file, bcasts them
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+void DihedralHarmonicKokkos<DeviceType>::read_restart(FILE *fp)
+{
+  DihedralHarmonic::read_restart(fp);
+
+  int n = atom->ndihedraltypes;
+  for (int i = 1; i <= n; i++) {
+    k_k.h_view[i] = k[i];
+    k_cos_shift.h_view[i] = cos_shift[i];
+    k_sin_shift.h_view[i] = sin_shift[i];
+    k_sign.h_view[i] = sign[i];
+    k_multiplicity.h_view[i] = multiplicity[i];
+  }
+
+  k_k.template modify<LMPHostType>();
+  k_cos_shift.template modify<LMPHostType>();
+  k_sin_shift.template modify<LMPHostType>();
+  k_sign.template modify<LMPHostType>();
+  k_multiplicity.template modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   tally energy and virial into global and per-atom accumulators
+   virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
+          = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
+          = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
+------------------------------------------------------------------------- */
+
+template<class DeviceType>
+//template<int NEWTON_BOND>
+KOKKOS_INLINE_FUNCTION
+void DihedralHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, const int i2, const int i3, const int i4,
+                        F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4,
+                        const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z,
+                        const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
+                        const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const
+{
+  E_FLOAT edihedralquarter;
+  F_FLOAT v[6];
+
+  // The eatom and vatom arrays are atomic
+  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>();
+  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>();
+
+  if (eflag_either) {
+    if (eflag_global) {
+      if (newton_bond) ev.evdwl += edihedral;
+      else {
+        edihedralquarter = 0.25*edihedral;
+        if (i1 < nlocal) ev.evdwl += edihedralquarter;
+        if (i2 < nlocal) ev.evdwl += edihedralquarter;
+        if (i3 < nlocal) ev.evdwl += edihedralquarter;
+        if (i4 < nlocal) ev.evdwl += edihedralquarter;
+      }
+    }
+    if (eflag_atom) {
+      edihedralquarter = 0.25*edihedral;
+      if (newton_bond || i1 < nlocal) v_eatom[i1] += edihedralquarter;
+      if (newton_bond || i2 < nlocal) v_eatom[i2] += edihedralquarter;
+      if (newton_bond || i3 < nlocal) v_eatom[i3] += edihedralquarter;
+      if (newton_bond || i4 < nlocal) v_eatom[i4] += edihedralquarter;
+    }
+  }
+
+  if (vflag_either) {
+    v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
+    v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
+    v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
+    v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
+    v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
+    v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
+
+    if (vflag_global) {
+      if (newton_bond) {
+        ev.v[0] += v[0];
+        ev.v[1] += v[1];
+        ev.v[2] += v[2];
+        ev.v[3] += v[3];
+        ev.v[4] += v[4];
+        ev.v[5] += v[5];
+      } else {
+        if (i1 < nlocal) {
+          ev.v[0] += 0.25*v[0];
+          ev.v[1] += 0.25*v[1];
+          ev.v[2] += 0.25*v[2];
+          ev.v[3] += 0.25*v[3];
+          ev.v[4] += 0.25*v[4];
+          ev.v[5] += 0.25*v[5];
+        }
+        if (i2 < nlocal) {
+          ev.v[0] += 0.25*v[0];
+          ev.v[1] += 0.25*v[1];
+          ev.v[2] += 0.25*v[2];
+          ev.v[3] += 0.25*v[3];
+          ev.v[4] += 0.25*v[4];
+          ev.v[5] += 0.25*v[5];
+        }
+        if (i3 < nlocal) {
+          ev.v[0] += 0.25*v[0];
+          ev.v[1] += 0.25*v[1];
+          ev.v[2] += 0.25*v[2];
+          ev.v[3] += 0.25*v[3];
+          ev.v[4] += 0.25*v[4];
+          ev.v[5] += 0.25*v[5];
+        }
+        if (i4 < nlocal) {
+          ev.v[0] += 0.25*v[0];
+          ev.v[1] += 0.25*v[1];
+          ev.v[2] += 0.25*v[2];
+          ev.v[3] += 0.25*v[3];
+          ev.v[4] += 0.25*v[4];
+          ev.v[5] += 0.25*v[5];
+        }
+      }
+    }
+
+    if (vflag_atom) {
+      if (newton_bond || i1 < nlocal) {
+        v_vatom(i1,0) += 0.25*v[0];
+        v_vatom(i1,1) += 0.25*v[1];
+        v_vatom(i1,2) += 0.25*v[2];
+        v_vatom(i1,3) += 0.25*v[3];
+        v_vatom(i1,4) += 0.25*v[4];
+        v_vatom(i1,5) += 0.25*v[5];
+      }
+      if (newton_bond || i2 < nlocal) {
+        v_vatom(i2,0) += 0.25*v[0];
+        v_vatom(i2,1) += 0.25*v[1];
+        v_vatom(i2,2) += 0.25*v[2];
+        v_vatom(i2,3) += 0.25*v[3];
+        v_vatom(i2,4) += 0.25*v[4];
+        v_vatom(i2,5) += 0.25*v[5];
+      }
+      if (newton_bond || i3 < nlocal) {
+        v_vatom(i3,0) += 0.25*v[0];
+        v_vatom(i3,1) += 0.25*v[1];
+        v_vatom(i3,2) += 0.25*v[2];
+        v_vatom(i3,3) += 0.25*v[3];
+        v_vatom(i3,4) += 0.25*v[4];
+        v_vatom(i3,5) += 0.25*v[5];
+      }
+      if (newton_bond || i4 < nlocal) {
+        v_vatom(i4,0) += 0.25*v[0];
+        v_vatom(i4,1) += 0.25*v[1];
+        v_vatom(i4,2) += 0.25*v[2];
+        v_vatom(i4,3) += 0.25*v[3];
+        v_vatom(i4,4) += 0.25*v[4];
+        v_vatom(i4,5) += 0.25*v[5];
+      }
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+namespace LAMMPS_NS {
+template class DihedralHarmonicKokkos<LMPDeviceType>;
+#ifdef KOKKOS_ENABLE_CUDA
+template class DihedralHarmonicKokkos<LMPHostType>;
+#endif
+}
+
--- a/src/KOKKOS/dihedral_harmonic_kokkos.h
+++ b/src/KOKKOS/dihedral_harmonic_kokkos.h
@ -0,0 +1,104 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef DIHEDRAL_CLASS
+
+DihedralStyle(harmonic/kk,DihedralHarmonicKokkos<LMPDeviceType>)
+DihedralStyle(harmonic/kk/device,DihedralHarmonicKokkos<LMPDeviceType>)
+DihedralStyle(harmonic/kk/host,DihedralHarmonicKokkos<LMPHostType>)
+
+#else
+
+#ifndef LMP_DIHEDRAL_HARMONIC_KOKKOS_H
+#define LMP_DIHEDRAL_HARMONIC_KOKKOS_H
+
+#include "dihedral_harmonic.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+template<int NEWTON_BOND, int EVFLAG>
+struct TagDihedralHarmonicCompute{};
+
+template<class DeviceType>
+class DihedralHarmonicKokkos : public DihedralHarmonic {
+ public:
+  typedef DeviceType device_type;
+  typedef EV_FLOAT value_type;
+  typedef ArrayTypes<DeviceType> AT;
+
+  DihedralHarmonicKokkos(class LAMMPS *);
+  virtual ~DihedralHarmonicKokkos();
+  void compute(int, int);
+  void coeff(int, char **);
+  void read_restart(FILE *);
+
+  template<int NEWTON_BOND, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int&, EV_FLOAT&) const;
+
+  template<int NEWTON_BOND, int EVFLAG>
+  KOKKOS_INLINE_FUNCTION
+  void operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int&) const;
+
+  //template<int NEWTON_BOND>
+  KOKKOS_INLINE_FUNCTION
+  void ev_tally(EV_FLOAT &ev, const int i1, const int i2, const int i3, const int i4,
+                          F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4,
+                          const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z,
+                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
+                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;
+
+ protected:
+
+  class NeighborKokkos *neighborKK;
+
+  typename AT::t_x_array_randomread x;
+  typename AT::t_f_array f;
+  typename AT::t_int_2d dihedrallist;
+
+  DAT::tdual_efloat_1d k_eatom;
+  DAT::tdual_virial_array k_vatom;
+  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
+  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
+
+  int nlocal,newton_bond;
+  int eflag,vflag;
+
+  DAT::tdual_int_scalar k_warning_flag;
+  typename AT::t_int_scalar d_warning_flag;
+  HAT::t_int_scalar h_warning_flag;
+
+  DAT::tdual_ffloat_1d k_k;
+  DAT::tdual_ffloat_1d k_cos_shift;
+  DAT::tdual_ffloat_1d k_sin_shift;
+  DAT::tdual_int_1d k_sign;
+  DAT::tdual_int_1d k_multiplicity;
+
+  typename AT::t_ffloat_1d d_k;
+  typename AT::t_ffloat_1d d_cos_shift;
+  typename AT::t_ffloat_1d d_sin_shift;
+  typename AT::t_int_1d d_sign;
+  typename AT::t_int_1d d_multiplicity;
+
+  void allocate();
+};
+
+}
+
+#endif
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
--- a/src/KOKKOS/fix_minimize_kokkos.cpp
+++ b/src/KOKKOS/fix_minimize_kokkos.cpp
@ -0,0 +1,257 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "fix_minimize_kokkos.h"
+#include "atom_kokkos.h"
+#include "domain.h"
+#include "memory_kokkos.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+using namespace FixConst;
+
+/* ---------------------------------------------------------------------- */
+
+FixMinimizeKokkos::FixMinimizeKokkos(LAMMPS *lmp, int narg, char **arg) :
+  FixMinimize(lmp, narg, arg)
+{
+  atomKK = (AtomKokkos *) atom;
+}
+
+/* ---------------------------------------------------------------------- */
+
+FixMinimizeKokkos::~FixMinimizeKokkos()
+{
+  memoryKK->destroy_kokkos(k_vectors,vectors);
+  vectors = NULL;
+}
+
+/* ----------------------------------------------------------------------
+   allocate/initialize memory for a new vector with 3 elements per atom
+------------------------------------------------------------------------- */
+
+void FixMinimizeKokkos::add_vector_kokkos()
+{
+  int n = 3;
+
+  memory->grow(peratom,nvector+1,"minimize:peratom");
+  peratom[nvector] = n;
+
+  // d_vectors needs to be LayoutRight for subviews
+
+  k_vectors.sync<LMPDeviceType>();
+
+  memoryKK->grow_kokkos(k_vectors,vectors,nvector+1,atom->nmax*n,
+                      "minimize:vectors");
+  d_vectors = k_vectors.d_view;
+  h_vectors = k_vectors.h_view;
+
+  k_vectors.modify<LMPDeviceType>();
+
+  nvector++;
+}
+
+/* ----------------------------------------------------------------------
+   return a pointer to the Mth vector
+------------------------------------------------------------------------- */
+
+DAT::t_ffloat_1d FixMinimizeKokkos::request_vector_kokkos(int m)
+{
+  k_vectors.sync<LMPDeviceType>();
+
+  return Kokkos::subview(d_vectors,m,Kokkos::ALL);
+}
+
+/* ----------------------------------------------------------------------
+   reset x0 for atoms that moved across PBC via reneighboring in line search
+   x0 = 1st vector
+   must do minimum_image using original box stored at beginning of line search
+   swap & set_global_box() change to original box, then restore current box
+------------------------------------------------------------------------- */
+
+void FixMinimizeKokkos::reset_coords()
+{
+  box_swap();
+  domain->set_global_box();
+
+  int nlocal = atom->nlocal;
+
+  atomKK->sync(Device,X_MASK);
+  k_vectors.sync<LMPDeviceType>();
+
+  {
+    // local variables for lambda capture
+
+    auto triclinic = domain->triclinic;
+    auto xperiodic = domain->xperiodic;
+    auto xprd_half = domain->xprd_half;
+    auto xprd = domain->xprd;
+    auto yperiodic = domain->yperiodic;
+    auto yprd_half = domain->yprd_half;
+    auto yprd = domain->yprd;
+    auto zperiodic = domain->zperiodic;
+    auto zprd_half = domain->zprd_half;
+    auto zprd = domain->zprd;
+    auto xy = domain->xy;
+    auto xz = domain->xz;
+    auto yz = domain->yz;
+    auto l_x = atomKK->k_x.d_view;
+    auto l_x0 = Kokkos::subview(d_vectors,0,Kokkos::ALL);
+
+    Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(const int& i) {
+      const int n = i*3;
+      double dx0 = l_x(i,0) - l_x0[n];
+      double dy0 = l_x(i,1) - l_x0[n+1];
+      double dz0 = l_x(i,2) - l_x0[n+2];
+      double dx = dx0;
+      double dy = dy0;
+      double dz = dz0;
+      // domain->minimum_image(dx,dy,dz);
+      {
+        if (triclinic == 0) {
+          if (xperiodic) {
+            if (fabs(dx) > xprd_half) {
+              if (dx < 0.0) dx += xprd;
+              else dx -= xprd;
+            }
+          }
+          if (yperiodic) {
+            if (fabs(dy) > yprd_half) {
+              if (dy < 0.0) dy += yprd;
+              else dy -= yprd;
+            }
+          }
+          if (zperiodic) {
+            if (fabs(dz) > zprd_half) {
+              if (dz < 0.0) dz += zprd;
+              else dz -= zprd;
+            }
+          }
+
+        } else {
+          if (zperiodic) {
+            if (fabs(dz) > zprd_half) {
+              if (dz < 0.0) {
+                dz += zprd;
+                dy += yz;
+                dx += xz;
+              } else {
+                dz -= zprd;
+                dy -= yz;
+                dx -= xz;
+              }
+            }
+          }
+          if (yperiodic) {
+            if (fabs(dy) > yprd_half) {
+              if (dy < 0.0) {
+                dy += yprd;
+                dx += xy;
+              } else {
+                dy -= yprd;
+                dx -= xy;
+              }
+            }
+          }
+          if (xperiodic) {
+            if (fabs(dx) > xprd_half) {
+              if (dx < 0.0) dx += xprd;
+              else dx -= xprd;
+            }
+          }
+        }
+      } // end domain->minimum_image(dx,dy,dz);
+      if (dx != dx0) l_x0[n] = l_x(i,0) - dx;
+      if (dy != dy0) l_x0[n+1] = l_x(i,1) - dy;
+      if (dz != dz0) l_x0[n+2] = l_x(i,2) - dz;
+    });
+  }
+  k_vectors.modify<LMPDeviceType>();
+
+  box_swap();
+  domain->set_global_box();
+}
+
+/* ----------------------------------------------------------------------
+   allocate local atom-based arrays
+------------------------------------------------------------------------- */
+
+void FixMinimizeKokkos::grow_arrays(int nmax)
+{
+  k_vectors.sync<LMPDeviceType>();
+  memoryKK->grow_kokkos(k_vectors,vectors,nvector,3*nmax,"minimize:vector");
+  d_vectors = k_vectors.d_view;
+  h_vectors = k_vectors.h_view;
+  k_vectors.modify<LMPDeviceType>();
+}
+
+/* ----------------------------------------------------------------------
+   copy values within local atom-based arrays
+------------------------------------------------------------------------- */
+
+void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/)
+{
+  int m,iper,nper,ni,nj;
+
+  k_vectors.sync<LMPHostType>();
+
+  for (m = 0; m < nvector; m++) {
+    nper = 3;
+    ni = nper*i;
+    nj = nper*j;
+    for (iper = 0; iper < nper; iper++) h_vectors(m,nj++) = h_vectors(m,ni++);
+  }
+
+  k_vectors.modify<LMPHostType>();
+}
+
+/* ----------------------------------------------------------------------
+   pack values in local atom-based arrays for exchange with another proc
+------------------------------------------------------------------------- */
+
+int FixMinimizeKokkos::pack_exchange(int i, double *buf)
+{
+  int m,iper,nper,ni;
+
+  k_vectors.sync<LMPHostType>();
+
+  int n = 0;
+  for (m = 0; m < nvector; m++) {
+    nper = peratom[m];
+    ni = nper*i;
+    for (iper = 0; iper < nper; iper++) buf[n++] = h_vectors(m,ni++);
+  }
+  return n;
+}
+
+/* ----------------------------------------------------------------------
+   unpack values in local atom-based arrays from exchange with another proc
+------------------------------------------------------------------------- */
+
+int FixMinimizeKokkos::unpack_exchange(int nlocal, double *buf)
+{
+  int m,iper,nper,ni;
+
+  k_vectors.sync<LMPHostType>();
+
+  int n = 0;
+  for (m = 0; m < nvector; m++) {
+    nper = peratom[m];
+    ni = nper*nlocal;
+    for (iper = 0; iper < nper; iper++) h_vectors(m,ni++) = buf[n++];
+  }
+
+  k_vectors.modify<LMPHostType>();
+
+  return n;
+}
--- a/src/KOKKOS/fix_minimize_kokkos.h
+++ b/src/KOKKOS/fix_minimize_kokkos.h
@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef FIX_CLASS
+
+FixStyle(MINIMIZE/kk,FixMinimizeKokkos)
+FixStyle(MINIMIZE/kk/device,FixMinimizeKokkos)
+FixStyle(MINIMIZE/kk/host,FixMinimizeKokkos)
+
+#else
+
+#ifndef LMP_FIX_MINIMIZE_KOKKOS_H
+#define LMP_FIX_MINIMIZE_KOKKOS_H
+
+#include "fix_minimize.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class FixMinimizeKokkos : public FixMinimize {
+  friend class MinLineSearchKokkos;
+
+ public:
+  FixMinimizeKokkos(class LAMMPS *, int, char **);
+  virtual ~FixMinimizeKokkos();
+  void init() {}
+
+  void grow_arrays(int);
+  void copy_arrays(int, int, int);
+  int pack_exchange(int, double *);
+  int unpack_exchange(int, double *);
+
+  void add_vector_kokkos();
+  DAT::t_float_1d request_vector_kokkos(int);
+  void reset_coords();
+
+  DAT::tdual_float_2d k_vectors;
+  DAT::t_float_2d d_vectors;
+  HAT::t_float_2d h_vectors;
+};
+
+}
+
+#endif
+#endif
+/* ERROR/WARNING messages:
+
+*/
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@ -113,23 +113,37 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
      }
      iarg += 2;

+      int set_flag = 0;
      char *str;
      if ((str = getenv("SLURM_LOCALID"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
+        set_flag = 1;
+      }
+      if ((str = getenv("MPT_LRANK"))) {
+        int local_rank = atoi(str);
+        device = local_rank % ngpus;
+        if (device >= skip_gpu) device++;
+        set_flag = 1;
      }
      if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
+        set_flag = 1;
      }
      if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
+        set_flag = 1;
      }

+      if (ngpus > 1 && !set_flag)
+        error->all(FLERR,"Could not determine local MPI rank for multiple "
+                           "GPUs with Kokkos CUDA because MPI library not recognized");
+
    } else if (strcmp(arg[iarg],"t") == 0 ||
               strcmp(arg[iarg],"threads") == 0) {
      nthreads = atoi(arg[iarg+1]);
--- a/src/KOKKOS/kokkos.h
+++ b/src/KOKKOS/kokkos.h
@ -74,6 +74,11 @@ E: Invalid Kokkos command-line args

 Self-explanatory.  See Section 2.7 of the manual for details.

+E: Could not determine local MPI rank for multiple GPUs with Kokkos CUDA
+because MPI library not recognized
+
+The local MPI rank was not found in one of four supported environment variables.
+
 E: GPUs are requested but Kokkos has not been compiled for CUDA

 Recompile Kokkos with CUDA support to use GPUs.
--- a/src/KOKKOS/min_cg_kokkos.cpp
+++ b/src/KOKKOS/min_cg_kokkos.cpp
@ -0,0 +1,178 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#include "min_cg_kokkos.h"
+#include <mpi.h>
+#include <cmath>
+#include "update.h"
+#include "output.h"
+#include "timer.h"
+#include "atom_kokkos.h"
+#include "atom_masks.h"
+#include "fix_minimize_kokkos.h"
+
+using namespace LAMMPS_NS;
+
+// EPS_ENERGY = minimum normalization for energy tolerance
+
+#define EPS_ENERGY 1.0e-8
+
+/* ---------------------------------------------------------------------- */
+
+MinCGKokkos::MinCGKokkos(LAMMPS *lmp) : MinLineSearchKokkos(lmp)
+{
+  atomKK = (AtomKokkos *) atom;
+  kokkosable = 1;
+}
+
+/* ----------------------------------------------------------------------
+   minimization via conjugate gradient iterations
+------------------------------------------------------------------------- */
+
+int MinCGKokkos::iterate(int maxiter)
+{
+  int fail,ntimestep;
+  double beta,gg,dot[2],dotall[2];
+
+  fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
+  fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
+
+  // nlimit = max # of CG iterations before restarting
+  // set to ndoftotal unless too big
+
+  int nlimit = static_cast<int> (MIN(MAXSMALLINT,ndoftotal));
+
+  // initialize working vectors
+
+  {
+    // local variables for lambda capture
+
+    auto l_h = h;
+    auto l_g = g;
+    auto l_fvec = fvec;
+
+    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
+      l_h[i] = l_fvec[i];
+      l_g[i] = l_fvec[i];
+    });
+  }
+
+  gg = fnorm_sqr();
+
+  for (int iter = 0; iter < maxiter; iter++) {
+
+    if (timer->check_timeout(niter))
+      return TIMEOUT;
+
+    ntimestep = ++update->ntimestep;
+    niter++;
+
+    // line minimization along direction h from current atom->x
+
+    eprevious = ecurrent;
+    fail = (this->*linemin)(ecurrent,alpha_final);
+    if (fail) return fail;
+
+    // function evaluation criterion
+
+    if (neval >= update->max_eval) return MAXEVAL;
+
+    // energy tolerance criterion
+
+    if (fabs(ecurrent-eprevious) <
+        update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY))
+      return ETOL;
+
+    // force tolerance criterion
+
+    s_double2 sdot;
+    {
+      // local variables for lambda capture
+
+      auto l_g = g;
+      auto l_fvec = fvec;
+
+      Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
+        sdot.d0 += l_fvec[i]*l_fvec[i];
+        sdot.d1 += l_fvec[i]*l_g[i];
+      },sdot);
+    }
+    dot[0] = sdot.d0;
+    dot[1] = sdot.d1;
+    MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
+
+    if (dotall[0] < update->ftol*update->ftol) return FTOL;
+
+    // update new search direction h from new f = -Grad(x) and old g
+    // this is Polak-Ribieri formulation
+    // beta = dotall[0]/gg would be Fletcher-Reeves
+    // reinitialize CG every ndof iterations by setting beta = 0.0
+
+    beta = MAX(0.0,(dotall[0] - dotall[1])/gg);
+    if ((niter+1) % nlimit == 0) beta = 0.0;
+    gg = dotall[0];
+
+    {
+      // local variables for lambda capture
+
+      auto l_h = h;
+      auto l_g = g;
+      auto l_fvec = fvec;
+
+      Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
+        l_g[i] = l_fvec[i];
+        l_h[i] = l_g[i] + beta*l_h[i];
+      });
+    }
+
+    // reinitialize CG if new search direction h is not downhill
+
+    double dot_0 = 0.0;
+
+    {
+      // local variables for lambda capture
+
+      auto l_h = h;
+      auto l_g = g;
+
+      Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& dot_0) {
+        dot_0 += l_g[i]*l_h[i];
+      },dot_0);
+    }
+    dot[0] = dot_0;
+    MPI_Allreduce(dot,dotall,1,MPI_DOUBLE,MPI_SUM,world);
+
+    if (dotall[0] <= 0.0) {
+      // local variables for lambda capture
+
+      auto l_h = h;
+      auto l_g = g;
+
+      Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
+         l_h[i] = l_g[i];
+      });
+    }
+
+    // output for thermo, dump, restart files
+
+    if (output->next == ntimestep) {
+      atomKK->sync(Host,ALL_MASK);
+
+      timer->stamp();
+      output->write(ntimestep);
+      timer->stamp(Timer::OUTPUT);
+    }
+  }
+
+  return MAXITER;
+}
--- a/src/KOKKOS/min_cg_kokkos.h
+++ b/src/KOKKOS/min_cg_kokkos.h
@ -0,0 +1,38 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifdef MINIMIZE_CLASS
+
+MinimizeStyle(cg/kk,MinCGKokkos)
+MinimizeStyle(cg/kk/device,MinCGKokkos)
+MinimizeStyle(cg/kk/host,MinCGKokkos)
+
+#else
+
+#ifndef LMP_MIN_CG_KOKKOS_H
+#define LMP_MIN_CG_KOKKOS_H
+
+#include "min_linesearch_kokkos.h"
+
+namespace LAMMPS_NS {
+
+class MinCGKokkos : public MinLineSearchKokkos {
+ public:
+  MinCGKokkos(class LAMMPS *);
+  int iterate(int);
+};
+
+}
+
+#endif
+#endif
--- a/src/KOKKOS/min_kokkos.cpp
+++ b/src/KOKKOS/min_kokkos.cpp
@ -0,0 +1,631 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "min_kokkos.h"
+#include <mpi.h>
+#include <cmath>
+#include <cstring>
+#include "atom_kokkos.h"
+#include "atom_vec.h"
+#include "domain.h"
+#include "comm.h"
+#include "update.h"
+#include "modify.h"
+#include "fix_minimize_kokkos.h"
+#include "compute.h"
+#include "neighbor.h"
+#include "force.h"
+#include "pair.h"
+#include "bond.h"
+#include "angle.h"
+#include "dihedral.h"
+#include "improper.h"
+#include "kspace.h"
+#include "output.h"
+#include "thermo.h"
+#include "timer.h"
+#include "memory.h"
+#include "error.h"
+#include "kokkos.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+/* ---------------------------------------------------------------------- */
+
+MinKokkos::MinKokkos(LAMMPS *lmp) : Min(lmp)
+{
+  atomKK = (AtomKokkos *) atom;
+  fix_minimize_kk = NULL;
+}
+
+/* ---------------------------------------------------------------------- */
+
+MinKokkos::~MinKokkos()
+{
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+void MinKokkos::init()
+{
+  Min::init();
+
+  fix_minimize_kk = (FixMinimizeKokkos*) fix_minimize;
+}
+
+/* ----------------------------------------------------------------------
+   setup before run
+------------------------------------------------------------------------- */
+
+void MinKokkos::setup(int flag)
+{
+  if (comm->me == 0 && screen) {
+    fprintf(screen,"Setting up %s style minimization ...\n",
+            update->minimize_style);
+    if (flag) {
+      fprintf(screen,"  Unit style    : %s\n", update->unit_style);
+      fprintf(screen,"  Current step  : " BIGINT_FORMAT "\n",
+              update->ntimestep);
+      timer->print_timeout(screen);
+    }
+  }
+  update->setupflag = 1;
+
+  // setup extra global dof due to fixes
+  // cannot be done in init() b/c update init() is before modify init()
+
+  nextra_global = modify->min_dof();
+  if (nextra_global) {
+    fextra = new double[nextra_global];
+    if (comm->me == 0 && screen)
+      fprintf(screen,"WARNING: Energy due to %d extra global DOFs will"
+              " be included in minimizer energies\n",nextra_global);
+  }
+
+  // compute for potential energy
+
+  int id = modify->find_compute("thermo_pe");
+  if (id < 0) error->all(FLERR,"Minimization could not find thermo_pe compute");
+  pe_compute = modify->compute[id];
+
+  // style-specific setup does two tasks
+  // setup extra global dof vectors
+  // setup extra per-atom dof vectors due to requests from Pair classes
+  // cannot be done in init() b/c update init() is before modify/pair init()
+
+  setup_style();
+
+  // ndoftotal = total dof for entire minimization problem
+  // dof for atoms, extra per-atom, extra global
+
+  bigint ndofme = 3 * static_cast<bigint>(atom->nlocal);
+  for (int m = 0; m < nextra_atom; m++)
+    ndofme += extra_peratom[m]*atom->nlocal;
+  MPI_Allreduce(&ndofme,&ndoftotal,1,MPI_LMP_BIGINT,MPI_SUM,world);
+  ndoftotal += nextra_global;
+
+  // setup domain, communication and neighboring
+  // acquire ghosts
+  // build neighbor lists
+
+  atom->setup();
+  modify->setup_pre_exchange();
+  if (triclinic) domain->x2lamda(atom->nlocal);
+  domain->pbc();
+  domain->reset_box();
+  comm->setup();
+  if (neighbor->style) neighbor->setup_bins();
+  comm->exchange();
+  if (atom->sortfreq > 0) atom->sort();
+  comm->borders();
+  if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+  domain->image_check();
+  domain->box_too_small_check();
+  modify->setup_pre_neighbor();
+  neighbor->build(1);
+  modify->setup_post_neighbor();
+  neighbor->ncalls = 0;
+
+  // remove these restriction eventually
+
+  if (searchflag == 0) {
+    if (nextra_global)
+      error->all(FLERR,
+                 "Cannot use a damped dynamics min style with fix box/relax");
+    if (nextra_atom)
+      error->all(FLERR,
+                 "Cannot use a damped dynamics min style with per-atom DOF");
+  }
+
+  if (strcmp(update->minimize_style,"hftn") == 0) {
+    if (nextra_global)
+      error->all(FLERR, "Cannot use hftn min style with fix box/relax");
+    if (nextra_atom)
+      error->all(FLERR, "Cannot use hftn min style with per-atom DOF");
+  }
+
+  // atoms may have migrated in comm->exchange()
+
+  reset_vectors();
+
+  // compute all forces
+
+  force->setup();
+  ev_set(update->ntimestep);
+  force_clear();
+  modify->setup_pre_force(vflag);
+
+  if (pair_compute_flag) {
+    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
+    force->pair->compute(eflag,vflag);
+    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
+    timer->stamp(Timer::PAIR);
+  }
+  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
+
+  if (atomKK->molecular) {
+    if (force->bond) {
+      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
+      force->bond->compute(eflag,vflag);
+      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
+    }
+    if (force->angle) {
+      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
+      force->angle->compute(eflag,vflag);
+      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
+    }
+    if (force->dihedral) {
+      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
+      force->dihedral->compute(eflag,vflag);
+      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
+    }
+    if (force->improper) {
+      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
+      force->improper->compute(eflag,vflag);
+      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
+    }
+    timer->stamp(Timer::BOND);
+  }
+
+  if(force->kspace) {
+    force->kspace->setup();
+    if (kspace_compute_flag) {
+      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
+      force->kspace->compute(eflag,vflag);
+      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
+      timer->stamp(Timer::KSPACE);
+    } else force->kspace->compute_dummy(eflag,vflag);
+  }
+
+  modify->setup_pre_reverse(eflag,vflag);
+  if (force->newton) comm->reverse_comm();
+
+  // update per-atom minimization variables stored by pair styles
+
+  if (nextra_atom)
+    for (int m = 0; m < nextra_atom; m++)
+      requestor[m]->min_xf_get(m);
+
+  lmp->kokkos->auto_sync = 0;
+  modify->setup(vflag);
+  output->setup(flag);
+  lmp->kokkos->auto_sync = 1;
+  update->setupflag = 0;
+
+  // stats for initial thermo output
+
+  ecurrent = pe_compute->compute_scalar();
+  if (nextra_global) ecurrent += modify->min_energy(fextra);
+  if (output->thermo->normflag) ecurrent /= atom->natoms;
+
+  einitial = ecurrent;
+  fnorm2_init = sqrt(fnorm_sqr());
+  fnorminf_init = fnorm_inf();
+}
+
+/* ----------------------------------------------------------------------
+   setup without output or one-time post-init setup
+   flag = 0 = just force calculation
+   flag = 1 = reneighbor and force calculation
+------------------------------------------------------------------------- */
+
+void MinKokkos::setup_minimal(int flag)
+{
+  update->setupflag = 1;
+
+  // setup domain, communication and neighboring
+  // acquire ghosts
+  // build neighbor lists
+
+  if (flag) {
+    modify->setup_pre_exchange();
+    if (triclinic) domain->x2lamda(atom->nlocal);
+    domain->pbc();
+    domain->reset_box();
+    comm->setup();
+    if (neighbor->style) neighbor->setup_bins();
+    comm->exchange();
+    comm->borders();
+    if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+    domain->image_check();
+    domain->box_too_small_check();
+    modify->setup_pre_neighbor();
+    neighbor->build(1);
+    modify->setup_post_neighbor();
+    neighbor->ncalls = 0;
+  }
+
+  // atoms may have migrated in comm->exchange()
+
+  reset_vectors();
+
+  // compute all forces
+
+  ev_set(update->ntimestep);
+  force_clear();
+  modify->setup_pre_force(vflag);
+
+  if (pair_compute_flag) {
+    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
+    force->pair->compute(eflag,vflag);
+    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
+    timer->stamp(Timer::PAIR);
+  }
+  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
+
+  if (atomKK->molecular) {
+    if (force->bond) {
+      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
+      force->bond->compute(eflag,vflag);
+      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
+    }
+    if (force->angle) {
+      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
+      force->angle->compute(eflag,vflag);
+      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
+    }
+    if (force->dihedral) {
+      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
+      force->dihedral->compute(eflag,vflag);
+      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
+    }
+    if (force->improper) {
+      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
+      force->improper->compute(eflag,vflag);
+      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
+    }
+    timer->stamp(Timer::BOND);
+  }
+
+  if(force->kspace) {
+    force->kspace->setup();
+    if (kspace_compute_flag) {
+      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
+      force->kspace->compute(eflag,vflag);
+      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
+      timer->stamp(Timer::KSPACE);
+    } else force->kspace->compute_dummy(eflag,vflag);
+  }
+
+  modify->setup_pre_reverse(eflag,vflag);
+  if (force->newton) comm->reverse_comm();
+
+  // update per-atom minimization variables stored by pair styles
+
+  if (nextra_atom)
+    for (int m = 0; m < nextra_atom; m++)
+      requestor[m]->min_xf_get(m);
+
+  lmp->kokkos->auto_sync = 0;
+  modify->setup(vflag);
+  lmp->kokkos->auto_sync = 1;
+  update->setupflag = 0;
+
+  // stats for Finish to print
+
+  ecurrent = pe_compute->compute_scalar();
+  if (nextra_global) ecurrent += modify->min_energy(fextra);
+  if (output->thermo->normflag) ecurrent /= atom->natoms;
+
+  einitial = ecurrent;
+  fnorm2_init = sqrt(fnorm_sqr());
+  fnorminf_init = fnorm_inf();
+}
+
+/* ----------------------------------------------------------------------
+   perform minimization, calling iterate() for N steps
+------------------------------------------------------------------------- */
+
+void MinKokkos::run(int n)
+{
+  if (nextra_global)
+    error->all(FLERR,"Cannot yet use extra global DOFs (e.g. fix box/relax) "
+     "with Kokkos minimize");
+
+  if (nextra_global || nextra_atom)
+    error->all(FLERR,"Cannot yet use extra atom DOFs (e.g. USER-AWPMD and USER-EFF packages) "
+     "with Kokkos minimize");
+
+  // minimizer iterations
+
+  lmp->kokkos->auto_sync = 0;
+  atomKK->sync(Device,ALL_MASK);
+
+  stop_condition = iterate(n);
+  stopstr = stopstrings(stop_condition);
+
+  // if early exit from iterate loop:
+  // set update->nsteps to niter for Finish stats to print
+  // set output->next values to this timestep
+  // call energy_force() to insure vflag is set when forces computed
+  // output->write does final output for thermo, dump, restart files
+  // add ntimestep to all computes that store invocation times
+  //   since are hardwiring call to thermo/dumps and computes may not be ready
+
+  if (stop_condition != MAXITER) {
+    update->nsteps = niter;
+
+    if (update->restrict_output == 0) {
+      for (int idump = 0; idump < output->ndump; idump++)
+        output->next_dump[idump] = update->ntimestep;
+      output->next_dump_any = update->ntimestep;
+      if (output->restart_flag) {
+        output->next_restart = update->ntimestep;
+        if (output->restart_every_single)
+          output->next_restart_single = update->ntimestep;
+        if (output->restart_every_double)
+          output->next_restart_double = update->ntimestep;
+      }
+    }
+    output->next_thermo = update->ntimestep;
+
+    modify->addstep_compute_all(update->ntimestep);
+    ecurrent = energy_force(0);
+
+    atomKK->sync(Host,ALL_MASK);
+    output->write(update->ntimestep);
+  }
+
+  atomKK->sync(Host,ALL_MASK);
+  lmp->kokkos->auto_sync = 1;
+}
+
+/* ----------------------------------------------------------------------
+   evaluate potential energy and forces
+   may migrate atoms due to reneighboring
+   return new energy, which should include nextra_global dof
+   return negative gradient stored in atom->f
+   return negative gradient for nextra_global dof in fextra
+------------------------------------------------------------------------- */
+
+double MinKokkos::energy_force(int resetflag)
+{
+  // check for reneighboring
+  // always communicate since minimizer moved atoms
+
+  int nflag = neighbor->decide();
+
+  if (nflag == 0) {
+    timer->stamp();
+    comm->forward_comm();
+    timer->stamp(Timer::COMM);
+  } else {
+    if (modify->n_min_pre_exchange) {
+      timer->stamp();
+      modify->min_pre_exchange();
+      timer->stamp(Timer::MODIFY);
+    }
+    if (triclinic) domain->x2lamda(atom->nlocal);
+    domain->pbc();
+    if (domain->box_change) {
+      domain->reset_box();
+      comm->setup();
+      if (neighbor->style) neighbor->setup_bins();
+    }
+    timer->stamp();
+    comm->exchange();
+    if (atom->sortfreq > 0 &&
+        update->ntimestep >= atom->nextsort) atom->sort();
+    comm->borders();
+    if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
+    timer->stamp(Timer::COMM);
+    if (modify->n_min_pre_neighbor) {
+      modify->min_pre_neighbor();
+      timer->stamp(Timer::MODIFY);
+    }
+    neighbor->build(1);
+    timer->stamp(Timer::NEIGH);
+    if (modify->n_min_post_neighbor) {
+      modify->min_post_neighbor();
+      timer->stamp(Timer::MODIFY);
+    }
+  }
+
+  ev_set(update->ntimestep);
+  force_clear();
+
+  timer->stamp();
+
+  if (modify->n_min_pre_force) {
+    modify->min_pre_force(vflag);
+    timer->stamp(Timer::MODIFY);
+  }
+
+  if (pair_compute_flag) {
+    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
+    force->pair->compute(eflag,vflag);
+    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
+    timer->stamp(Timer::PAIR);
+  }
+
+  if (atom->molecular) {
+    if (force->bond) {
+      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
+      force->bond->compute(eflag,vflag);
+      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
+    }
+    if (force->angle) {
+      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
+      force->angle->compute(eflag,vflag);
+      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
+    }
+    if (force->dihedral) {
+      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
+      force->dihedral->compute(eflag,vflag);
+      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
+    }
+    if (force->improper) {
+      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
+      force->improper->compute(eflag,vflag);
+      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
+    }
+    timer->stamp(Timer::BOND);
+  }
+
+  if (kspace_compute_flag) {
+    atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
+    force->kspace->compute(eflag,vflag);
+    atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
+    timer->stamp(Timer::KSPACE);
+  }
+
+  if (modify->n_min_pre_reverse) {
+    modify->min_pre_reverse(eflag,vflag);
+    timer->stamp(Timer::MODIFY);
+  }
+
+  if (force->newton) {
+    comm->reverse_comm();
+    timer->stamp(Timer::COMM);
+  }
+
+  // fixes that affect minimization
+
+  if (modify->n_min_post_force) {
+     timer->stamp();
+     modify->min_post_force(vflag);
+     timer->stamp(Timer::MODIFY);
+  }
+
+  // compute potential energy of system
+  // normalize if thermo PE does
+
+  atomKK->sync(pe_compute->execution_space,pe_compute->datamask_read);
+  double energy = pe_compute->compute_scalar();
+  atomKK->modified(pe_compute->execution_space,pe_compute->datamask_modify);
+  if (nextra_global) energy += modify->min_energy(fextra);
+  if (output->thermo->normflag) energy /= atom->natoms;
+
+  // if reneighbored, atoms migrated
+  // if resetflag = 1, update x0 of atoms crossing PBC
+  // reset vectors used by lo-level minimizer
+
+  if (nflag) {
+    if (resetflag) fix_minimize_kk->reset_coords();
+    reset_vectors();
+  }
+  return energy;
+}
+
+/* ----------------------------------------------------------------------
+   clear force on own & ghost atoms
+   clear other arrays as needed
+------------------------------------------------------------------------- */
+
+void MinKokkos::force_clear()
+{
+  if (external_force_clear) return;
+
+  // clear global force array
+  // if either newton flag is set, also include ghosts
+
+  atomKK->k_f.clear_sync_state(); // ignore host forces/torques since device views
+  atomKK->k_torque.clear_sync_state(); // will be cleared below
+
+  int nzero = atom->nlocal;
+  if (force->newton) nzero += atom->nghost;
+
+  if (nzero) {
+    // local variables for lambda capture
+
+    auto l_f = atomKK->k_f.d_view;
+    auto l_torque = atomKK->k_torque.d_view;
+    auto l_torqueflag = torqueflag;
+
+    Kokkos::parallel_for(nzero, LAMMPS_LAMBDA(int i) {
+      l_f(i,0) = 0.0;
+      l_f(i,1) = 0.0;
+      l_f(i,2) = 0.0;
+      if (l_torqueflag) {
+        l_torque(i,0) = 0.0;
+        l_torque(i,1) = 0.0;
+        l_torque(i,2) = 0.0;
+      }
+    });
+  }
+  atomKK->modified(Device,F_MASK);
+}
+
+/* ----------------------------------------------------------------------
+   compute and return ||force||_2^2
+------------------------------------------------------------------------- */
+
+double MinKokkos::fnorm_sqr()
+{
+
+  double local_norm2_sqr = 0.0;
+  {
+    // local variables for lambda capture
+
+    auto l_fvec = fvec;
+
+    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm2_sqr) {
+      local_norm2_sqr += l_fvec[i]*l_fvec[i];
+    },local_norm2_sqr);
+  }
+
+  double norm2_sqr = 0.0;
+  MPI_Allreduce(&local_norm2_sqr,&norm2_sqr,1,MPI_DOUBLE,MPI_SUM,world);
+
+  return norm2_sqr;
+}
+
+/* ----------------------------------------------------------------------
+   compute and return ||force||_inf
+------------------------------------------------------------------------- */
+
+double MinKokkos::fnorm_inf()
+{
+
+  double local_norm_inf = 0.0;
+  {
+    // local variables for lambda capture
+
+    auto l_fvec = fvec;
+
+    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm_inf) {
+      local_norm_inf = MAX(fabs(l_fvec[i]),local_norm_inf);
+    },Kokkos::Max<double>(local_norm_inf));
+  }
+
+  double norm_inf = 0.0;
+  MPI_Allreduce(&local_norm_inf,&norm_inf,1,MPI_DOUBLE,MPI_MAX,world);
+
+  return norm_inf;
+}
--- a/src/KOKKOS/min_kokkos.h
+++ b/src/KOKKOS/min_kokkos.h
@ -0,0 +1,58 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_MIN_KOKKOS_H
+#define LMP_MIN_KOKKOS_H
+
+#include "min.h"
+#include "kokkos_type.h"
+
+namespace LAMMPS_NS {
+
+class MinKokkos : public Min {
+ public:
+  MinKokkos(class LAMMPS *);
+  virtual ~MinKokkos();
+  void init();
+  void setup(int flag=1);
+  void setup_minimal(int);
+  void run(int);
+  double fnorm_sqr();
+  double fnorm_inf();
+
+  virtual void init_style() {}
+  virtual void setup_style() = 0;
+  virtual void reset_vectors() = 0;
+  virtual int iterate(int) = 0;
+
+  // possible return values of iterate() method
+  enum{MAXITER,MAXEVAL,ETOL,FTOL,DOWNHILL,ZEROALPHA,ZEROFORCE,
+       ZEROQUAD,TRSMALL,INTERROR,TIMEOUT};
+
+ //protected: // won't compile with CUDA
+  class FixMinimizeKokkos *fix_minimize_kk;  // fix that stores auxiliary data
+
+  DAT::t_ffloat_1d xvec;            // variables for atomic dof, as 1d vector
+  DAT::t_ffloat_1d fvec;            // force vector for atomic dof, as 1d vector
+
+  double energy_force(int);
+  void force_clear();
+};
+
+}
+
+#endif
+
+/* ERROR/WARNING messages:
+
+*/
--- a/src/KOKKOS/min_linesearch_kokkos.cpp
+++ b/src/KOKKOS/min_linesearch_kokkos.cpp
@ -0,0 +1,401 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+   Contributing author: Stan Moore (SNL)
+------------------------------------------------------------------------- */
+
+#include "min_linesearch_kokkos.h"
+#include <mpi.h>
+#include <cmath>
+#include "atom_kokkos.h"
+#include "modify.h"
+#include "fix_minimize_kokkos.h"
+#include "pair.h"
+#include "output.h"
+#include "thermo.h"
+#include "error.h"
+#include "atom_masks.h"
+
+using namespace LAMMPS_NS;
+
+// ALPHA_MAX = max alpha allowed to avoid long backtracks
+// ALPHA_REDUCE = reduction ratio, should be in range [0.5,1)
+// BACKTRACK_SLOPE, should be in range (0,0.5]
+// QUADRATIC_TOL = tolerance on alpha0, should be in range [0.1,1)
+// EMACH = machine accuracy limit of energy changes (1.0e-8)
+// EPS_QUAD = tolerance for quadratic projection
+
+#define ALPHA_MAX 1.0
+#define ALPHA_REDUCE 0.5
+#define BACKTRACK_SLOPE 0.4
+#define QUADRATIC_TOL 0.1
+//#define EMACH 1.0e-8
+#define EMACH 1.0e-8
+#define EPS_QUAD 1.0e-28
+
+/* ---------------------------------------------------------------------- */
+
+MinLineSearchKokkos::MinLineSearchKokkos(LAMMPS *lmp) : MinKokkos(lmp)
+{
+  searchflag = 1;
+  atomKK = (AtomKokkos *) atom;
+}
+
+/* ---------------------------------------------------------------------- */
+
+MinLineSearchKokkos::~MinLineSearchKokkos()
+{
+
+}
+
+/* ---------------------------------------------------------------------- */
+
+void MinLineSearchKokkos::init()
+{
+  MinKokkos::init();
+
+  if (linestyle == 1) linemin = &MinLineSearchKokkos::linemin_quadratic;
+  else error->all(FLERR,"Kokkos minimize only supports the 'min_modify line "
+   "quadratic' option");
+}
+
+/* ---------------------------------------------------------------------- */
+
+void MinLineSearchKokkos::setup_style()
+{
+  // memory for x0,g,h for atomic dof
+
+  fix_minimize_kk->add_vector_kokkos();
+  fix_minimize_kk->add_vector_kokkos();
+  fix_minimize_kk->add_vector_kokkos();
+}
+
+/* ----------------------------------------------------------------------
+   set current vector lengths and pointers
+   called after atoms have migrated
+------------------------------------------------------------------------- */
+
+void MinLineSearchKokkos::reset_vectors()
+{
+  // atomic dof
+
+  nvec = 3 * atom->nlocal;
+  atomKK->sync(Device,F_MASK|X_MASK);
+  auto d_x = atomKK->k_x.d_view;
+  auto d_f = atomKK->k_f.d_view;
+
+  if (nvec) xvec = DAT::t_ffloat_1d(d_x.data(),d_x.size());
+  if (nvec) fvec = DAT::t_ffloat_1d(d_f.data(),d_f.size());
+  x0 = fix_minimize_kk->request_vector_kokkos(0);
+  g = fix_minimize_kk->request_vector_kokkos(1);
+  h = fix_minimize_kk->request_vector_kokkos(2);
+
+  auto h_fvec = Kokkos::create_mirror_view(fvec);
+  Kokkos::deep_copy(h_fvec,fvec);
+}
+
+/* ----------------------------------------------------------------------
+   line minimization methods
+   find minimum-energy starting at x along h direction
+   input args:   eoriginal = energy at initial x
+   input extra:  n,x,x0,f,h for atomic, extra global, extra per-atom dof
+   output args:  return 0 if successful move, non-zero alpha
+                 return non-zero if failed
+                 alpha = distance moved along h for x at min eng config
+                 update neval counter of eng/force function evaluations
+                 output extra: if fail, energy_force() of original x
+                 if succeed, energy_force() at x + alpha*h
+                 atom->x = coords at new configuration
+                 atom->f = force at new configuration
+                 ecurrent = energy of new configuration
+------------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------
+    // linemin: quadratic line search (adapted from Dennis and Schnabel)
+    // The objective function is approximated by a quadratic
+    // function in alpha, for sufficiently small alpha.
+    // This idea is the same as that used in the well-known secant
+    // method. However, since the change in the objective function
+    // (difference of two finite numbers) is not known as accurately
+    // as the gradient (which is close to zero), all the expressions
+    // are written in terms of gradients. In this way, we can converge
+    // the LAMMPS forces much closer to zero.
+    //
+    // We know E,Eprev,fh,fhprev. The Taylor series about alpha_prev
+    // truncated at the quadratic term is:
+    //
+    //     E = Eprev - del_alpha*fhprev + (1/2)del_alpha^2*Hprev
+    //
+    // and
+    //
+    //     fh = fhprev - del_alpha*Hprev
+    //
+    // where del_alpha = alpha-alpha_prev
+    //
+    // We solve these two equations for Hprev and E=Esolve, giving:
+    //
+    //     Esolve = Eprev - del_alpha*(f+fprev)/2
+    //
+    // We define relerr to be:
+    //
+    //      relerr = |(Esolve-E)/Eprev|
+    //             = |1.0 - (0.5*del_alpha*(f+fprev)+E)/Eprev|
+    //
+    // If this is accurate to within a reasonable tolerance, then
+    // we go ahead and use a secant step to fh = 0:
+    //
+    //      alpha0 = alpha - (alpha-alphaprev)*fh/delfh;
+    //
+------------------------------------------------------------------------- */
+
+int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha)
+{
+  double fdothall,fdothme,hme,hmaxall;
+  double de_ideal,de;
+  double delfh,engprev,relerr,alphaprev,fhprev,ff,fh,alpha0;
+  double dot[2],dotall[2];
+  double alphamax;
+
+  fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
+  fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
+
+  // fdothall = projection of search dir along downhill gradient
+  // if search direction is not downhill, exit with error
+
+  fdothme = 0.0;
+  {
+    // local variables for lambda capture
+
+    auto l_fvec = fvec;
+    auto l_h = h;
+
+    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& fdothme) {
+      fdothme += l_fvec[i]*l_h[i];
+    },fdothme);
+  }
+  MPI_Allreduce(&fdothme,&fdothall,1,MPI_DOUBLE,MPI_SUM,world);
+  if (output->thermo->normflag) fdothall /= atom->natoms;
+
+  if (fdothall <= 0.0) return DOWNHILL;
+
+  // set alphamax so no dof is changed by more than max allowed amount
+  // for atom coords, max amount = dmax
+  // for extra per-atom dof, max amount = extra_max[]
+  // for extra global dof, max amount is set by fix
+  // also insure alphamax <= ALPHA_MAX
+  // else will have to backtrack from huge value when forces are tiny
+  // if all search dir components are already 0.0, exit with error
+
+
+  hme = 0.0;
+  {
+    // local variables for lambda capture
+
+    auto l_h = h;
+
+    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& hme) {
+      hme = MAX(hme,fabs(l_h[i]));
+    },Kokkos::Max<double>(hme));
+  }
+  MPI_Allreduce(&hme,&hmaxall,1,MPI_DOUBLE,MPI_MAX,world);
+  alphamax = MIN(ALPHA_MAX,dmax/hmaxall);
+
+  if (hmaxall == 0.0) return ZEROFORCE;
+
+  // store box and values of all dof at start of linesearch
+
+  {
+    // local variables for lambda capture
+
+    auto l_xvec = xvec;
+    auto l_x0 = x0;
+
+    fix_minimize_kk->store_box();
+    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
+      l_x0[i] = l_xvec[i];
+    });
+  }
+
+  // backtrack with alpha until energy decrease is sufficient
+  // or until get to small energy change, then perform quadratic projection
+
+  alpha = alphamax;
+  fhprev = fdothall;
+  engprev = eoriginal;
+  alphaprev = 0.0;
+
+  // // important diagnostic: test the gradient against energy
+  // double etmp;
+  // double alphatmp = alphamax*1.0e-4;
+  // etmp = alpha_step(alphatmp,1);
+  // printf("alpha = %g dele = %g dele_force = %g err = %g\n",
+  //        alphatmp,etmp-eoriginal,-alphatmp*fdothall,
+  //        etmp-eoriginal+alphatmp*fdothall);
+  // alpha_step(0.0,1);
+
+  while (1) {
+    ecurrent = alpha_step(alpha,1);
+
+    // compute new fh, alpha, delfh
+
+    s_double2 sdot;
+    {
+      // local variables for lambda capture
+
+      auto l_fvec = fvec;
+      auto l_h = h;
+
+      Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
+        sdot.d0 += l_fvec[i]*l_fvec[i];
+        sdot.d1 += l_fvec[i]*l_h[i];
+      },sdot);
+    }
+    dot[0] = sdot.d0;
+    dot[1] = sdot.d1;
+
+    MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
+    ff = dotall[0];
+    fh = dotall[1];
+    if (output->thermo->normflag) {
+      ff /= atom->natoms;
+      fh /= atom->natoms;
+    }
+
+    delfh = fh - fhprev;
+
+    // if fh or delfh is epsilon, reset to starting point, exit with error
+
+    if (fabs(fh) < EPS_QUAD || fabs(delfh) < EPS_QUAD) {
+      ecurrent = alpha_step(0.0,0);
+      return ZEROQUAD;
+    }
+
+    // Check if ready for quadratic projection, equivalent to secant method
+    // alpha0 = projected alpha
+
+    relerr = fabs(1.0-(0.5*(alpha-alphaprev)*(fh+fhprev)+ecurrent)/engprev);
+    alpha0 = alpha - (alpha-alphaprev)*fh/delfh;
+
+    if (relerr <= QUADRATIC_TOL && alpha0 > 0.0 && alpha0 < alphamax) {
+      ecurrent = alpha_step(alpha0,1);
+      if (ecurrent - eoriginal < EMACH) {
+        return 0;
+      }
+    }
+
+    // if backtracking energy change is better than ideal, exit with success
+
+    de_ideal = -BACKTRACK_SLOPE*alpha*fdothall;
+    de = ecurrent - eoriginal;
+
+    if (de <= de_ideal)
+      return 0;
+
+    // save previous state
+
+    fhprev = fh;
+    engprev = ecurrent;
+    alphaprev = alpha;
+
+    // reduce alpha
+
+    alpha *= ALPHA_REDUCE;
+
+    // backtracked all the way to 0.0
+    // reset to starting point, exit with error
+
+    if (alpha <= 0.0 || de_ideal >= -EMACH) {
+      ecurrent = alpha_step(0.0,0);
+      return ZEROALPHA;
+    }
+  }
+}
+
+/* ---------------------------------------------------------------------- */
+
+double MinLineSearchKokkos::alpha_step(double alpha, int resetflag)
+{
+  // reset to starting point
+
+  atomKK->k_x.clear_sync_state(); // ignore if host positions since device
+                                  //  positions will be reset below
+  {
+    // local variables for lambda capture
+
+    auto l_xvec = xvec;
+    auto l_x0 = x0;
+
+    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
+      l_xvec[i] = l_x0[i];
+    });
+  }
+
+  // step forward along h
+
+  if (alpha > 0.0) {
+    // local variables for lambda capture
+
+    auto l_xvec = xvec;
+    auto l_h = h;
+
+    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
+      l_xvec[i] += alpha*l_h[i];
+    });
+  }
+
+  atomKK->modified(Device,X_MASK);
+
+  // compute and return new energy
+
+  neval++;
+  return energy_force(resetflag);
+}
+
+/* ---------------------------------------------------------------------- */
+
+// compute projection of force on: itself and the search direction
+
+double MinLineSearchKokkos::compute_dir_deriv(double &ff)
+{
+  double dot[2],dotall[2];
+  double fh;
+
+  // compute new fh, alpha, delfh
+
+  s_double2 sdot;
+  {
+    // local variables for lambda capture
+
+    auto l_fvec = fvec;
+    auto l_h = h;
+
+    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
+      sdot.d0 += l_fvec[i]*l_fvec[i];
+      sdot.d1 += l_fvec[i]*l_h[i];
+    },sdot);
+  }
+  dot[0] = sdot.d0;
+  dot[1] = sdot.d1;
+
+  MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
+
+  ff = dotall[0];
+  fh = dotall[1];
+  if (output->thermo->normflag) {
+    ff /= atom->natoms;
+    fh /= atom->natoms;
+  }
+
+  return fh;
+}
--- a/src/KOKKOS/min_linesearch_kokkos.h
+++ b/src/KOKKOS/min_linesearch_kokkos.h
@ -0,0 +1,69 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_MIN_LSRCH_KOKKOS_H
+#define LMP_MIN_LSRCH_KOKKOS_H
+
+#include "min_kokkos.h"
+
+namespace LAMMPS_NS {
+
+  struct s_double2 {
+    double d0, d1;
+    KOKKOS_INLINE_FUNCTION
+    s_double2() {
+      d0 = d1 = 0.0;
+    }
+    KOKKOS_INLINE_FUNCTION
+    s_double2& operator+=(const s_double2 &rhs){
+      d0 += rhs.d0;
+      d1 += rhs.d1;
+      return *this;
+    }
+
+    KOKKOS_INLINE_FUNCTION
+    void operator+=(const volatile s_double2 &rhs) volatile {
+      d0 += rhs.d0;
+      d1 += rhs.d1;
+    }
+  };
+  //typedef s_double2 double2;
+
+class MinLineSearchKokkos : public MinKokkos {
+ public:
+  MinLineSearchKokkos(class LAMMPS *);
+  ~MinLineSearchKokkos();
+  void init();
+  void setup_style();
+  void reset_vectors();
+
+ //protected: // won't compile with CUDA
+  // vectors needed by linesearch minimizers
+  // allocated and stored by fix_minimize
+  // x,f are stored by parent or Atom class or Pair class
+
+  DAT::t_ffloat_1d x0;   // coords at start of linesearch
+  DAT::t_ffloat_1d g;    // old gradient vector
+  DAT::t_ffloat_1d h;    // search direction vector
+
+  typedef int (MinLineSearchKokkos::*FnPtr)(double, double &);
+  FnPtr linemin;
+  int linemin_quadratic(double, double &);
+
+  double alpha_step(double, int);
+  double compute_dir_deriv(double &);
+};
+
+}
+
+#endif
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@ -143,7 +143,6 @@ void VerletKokkos::setup(int flag)
  }
  else if (force->pair) force->pair->compute_dummy(eflag,vflag);

-
  if (atomKK->molecular) {
    if (force->bond) {
      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
@ -248,7 +247,6 @@ void VerletKokkos::setup_minimal(int flag)
  }
  else if (force->pair) force->pair->compute_dummy(eflag,vflag);

-
  if (atomKK->molecular) {
    if (force->bond) {
      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
@ -285,7 +283,9 @@ void VerletKokkos::setup_minimal(int flag)

  if (force->newton) comm->reverse_comm();

+  lmp->kokkos->auto_sync = 0;
  modify->setup(vflag);
+  lmp->kokkos->auto_sync = 1;
  update->setupflag = 0;
 }

--- a/src/MOLECULE/dihedral_harmonic.cpp
+++ b/src/MOLECULE/dihedral_harmonic.cpp
@ -42,7 +42,7 @@ DihedralHarmonic::DihedralHarmonic(LAMMPS *lmp) : Dihedral(lmp)

 DihedralHarmonic::~DihedralHarmonic()
 {
-  if (allocated) {
+  if (allocated && !copymode) {
    memory->destroy(setflag);
    memory->destroy(k);
    memory->destroy(sign);
--- a/src/MOLECULE/dihedral_harmonic.h
+++ b/src/MOLECULE/dihedral_harmonic.h
@ -29,16 +29,16 @@ class DihedralHarmonic : public Dihedral {
  DihedralHarmonic(class LAMMPS *);
  virtual ~DihedralHarmonic();
  virtual void compute(int, int);
-  void coeff(int, char **);
+  virtual void coeff(int, char **);
  void write_restart(FILE *);
-  void read_restart(FILE *);
+  virtual void read_restart(FILE *);
  void write_data(FILE *);

 protected:
  double *k,*cos_shift,*sin_shift;
  int *sign,*multiplicity;

-  void allocate();
+  virtual void allocate();
 };

 }
--- a/src/REPLICA/tad.cpp
+++ b/src/REPLICA/tad.cpp
@ -197,7 +197,7 @@ void TAD::command(int narg, char **arg)
  args = new char*[narg2];
  args[0] = min_style;

-  update->create_minimize(narg2,args);
+  update->create_minimize(narg2,args,1);

  delete [] args;

@ -711,7 +711,7 @@ void TAD::perform_neb(int ievent)
  args = new char*[narg2];
  args[0] = min_style_neb;

-  update->create_minimize(narg2,args);
+  update->create_minimize(narg2,args,1);

  delete [] args;

@ -777,7 +777,7 @@ void TAD::perform_neb(int ievent)
  args = new char*[narg2];
  args[0] = min_style;

-  update->create_minimize(narg2,args);
+  update->create_minimize(narg2,args,1);

  update->etol = etol;
  update->ftol = ftol;
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@ -34,25 +34,12 @@
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
+#include "rigid_const.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
-enum{SINGLE,MOLECULE,GROUP};
-enum{NONE,XYZ,XY,YZ,XZ};
-enum{ISO,ANISO,TRICLINIC};
-
-#define MAXLINE 1024
-#define CHUNK 1024
-#define ATTRIBUTE_PERBODY 20
-
-#define TOLERANCE 1.0e-6
-#define EPSILON 1.0e-7
-
-#define SINERTIA 0.4            // moment of inertia prefactor for sphere
-#define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
-#define LINERTIA (1.0/12.0)     // moment of inertia prefactor for line segment
+using namespace RigidConst;

 /* ---------------------------------------------------------------------- */

@ -605,21 +592,6 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) :
  for (ibody = 0; ibody < nbody; ibody++)
    if (nrigid[ibody] <= 1) error->all(FLERR,"One or zero atoms in rigid body");

-  // bitmasks for properties of extended particles
-
-  POINT = 1;
-  SPHERE = 2;
-  ELLIPSOID = 4;
-  LINE = 8;
-  TRIANGLE = 16;
-  DIPOLE = 32;
-  OMEGA = 64;
-  ANGMOM = 128;
-  TORQUE = 256;
-
-  MINUSPI = -MY_PI;
-  TWOPI = 2.0*MY_PI;
-
  // wait to setup bodies until first init() using current atom properties

  setupflag = 0;
@ -1472,8 +1444,8 @@ void FixRigid::set_xv()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega_one[i][0] = omega[ibody][0];
        omega_one[i][1] = omega[ibody][1];
@ -2018,8 +1990,8 @@ void FixRigid::setup_bodies_static()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        orient[i][0] = lbonus[line[i]].theta - theta_body;
-        while (orient[i][0] <= MINUSPI) orient[i][0] += TWOPI;
-        while (orient[i][0] > MY_PI) orient[i][0] -= TWOPI;
+        while (orient[i][0] <= -MY_PI) orient[i][0] += MY_2PI;
+        while (orient[i][0] > MY_PI) orient[i][0] -= MY_2PI;
        if (orientflag == 4) orient[i][1] = orient[i][2] = orient[i][3] = 0.0;
      } else if (eflags[i] & TRIANGLE) {
        quatatom = tbonus[tri[i]].quat;
--- a/src/RIGID/fix_rigid.h
+++ b/src/RIGID/fix_rigid.h
@ -65,7 +65,6 @@ class FixRigid : public Fix {
  double dtv,dtf,dtq;
  double *step_respa;
  int triclinic;
-  double MINUSPI,TWOPI;

  char *inpfile;             // file to read rigid body attributes from
  int rstyle;               // SINGLE,MOLECULE,GROUP
@ -137,9 +136,6 @@ class FixRigid : public Fix {
  class AtomVecLine *avec_line;
  class AtomVecTri *avec_tri;

-  int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE;   // bitmasks for eflags
-  int OMEGA,ANGMOM,TORQUE;
-
  void image_shift();
  void set_xv();
  void set_v();
--- a/src/RIGID/fix_rigid_nh.cpp
+++ b/src/RIGID/fix_rigid_nh.cpp
@ -33,14 +33,11 @@
 #include "kspace.h"
 #include "memory.h"
 #include "error.h"
+#include "rigid_const.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
-
-enum{NONE,XYZ,XY,YZ,XZ};     // same as in FixRigid
-enum{ISO,ANISO,TRICLINIC};   // same as in FixRigid
-
-#define EPSILON 1.0e-7
+using namespace RigidConst;

 /* ---------------------------------------------------------------------- */

--- a/src/RIGID/fix_rigid_nh_small.cpp
+++ b/src/RIGID/fix_rigid_nh_small.cpp
@ -34,17 +34,12 @@
 #include "kspace.h"
 #include "memory.h"
 #include "error.h"
+#include "rigid_const.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
-
-enum{NONE,XYZ,XY,YZ,XZ};     // same as in FixRigid
-enum{ISO,ANISO,TRICLINIC};   // same as in FixRigid
-
-#define EPSILON 1.0e-7
-
-enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
+using namespace RigidConst;

 /* ---------------------------------------------------------------------- */

--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@ -38,34 +38,17 @@
 #include "hashlittle.h"
 #include "memory.h"
 #include "error.h"
+#include "rigid_const.h"

 #include <map>

 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
+using namespace RigidConst;

 #define RVOUS 1   // 0 for irregular, 1 for all2all

-#define MAXLINE 1024
-#define CHUNK 1024
-#define ATTRIBUTE_PERBODY 20
-
-#define TOLERANCE 1.0e-6
-#define EPSILON 1.0e-7
-#define BIG 1.0e20
-
-#define SINERTIA 0.4            // moment of inertia prefactor for sphere
-#define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
-#define LINERTIA (1.0/12.0)     // moment of inertia prefactor for line segment
-
-#define DELTA_BODY 10000
-
-enum{NONE,XYZ,XY,YZ,XZ};        // same as in FixRigid
-enum{ISO,ANISO,TRICLINIC};      // same as in FixRigid
-
-enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
-
 /* ---------------------------------------------------------------------- */

 FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
@ -455,21 +438,6 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
  comm_forward = 1 + bodysize;
  comm_reverse = 6;

-  // bitmasks for properties of extended particles
-
-  POINT = 1;
-  SPHERE = 2;
-  ELLIPSOID = 4;
-  LINE = 8;
-  TRIANGLE = 16;
-  DIPOLE = 32;
-  OMEGA = 64;
-  ANGMOM = 128;
-  TORQUE = 256;
-
-  MINUSPI = -MY_PI;
-  TWOPI = 2.0*MY_PI;
-
  // atom style pointers to particles that store extra info

  avec_ellipsoid = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
@ -1384,8 +1352,8 @@ void FixRigidSmall::set_xv()
        if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]);
        else theta_body = -2.0*acos(b->quat[0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega[i][0] = b->omega[0];
        omega[i][1] = b->omega[1];
@ -2155,8 +2123,8 @@ void FixRigidSmall::setup_bodies_static()
        if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]);
        else theta_body = -2.0*acos(b->quat[0]);
        orient[i][0] = lbonus[line[i]].theta - theta_body;
-        while (orient[i][0] <= MINUSPI) orient[i][0] += TWOPI;
-        while (orient[i][0] > MY_PI) orient[i][0] -= TWOPI;
+        while (orient[i][0] <= -MY_PI) orient[i][0] += MY_2PI;
+        while (orient[i][0] > MY_PI) orient[i][0] -= MY_2PI;
        if (orientflag == 4) orient[i][1] = orient[i][2] = orient[i][3] = 0.0;
      } else if (eflags[i] & TRIANGLE) {
        quatatom = tbonus[tri[i]].quat;
--- a/src/RIGID/fix_rigid_small.h
+++ b/src/RIGID/fix_rigid_small.h
@ -72,7 +72,6 @@ class FixRigidSmall : public Fix {
  double dtv,dtf,dtq;
  double *step_respa;
  int triclinic;
-  double MINUSPI,TWOPI;

  char *inpfile;             // file to read rigid body attributes from
  int setupflag;            // 1 if body properties are setup, else 0
@ -129,9 +128,6 @@ class FixRigidSmall : public Fix {
  int dorientflag;      // 1 if particles store dipole orientation
  int reinitflag;       // 1 if re-initialize rigid bodies between runs

-  int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE;   // bitmasks for eflags
-  int OMEGA,ANGMOM,TORQUE;
-
  class AtomVecEllipsoid *avec_ellipsoid;
  class AtomVecLine *avec_line;
  class AtomVecTri *avec_tri;
--- a/src/RIGID/rigid_const.h
+++ b/src/RIGID/rigid_const.h
@ -0,0 +1,54 @@
+/* -*- c++ -*- ----------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+#ifndef LMP_RIGID_CONST_H
+#define LMP_RIGID_CONST_H
+
+namespace LAMMPS_NS {
+  namespace RigidConst {
+
+    enum{SINGLE,MOLECULE,GROUP};
+    enum{NONE,XYZ,XY,YZ,XZ};
+    enum{ISO,ANISO,TRICLINIC};
+    enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
+
+    enum {POINT     = 1<<0,
+          SPHERE    = 1<<1,
+          ELLIPSOID = 1<<2,
+          LINE      = 1<<3,
+          TRIANGLE  = 1<<4,
+          DIPOLE    = 1<<5,
+          OMEGA     = 1<<6,
+          ANGMOM    = 1<<7,
+          TORQUE    = 1<<8
+    };
+    
+    static const double TOLERANCE = 1.0e-6;
+    static const double EPSILON   = 1.0e-7;
+    static const double BIG       = 1.0e20;
+
+    // moment of inertia prefactor for sphere
+    static const double SINERTIA = 0.4;
+    // moment of inertia prefactor for ellipsoid
+    static const double EINERTIA = 0.2;
+    // moment of inertia prefactor for line segment
+    static const double LINERTIA = 1.0/12.0;
+
+    static const int MAXLINE    = 1024;
+    static const int CHUNK      = 1024;
+    static const int DELTA_BODY = 10000;
+    static const int ATTRIBUTE_PERBODY = 20;
+  }
+}
+
+#endif
--- a/src/USER-OMP/fix_rigid_nh_omp.cpp
+++ b/src/USER-OMP/fix_rigid_nh_omp.cpp
@ -38,15 +38,12 @@

 #include "math_extra.h"
 #include "math_const.h"
+#include "rigid_const.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
-enum{SINGLE,MOLECULE,GROUP};    // same as in FixRigid
-enum{ISO,ANISO,TRICLINIC};      // same as in FixRigid
-
-#define EINERTIA 0.4            // moment of inertia prefactor for ellipsoid
+using namespace RigidConst;

 typedef struct { double x,y,z; } dbl3_t;

@ -770,8 +767,8 @@ void FixRigidNHOMP::set_xv_thr()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega_one[i][0] = omega[ibody][0];
        omega_one[i][1] = omega[ibody][1];
--- a/src/USER-OMP/fix_rigid_omp.cpp
+++ b/src/USER-OMP/fix_rigid_omp.cpp
@ -33,14 +33,12 @@

 #include "math_extra.h"
 #include "math_const.h"
+#include "rigid_const.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
-enum{SINGLE,MOLECULE,GROUP};    // same as in FixRigid
-
-#define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
+using namespace RigidConst;

 typedef struct { double x,y,z; } dbl3_t;

@ -488,8 +486,8 @@ void FixRigidOMP::set_xv_thr()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega_one[i][0] = omega[ibody][0];
        omega_one[i][1] = omega[ibody][1];
--- a/src/USER-OMP/fix_rigid_small_omp.cpp
+++ b/src/USER-OMP/fix_rigid_small_omp.cpp
@ -31,14 +31,12 @@

 #include "math_extra.h"
 #include "math_const.h"
+#include "rigid_const.h"

 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
-#define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
-
-enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
+using namespace RigidConst;

 typedef struct { double x,y,z; } dbl3_t;

@ -427,8 +425,8 @@ void FixRigidSmallOMP::set_xv_thr()
        if (b.quat[3] >= 0.0) theta_body = 2.0*acos(b.quat[0]);
        else theta_body = -2.0*acos(b.quat[0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega[i][0] = b.omega[0];
        omega[i][1] = b.omega[1];
--- a/src/compute_bond_local.cpp
+++ b/src/compute_bond_local.cpp
@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 #define DELTA 10000
 #define EPSILON 1.0e-12

-enum{DIST,VELVIB,OMEGA,ENGTRANS,ENGVIB,ENGROT,ENGPOT,FORCE,VARIABLE};
+enum{DIST,VELVIB,OMEGA,ENGTRANS,ENGVIB,ENGROT,ENGPOT,FORCE,FX,FY,FZ,VARIABLE};

 /* ---------------------------------------------------------------------- */

@ -64,6 +64,9 @@ ComputeBondLocal::ComputeBondLocal(LAMMPS *lmp, int narg, char **arg) :
    if (strcmp(arg[iarg],"dist") == 0) bstyle[nvalues++] = DIST;
    else if (strcmp(arg[iarg],"engpot") == 0) bstyle[nvalues++] = ENGPOT;
    else if (strcmp(arg[iarg],"force") == 0) bstyle[nvalues++] = FORCE;
+    else if (strcmp(arg[iarg],"fx") == 0) bstyle[nvalues++] = FX;
+    else if (strcmp(arg[iarg],"fy") == 0) bstyle[nvalues++] = FY;
+    else if (strcmp(arg[iarg],"fz") == 0) bstyle[nvalues++] = FZ;
    else if (strcmp(arg[iarg],"engvib") == 0) bstyle[nvalues++] = ENGVIB;
    else if (strcmp(arg[iarg],"engrot") == 0) bstyle[nvalues++] = ENGROT;
    else if (strcmp(arg[iarg],"engtrans") == 0) bstyle[nvalues++] = ENGTRANS;
@ -127,7 +130,8 @@ ComputeBondLocal::ComputeBondLocal(LAMMPS *lmp, int narg, char **arg) :
  singleflag = 0;
  velflag = 0;
  for (int i = 0; i < nvalues; i++) {
-    if (bstyle[i] == ENGPOT || bstyle[i] == FORCE) singleflag = 1;
+    if (bstyle[i] == ENGPOT || bstyle[i] == FORCE || bstyle[i] == FX       || 
+        bstyle[i] == FY     || bstyle[i] == FZ) singleflag = 1;
    if (bstyle[i] == VELVIB || bstyle[i] == OMEGA || bstyle[i] == ENGTRANS ||
        bstyle[i] == ENGVIB || bstyle[i] == ENGROT) velflag = 1;
  }
@ -393,6 +397,15 @@ int ComputeBondLocal::compute_bonds(int flag)
          case FORCE:
            ptr[n] = sqrt(rsq)*fbond;
            break;
+          case FX:
+            ptr[n] = dx*fbond;
+            break;
+          case FY:
+            ptr[n] = dy*fbond;
+            break;
+          case FZ:
+            ptr[n] = dz*fbond;
+            break;
          case ENGVIB:
            ptr[n] = engvib;
            break;
--- a/src/compute_orientorder_atom.cpp
+++ b/src/compute_orientorder_atom.cpp
@ -43,12 +43,14 @@ using namespace std;
  #define MY_EPSILON (10.0*2.220446049250313e-16)
 #endif

+#define QEPSILON 1.0e-6
+
 /* ---------------------------------------------------------------------- */

 ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg) :
  Compute(lmp, narg, arg),
  qlist(NULL), distsq(NULL), nearest(NULL), rlist(NULL),
-  qnarray(NULL), qnm_r(NULL), qnm_i(NULL)
+  qnarray(NULL), qnm_r(NULL), qnm_i(NULL), cglist(NULL)
 {
  if (narg < 3 ) error->all(FLERR,"Illegal compute orientorder/atom command");

@ -56,6 +58,8 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg

  nnn = 12;
  cutsq = 0.0;
+  wlflag = 0;
+  wlhatflag = 0;
  qlcompflag = 0;

  // specify which orders to request
@ -96,27 +100,39 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
      if (iarg+nqlist > narg)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      qmax = 0;
-      for (int iw = 0; iw < nqlist; iw++) {
-        qlist[iw] = force->numeric(FLERR,arg[iarg+iw]);
-        if (qlist[iw] < 0)
+      for (int il = 0; il < nqlist; il++) {
+        qlist[il] = force->numeric(FLERR,arg[iarg+il]);
+        if (qlist[il] < 0)
          error->all(FLERR,"Illegal compute orientorder/atom command");
-        if (qlist[iw] > qmax) qmax = qlist[iw];
+        if (qlist[il] > qmax) qmax = qlist[il];
      }
      iarg += nqlist;
+    } else if (strcmp(arg[iarg],"wl") == 0) {
+      if (iarg+2 > narg)
+        error->all(FLERR,"Illegal compute orientorder/atom command");
+      if (strcmp(arg[iarg+1],"yes") == 0) wlflag = 1;
+      else if (strcmp(arg[iarg+1],"no") == 0) wlflag = 0;
+      else error->all(FLERR,"Illegal compute orientorder/atom command");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"wl/hat") == 0) {
+      if (iarg+2 > narg)
+        error->all(FLERR,"Illegal compute orientorder/atom command");
+      if (strcmp(arg[iarg+1],"yes") == 0) wlhatflag = 1;
+      else if (strcmp(arg[iarg+1],"no") == 0) wlhatflag = 0;
+      else error->all(FLERR,"Illegal compute orientorder/atom command");
+      iarg += 2;
    } else if (strcmp(arg[iarg],"components") == 0) {
      qlcompflag = 1;
      if (iarg+2 > narg)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      qlcomp = force->numeric(FLERR,arg[iarg+1]);
-      if (qlcomp <= 0)
-        error->all(FLERR,"Illegal compute orientorder/atom command");
      iqlcomp = -1;
-      for (int iw = 0; iw < nqlist; iw++)
-        if (qlcomp == qlist[iw]) {
-          iqlcomp = iw;
+      for (int il = 0; il < nqlist; il++)
+        if (qlcomp == qlist[il]) {
+          iqlcomp = il;
          break;
        }
-      if (iqlcomp < 0)
+      if (iqlcomp == -1)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      iarg += 2;
    } else if (strcmp(arg[iarg],"cutoff") == 0) {
@ -130,8 +146,10 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
    } else error->all(FLERR,"Illegal compute orientorder/atom command");
  }

-  if (qlcompflag) ncol = nqlist + 2*(2*qlcomp+1);
-  else ncol = nqlist;
+  ncol = nqlist;
+  if (wlflag) ncol += nqlist;
+  if (wlhatflag) ncol += nqlist;
+  if (qlcompflag) ncol += 2*(2*qlcomp+1);

  peratom_flag = 1;
  size_peratom_cols = ncol;
@ -151,7 +169,7 @@ ComputeOrientOrderAtom::~ComputeOrientOrderAtom()
  memory->destroy(qlist);
  memory->destroy(qnm_r);
  memory->destroy(qnm_i);
-
+  memory->destroy(cglist);
 }

 /* ---------------------------------------------------------------------- */
@ -166,8 +184,8 @@ void ComputeOrientOrderAtom::init()
    error->all(FLERR,"Compute orientorder/atom cutoff is "
               "longer than pairwise cutoff");

-  memory->create(qnm_r,qmax,2*qmax+1,"orientorder/atom:qnm_r");
-  memory->create(qnm_i,qmax,2*qmax+1,"orientorder/atom:qnm_i");
+  memory->create(qnm_r,nqlist,2*qmax+1,"orientorder/atom:qnm_r");
+  memory->create(qnm_i,nqlist,2*qmax+1,"orientorder/atom:qnm_i");

  // need an occasional full neighbor list

@ -183,6 +201,8 @@ void ComputeOrientOrderAtom::init()
    if (strcmp(modify->compute[i]->style,"orientorder/atom") == 0) count++;
  if (count > 1 && comm->me == 0)
    error->warning(FLERR,"More than one compute orientorder/atom");
+
+  if (wlflag || wlhatflag) init_clebsch_gordan();
 }

 /* ---------------------------------------------------------------------- */
@ -274,8 +294,8 @@ void ComputeOrientOrderAtom::compute_peratom()
      // if not nnn neighbors, order parameter = 0;

      if ((ncount == 0) || (ncount < nnn)) {
-        for (int iw = 0; iw < nqlist; iw++)
-          qn[iw] = 0.0;
+        for (int jj = 0; jj < ncol; jj++)
+          qn[jj] = 0.0;
        continue;
      }

@ -287,6 +307,7 @@ void ComputeOrientOrderAtom::compute_peratom()
      }

      calc_boop(rlist, ncount, qn, qlist, nqlist);
+
    }
  }
 }
@ -403,13 +424,12 @@ void ComputeOrientOrderAtom::select3(int k, int n, double *arr, int *iarr, doubl
 void ComputeOrientOrderAtom::calc_boop(double **rlist,
                                       int ncount, double qn[],
                                       int qlist[], int nqlist) {
-  for (int iw = 0; iw < nqlist; iw++) {
-    int n = qlist[iw];

-    qn[iw] = 0.0;
-    for(int m = 0; m < 2*n+1; m++) {
-      qnm_r[iw][m] = 0.0;
-      qnm_i[iw][m] = 0.0;
+  for (int il = 0; il < nqlist; il++) {
+    int l = qlist[il];
+    for(int m = 0; m < 2*l+1; m++) {
+      qnm_r[il][m] = 0.0;
+      qnm_i[il][m] = 0.0;
    }
  }

@ -433,24 +453,24 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
      expphi_i *= rxymaginv;
    }

-    for (int iw = 0; iw < nqlist; iw++) {
-      int n = qlist[iw];
+    for (int il = 0; il < nqlist; il++) {
+      int l = qlist[il];

-      qnm_r[iw][n] += polar_prefactor(n, 0, costheta);
+      qnm_r[il][l] += polar_prefactor(l, 0, costheta);
      double expphim_r = expphi_r;
      double expphim_i = expphi_i;
-      for(int m = 1; m <= +n; m++) {
-        double prefactor = polar_prefactor(n, m, costheta);
+      for(int m = 1; m <= +l; m++) {
+        double prefactor = polar_prefactor(l, m, costheta);
        double c_r = prefactor * expphim_r;
        double c_i = prefactor * expphim_i;
-        qnm_r[iw][m+n] += c_r;
-        qnm_i[iw][m+n] += c_i;
+        qnm_r[il][m+l] += c_r;
+        qnm_i[il][m+l] += c_i;
        if(m & 1) {
-          qnm_r[iw][-m+n] -= c_r;
-          qnm_i[iw][-m+n] += c_i;
+          qnm_r[il][-m+l] -= c_r;
+          qnm_i[il][-m+l] += c_i;
        } else {
-          qnm_r[iw][-m+n] += c_r;
-          qnm_i[iw][-m+n] -= c_i;
+          qnm_r[il][-m+l] += c_r;
+          qnm_i[il][-m+l] -= c_i;
        }
        double tmp_r = expphim_r*expphi_r - expphim_i*expphi_i;
        double tmp_i = expphim_r*expphi_i + expphim_i*expphi_r;
@ -461,30 +481,110 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
    }
  }

-  double fac = sqrt(MY_4PI) / ncount;
-  double normfac = 0.0;
-  for (int iw = 0; iw < nqlist; iw++) {
-    int n = qlist[iw];
-    double qm_sum = 0.0;
-    for(int m = 0; m < 2*n+1; m++) {
-      qm_sum += qnm_r[iw][m]*qnm_r[iw][m] + qnm_i[iw][m]*qnm_i[iw][m];
-      //      printf("Ylm^2 = %d %d %g\n",n,m,
-      //     qnm_r[iw][m]*qnm_r[iw][m] + qnm_i[iw][m]*qnm_i[iw][m]);
-    }
-    qn[iw] = fac * sqrt(qm_sum / (2*n+1));
-    if (qlcompflag && iqlcomp == iw) normfac = 1.0/sqrt(qm_sum);
+  // convert sums to averages

+  double facn = 1.0 / ncount;
+  for (int il = 0; il < nqlist; il++) {
+    int l = qlist[il];
+    for(int m = 0; m < 2*l+1; m++) {
+      qnm_r[il][m] *= facn;
+      qnm_i[il][m] *= facn;
+    }
  }

-  // output of the complex vector
+  // calculate Q_l
+  // NOTE: optional W_l_hat and components of Q_qlcomp use these stored Q_l values 
+
+  int jj = 0;
+  for (int il = 0; il < nqlist; il++) {
+    int l = qlist[il];
+    double qnormfac = sqrt(MY_4PI/(2*l+1));
+    double qm_sum = 0.0;
+    for(int m = 0; m < 2*l+1; m++)
+      qm_sum += qnm_r[il][m]*qnm_r[il][m] + qnm_i[il][m]*qnm_i[il][m];
+    qn[jj++] = qnormfac * sqrt(qm_sum);
+  }
+
+  // TODO: 
+  // 1. [done]Need to allocate extra memory in qnarray[] for this option
+  // 2. [done]Need to add keyword option
+  // 3. [done]Need to caclulate Clebsch-Gordan/Wigner 3j coefficients
+  //     (Can try getting them from boop.py first)
+  // 5. [done]Compare to bcc values in /Users/athomps/netapp/codes/MatMiner/matminer/matminer/featurizers/boop.py
+  // 6. [done]I get the right answer for W_l, but need to make sure that factor of 1/sqrt(l+1) is right for cglist
+  // 7. Add documentation
+  // 8. [done] run valgrind
+  // 9. [done] Add Wlhat
+  // 10. Update memory_usage()
+  // 11. Add exact FCC values for W_4, W_4_hat
+
+  // calculate W_l
+
+  if (wlflag) {
+    int idxcg_count = 0;
+    for (int il = 0; il < nqlist; il++) {
+      int l = qlist[il];
+      double wlsum = 0.0;
+      for(int m1 = 0; m1 < 2*l+1; m1++) {
+        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
+          int m = m1 + m2 - l;
+          double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
+          double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
+          wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
+          idxcg_count++;
+        }
+      }
+      qn[jj++] = wlsum/sqrt(2*l+1);
+    }
+  }
+
+  // calculate W_l_hat
+
+  if (wlhatflag) {
+    int idxcg_count = 0;
+    for (int il = 0; il < nqlist; il++) {
+      int l = qlist[il];
+      double wlsum = 0.0;
+      for(int m1 = 0; m1 < 2*l+1; m1++) {
+        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
+          int m = m1 + m2 - l;
+          double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
+          double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
+          wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
+          idxcg_count++;
+        }
+      }
+  //      Whats = [w/(q/np.sqrt(np.pi * 4 / (2 * l + 1)))**3 if abs(q) > 1.0e-6 else 0.0 for l,q,w in zip(range(1,max_l+1),Qs,Ws)]
+      if (qn[il] < QEPSILON)
+        qn[jj++] = 0.0;
+      else {
+        double qnormfac = sqrt(MY_4PI/(2*l+1));
+        double qnfac = qnormfac/qn[il];
+        qn[jj++] = wlsum/sqrt(2*l+1)*(qnfac*qnfac*qnfac);
+      }
+    }
+  }
+
+  // Calculate components of Q_l, for l=qlcomp

  if (qlcompflag) {
-    int j = nqlist;
-    for(int m = 0; m < 2*qlcomp+1; m++) {
-      qn[j++] = qnm_r[iqlcomp][m] * normfac;
-      qn[j++] = qnm_i[iqlcomp][m] * normfac;
+    int il = iqlcomp;
+    int l = qlcomp;
+    if (qn[il] < QEPSILON)
+      for(int m = 0; m < 2*l+1; m++) {
+        qn[jj++] = 0.0;
+        qn[jj++] = 0.0;
+      }
+    else {
+      double qnormfac = sqrt(MY_4PI/(2*l+1));
+      double qnfac = qnormfac/qn[il];
+      for(int m = 0; m < 2*l+1; m++) {
+        qn[jj++] = qnm_r[il][m] * qnfac;
+        qn[jj++] = qnm_i[il][m] * qnfac;
+      }
    }
  }
+
 }

 /* ----------------------------------------------------------------------
@ -542,3 +642,258 @@ double ComputeOrientOrderAtom::associated_legendre(int l, int m, double x)

  return p;
 }
+
+/* ----------------------------------------------------------------------
+   assign Clebsch-Gordan coefficients
+   using the quasi-binomial formula VMK 8.2.1(3)
+   specialized for case j1=j2=j=l
+------------------------------------------------------------------------- */
+
+void ComputeOrientOrderAtom::init_clebsch_gordan()
+{
+  double sum,dcg,sfaccg, sfac1, sfac2;
+  int m, aa2, bb2, cc2;
+  int ifac, idxcg_count;
+
+  idxcg_count = 0;
+  for (int il = 0; il < nqlist; il++) {
+    int l = qlist[il];
+    for(int m1 = 0; m1 < 2*l+1; m1++)
+      for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
+        idxcg_count++;
+  }
+  idxcg_max = idxcg_count;
+  memory->create(cglist, idxcg_max, "computeorientorderatom:cglist");
+
+  idxcg_count = 0;
+  for (int il = 0; il < nqlist; il++) {
+    int l = qlist[il];
+    for(int m1 = 0; m1 < 2*l+1; m1++) {
+        aa2 = m1 - l;
+        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
+          bb2 = m2 - l;
+          m = aa2 + bb2 + l;
+          
+          sum = 0.0;
+          for (int z = MAX(0, MAX(-aa2, bb2));
+               z <= MIN(l, MIN(l - aa2, l + bb2)); z++) {
+            ifac = z % 2 ? -1 : 1;
+            sum += ifac /
+              (factorial(z) *
+               factorial(l - z) *
+               factorial(l - aa2 - z) *
+               factorial(l + bb2 - z) *
+               factorial(aa2 + z) *
+               factorial(-bb2 + z));
+          }
+          
+          cc2 = m - l;
+          sfaccg = sqrt(factorial(l + aa2) *
+                        factorial(l - aa2) *
+                        factorial(l + bb2) *
+                        factorial(l - bb2) *
+                        factorial(l + cc2) *
+                        factorial(l - cc2) *
+                        (2*l + 1));
+          
+          sfac1 = factorial(3*l + 1);
+          sfac2 = factorial(l);
+          dcg = sqrt(sfac2*sfac2*sfac2 / sfac1);
+
+          cglist[idxcg_count] = sum * dcg * sfaccg;
+          idxcg_count++;
+        }
+      }
+  }
+}
+
+/* ----------------------------------------------------------------------
+   factorial n, wrapper for precomputed table
+------------------------------------------------------------------------- */
+
+double ComputeOrientOrderAtom::factorial(int n)
+{
+  if (n < 0 || n > nmaxfactorial) {
+    char str[128];
+    sprintf(str, "Invalid argument to factorial %d", n);
+    error->all(FLERR, str);
+  }
+
+  return nfac_table[n];
+}
+
+/* ----------------------------------------------------------------------
+   factorial n table, size SNA::nmaxfactorial+1
+------------------------------------------------------------------------- */
+
+const double ComputeOrientOrderAtom::nfac_table[] = {
+  1,
+  1,
+  2,
+  6,
+  24,
+  120,
+  720,
+  5040,
+  40320,
+  362880,
+  3628800,
+  39916800,
+  479001600,
+  6227020800,
+  87178291200,
+  1307674368000,
+  20922789888000,
+  355687428096000,
+  6.402373705728e+15,
+  1.21645100408832e+17,
+  2.43290200817664e+18,
+  5.10909421717094e+19,
+  1.12400072777761e+21,
+  2.5852016738885e+22,
+  6.20448401733239e+23,
+  1.5511210043331e+25,
+  4.03291461126606e+26,
+  1.08888694504184e+28,
+  3.04888344611714e+29,
+  8.8417619937397e+30,
+  2.65252859812191e+32,
+  8.22283865417792e+33,
+  2.63130836933694e+35,
+  8.68331761881189e+36,
+  2.95232799039604e+38,
+  1.03331479663861e+40,
+  3.71993326789901e+41,
+  1.37637530912263e+43,
+  5.23022617466601e+44,
+  2.03978820811974e+46,
+  8.15915283247898e+47,
+  3.34525266131638e+49,
+  1.40500611775288e+51,
+  6.04152630633738e+52,
+  2.65827157478845e+54,
+  1.1962222086548e+56,
+  5.50262215981209e+57,
+  2.58623241511168e+59,
+  1.24139155925361e+61,
+  6.08281864034268e+62,
+  3.04140932017134e+64,
+  1.55111875328738e+66,
+  8.06581751709439e+67,
+  4.27488328406003e+69,
+  2.30843697339241e+71,
+  1.26964033536583e+73,
+  7.10998587804863e+74,
+  4.05269195048772e+76,
+  2.35056133128288e+78,
+  1.3868311854569e+80,
+  8.32098711274139e+81,
+  5.07580213877225e+83,
+  3.14699732603879e+85,
+  1.98260831540444e+87,
+  1.26886932185884e+89,
+  8.24765059208247e+90,
+  5.44344939077443e+92,
+  3.64711109181887e+94,
+  2.48003554243683e+96,
+  1.71122452428141e+98,
+  1.19785716699699e+100,
+  8.50478588567862e+101,
+  6.12344583768861e+103,
+  4.47011546151268e+105,
+  3.30788544151939e+107,
+  2.48091408113954e+109,
+  1.88549470166605e+111,
+  1.45183092028286e+113,
+  1.13242811782063e+115,
+  8.94618213078297e+116,
+  7.15694570462638e+118,
+  5.79712602074737e+120,
+  4.75364333701284e+122,
+  3.94552396972066e+124,
+  3.31424013456535e+126,
+  2.81710411438055e+128,
+  2.42270953836727e+130,
+  2.10775729837953e+132,
+  1.85482642257398e+134,
+  1.65079551609085e+136,
+  1.48571596448176e+138,
+  1.3520015276784e+140,
+  1.24384140546413e+142,
+  1.15677250708164e+144,
+  1.08736615665674e+146,
+  1.03299784882391e+148,
+  9.91677934870949e+149,
+  9.61927596824821e+151,
+  9.42689044888324e+153,
+  9.33262154439441e+155,
+  9.33262154439441e+157,
+  9.42594775983835e+159,
+  9.61446671503512e+161,
+  9.90290071648618e+163,
+  1.02990167451456e+166,
+  1.08139675824029e+168,
+  1.14628056373471e+170,
+  1.22652020319614e+172,
+  1.32464181945183e+174,
+  1.44385958320249e+176,
+  1.58824554152274e+178,
+  1.76295255109024e+180,
+  1.97450685722107e+182,
+  2.23119274865981e+184,
+  2.54355973347219e+186,
+  2.92509369349301e+188,
+  3.3931086844519e+190,
+  3.96993716080872e+192,
+  4.68452584975429e+194,
+  5.5745857612076e+196,
+  6.68950291344912e+198,
+  8.09429852527344e+200,
+  9.8750442008336e+202,
+  1.21463043670253e+205,
+  1.50614174151114e+207,
+  1.88267717688893e+209,
+  2.37217324288005e+211,
+  3.01266001845766e+213,
+  3.8562048236258e+215,
+  4.97450422247729e+217,
+  6.46685548922047e+219,
+  8.47158069087882e+221,
+  1.118248651196e+224,
+  1.48727070609069e+226,
+  1.99294274616152e+228,
+  2.69047270731805e+230,
+  3.65904288195255e+232,
+  5.01288874827499e+234,
+  6.91778647261949e+236,
+  9.61572319694109e+238,
+  1.34620124757175e+241,
+  1.89814375907617e+243,
+  2.69536413788816e+245,
+  3.85437071718007e+247,
+  5.5502938327393e+249,
+  8.04792605747199e+251,
+  1.17499720439091e+254,
+  1.72724589045464e+256,
+  2.55632391787286e+258,
+  3.80892263763057e+260,
+  5.71338395644585e+262,
+  8.62720977423323e+264,
+  1.31133588568345e+267,
+  2.00634390509568e+269,
+  3.08976961384735e+271,
+  4.78914290146339e+273,
+  7.47106292628289e+275,
+  1.17295687942641e+278,
+  1.85327186949373e+280,
+  2.94670227249504e+282,
+  4.71472363599206e+284,
+  7.59070505394721e+286,
+  1.22969421873945e+289,
+  2.0044015765453e+291,
+  3.28721858553429e+293,
+  5.42391066613159e+295,
+  9.00369170577843e+297,
+  1.503616514865e+300, // nmaxfactorial = 167
+};
+
--- a/src/compute_orientorder_atom.h
+++ b/src/compute_orientorder_atom.h
@ -33,7 +33,7 @@ class ComputeOrientOrderAtom : public Compute {
  void compute_peratom();
  double memory_usage();
  double cutsq;
-  int iqlcomp, qlcomp, qlcompflag;
+  int iqlcomp, qlcomp, qlcompflag, wlflag, wlhatflag;
  int *qlist;
  int nqlist;

@ -55,6 +55,13 @@ class ComputeOrientOrderAtom : public Compute {

  double polar_prefactor(int, int, double);
  double associated_legendre(int, int, double);
+
+  static const int nmaxfactorial = 167;
+  static const double nfac_table[];
+  double factorial(int);
+  void init_clebsch_gordan();
+  double *cglist;                      // Clebsch-Gordan coeffs
+  int idxcg_max;
 };

 }
--- a/src/compute_pe.cpp
+++ b/src/compute_pe.cpp
@ -26,6 +26,7 @@
 #include "modify.h"
 #include "domain.h"
 #include "error.h"
+#include "atom_masks.h"

 using namespace LAMMPS_NS;

@ -65,6 +66,9 @@ ComputePE::ComputePE(LAMMPS *lmp, int narg, char **arg) :
      iarg++;
    }
  }
+
+  datamask_read = EMPTY_MASK;
+  datamask_modify = EMPTY_MASK;
 }

 /* ---------------------------------------------------------------------- */
--- a/src/fix_minimize.cpp
+++ b/src/fix_minimize.cpp
@ -42,8 +42,10 @@ FixMinimize::~FixMinimize()
  // delete locally stored data

  memory->destroy(peratom);
-  for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]);
-  memory->sfree(vectors);
+  if (vectors) {
+    for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]);
+    memory->sfree(vectors);
+  }
 }

 /* ---------------------------------------------------------------------- */
--- a/src/fix_minimize.h
+++ b/src/fix_minimize.h
@ -29,22 +29,22 @@ class FixMinimize : public Fix {

 public:
  FixMinimize(class LAMMPS *, int, char **);
-  ~FixMinimize();
+  virtual ~FixMinimize();
  int setmask();
-  void init() {}
+  virtual void init() {}

  double memory_usage();
-  void grow_arrays(int);
-  void copy_arrays(int, int, int);
-  int pack_exchange(int, double *);
-  int unpack_exchange(int, double *);
+  virtual void grow_arrays(int);
+  virtual void copy_arrays(int, int, int);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(int, double *);

-  void add_vector(int);
+  virtual void add_vector(int);
  double *request_vector(int);
  void store_box();
  void reset_coords();

- private:
+ protected:
  int nvector;
  int *peratom;
  double **vectors;
--- a/src/input.cpp
+++ b/src/input.cpp
@ -1672,7 +1672,7 @@ void Input::min_style()
 {
  if (domain->box_exist == 0)
    error->all(FLERR,"Min_style command before simulation box is defined");
-  update->create_minimize(narg,arg);
+  update->create_minimize(narg,arg,1);
 }

 /* ---------------------------------------------------------------------- */
--- a/src/min.cpp
+++ b/src/min.cpp
@ -68,6 +68,8 @@ Min::Min(LAMMPS *lmp) : Pointers(lmp)
  requestor = NULL;

  external_force_clear = 0;
+
+  kokkosable = 0;
 }

 /* ---------------------------------------------------------------------- */
@ -93,6 +95,10 @@ Min::~Min()

 void Min::init()
 {
+  if (lmp->kokkos && !kokkosable)
+    error->all(FLERR,"Must use a Kokkos-enabled min style (e.g. min_style cg/kk) "
+     "with Kokkos minimize");
+
  // create fix needed for storing atom-based quantities
  // will delete it at end of run

--- a/src/min.h
+++ b/src/min.h
@ -31,16 +31,16 @@ class Min : protected Pointers {
  Min(class LAMMPS *);
  virtual ~Min();
  virtual void init();
-  void setup(int flag=1);
-  void setup_minimal(int);
-  void run(int);
+  virtual void setup(int flag=1);
+  virtual void setup_minimal(int);
+  virtual void run(int);
  void cleanup();
  int request(class Pair *, int, double);
  virtual bigint memory_usage() {return 0;}
  void modify_params(int, char **);
  virtual int modify_param(int, char **) {return 0;}
-  double fnorm_sqr();
-  double fnorm_inf();
+  virtual double fnorm_sqr();
+  virtual double fnorm_inf();

  virtual void init_style() {}
  virtual void setup_style() = 0;
@ -97,10 +97,12 @@ class Min : protected Pointers {
  double *extra_max;          // max allowed change per iter for atom's var
  class Pair **requestor;     // Pair that stores/manipulates the variable

+  int kokkosable;             // 1 if this min style supports Kokkos
+
  int neigh_every,neigh_delay,neigh_dist_check;  // neighboring params

-  double energy_force(int);
-  void force_clear();
+  virtual double energy_force(int);
+  virtual void force_clear();

  double compute_force_norm_sqr();
  double compute_force_norm_inf();
--- a/src/minimize.cpp
+++ b/src/minimize.cpp
@ -52,9 +52,6 @@ void Minimize::command(int narg, char **arg)
  if (update->laststep < 0)
    error->all(FLERR,"Too many iterations");

-  if (lmp->kokkos)
-    error->all(FLERR,"Cannot yet use minimize with Kokkos");
-
  lmp->init();
  timer->init_timeout();
  update->minimize->setup();
--- a/src/update.cpp
+++ b/src/update.cpp
@ -78,7 +78,7 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp)
  create_integrate(1,&str,1);

  str = (char *) "cg";
-  create_minimize(1,&str);
+  create_minimize(1,&str,1);
 }

 /* ---------------------------------------------------------------------- */
@ -376,22 +376,69 @@ Integrate *Update::integrate_creator(LAMMPS *lmp, int narg, char ** arg)

 /* ---------------------------------------------------------------------- */

-void Update::create_minimize(int narg, char **arg)
+void Update::create_minimize(int narg, char **arg, int trysuffix)
 {
-  if (narg != 1) error->all(FLERR,"Illegal min_style command");
+  if (narg < 1) error->all(FLERR,"Illegal run_style command");

  delete [] minimize_style;
  delete minimize;

-  if (minimize_map->find(arg[0]) != minimize_map->end()) {
-    MinimizeCreator minimize_creator = (*minimize_map)[arg[0]];
-    minimize = minimize_creator(lmp);
-  }
-  else error->all(FLERR,"Illegal min_style command");
+  int sflag;
+  new_minimize(arg[0],narg-1,&arg[1],trysuffix,sflag);

-  int n = strlen(arg[0]) + 1;
-  minimize_style = new char[n];
-  strcpy(minimize_style,arg[0]);
+  if (sflag) {
+    char estyle[256];
+    if (sflag == 1) snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix);
+    else snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix2);
+    int n = strlen(estyle) + 1;
+    minimize_style = new char[n];
+    strcpy(minimize_style,estyle);
+  } else {
+    int n = strlen(arg[0]) + 1;
+    minimize_style = new char[n];
+    strcpy(minimize_style,arg[0]);
+  }
+}
+
+/* ----------------------------------------------------------------------
+   create the Minimize style, first with suffix appended
+------------------------------------------------------------------------- */
+
+void Update::new_minimize(char *style, int narg, char **arg,
+                           int trysuffix, int &sflag)
+{
+  if (trysuffix && lmp->suffix_enable) {
+    if (lmp->suffix) {
+      sflag = 1;
+      char estyle[256];
+      snprintf(estyle,256,"%s/%s",style,lmp->suffix);
+      if (minimize_map->find(estyle) != minimize_map->end()) {
+        MinimizeCreator minimize_creator = (*minimize_map)[estyle];
+        minimize = minimize_creator(lmp);
+        return;
+      }
+    }
+
+    if (lmp->suffix2) {
+      sflag = 2;
+      char estyle[256];
+      snprintf(estyle,256,"%s/%s",style,lmp->suffix2);
+      if (minimize_map->find(estyle) != minimize_map->end()) {
+        MinimizeCreator minimize_creator = (*minimize_map)[estyle];
+        minimize = minimize_creator(lmp);
+        return;
+      }
+    }
+  }
+
+  sflag = 0;
+  if (minimize_map->find(style) != minimize_map->end()) {
+    MinimizeCreator minimize_creator = (*minimize_map)[style];
+    minimize = minimize_creator(lmp);
+    return;
+  }
+
+  error->all(FLERR,"Illegal minimize style");
 }

 /* ----------------------------------------------------------------------
--- a/src/update.h
+++ b/src/update.h
@ -63,7 +63,7 @@ class Update : protected Pointers {
  void init();
  void set_units(const char *);
  void create_integrate(int, char **, int);
-  void create_minimize(int, char **);
+  void create_minimize(int, char **, int);
  void reset_timestep(int, char **);
  void reset_timestep(bigint);
  void update_time();
@ -71,6 +71,7 @@ class Update : protected Pointers {

 private:
  void new_integrate(char *, int, char **, int, int &);
+  void new_minimize(char *, int, char **, int, int &);

  template <typename T> static Integrate *integrate_creator(LAMMPS *, int, char **);
  template <typename T> static Min *minimize_creator(LAMMPS *);