Merge remote-tracking branch 'origin/master' into lammps_gjf

2019-09-17 12:08:12 -07:00
parent 971f4763e3 d357259cc4
commit eba10b10e8
51 changed files with 3356 additions and 225 deletions
--- a/cmake/Modules/Packages/KOKKOS.cmake
+++ b/cmake/Modules/Packages/KOKKOS.cmake
@ -17,6 +17,8 @@ if(PKG_KOKKOS)
                         ${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/min_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/min_linesearch_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp
                         ${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp
--- a/doc/src/Commands_bond.txt
+++ b/doc/src/Commands_bond.txt
@ -108,7 +108,7 @@ OPT.
 "class2 (ko)"_dihedral_class2.html,
 "cosine/shift/exp (o)"_dihedral_cosine_shift_exp.html,
 "fourier (io)"_dihedral_fourier.html,
-"harmonic (io)"_dihedral_harmonic.html,
+"harmonic (iko)"_dihedral_harmonic.html,
 "helix (o)"_dihedral_helix.html,
 "multi/harmonic (o)"_dihedral_multi_harmonic.html,
 "nharmonic (o)"_dihedral_nharmonic.html,
--- a/doc/src/Run_options.txt
+++ b/doc/src/Run_options.txt
@ -126,9 +126,10 @@ are intended for computational work like running LAMMPS.  By default
 Ng = 1 and Ns is not set.
 Depending on which flavor of MPI you are running, LAMMPS will look for
-one of these 3 environment variables
+one of these 4 environment variables
 SLURM_LOCALID (various MPI variants compiled with SLURM support)
 MPT_LRANK (HPE MPI)
 MV2_COMM_WORLD_LOCAL_RANK (Mvapich)
 OMPI_COMM_WORLD_LOCAL_RANK (OpenMPI) :pre
--- a/doc/src/comm_modify.txt
+++ b/doc/src/comm_modify.txt
@ -40,11 +40,12 @@ coordinates and other properties are exchanged between neighboring
 processors and stored as properties of ghost atoms.
 NOTE: These options apply to the currently defined comm style.  When
-you specify a "comm_style"_comm_style.html command, all communication
+you specify a "comm_style"_comm_style.html or
-settings are restored to their default values, including those
+"read_restart"_read_restart.html command, all communication settings
 are restored to their default or stored values, including those
 previously reset by a comm_modify command.  Thus if your input script
-specifies a comm_style command, you should use the comm_modify command
+specifies a comm_style or read_restart command, you should use the
-after it.
+comm_modify command after it.
 The {mode} keyword determines whether a single or multiple cutoff
 distances are used to determine which atoms to communicate.
--- a/doc/src/compute_bond_local.txt
+++ b/doc/src/compute_bond_local.txt
@ -15,10 +15,11 @@ compute ID group-ID bond/local value1 value2 ... keyword args ... :pre
 ID, group-ID are documented in "compute"_compute.html command :ulb,l
 bond/local = style name of this compute command :l
 one or more values may be appended :l
-value = {dist} or {engpot} or {force} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} or {v_name} :l
+value = {dist} or {engpot} or {force} or {fx} or {fy} or {fz} or {engvib} or {engrot} or {engtrans} or {omega} or {velvib} or {v_name} :l
  {dist} = bond distance
  {engpot} = bond potential energy
  {force} = bond force :pre
  {fx},{fy},{fz} = components of bond force
  {engvib} = bond kinetic energy of vibration
  {engrot} = bond kinetic energy of rotation
  {engtrans} = bond kinetic energy of translation
@ -38,6 +39,7 @@ keyword = {set} :l
 compute 1 all bond/local engpot
 compute 1 all bond/local dist engpot force :pre
 compute 1 all bond/local dist fx fy fz :pre
 compute 1 all angle/local dist v_distsq set dist d :pre
 [Description:]
@ -59,6 +61,9 @@ based on the current separation of the pair of atoms in the bond.
 The value {force} is the magnitude of the force acting between the
 pair of atoms in the bond.
 The values {fx}, {fy}, and {fz} are the xyz components of
 {force} between the pair of atoms in the bond.
 The remaining properties are all computed for motion of the two atoms
 relative to the center of mass (COM) velocity of the 2 atoms in the
 bond.
--- a/doc/src/compute_orientorder_atom.txt
+++ b/doc/src/compute_orientorder_atom.txt
@ -19,6 +19,8 @@ keyword = {cutoff} or {nnn} or {degrees} or {components}
  {cutoff} value = distance cutoff
  {nnn} value = number of nearest neighbors
  {degrees} values = nlvalues, l1, l2,...
  {wl} value = yes or no
  {wl/hat} value = yes or no
  {components} value = ldegree  :pre
 :ule
@ -27,7 +29,8 @@ keyword = {cutoff} or {nnn} or {degrees} or {components}
 compute 1 all orientorder/atom
 compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5
-compute 1 all orientorder/atom degrees 4 6 components 6 nnn NULL cutoff 3.0 :pre
+compute 1 all orientorder/atom wl/hat yes
 compute 1 all orientorder/atom components 6 :pre
 [Description:]
@ -48,7 +51,7 @@ neighbors of the central atom.
 The angles theta and phi are the standard spherical polar angles
 defining the direction of the bond vector {rij}.
 The second equation defines {Ql}, which is a
-rotationally invariant scalar quantity obtained by summing
+rotationally invariant non-negative amplitude obtained by summing
 over all the components of degree {l}.
 The optional keyword {cutoff} defines the distance cutoff
@ -63,7 +66,7 @@ specified distance cutoff are used.
 The optional keyword {degrees} defines the list of order parameters to
 be computed.  The first argument {nlvalues} is the number of order
-parameters. This is followed by that number of integers giving the
+parameters. This is followed by that number of non-negative integers giving the
 degree of each order parameter. Because {Q}2 and all odd-degree order
 parameters are zero for atoms in cubic crystals (see
 "Steinhardt"_#Steinhardt), the default order parameters are {Q}4,
@ -71,7 +74,20 @@ parameters are zero for atoms in cubic crystals (see
 = sqrt(7/3)/8 = 0.19094....  The numerical values of all order
 parameters up to {Q}12 for a range of commonly encountered
 high-symmetry structures are given in Table I of "Mickel et
-al."_#Mickel.
+al."_#Mickel, and these can be reproduced with this compute
 The optional keyword {wl} will output the third-order invariants {Wl} 
 (see Eq. 1.4 in "Steinhardt"_#Steinhardt) for the same degrees as
 for the {Ql} parameters. For the FCC crystal with {nnn} =12,
 {W}4 = -sqrt(14/143).(49/4096)/Pi^1.5 = -0.0006722136...
 The optional keyword {wl/hat} will output the normalized third-order 
 invariants {Wlhat} (see Eq. 2.2 in "Steinhardt"_#Steinhardt) 
 for the same degrees as for the {Ql} parameters. For the FCC crystal 
 with {nnn} =12, {W}4hat = -7/3*sqrt(2/429) = -0.159317...The numerical
 values of {Wlhat} for a range of commonly encountered high-symmetry
 structures are given in Table I of "Steinhardt"_#Steinhardt, and these
 can be reproduced with this keyword.
 The optional keyword {components} will output the components of the
 normalized complex vector {Ybar_lm} of degree {ldegree}, which must be
@ -82,7 +98,7 @@ particles, as discussed in "ten Wolde"_#tenWolde2.
 The value of {Ql} is set to zero for atoms not in the
 specified compute group, as well as for atoms that have less than
-{nnn} neighbors within the distance cutoff.
+{nnn} neighbors within the distance cutoff, unless {nnn} is NULL.
 The neighbor list needed to compute this quantity is constructed each
 time the calculation is performed (i.e. each time a snapshot of atoms
@ -108,6 +124,12 @@ This compute calculates a per-atom array with {nlvalues} columns,
 giving the {Ql} values for each atom, which are real numbers on the
 range 0 <= {Ql} <= 1.
 If the keyword {wl} is set to yes, then the {Wl} values for each
 atom will be added to the output array, which are real numbers.
 If the keyword {wl/hat} is set to yes, then the {Wl_hat} 
 values for each atom will be added to the output array, which are real numbers.
 If the keyword {components} is set, then the real and imaginary parts
 of each component of (normalized) {Ybar_lm} will be added to the
 output array in the following order: Re({Ybar_-m}) Im({Ybar_-m})
@ -130,7 +152,8 @@ hexorder/atom"_compute_hexorder_atom.html
 [Default:]
 The option defaults are {cutoff} = pair style cutoff, {nnn} = 12,
-{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12.
+{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12,
 {wl} = no, {wl/hat} = no, and {components} off
 :line
--- a/doc/src/dihedral_harmonic.txt
+++ b/doc/src/dihedral_harmonic.txt
@ -8,6 +8,7 @@
 dihedral_style harmonic command :h3
 dihedral_style harmonic/intel command :h3
 dihedral_style harmonic/kk command :h3
 dihedral_style harmonic/omp command :h3
 [Syntax:]
--- a/doc/src/min_style.txt
+++ b/doc/src/min_style.txt
@ -11,7 +11,7 @@ min_style command :h3
 min_style style :pre
-style = {cg} or {hftn} or {sd} or {quickmin} or {fire} or {spin} :ul
+style = {cg} or {cg/kk} or {hftn} or {sd} or {quickmin} or {fire} or {spin} :ul
 [Examples:]
@ -74,9 +74,34 @@ defined via the "timestep"_timestep.html command.  Often they will
 converge more quickly if you use a timestep about 10x larger than you
 would normally use for dynamics simulations.
-NOTE: The {quickmin}, {fire}, and {hftn} styles do not yet support the
+NOTE: The {quickmin}, {fire}, {hftn}, and {cg/kk} styles do not yet 
-use of the "fix box/relax"_fix_box_relax.html command or minimizations
+support the use of the "fix box/relax"_fix_box_relax.html command or 
-involving the electron radius in "eFF"_pair_eff.html models.
+minimizations involving the electron radius in "eFF"_pair_eff.html 
 models. 
 :line
 Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
 functionally the same as the corresponding style without the suffix.
 They have been optimized to run faster, depending on your available
 hardware, as discussed on the "Speed packages"_Speed_packages.html doc
 page.  The accelerated styles take the same arguments and should
 produce the same results, except for round-off and precision issues.
 These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
 USER-OMP and OPT packages, respectively.  They are only enabled if
 LAMMPS was built with those packages.  See the "Build
 package"_Build_package.html doc page for more info.
 You can specify the accelerated styles explicitly in your input script
 by including their suffix, or you can use the "-suffix command-line
 switch"_Run_options.html when you invoke LAMMPS, or you can use the
 "suffix"_suffix.html command in your input script.
 See the "Speed packages"_Speed_packages.html doc page for more
 instructions on how to use the accelerated styles effectively.
 :line
 [Restrictions:] none
--- a/doc/src/minimize.txt
+++ b/doc/src/minimize.txt
@ -7,6 +7,7 @@
 :line
 minimize command :h3
 minimize/kk command :h3
 [Syntax:]
@ -256,6 +257,28 @@ info in the Restrictions section below.
 :line
 Styles with a {gpu}, {intel}, {kk}, {omp}, or {opt} suffix are
 functionally the same as the corresponding style without the suffix.
 They have been optimized to run faster, depending on your available
 hardware, as discussed on the "Speed packages"_Speed_packages.html doc
 page.  The accelerated styles take the same arguments and should
 produce the same results, except for round-off and precision issues.
 These accelerated styles are part of the GPU, USER-INTEL, KOKKOS,
 USER-OMP and OPT packages, respectively.  They are only enabled if
 LAMMPS was built with those packages.  See the "Build
 package"_Build_package.html doc page for more info.
 You can specify the accelerated styles explicitly in your input script
 by including their suffix, or you can use the "-suffix command-line
 switch"_Run_options.html when you invoke LAMMPS, or you can use the
 "suffix"_suffix.html command in your input script.
 See the "Speed packages"_Speed_packages.html doc page for more
 instructions on how to use the accelerated styles effectively.
 :line
 [Restrictions:]
 Features that are not yet implemented are listed here, in case someone
--- a/examples/steinhardt/in.bcc
+++ b/examples/steinhardt/in.bcc
@ -0,0 +1,91 @@
 # Steinhardt-Nelson bond orientational order parameters for BCC
 variable  	rcut equal 3.0
 boundary	p p p
 atom_style	atomic
 neighbor	0.3 bin
 neigh_modify	delay 5
 # create geometry
 lattice		bcc 1.0
 region		box block 0 3 0 3 0 3
 create_box	1 box
 create_atoms	1 box
 mass		1 1.0
 # LJ potentials
 pair_style	lj/cut ${rcut}
 pair_coeff	* * 1.0 1.0 ${rcut}
 # 14 neighbors, perfect crystal 
 compute 	qlwlhat all orientorder/atom degrees 6 2 4 6 8 10 12 nnn 14 wl/hat yes
 compute 	avql all reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] c_qlwlhat[6] 
 compute 	avwlhat all reduce ave c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10] c_qlwlhat[11] c_qlwlhat[12] 
 thermo_style    custom step temp epair etotal c_avql[*] c_avwlhat[*]
 run		0
 # check Q_l values
 print " "
 print "*******************************************************************"
 print " "
 print "Comparison with reference values of Q_l "
 print "   [Table I in W. Mickel, S. C. Kapfer," 
 print "   G. E. Schroeder-Turkand, K. Mecke, "
 print "   J. Chem. Phys. 138, 044501 (2013).]"
 print " "
 variable 	q2ref equal 0.0
 variable 	q4ref equal 0.036
 variable 	q6ref equal 0.511
 variable 	q8ref equal 0.429
 variable 	q10ref equal 0.195
 variable 	q12ref equal 0.405
 variable 	q2 equal c_avql[1]
 variable 	q4 equal c_avql[2]
 variable 	q6 equal c_avql[3]
 variable 	q8 equal c_avql[4]
 variable 	q10 equal c_avql[5]
 variable 	q12 equal c_avql[6]
 print "q2     = $(v_q2:%10.6f) delta = $(v_q2-v_q2ref:%10.4f)"
 print "q4     = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)"
 print "q6     = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)"
 print "q8     = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)"
 print "q10    = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)"
 print "q12    = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)"
 # check W_l_hat values
 print " "
 print "Comparison with reference values of W_l_hat"
 print "   [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, "
 print "   Phys. Rev. B 28, 784 (1983).]"
 print " "
 variable 	w4hatref equal 0.159317
 variable 	w6hatref equal 0.013161
 variable 	w8hatref equal -0.058455
 variable 	w10hatref equal -0.090130
 variable 	w4hat equal c_avwlhat[2]
 variable 	w6hat equal c_avwlhat[3]
 variable 	w8hat equal c_avwlhat[4]
 variable 	w10hat equal c_avwlhat[5]
 print "w4hat  = $(v_w4hat:%10.6f) delta = $(v_w4hat-v_w4hatref:%10.6f)"
 print "w6hat  = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)"
 print "w8hat  = $(v_w8hat:%10.6f) delta = $(v_w8hat-v_w8hatref:%10.6f)"
 print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)"
 print " "
 print "*******************************************************************"
 print " "
--- a/examples/steinhardt/in.fcc
+++ b/examples/steinhardt/in.fcc
@ -0,0 +1,88 @@
 # Steinhardt-Nelson bond orientational order parameters for FCC
 variable  	rcut equal 3.0
 boundary	p p p
 atom_style	atomic
 neighbor	0.3 bin
 neigh_modify	delay 5
 # create geometry
 lattice		fcc 1.0
 region		box block 0 3 0 3 0 3
 create_box	1 box
 create_atoms	1 box
 mass		1 1.0
 # LJ potentials
 pair_style	lj/cut ${rcut}
 pair_coeff	* * 1.0 1.0 ${rcut}
 # 12 neighbors, perfect crystal
 compute 	qlwlhat all orientorder/atom wl/hat yes
 compute 	avql all reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] 
 compute 	avwlhat all reduce ave c_qlwlhat[6] c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10]
 thermo_style    custom step temp epair etotal c_avql[*] c_avwlhat[*]
 run		0
 # check Q_l values
 print " "
 print "*******************************************************************"
 print " "
 print "Comparison with reference values of Q_l "
 print "   [Table I in W. Mickel, S. C. Kapfer," 
 print "   G. E. Schroeder-Turkand, K. Mecke, "
 print "   J. Chem. Phys. 138, 044501 (2013).]"
 print " "
 variable 	q4ref equal 0.190
 variable 	q6ref equal 0.575
 variable 	q8ref equal 0.404
 variable 	q10ref equal 0.013
 variable 	q12ref equal 0.600
 variable 	q4 equal c_avql[1]
 variable 	q6 equal c_avql[2]
 variable 	q8 equal c_avql[3]
 variable 	q10 equal c_avql[4]
 variable 	q12 equal c_avql[5]
 print "q4     = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)"
 print "q6     = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)"
 print "q8     = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)"
 print "q10    = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)"
 print "q12    = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)"
 # check W_l_hat values
 print " "
 print "Comparison with reference values of W_l_hat"
 print "   [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, "
 print "   Phys. Rev. B 28, 784 (1983).]"
 print " "
 variable 	w4hatref equal -0.159316
 variable 	w6hatref equal -0.013161
 variable 	w8hatref equal 0.058454
 variable 	w10hatref equal -0.090128
 variable 	w4hat equal c_avwlhat[1]
 variable 	w6hat equal c_avwlhat[2]
 variable 	w8hat equal c_avwlhat[3]
 variable 	w10hat equal c_avwlhat[4]
 print "w4hat  = $(v_w4hat:%10.6f) delta = $(v_w4hat-v_w4hatref:%10.6f)"
 print "w6hat  = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)"
 print "w8hat  = $(v_w8hat:%10.6f) delta = $(v_w8hat-v_w8hatref:%10.6f)"
 print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)"
 print " "
 print "*******************************************************************"
 print " "
--- a/examples/steinhardt/in.icos
+++ b/examples/steinhardt/in.icos
@ -0,0 +1,106 @@
 # Steinhardt-Nelson bond orientational order parameters for icosahedral cluster
 # W_6_hat is sensitive to icosohedral order
 variable        rcut equal 1.2 # a bit bigger than LJ Rmin 
 variable        rcutred equal 0.75 # a bit bigger than 1/sqrt(2)
 # create a perfect fcc crystallite
 atom_style	atomic
 boundary	s s s
 lattice		fcc 1.0 # neighbors at LJ Rmin
 region		box block 0 2 0 2 0 2
 create_box	1 box
 create_atoms	1 box
 mass		1 1.0
 region 		centralatom sphere 1 1 1 0.0 side in
 group 		centralatom region centralatom
 region 		mysphere sphere 1 1 1 ${rcutred} side out
 delete_atoms 	region mysphere
 # LJ potential
 pair_style	lj/cut 100.0
 pair_coeff	* * 1.0 1.0 100.0
 # define output for central atom
 compute 	qlwlhat all orientorder/atom wl/hat yes cutoff ${rcut} nnn NULL
 compute 	avql centralatom reduce ave c_qlwlhat[1] c_qlwlhat[2] c_qlwlhat[3] c_qlwlhat[4] c_qlwlhat[5] 
 compute 	avwlhat centralatom reduce ave c_qlwlhat[6] c_qlwlhat[7] c_qlwlhat[8] c_qlwlhat[9] c_qlwlhat[10] 
 variable	q6 equal c_avql[2]
 variable	w6hat equal c_avwlhat[2]
 compute 	mype all pe/atom
 compute 	centralatompe centralatom reduce ave c_mype
 # gently equilibrate the crystallite
 velocity     	all create 0.001 482748
 fix	     	1 all nve
 neighbor	0.3 bin
 neigh_modify	every 1 check no delay 0
 timestep	0.003
 thermo_style    custom step temp epair etotal c_centralatompe v_q6 v_w6hat
 thermo		10
 run		10
 # quench to icosehedral cluster
 minimize      	1.0e-10 1.0e-6 100 1000
 # check Q_l values
 print " "
 print "*******************************************************************"
 print " "
 print "Comparison with reference values of Q_l "
 print "   [Table I in W. Mickel, S. C. Kapfer," 
 print "   G. E. Schroeder-Turkand, K. Mecke, "
 print "   J. Chem. Phys. 138, 044501 (2013).]"
 print " "
 variable 	q4ref equal 0.0
 variable 	q6ref equal 0.663
 variable 	q8ref equal 0.0
 variable 	q10ref equal 0.363
 variable 	q12ref equal 0.585
 variable 	q4 equal c_avql[1]
 variable 	q6 equal c_avql[2]
 variable 	q8 equal c_avql[3]
 variable 	q10 equal c_avql[4]
 variable 	q12 equal c_avql[5]
 print "q4     = $(v_q4:%10.6f) delta = $(v_q4-v_q4ref:%10.4f)"
 print "q6     = $(v_q6:%10.6f) delta = $(v_q6-v_q6ref:%10.4f)"
 print "q8     = $(v_q8:%10.6f) delta = $(v_q8-v_q8ref:%10.4f)"
 print "q10    = $(v_q10:%10.6f) delta = $(v_q10-v_q10ref:%10.4f)"
 print "q12    = $(v_q12:%10.6f) delta = $(v_q12-v_q12ref:%10.4f)"
 # check W_l_hat values
 print " "
 print "Comparison with reference values of W_l_hat"
 print "   [Table I in P. Steinhardt, D. Nelson, and M. Ronchetti, "
 print "   Phys. Rev. B 28, 784 (1983).]"
 print " "
 variable 	w6hatref equal -0.169754
 variable 	w10hatref equal -0.093967
 variable 	w4hat equal c_avwlhat[1]
 variable 	w6hat equal c_avwlhat[2]
 variable 	w8hat equal c_avwlhat[3]
 variable 	w10hat equal c_avwlhat[4]
 variable 	w12hat equal c_avwlhat[5]
 print "w6hat  = $(v_w6hat:%10.6f) delta = $(v_w6hat-v_w6hatref:%10.6f)"
 print "w10hat = $(v_w10hat:%10.6f) delta = $(v_w10hat-v_w10hatref:%10.6f)"
 print " "
 print "*******************************************************************"
 print " "
--- a/src/KOKKOS/Install.sh
+++ b/src/KOKKOS/Install.sh
@ -91,6 +91,8 @@ action dihedral_charmm_kokkos.cpp dihedral_charmm.cpp
 action dihedral_charmm_kokkos.h dihedral_charmm.h
 action dihedral_class2_kokkos.cpp dihedral_class2.cpp 
 action dihedral_class2_kokkos.h dihedral_class2.h
 action dihedral_harmonic_kokkos.cpp dihedral_harmonic.cpp 
 action dihedral_harmonic_kokkos.h dihedral_harmonic.h
 action dihedral_opls_kokkos.cpp dihedral_opls.cpp
 action dihedral_opls_kokkos.h dihedral_opls.h
 action domain_kokkos.cpp
@ -107,6 +109,8 @@ action fix_gravity_kokkos.cpp
 action fix_gravity_kokkos.h
 action fix_langevin_kokkos.cpp
 action fix_langevin_kokkos.h
 action fix_minimize_kokkos.cpp
 action fix_minimize_kokkos.h
 action fix_neigh_history_kokkos.cpp
 action fix_neigh_history_kokkos.h
 action fix_nh_kokkos.cpp
@ -179,6 +183,12 @@ action nbin_ssa_kokkos.cpp nbin_ssa.cpp
 action nbin_ssa_kokkos.h nbin_ssa.h
 action math_special_kokkos.cpp
 action math_special_kokkos.h
 action min_cg_kokkos.cpp
 action min_cg_kokkos.h
 action min_kokkos.cpp
 action min_kokkos.h
 action min_linesearch_kokkos.cpp
 action min_linesearch_kokkos.h
 action pair_buck_coul_cut_kokkos.cpp
 action pair_buck_coul_cut_kokkos.h
 action pair_buck_coul_long_kokkos.cpp pair_buck_coul_long.cpp
--- a/src/KOKKOS/dihedral_harmonic_kokkos.cpp
+++ b/src/KOKKOS/dihedral_harmonic_kokkos.cpp
@ -0,0 +1,537 @@
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
   Contributing author: Stan Moore (SNL)
 ------------------------------------------------------------------------- */
 #include "dihedral_harmonic_kokkos.h"
 #include <cmath>
 #include <cstdlib>
 #include "atom_kokkos.h"
 #include "comm.h"
 #include "neighbor_kokkos.h"
 #include "domain.h"
 #include "force.h"
 #include "update.h"
 #include "memory_kokkos.h"
 #include "error.h"
 #include "atom_masks.h"
 using namespace LAMMPS_NS;
 #define TOLERANCE 0.05
 #define SMALL     0.001
 #define SMALLER   0.00001
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 DihedralHarmonicKokkos<DeviceType>::DihedralHarmonicKokkos(LAMMPS *lmp) : DihedralHarmonic(lmp)
 {
  atomKK = (AtomKokkos *) atom;
  neighborKK = (NeighborKokkos *) neighbor;
  execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
  datamask_read = X_MASK | F_MASK | Q_MASK | ENERGY_MASK | VIRIAL_MASK;
  datamask_modify = F_MASK | ENERGY_MASK | VIRIAL_MASK;
  k_warning_flag = DAT::tdual_int_scalar("Dihedral:warning_flag");
  d_warning_flag = k_warning_flag.view<DeviceType>();
  h_warning_flag = k_warning_flag.h_view;
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 DihedralHarmonicKokkos<DeviceType>::~DihedralHarmonicKokkos()
 {
  if (!copymode) {
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->destroy_kokkos(k_vatom,vatom);
  }
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 void DihedralHarmonicKokkos<DeviceType>::compute(int eflag_in, int vflag_in)
 {
  eflag = eflag_in;
  vflag = vflag_in;
  ev_init(eflag,vflag,0);
  // reallocate per-atom arrays if necessary
  if (eflag_atom) {
    memoryKK->destroy_kokkos(k_eatom,eatom);
    memoryKK->create_kokkos(k_eatom,eatom,maxeatom,"dihedral:eatom");
    d_eatom = k_eatom.view<DeviceType>();
  }
  if (vflag_atom) {
    memoryKK->destroy_kokkos(k_vatom,vatom);
    memoryKK->create_kokkos(k_vatom,vatom,maxvatom,6,"dihedral:vatom");
    d_vatom = k_vatom.view<DeviceType>();
  }
  k_k.template sync<DeviceType>();
  k_cos_shift.template sync<DeviceType>();
  k_sin_shift.template sync<DeviceType>();
  k_sign.template sync<DeviceType>();
  k_multiplicity.template sync<DeviceType>();
  x = atomKK->k_x.view<DeviceType>();
  f = atomKK->k_f.view<DeviceType>();
  neighborKK->k_dihedrallist.template sync<DeviceType>();
  dihedrallist = neighborKK->k_dihedrallist.view<DeviceType>();
  int ndihedrallist = neighborKK->ndihedrallist;
  nlocal = atom->nlocal;
  newton_bond = force->newton_bond;
  h_warning_flag() = 0;
  k_warning_flag.template modify<LMPHostType>();
  k_warning_flag.template sync<DeviceType>();
  copymode = 1;
  // loop over neighbors of my atoms
  EV_FLOAT ev;
  if (evflag) {
    if (newton_bond) {
      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<1,1> >(0,ndihedrallist),*this,ev);
    } else {
      Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<0,1> >(0,ndihedrallist),*this,ev);
    }
  } else {
    if (newton_bond) {
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<1,0> >(0,ndihedrallist),*this);
    } else {
      Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagDihedralHarmonicCompute<0,0> >(0,ndihedrallist),*this);
    }
  }
  // error check
  k_warning_flag.template modify<DeviceType>();
  k_warning_flag.template sync<LMPHostType>();
  if (h_warning_flag())
    error->warning(FLERR,"Dihedral problem",0);
  if (eflag_global) energy += ev.evdwl;
  if (vflag_global) {
    virial[0] += ev.v[0];
    virial[1] += ev.v[1];
    virial[2] += ev.v[2];
    virial[3] += ev.v[3];
    virial[4] += ev.v[4];
    virial[5] += ev.v[5];
  }
  if (eflag_atom) {
    k_eatom.template modify<DeviceType>();
    k_eatom.template sync<LMPHostType>();
  }
  if (vflag_atom) {
    k_vatom.template modify<DeviceType>();
    k_vatom.template sync<LMPHostType>();
  }
  copymode = 0;
 }
 template<class DeviceType>
 template<int NEWTON_BOND, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void DihedralHarmonicKokkos<DeviceType>::operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n, EV_FLOAT& ev) const {
  // The f array is atomic
  Kokkos::View<F_FLOAT*[3], typename DAT::t_f_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > a_f = f;
  const int i1 = dihedrallist(n,0);
  const int i2 = dihedrallist(n,1);
  const int i3 = dihedrallist(n,2);
  const int i4 = dihedrallist(n,3);
  const int type = dihedrallist(n,4);
  // 1st bond
  const F_FLOAT vb1x = x(i1,0) - x(i2,0);
  const F_FLOAT vb1y = x(i1,1) - x(i2,1);
  const F_FLOAT vb1z = x(i1,2) - x(i2,2);
  // 2nd bond
  const F_FLOAT vb2x = x(i3,0) - x(i2,0);
  const F_FLOAT vb2y = x(i3,1) - x(i2,1);
  const F_FLOAT vb2z = x(i3,2) - x(i2,2);
  const F_FLOAT vb2xm = -vb2x;
  const F_FLOAT vb2ym = -vb2y;
  const F_FLOAT vb2zm = -vb2z;
  // 3rd bond
  const F_FLOAT vb3x = x(i4,0) - x(i3,0);
  const F_FLOAT vb3y = x(i4,1) - x(i3,1);
  const F_FLOAT vb3z = x(i4,2) - x(i3,2);
  // c,s calculation
  const F_FLOAT ax = vb1y*vb2zm - vb1z*vb2ym;
  const F_FLOAT ay = vb1z*vb2xm - vb1x*vb2zm;
  const F_FLOAT az = vb1x*vb2ym - vb1y*vb2xm;
  const F_FLOAT bx = vb3y*vb2zm - vb3z*vb2ym;
  const F_FLOAT by = vb3z*vb2xm - vb3x*vb2zm;
  const F_FLOAT bz = vb3x*vb2ym - vb3y*vb2xm;
  const F_FLOAT rasq = ax*ax + ay*ay + az*az;
  const F_FLOAT rbsq = bx*bx + by*by + bz*bz;
  const F_FLOAT rgsq = vb2xm*vb2xm + vb2ym*vb2ym + vb2zm*vb2zm;
  const F_FLOAT rg = sqrt(rgsq);
  F_FLOAT rginv,ra2inv,rb2inv;
  rginv = ra2inv = rb2inv = 0.0;
  if (rg > 0) rginv = 1.0/rg;
  if (rasq > 0) ra2inv = 1.0/rasq;
  if (rbsq > 0) rb2inv = 1.0/rbsq;
  const F_FLOAT rabinv = sqrt(ra2inv*rb2inv);
  F_FLOAT c = (ax*bx + ay*by + az*bz)*rabinv;
  const F_FLOAT s = rg*rabinv*(ax*vb3x + ay*vb3y + az*vb3z);
  // error check
  if ((c > 1.0 + TOLERANCE || c < (-1.0 - TOLERANCE)) && !d_warning_flag())
    Kokkos::atomic_fetch_add(&d_warning_flag(),1);
  if (c > 1.0) c = 1.0;
  if (c < -1.0) c = -1.0;
  const int m = d_multiplicity[type];
  F_FLOAT p = 1.0;
  F_FLOAT ddf1,df1;
  ddf1 = df1 = 0.0;
  for (int i = 0; i < m; i++) {
    ddf1 = p*c - df1*s;
    df1 = p*s + df1*c;
    p = ddf1;
  }
  p = p*d_cos_shift[type] + df1*d_sin_shift[type];
  df1 = df1*d_cos_shift[type] - ddf1*d_sin_shift[type];
  df1 *= -m;
  p += 1.0;
  if (m == 0) {
    p = 1.0 + d_cos_shift[type];
    df1 = 0.0;
  }
  E_FLOAT edihedral = 0.0;
  if (eflag) edihedral = d_k[type] * p;
  const F_FLOAT fg = vb1x*vb2xm + vb1y*vb2ym + vb1z*vb2zm;
  const F_FLOAT hg = vb3x*vb2xm + vb3y*vb2ym + vb3z*vb2zm;
  const F_FLOAT fga = fg*ra2inv*rginv;
  const F_FLOAT hgb = hg*rb2inv*rginv;
  const F_FLOAT gaa = -ra2inv*rg;
  const F_FLOAT gbb = rb2inv*rg;
  const F_FLOAT dtfx = gaa*ax;
  const F_FLOAT dtfy = gaa*ay;
  const F_FLOAT dtfz = gaa*az;
  const F_FLOAT dtgx = fga*ax - hgb*bx;
  const F_FLOAT dtgy = fga*ay - hgb*by;
  const F_FLOAT dtgz = fga*az - hgb*bz;
  const F_FLOAT dthx = gbb*bx;
  const F_FLOAT dthy = gbb*by;
  const F_FLOAT dthz = gbb*bz;
  const F_FLOAT df = -d_k[type] * df1;
  const F_FLOAT sx2  = df*dtgx;;
  const F_FLOAT sy2  = df*dtgy;;
  const F_FLOAT sz2  = df*dtgz;;
  F_FLOAT f1[3],f2[3],f3[3],f4[3];
  f1[0] = df*dtfx;
  f1[1] = df*dtfy;
  f1[2] = df*dtfz;
  f2[0] = sx2 - f1[0];
  f2[1] = sy2 - f1[1];
  f2[2] = sz2 - f1[2];
  f4[0] = df*dthx;
  f4[1] = df*dthy;
  f4[2] = df*dthz;
  f3[0] = -sx2 - f4[0];
  f3[1] = -sy2 - f4[1];
  f3[2] = -sz2 - f4[2];
  // apply force to each of 4 atoms
  if (NEWTON_BOND || i1 < nlocal) {
    a_f(i1,0) += f1[0];
    a_f(i1,1) += f1[1];
    a_f(i1,2) += f1[2];
  }
  if (NEWTON_BOND || i2 < nlocal) {
    a_f(i2,0) += f2[0];
    a_f(i2,1) += f2[1];
    a_f(i2,2) += f2[2];
  }
  if (NEWTON_BOND || i3 < nlocal) {
    a_f(i3,0) += f3[0];
    a_f(i3,1) += f3[1];
    a_f(i3,2) += f3[2];
  }
  if (NEWTON_BOND || i4 < nlocal) {
    a_f(i4,0) += f4[0];
    a_f(i4,1) += f4[1];
    a_f(i4,2) += f4[2];
  }
  if (EVFLAG)
    ev_tally(ev,i1,i2,i3,i4,edihedral,f1,f3,f4,
             vb1x,vb1y,vb1z,vb2x,vb2y,vb2z,vb3x,vb3y,vb3z);
 }
 template<class DeviceType>
 template<int NEWTON_BOND, int EVFLAG>
 KOKKOS_INLINE_FUNCTION
 void DihedralHarmonicKokkos<DeviceType>::operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int &n) const {
  EV_FLOAT ev;
  this->template operator()<NEWTON_BOND,EVFLAG>(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>(), n, ev);
 }
 /* ---------------------------------------------------------------------- */
 template<class DeviceType>
 void DihedralHarmonicKokkos<DeviceType>::allocate()
 {
  DihedralHarmonic::allocate();
  int n = atom->ndihedraltypes;
  k_k = DAT::tdual_ffloat_1d("DihedralHarmonic::k",n+1);
  k_cos_shift = DAT::tdual_ffloat_1d("DihedralHarmonic::cos_shift",n+1);
  k_sin_shift = DAT::tdual_ffloat_1d("DihedralHarmonic::sin_shift",n+1);
  k_sign = DAT::tdual_int_1d("DihedralHarmonic::sign",n+1);
  k_multiplicity = DAT::tdual_int_1d("DihedralHarmonic::multiplicity",n+1);
  d_k = k_k.template view<DeviceType>();
  d_cos_shift = k_cos_shift.template view<DeviceType>();
  d_sin_shift = k_sin_shift.template view<DeviceType>();
  d_sign = k_sign.template view<DeviceType>();
  d_multiplicity = k_multiplicity.template view<DeviceType>();
 }
 /* ----------------------------------------------------------------------
   set coeffs for one type
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 void DihedralHarmonicKokkos<DeviceType>::coeff(int narg, char **arg)
 {
  DihedralHarmonic::coeff(narg, arg);
  int n = atom->ndihedraltypes;
  for (int i = 1; i <= n; i++) {
    k_k.h_view[i] = k[i];
    k_cos_shift.h_view[i] = cos_shift[i];
    k_sin_shift.h_view[i] = sin_shift[i];
    k_sign.h_view[i] = sign[i];
    k_multiplicity.h_view[i] = multiplicity[i];
  }
  k_k.template modify<LMPHostType>();
  k_cos_shift.template modify<LMPHostType>();
  k_sin_shift.template modify<LMPHostType>();
  k_sign.template modify<LMPHostType>();
  k_multiplicity.template modify<LMPHostType>();
 }
 /* ----------------------------------------------------------------------
   proc 0 reads coeffs from restart file, bcasts them
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 void DihedralHarmonicKokkos<DeviceType>::read_restart(FILE *fp)
 {
  DihedralHarmonic::read_restart(fp);
  int n = atom->ndihedraltypes;
  for (int i = 1; i <= n; i++) {
    k_k.h_view[i] = k[i];
    k_cos_shift.h_view[i] = cos_shift[i];
    k_sin_shift.h_view[i] = sin_shift[i];
    k_sign.h_view[i] = sign[i];
    k_multiplicity.h_view[i] = multiplicity[i];
  }
  k_k.template modify<LMPHostType>();
  k_cos_shift.template modify<LMPHostType>();
  k_sin_shift.template modify<LMPHostType>();
  k_sign.template modify<LMPHostType>();
  k_multiplicity.template modify<LMPHostType>();
 }
 /* ----------------------------------------------------------------------
   tally energy and virial into global and per-atom accumulators
   virial = r1F1 + r2F2 + r3F3 + r4F4 = (r1-r2) F1 + (r3-r2) F3 + (r4-r2) F4
          = (r1-r2) F1 + (r3-r2) F3 + (r4-r3 + r3-r2) F4
          = vb1*f1 + vb2*f3 + (vb3+vb2)*f4
 ------------------------------------------------------------------------- */
 template<class DeviceType>
 //template<int NEWTON_BOND>
 KOKKOS_INLINE_FUNCTION
 void DihedralHarmonicKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int i1, const int i2, const int i3, const int i4,
                        F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4,
                        const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z,
                        const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
                        const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const
 {
  E_FLOAT edihedralquarter;
  F_FLOAT v[6];
  // The eatom and vatom arrays are atomic
  Kokkos::View<E_FLOAT*, typename DAT::t_efloat_1d::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_eatom = k_eatom.view<DeviceType>();
  Kokkos::View<F_FLOAT*[6], typename DAT::t_virial_array::array_layout,DeviceType,Kokkos::MemoryTraits<Kokkos::Atomic|Kokkos::Unmanaged> > v_vatom = k_vatom.view<DeviceType>();
  if (eflag_either) {
    if (eflag_global) {
      if (newton_bond) ev.evdwl += edihedral;
      else {
        edihedralquarter = 0.25*edihedral;
        if (i1 < nlocal) ev.evdwl += edihedralquarter;
        if (i2 < nlocal) ev.evdwl += edihedralquarter;
        if (i3 < nlocal) ev.evdwl += edihedralquarter;
        if (i4 < nlocal) ev.evdwl += edihedralquarter;
      }
    }
    if (eflag_atom) {
      edihedralquarter = 0.25*edihedral;
      if (newton_bond || i1 < nlocal) v_eatom[i1] += edihedralquarter;
      if (newton_bond || i2 < nlocal) v_eatom[i2] += edihedralquarter;
      if (newton_bond || i3 < nlocal) v_eatom[i3] += edihedralquarter;
      if (newton_bond || i4 < nlocal) v_eatom[i4] += edihedralquarter;
    }
  }
  if (vflag_either) {
    v[0] = vb1x*f1[0] + vb2x*f3[0] + (vb3x+vb2x)*f4[0];
    v[1] = vb1y*f1[1] + vb2y*f3[1] + (vb3y+vb2y)*f4[1];
    v[2] = vb1z*f1[2] + vb2z*f3[2] + (vb3z+vb2z)*f4[2];
    v[3] = vb1x*f1[1] + vb2x*f3[1] + (vb3x+vb2x)*f4[1];
    v[4] = vb1x*f1[2] + vb2x*f3[2] + (vb3x+vb2x)*f4[2];
    v[5] = vb1y*f1[2] + vb2y*f3[2] + (vb3y+vb2y)*f4[2];
    if (vflag_global) {
      if (newton_bond) {
        ev.v[0] += v[0];
        ev.v[1] += v[1];
        ev.v[2] += v[2];
        ev.v[3] += v[3];
        ev.v[4] += v[4];
        ev.v[5] += v[5];
      } else {
        if (i1 < nlocal) {
          ev.v[0] += 0.25*v[0];
          ev.v[1] += 0.25*v[1];
          ev.v[2] += 0.25*v[2];
          ev.v[3] += 0.25*v[3];
          ev.v[4] += 0.25*v[4];
          ev.v[5] += 0.25*v[5];
        }
        if (i2 < nlocal) {
          ev.v[0] += 0.25*v[0];
          ev.v[1] += 0.25*v[1];
          ev.v[2] += 0.25*v[2];
          ev.v[3] += 0.25*v[3];
          ev.v[4] += 0.25*v[4];
          ev.v[5] += 0.25*v[5];
        }
        if (i3 < nlocal) {
          ev.v[0] += 0.25*v[0];
          ev.v[1] += 0.25*v[1];
          ev.v[2] += 0.25*v[2];
          ev.v[3] += 0.25*v[3];
          ev.v[4] += 0.25*v[4];
          ev.v[5] += 0.25*v[5];
        }
        if (i4 < nlocal) {
          ev.v[0] += 0.25*v[0];
          ev.v[1] += 0.25*v[1];
          ev.v[2] += 0.25*v[2];
          ev.v[3] += 0.25*v[3];
          ev.v[4] += 0.25*v[4];
          ev.v[5] += 0.25*v[5];
        }
      }
    }
    if (vflag_atom) {
      if (newton_bond || i1 < nlocal) {
        v_vatom(i1,0) += 0.25*v[0];
        v_vatom(i1,1) += 0.25*v[1];
        v_vatom(i1,2) += 0.25*v[2];
        v_vatom(i1,3) += 0.25*v[3];
        v_vatom(i1,4) += 0.25*v[4];
        v_vatom(i1,5) += 0.25*v[5];
      }
      if (newton_bond || i2 < nlocal) {
        v_vatom(i2,0) += 0.25*v[0];
        v_vatom(i2,1) += 0.25*v[1];
        v_vatom(i2,2) += 0.25*v[2];
        v_vatom(i2,3) += 0.25*v[3];
        v_vatom(i2,4) += 0.25*v[4];
        v_vatom(i2,5) += 0.25*v[5];
      }
      if (newton_bond || i3 < nlocal) {
        v_vatom(i3,0) += 0.25*v[0];
        v_vatom(i3,1) += 0.25*v[1];
        v_vatom(i3,2) += 0.25*v[2];
        v_vatom(i3,3) += 0.25*v[3];
        v_vatom(i3,4) += 0.25*v[4];
        v_vatom(i3,5) += 0.25*v[5];
      }
      if (newton_bond || i4 < nlocal) {
        v_vatom(i4,0) += 0.25*v[0];
        v_vatom(i4,1) += 0.25*v[1];
        v_vatom(i4,2) += 0.25*v[2];
        v_vatom(i4,3) += 0.25*v[3];
        v_vatom(i4,4) += 0.25*v[4];
        v_vatom(i4,5) += 0.25*v[5];
      }
    }
  }
 }
 /* ---------------------------------------------------------------------- */
 namespace LAMMPS_NS {
 template class DihedralHarmonicKokkos<LMPDeviceType>;
 #ifdef KOKKOS_ENABLE_CUDA
 template class DihedralHarmonicKokkos<LMPHostType>;
 #endif
 }
--- a/src/KOKKOS/dihedral_harmonic_kokkos.h
+++ b/src/KOKKOS/dihedral_harmonic_kokkos.h
@ -0,0 +1,104 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifdef DIHEDRAL_CLASS
 DihedralStyle(harmonic/kk,DihedralHarmonicKokkos<LMPDeviceType>)
 DihedralStyle(harmonic/kk/device,DihedralHarmonicKokkos<LMPDeviceType>)
 DihedralStyle(harmonic/kk/host,DihedralHarmonicKokkos<LMPHostType>)
 #else
 #ifndef LMP_DIHEDRAL_HARMONIC_KOKKOS_H
 #define LMP_DIHEDRAL_HARMONIC_KOKKOS_H
 #include "dihedral_harmonic.h"
 #include "kokkos_type.h"
 namespace LAMMPS_NS {
 template<int NEWTON_BOND, int EVFLAG>
 struct TagDihedralHarmonicCompute{};
 template<class DeviceType>
 class DihedralHarmonicKokkos : public DihedralHarmonic {
 public:
  typedef DeviceType device_type;
  typedef EV_FLOAT value_type;
  typedef ArrayTypes<DeviceType> AT;
  DihedralHarmonicKokkos(class LAMMPS *);
  virtual ~DihedralHarmonicKokkos();
  void compute(int, int);
  void coeff(int, char **);
  void read_restart(FILE *);
  template<int NEWTON_BOND, int EVFLAG>
  KOKKOS_INLINE_FUNCTION
  void operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int&, EV_FLOAT&) const;
  template<int NEWTON_BOND, int EVFLAG>
  KOKKOS_INLINE_FUNCTION
  void operator()(TagDihedralHarmonicCompute<NEWTON_BOND,EVFLAG>, const int&) const;
  //template<int NEWTON_BOND>
  KOKKOS_INLINE_FUNCTION
  void ev_tally(EV_FLOAT &ev, const int i1, const int i2, const int i3, const int i4,
                          F_FLOAT &edihedral, F_FLOAT *f1, F_FLOAT *f3, F_FLOAT *f4,
                          const F_FLOAT &vb1x, const F_FLOAT &vb1y, const F_FLOAT &vb1z,
                          const F_FLOAT &vb2x, const F_FLOAT &vb2y, const F_FLOAT &vb2z,
                          const F_FLOAT &vb3x, const F_FLOAT &vb3y, const F_FLOAT &vb3z) const;
 protected:
  class NeighborKokkos *neighborKK;
  typename AT::t_x_array_randomread x;
  typename AT::t_f_array f;
  typename AT::t_int_2d dihedrallist;
  DAT::tdual_efloat_1d k_eatom;
  DAT::tdual_virial_array k_vatom;
  typename ArrayTypes<DeviceType>::t_efloat_1d d_eatom;
  typename ArrayTypes<DeviceType>::t_virial_array d_vatom;
  int nlocal,newton_bond;
  int eflag,vflag;
  DAT::tdual_int_scalar k_warning_flag;
  typename AT::t_int_scalar d_warning_flag;
  HAT::t_int_scalar h_warning_flag;
  DAT::tdual_ffloat_1d k_k;
  DAT::tdual_ffloat_1d k_cos_shift;
  DAT::tdual_ffloat_1d k_sin_shift;
  DAT::tdual_int_1d k_sign;
  DAT::tdual_int_1d k_multiplicity;
  typename AT::t_ffloat_1d d_k;
  typename AT::t_ffloat_1d d_cos_shift;
  typename AT::t_ffloat_1d d_sin_shift;
  typename AT::t_int_1d d_sign;
  typename AT::t_int_1d d_multiplicity;
  void allocate();
 };
 }
 #endif
 #endif
 /* ERROR/WARNING messages:
 */
--- a/src/KOKKOS/fix_minimize_kokkos.cpp
+++ b/src/KOKKOS/fix_minimize_kokkos.cpp
@ -0,0 +1,257 @@
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "fix_minimize_kokkos.h"
 #include "atom_kokkos.h"
 #include "domain.h"
 #include "memory_kokkos.h"
 #include "atom_masks.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
 /* ---------------------------------------------------------------------- */
 FixMinimizeKokkos::FixMinimizeKokkos(LAMMPS *lmp, int narg, char **arg) :
  FixMinimize(lmp, narg, arg)
 {
  atomKK = (AtomKokkos *) atom;
 }
 /* ---------------------------------------------------------------------- */
 FixMinimizeKokkos::~FixMinimizeKokkos()
 {
  memoryKK->destroy_kokkos(k_vectors,vectors);
  vectors = NULL;
 }
 /* ----------------------------------------------------------------------
   allocate/initialize memory for a new vector with 3 elements per atom
 ------------------------------------------------------------------------- */
 void FixMinimizeKokkos::add_vector_kokkos()
 {
  int n = 3;
  memory->grow(peratom,nvector+1,"minimize:peratom");
  peratom[nvector] = n;
  // d_vectors needs to be LayoutRight for subviews
  k_vectors.sync<LMPDeviceType>();
  memoryKK->grow_kokkos(k_vectors,vectors,nvector+1,atom->nmax*n,
                      "minimize:vectors");
  d_vectors = k_vectors.d_view;
  h_vectors = k_vectors.h_view;
  k_vectors.modify<LMPDeviceType>();
  nvector++;
 }
 /* ----------------------------------------------------------------------
   return a pointer to the Mth vector
 ------------------------------------------------------------------------- */
 DAT::t_ffloat_1d FixMinimizeKokkos::request_vector_kokkos(int m)
 {
  k_vectors.sync<LMPDeviceType>();
  return Kokkos::subview(d_vectors,m,Kokkos::ALL);
 }
 /* ----------------------------------------------------------------------
   reset x0 for atoms that moved across PBC via reneighboring in line search
   x0 = 1st vector
   must do minimum_image using original box stored at beginning of line search
   swap & set_global_box() change to original box, then restore current box
 ------------------------------------------------------------------------- */
 void FixMinimizeKokkos::reset_coords()
 {
  box_swap();
  domain->set_global_box();
  int nlocal = atom->nlocal;
  atomKK->sync(Device,X_MASK);
  k_vectors.sync<LMPDeviceType>();
  {
    // local variables for lambda capture
    auto triclinic = domain->triclinic;
    auto xperiodic = domain->xperiodic;
    auto xprd_half = domain->xprd_half;
    auto xprd = domain->xprd;
    auto yperiodic = domain->yperiodic;
    auto yprd_half = domain->yprd_half;
    auto yprd = domain->yprd;
    auto zperiodic = domain->zperiodic;
    auto zprd_half = domain->zprd_half;
    auto zprd = domain->zprd;
    auto xy = domain->xy;
    auto xz = domain->xz;
    auto yz = domain->yz;
    auto l_x = atomKK->k_x.d_view;
    auto l_x0 = Kokkos::subview(d_vectors,0,Kokkos::ALL);
    Kokkos::parallel_for(nlocal, LAMMPS_LAMBDA(const int& i) {
      const int n = i*3;
      double dx0 = l_x(i,0) - l_x0[n];
      double dy0 = l_x(i,1) - l_x0[n+1];
      double dz0 = l_x(i,2) - l_x0[n+2];
      double dx = dx0;
      double dy = dy0;
      double dz = dz0;
      // domain->minimum_image(dx,dy,dz);
      {
        if (triclinic == 0) {
          if (xperiodic) {
            if (fabs(dx) > xprd_half) {
              if (dx < 0.0) dx += xprd;
              else dx -= xprd;
            }
          }
          if (yperiodic) {
            if (fabs(dy) > yprd_half) {
              if (dy < 0.0) dy += yprd;
              else dy -= yprd;
            }
          }
          if (zperiodic) {
            if (fabs(dz) > zprd_half) {
              if (dz < 0.0) dz += zprd;
              else dz -= zprd;
            }
          }
        } else {
          if (zperiodic) {
            if (fabs(dz) > zprd_half) {
              if (dz < 0.0) {
                dz += zprd;
                dy += yz;
                dx += xz;
              } else {
                dz -= zprd;
                dy -= yz;
                dx -= xz;
              }
            }
          }
          if (yperiodic) {
            if (fabs(dy) > yprd_half) {
              if (dy < 0.0) {
                dy += yprd;
                dx += xy;
              } else {
                dy -= yprd;
                dx -= xy;
              }
            }
          }
          if (xperiodic) {
            if (fabs(dx) > xprd_half) {
              if (dx < 0.0) dx += xprd;
              else dx -= xprd;
            }
          }
        }
      } // end domain->minimum_image(dx,dy,dz);
      if (dx != dx0) l_x0[n] = l_x(i,0) - dx;
      if (dy != dy0) l_x0[n+1] = l_x(i,1) - dy;
      if (dz != dz0) l_x0[n+2] = l_x(i,2) - dz;
    });
  }
  k_vectors.modify<LMPDeviceType>();
  box_swap();
  domain->set_global_box();
 }
 /* ----------------------------------------------------------------------
   allocate local atom-based arrays
 ------------------------------------------------------------------------- */
 void FixMinimizeKokkos::grow_arrays(int nmax)
 {
  k_vectors.sync<LMPDeviceType>();
  memoryKK->grow_kokkos(k_vectors,vectors,nvector,3*nmax,"minimize:vector");
  d_vectors = k_vectors.d_view;
  h_vectors = k_vectors.h_view;
  k_vectors.modify<LMPDeviceType>();
 }
 /* ----------------------------------------------------------------------
   copy values within local atom-based arrays
 ------------------------------------------------------------------------- */
 void FixMinimizeKokkos::copy_arrays(int i, int j, int /*delflag*/)
 {
  int m,iper,nper,ni,nj;
  k_vectors.sync<LMPHostType>();
  for (m = 0; m < nvector; m++) {
    nper = 3;
    ni = nper*i;
    nj = nper*j;
    for (iper = 0; iper < nper; iper++) h_vectors(m,nj++) = h_vectors(m,ni++);
  }
  k_vectors.modify<LMPHostType>();
 }
 /* ----------------------------------------------------------------------
   pack values in local atom-based arrays for exchange with another proc
 ------------------------------------------------------------------------- */
 int FixMinimizeKokkos::pack_exchange(int i, double *buf)
 {
  int m,iper,nper,ni;
  k_vectors.sync<LMPHostType>();
  int n = 0;
  for (m = 0; m < nvector; m++) {
    nper = peratom[m];
    ni = nper*i;
    for (iper = 0; iper < nper; iper++) buf[n++] = h_vectors(m,ni++);
  }
  return n;
 }
 /* ----------------------------------------------------------------------
   unpack values in local atom-based arrays from exchange with another proc
 ------------------------------------------------------------------------- */
 int FixMinimizeKokkos::unpack_exchange(int nlocal, double *buf)
 {
  int m,iper,nper,ni;
  k_vectors.sync<LMPHostType>();
  int n = 0;
  for (m = 0; m < nvector; m++) {
    nper = peratom[m];
    ni = nper*nlocal;
    for (iper = 0; iper < nper; iper++) h_vectors(m,ni++) = buf[n++];
  }
  k_vectors.modify<LMPHostType>();
  return n;
 }
--- a/src/KOKKOS/fix_minimize_kokkos.h
+++ b/src/KOKKOS/fix_minimize_kokkos.h
@ -0,0 +1,58 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifdef FIX_CLASS
 FixStyle(MINIMIZE/kk,FixMinimizeKokkos)
 FixStyle(MINIMIZE/kk/device,FixMinimizeKokkos)
 FixStyle(MINIMIZE/kk/host,FixMinimizeKokkos)
 #else
 #ifndef LMP_FIX_MINIMIZE_KOKKOS_H
 #define LMP_FIX_MINIMIZE_KOKKOS_H
 #include "fix_minimize.h"
 #include "kokkos_type.h"
 namespace LAMMPS_NS {
 class FixMinimizeKokkos : public FixMinimize {
  friend class MinLineSearchKokkos;
 public:
  FixMinimizeKokkos(class LAMMPS *, int, char **);
  virtual ~FixMinimizeKokkos();
  void init() {}
  void grow_arrays(int);
  void copy_arrays(int, int, int);
  int pack_exchange(int, double *);
  int unpack_exchange(int, double *);
  void add_vector_kokkos();
  DAT::t_float_1d request_vector_kokkos(int);
  void reset_coords();
  DAT::tdual_float_2d k_vectors;
  DAT::t_float_2d d_vectors;
  HAT::t_float_2d h_vectors;
 };
 }
 #endif
 #endif
 /* ERROR/WARNING messages:
 */
--- a/src/KOKKOS/kokkos.cpp
+++ b/src/KOKKOS/kokkos.cpp
@ -113,23 +113,37 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
      }
      iarg += 2;
      int set_flag = 0;
      char *str;
      if ((str = getenv("SLURM_LOCALID"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
        set_flag = 1;
      }
      if ((str = getenv("MPT_LRANK"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
        set_flag = 1;
      }
      if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
        set_flag = 1;
      }
      if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
        int local_rank = atoi(str);
        device = local_rank % ngpus;
        if (device >= skip_gpu) device++;
        set_flag = 1;
      }
      if (ngpus > 1 && !set_flag)
        error->all(FLERR,"Could not determine local MPI rank for multiple "
                           "GPUs with Kokkos CUDA because MPI library not recognized");
    } else if (strcmp(arg[iarg],"t") == 0 ||
               strcmp(arg[iarg],"threads") == 0) {
      nthreads = atoi(arg[iarg+1]);
--- a/src/KOKKOS/kokkos.h
+++ b/src/KOKKOS/kokkos.h
@ -74,6 +74,11 @@ E: Invalid Kokkos command-line args
 Self-explanatory.  See Section 2.7 of the manual for details.
 E: Could not determine local MPI rank for multiple GPUs with Kokkos CUDA
 because MPI library not recognized
 The local MPI rank was not found in one of four supported environment variables.
 E: GPUs are requested but Kokkos has not been compiled for CUDA
 Recompile Kokkos with CUDA support to use GPUs.
--- a/src/KOKKOS/min_cg_kokkos.cpp
+++ b/src/KOKKOS/min_cg_kokkos.cpp
@ -0,0 +1,178 @@
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #include "min_cg_kokkos.h"
 #include <mpi.h>
 #include <cmath>
 #include "update.h"
 #include "output.h"
 #include "timer.h"
 #include "atom_kokkos.h"
 #include "atom_masks.h"
 #include "fix_minimize_kokkos.h"
 using namespace LAMMPS_NS;
 // EPS_ENERGY = minimum normalization for energy tolerance
 #define EPS_ENERGY 1.0e-8
 /* ---------------------------------------------------------------------- */
 MinCGKokkos::MinCGKokkos(LAMMPS *lmp) : MinLineSearchKokkos(lmp)
 {
  atomKK = (AtomKokkos *) atom;
  kokkosable = 1;
 }
 /* ----------------------------------------------------------------------
   minimization via conjugate gradient iterations
 ------------------------------------------------------------------------- */
 int MinCGKokkos::iterate(int maxiter)
 {
  int fail,ntimestep;
  double beta,gg,dot[2],dotall[2];
  fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
  fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
  // nlimit = max # of CG iterations before restarting
  // set to ndoftotal unless too big
  int nlimit = static_cast<int> (MIN(MAXSMALLINT,ndoftotal));
  // initialize working vectors
  {
    // local variables for lambda capture
    auto l_h = h;
    auto l_g = g;
    auto l_fvec = fvec;
    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
      l_h[i] = l_fvec[i];
      l_g[i] = l_fvec[i];
    });
  }
  gg = fnorm_sqr();
  for (int iter = 0; iter < maxiter; iter++) {
    if (timer->check_timeout(niter))
      return TIMEOUT;
    ntimestep = ++update->ntimestep;
    niter++;
    // line minimization along direction h from current atom->x
    eprevious = ecurrent;
    fail = (this->*linemin)(ecurrent,alpha_final);
    if (fail) return fail;
    // function evaluation criterion
    if (neval >= update->max_eval) return MAXEVAL;
    // energy tolerance criterion
    if (fabs(ecurrent-eprevious) <
        update->etol * 0.5*(fabs(ecurrent) + fabs(eprevious) + EPS_ENERGY))
      return ETOL;
    // force tolerance criterion
    s_double2 sdot;
    {
      // local variables for lambda capture
      auto l_g = g;
      auto l_fvec = fvec;
      Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
        sdot.d0 += l_fvec[i]*l_fvec[i];
        sdot.d1 += l_fvec[i]*l_g[i];
      },sdot);
    }
    dot[0] = sdot.d0;
    dot[1] = sdot.d1;
    MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
    if (dotall[0] < update->ftol*update->ftol) return FTOL;
    // update new search direction h from new f = -Grad(x) and old g
    // this is Polak-Ribieri formulation
    // beta = dotall[0]/gg would be Fletcher-Reeves
    // reinitialize CG every ndof iterations by setting beta = 0.0
    beta = MAX(0.0,(dotall[0] - dotall[1])/gg);
    if ((niter+1) % nlimit == 0) beta = 0.0;
    gg = dotall[0];
    {
      // local variables for lambda capture
      auto l_h = h;
      auto l_g = g;
      auto l_fvec = fvec;
      Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
        l_g[i] = l_fvec[i];
        l_h[i] = l_g[i] + beta*l_h[i];
      });
    }
    // reinitialize CG if new search direction h is not downhill
    double dot_0 = 0.0;
    {
      // local variables for lambda capture
      auto l_h = h;
      auto l_g = g;
      Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& dot_0) {
        dot_0 += l_g[i]*l_h[i];
      },dot_0);
    }
    dot[0] = dot_0;
    MPI_Allreduce(dot,dotall,1,MPI_DOUBLE,MPI_SUM,world);
    if (dotall[0] <= 0.0) {
      // local variables for lambda capture
      auto l_h = h;
      auto l_g = g;
      Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
         l_h[i] = l_g[i];
      });
    }
    // output for thermo, dump, restart files
    if (output->next == ntimestep) {
      atomKK->sync(Host,ALL_MASK);
      timer->stamp();
      output->write(ntimestep);
      timer->stamp(Timer::OUTPUT);
    }
  }
  return MAXITER;
 }
--- a/src/KOKKOS/min_cg_kokkos.h
+++ b/src/KOKKOS/min_cg_kokkos.h
@ -0,0 +1,38 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifdef MINIMIZE_CLASS
 MinimizeStyle(cg/kk,MinCGKokkos)
 MinimizeStyle(cg/kk/device,MinCGKokkos)
 MinimizeStyle(cg/kk/host,MinCGKokkos)
 #else
 #ifndef LMP_MIN_CG_KOKKOS_H
 #define LMP_MIN_CG_KOKKOS_H
 #include "min_linesearch_kokkos.h"
 namespace LAMMPS_NS {
 class MinCGKokkos : public MinLineSearchKokkos {
 public:
  MinCGKokkos(class LAMMPS *);
  int iterate(int);
 };
 }
 #endif
 #endif
--- a/src/KOKKOS/min_kokkos.cpp
+++ b/src/KOKKOS/min_kokkos.cpp
@ -0,0 +1,631 @@
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
   Contributing author: Stan Moore (SNL)
 ------------------------------------------------------------------------- */
 #include "min_kokkos.h"
 #include <mpi.h>
 #include <cmath>
 #include <cstring>
 #include "atom_kokkos.h"
 #include "atom_vec.h"
 #include "domain.h"
 #include "comm.h"
 #include "update.h"
 #include "modify.h"
 #include "fix_minimize_kokkos.h"
 #include "compute.h"
 #include "neighbor.h"
 #include "force.h"
 #include "pair.h"
 #include "bond.h"
 #include "angle.h"
 #include "dihedral.h"
 #include "improper.h"
 #include "kspace.h"
 #include "output.h"
 #include "thermo.h"
 #include "timer.h"
 #include "memory.h"
 #include "error.h"
 #include "kokkos.h"
 #include "atom_masks.h"
 using namespace LAMMPS_NS;
 /* ---------------------------------------------------------------------- */
 MinKokkos::MinKokkos(LAMMPS *lmp) : Min(lmp)
 {
  atomKK = (AtomKokkos *) atom;
  fix_minimize_kk = NULL;
 }
 /* ---------------------------------------------------------------------- */
 MinKokkos::~MinKokkos()
 {
 }
 /* ---------------------------------------------------------------------- */
 void MinKokkos::init()
 {
  Min::init();
  fix_minimize_kk = (FixMinimizeKokkos*) fix_minimize;
 }
 /* ----------------------------------------------------------------------
   setup before run
 ------------------------------------------------------------------------- */
 void MinKokkos::setup(int flag)
 {
  if (comm->me == 0 && screen) {
    fprintf(screen,"Setting up %s style minimization ...\n",
            update->minimize_style);
    if (flag) {
      fprintf(screen,"  Unit style    : %s\n", update->unit_style);
      fprintf(screen,"  Current step  : " BIGINT_FORMAT "\n",
              update->ntimestep);
      timer->print_timeout(screen);
    }
  }
  update->setupflag = 1;
  // setup extra global dof due to fixes
  // cannot be done in init() b/c update init() is before modify init()
  nextra_global = modify->min_dof();
  if (nextra_global) {
    fextra = new double[nextra_global];
    if (comm->me == 0 && screen)
      fprintf(screen,"WARNING: Energy due to %d extra global DOFs will"
              " be included in minimizer energies\n",nextra_global);
  }
  // compute for potential energy
  int id = modify->find_compute("thermo_pe");
  if (id < 0) error->all(FLERR,"Minimization could not find thermo_pe compute");
  pe_compute = modify->compute[id];
  // style-specific setup does two tasks
  // setup extra global dof vectors
  // setup extra per-atom dof vectors due to requests from Pair classes
  // cannot be done in init() b/c update init() is before modify/pair init()
  setup_style();
  // ndoftotal = total dof for entire minimization problem
  // dof for atoms, extra per-atom, extra global
  bigint ndofme = 3 * static_cast<bigint>(atom->nlocal);
  for (int m = 0; m < nextra_atom; m++)
    ndofme += extra_peratom[m]*atom->nlocal;
  MPI_Allreduce(&ndofme,&ndoftotal,1,MPI_LMP_BIGINT,MPI_SUM,world);
  ndoftotal += nextra_global;
  // setup domain, communication and neighboring
  // acquire ghosts
  // build neighbor lists
  atom->setup();
  modify->setup_pre_exchange();
  if (triclinic) domain->x2lamda(atom->nlocal);
  domain->pbc();
  domain->reset_box();
  comm->setup();
  if (neighbor->style) neighbor->setup_bins();
  comm->exchange();
  if (atom->sortfreq > 0) atom->sort();
  comm->borders();
  if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
  domain->image_check();
  domain->box_too_small_check();
  modify->setup_pre_neighbor();
  neighbor->build(1);
  modify->setup_post_neighbor();
  neighbor->ncalls = 0;
  // remove these restriction eventually
  if (searchflag == 0) {
    if (nextra_global)
      error->all(FLERR,
                 "Cannot use a damped dynamics min style with fix box/relax");
    if (nextra_atom)
      error->all(FLERR,
                 "Cannot use a damped dynamics min style with per-atom DOF");
  }
  if (strcmp(update->minimize_style,"hftn") == 0) {
    if (nextra_global)
      error->all(FLERR, "Cannot use hftn min style with fix box/relax");
    if (nextra_atom)
      error->all(FLERR, "Cannot use hftn min style with per-atom DOF");
  }
  // atoms may have migrated in comm->exchange()
  reset_vectors();
  // compute all forces
  force->setup();
  ev_set(update->ntimestep);
  force_clear();
  modify->setup_pre_force(vflag);
  if (pair_compute_flag) {
    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
    force->pair->compute(eflag,vflag);
    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
    timer->stamp(Timer::PAIR);
  }
  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
  if (atomKK->molecular) {
    if (force->bond) {
      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
      force->bond->compute(eflag,vflag);
      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
    }
    if (force->angle) {
      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
      force->angle->compute(eflag,vflag);
      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
    }
    if (force->dihedral) {
      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
      force->dihedral->compute(eflag,vflag);
      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
    }
    if (force->improper) {
      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
      force->improper->compute(eflag,vflag);
      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
    }
    timer->stamp(Timer::BOND);
  }
  if(force->kspace) {
    force->kspace->setup();
    if (kspace_compute_flag) {
      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
      force->kspace->compute(eflag,vflag);
      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
      timer->stamp(Timer::KSPACE);
    } else force->kspace->compute_dummy(eflag,vflag);
  }
  modify->setup_pre_reverse(eflag,vflag);
  if (force->newton) comm->reverse_comm();
  // update per-atom minimization variables stored by pair styles
  if (nextra_atom)
    for (int m = 0; m < nextra_atom; m++)
      requestor[m]->min_xf_get(m);
  lmp->kokkos->auto_sync = 0;
  modify->setup(vflag);
  output->setup(flag);
  lmp->kokkos->auto_sync = 1;
  update->setupflag = 0;
  // stats for initial thermo output
  ecurrent = pe_compute->compute_scalar();
  if (nextra_global) ecurrent += modify->min_energy(fextra);
  if (output->thermo->normflag) ecurrent /= atom->natoms;
  einitial = ecurrent;
  fnorm2_init = sqrt(fnorm_sqr());
  fnorminf_init = fnorm_inf();
 }
 /* ----------------------------------------------------------------------
   setup without output or one-time post-init setup
   flag = 0 = just force calculation
   flag = 1 = reneighbor and force calculation
 ------------------------------------------------------------------------- */
 void MinKokkos::setup_minimal(int flag)
 {
  update->setupflag = 1;
  // setup domain, communication and neighboring
  // acquire ghosts
  // build neighbor lists
  if (flag) {
    modify->setup_pre_exchange();
    if (triclinic) domain->x2lamda(atom->nlocal);
    domain->pbc();
    domain->reset_box();
    comm->setup();
    if (neighbor->style) neighbor->setup_bins();
    comm->exchange();
    comm->borders();
    if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
    domain->image_check();
    domain->box_too_small_check();
    modify->setup_pre_neighbor();
    neighbor->build(1);
    modify->setup_post_neighbor();
    neighbor->ncalls = 0;
  }
  // atoms may have migrated in comm->exchange()
  reset_vectors();
  // compute all forces
  ev_set(update->ntimestep);
  force_clear();
  modify->setup_pre_force(vflag);
  if (pair_compute_flag) {
    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
    force->pair->compute(eflag,vflag);
    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
    timer->stamp(Timer::PAIR);
  }
  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
  if (atomKK->molecular) {
    if (force->bond) {
      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
      force->bond->compute(eflag,vflag);
      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
    }
    if (force->angle) {
      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
      force->angle->compute(eflag,vflag);
      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
    }
    if (force->dihedral) {
      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
      force->dihedral->compute(eflag,vflag);
      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
    }
    if (force->improper) {
      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
      force->improper->compute(eflag,vflag);
      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
    }
    timer->stamp(Timer::BOND);
  }
  if(force->kspace) {
    force->kspace->setup();
    if (kspace_compute_flag) {
      atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
      force->kspace->compute(eflag,vflag);
      atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
      timer->stamp(Timer::KSPACE);
    } else force->kspace->compute_dummy(eflag,vflag);
  }
  modify->setup_pre_reverse(eflag,vflag);
  if (force->newton) comm->reverse_comm();
  // update per-atom minimization variables stored by pair styles
  if (nextra_atom)
    for (int m = 0; m < nextra_atom; m++)
      requestor[m]->min_xf_get(m);
  lmp->kokkos->auto_sync = 0;
  modify->setup(vflag);
  lmp->kokkos->auto_sync = 1;
  update->setupflag = 0;
  // stats for Finish to print
  ecurrent = pe_compute->compute_scalar();
  if (nextra_global) ecurrent += modify->min_energy(fextra);
  if (output->thermo->normflag) ecurrent /= atom->natoms;
  einitial = ecurrent;
  fnorm2_init = sqrt(fnorm_sqr());
  fnorminf_init = fnorm_inf();
 }
 /* ----------------------------------------------------------------------
   perform minimization, calling iterate() for N steps
 ------------------------------------------------------------------------- */
 void MinKokkos::run(int n)
 {
  if (nextra_global)
    error->all(FLERR,"Cannot yet use extra global DOFs (e.g. fix box/relax) "
     "with Kokkos minimize");
  if (nextra_global || nextra_atom)
    error->all(FLERR,"Cannot yet use extra atom DOFs (e.g. USER-AWPMD and USER-EFF packages) "
     "with Kokkos minimize");
  // minimizer iterations
  lmp->kokkos->auto_sync = 0;
  atomKK->sync(Device,ALL_MASK);
  stop_condition = iterate(n);
  stopstr = stopstrings(stop_condition);
  // if early exit from iterate loop:
  // set update->nsteps to niter for Finish stats to print
  // set output->next values to this timestep
  // call energy_force() to insure vflag is set when forces computed
  // output->write does final output for thermo, dump, restart files
  // add ntimestep to all computes that store invocation times
  //   since are hardwiring call to thermo/dumps and computes may not be ready
  if (stop_condition != MAXITER) {
    update->nsteps = niter;
    if (update->restrict_output == 0) {
      for (int idump = 0; idump < output->ndump; idump++)
        output->next_dump[idump] = update->ntimestep;
      output->next_dump_any = update->ntimestep;
      if (output->restart_flag) {
        output->next_restart = update->ntimestep;
        if (output->restart_every_single)
          output->next_restart_single = update->ntimestep;
        if (output->restart_every_double)
          output->next_restart_double = update->ntimestep;
      }
    }
    output->next_thermo = update->ntimestep;
    modify->addstep_compute_all(update->ntimestep);
    ecurrent = energy_force(0);
    atomKK->sync(Host,ALL_MASK);
    output->write(update->ntimestep);
  }
  atomKK->sync(Host,ALL_MASK);
  lmp->kokkos->auto_sync = 1;
 }
 /* ----------------------------------------------------------------------
   evaluate potential energy and forces
   may migrate atoms due to reneighboring
   return new energy, which should include nextra_global dof
   return negative gradient stored in atom->f
   return negative gradient for nextra_global dof in fextra
 ------------------------------------------------------------------------- */
 double MinKokkos::energy_force(int resetflag)
 {
  // check for reneighboring
  // always communicate since minimizer moved atoms
  int nflag = neighbor->decide();
  if (nflag == 0) {
    timer->stamp();
    comm->forward_comm();
    timer->stamp(Timer::COMM);
  } else {
    if (modify->n_min_pre_exchange) {
      timer->stamp();
      modify->min_pre_exchange();
      timer->stamp(Timer::MODIFY);
    }
    if (triclinic) domain->x2lamda(atom->nlocal);
    domain->pbc();
    if (domain->box_change) {
      domain->reset_box();
      comm->setup();
      if (neighbor->style) neighbor->setup_bins();
    }
    timer->stamp();
    comm->exchange();
    if (atom->sortfreq > 0 &&
        update->ntimestep >= atom->nextsort) atom->sort();
    comm->borders();
    if (triclinic) domain->lamda2x(atom->nlocal+atom->nghost);
    timer->stamp(Timer::COMM);
    if (modify->n_min_pre_neighbor) {
      modify->min_pre_neighbor();
      timer->stamp(Timer::MODIFY);
    }
    neighbor->build(1);
    timer->stamp(Timer::NEIGH);
    if (modify->n_min_post_neighbor) {
      modify->min_post_neighbor();
      timer->stamp(Timer::MODIFY);
    }
  }
  ev_set(update->ntimestep);
  force_clear();
  timer->stamp();
  if (modify->n_min_pre_force) {
    modify->min_pre_force(vflag);
    timer->stamp(Timer::MODIFY);
  }
  if (pair_compute_flag) {
    atomKK->sync(force->pair->execution_space,force->pair->datamask_read);
    force->pair->compute(eflag,vflag);
    atomKK->modified(force->pair->execution_space,force->pair->datamask_modify);
    timer->stamp(Timer::PAIR);
  }
  if (atom->molecular) {
    if (force->bond) {
      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
      force->bond->compute(eflag,vflag);
      atomKK->modified(force->bond->execution_space,force->bond->datamask_modify);
    }
    if (force->angle) {
      atomKK->sync(force->angle->execution_space,force->angle->datamask_read);
      force->angle->compute(eflag,vflag);
      atomKK->modified(force->angle->execution_space,force->angle->datamask_modify);
    }
    if (force->dihedral) {
      atomKK->sync(force->dihedral->execution_space,force->dihedral->datamask_read);
      force->dihedral->compute(eflag,vflag);
      atomKK->modified(force->dihedral->execution_space,force->dihedral->datamask_modify);
    }
    if (force->improper) {
      atomKK->sync(force->improper->execution_space,force->improper->datamask_read);
      force->improper->compute(eflag,vflag);
      atomKK->modified(force->improper->execution_space,force->improper->datamask_modify);
    }
    timer->stamp(Timer::BOND);
  }
  if (kspace_compute_flag) {
    atomKK->sync(force->kspace->execution_space,force->kspace->datamask_read);
    force->kspace->compute(eflag,vflag);
    atomKK->modified(force->kspace->execution_space,force->kspace->datamask_modify);
    timer->stamp(Timer::KSPACE);
  }
  if (modify->n_min_pre_reverse) {
    modify->min_pre_reverse(eflag,vflag);
    timer->stamp(Timer::MODIFY);
  }
  if (force->newton) {
    comm->reverse_comm();
    timer->stamp(Timer::COMM);
  }
  // fixes that affect minimization
  if (modify->n_min_post_force) {
     timer->stamp();
     modify->min_post_force(vflag);
     timer->stamp(Timer::MODIFY);
  }
  // compute potential energy of system
  // normalize if thermo PE does
  atomKK->sync(pe_compute->execution_space,pe_compute->datamask_read);
  double energy = pe_compute->compute_scalar();
  atomKK->modified(pe_compute->execution_space,pe_compute->datamask_modify);
  if (nextra_global) energy += modify->min_energy(fextra);
  if (output->thermo->normflag) energy /= atom->natoms;
  // if reneighbored, atoms migrated
  // if resetflag = 1, update x0 of atoms crossing PBC
  // reset vectors used by lo-level minimizer
  if (nflag) {
    if (resetflag) fix_minimize_kk->reset_coords();
    reset_vectors();
  }
  return energy;
 }
 /* ----------------------------------------------------------------------
   clear force on own & ghost atoms
   clear other arrays as needed
 ------------------------------------------------------------------------- */
 void MinKokkos::force_clear()
 {
  if (external_force_clear) return;
  // clear global force array
  // if either newton flag is set, also include ghosts
  atomKK->k_f.clear_sync_state(); // ignore host forces/torques since device views
  atomKK->k_torque.clear_sync_state(); // will be cleared below
  int nzero = atom->nlocal;
  if (force->newton) nzero += atom->nghost;
  if (nzero) {
    // local variables for lambda capture
    auto l_f = atomKK->k_f.d_view;
    auto l_torque = atomKK->k_torque.d_view;
    auto l_torqueflag = torqueflag;
    Kokkos::parallel_for(nzero, LAMMPS_LAMBDA(int i) {
      l_f(i,0) = 0.0;
      l_f(i,1) = 0.0;
      l_f(i,2) = 0.0;
      if (l_torqueflag) {
        l_torque(i,0) = 0.0;
        l_torque(i,1) = 0.0;
        l_torque(i,2) = 0.0;
      }
    });
  }
  atomKK->modified(Device,F_MASK);
 }
 /* ----------------------------------------------------------------------
   compute and return ||force||_2^2
 ------------------------------------------------------------------------- */
 double MinKokkos::fnorm_sqr()
 {
  double local_norm2_sqr = 0.0;
  {
    // local variables for lambda capture
    auto l_fvec = fvec;
    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm2_sqr) {
      local_norm2_sqr += l_fvec[i]*l_fvec[i];
    },local_norm2_sqr);
  }
  double norm2_sqr = 0.0;
  MPI_Allreduce(&local_norm2_sqr,&norm2_sqr,1,MPI_DOUBLE,MPI_SUM,world);
  return norm2_sqr;
 }
 /* ----------------------------------------------------------------------
   compute and return ||force||_inf
 ------------------------------------------------------------------------- */
 double MinKokkos::fnorm_inf()
 {
  double local_norm_inf = 0.0;
  {
    // local variables for lambda capture
    auto l_fvec = fvec;
    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(int i, double& local_norm_inf) {
      local_norm_inf = MAX(fabs(l_fvec[i]),local_norm_inf);
    },Kokkos::Max<double>(local_norm_inf));
  }
  double norm_inf = 0.0;
  MPI_Allreduce(&local_norm_inf,&norm_inf,1,MPI_DOUBLE,MPI_MAX,world);
  return norm_inf;
 }
--- a/src/KOKKOS/min_kokkos.h
+++ b/src/KOKKOS/min_kokkos.h
@ -0,0 +1,58 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifndef LMP_MIN_KOKKOS_H
 #define LMP_MIN_KOKKOS_H
 #include "min.h"
 #include "kokkos_type.h"
 namespace LAMMPS_NS {
 class MinKokkos : public Min {
 public:
  MinKokkos(class LAMMPS *);
  virtual ~MinKokkos();
  void init();
  void setup(int flag=1);
  void setup_minimal(int);
  void run(int);
  double fnorm_sqr();
  double fnorm_inf();
  virtual void init_style() {}
  virtual void setup_style() = 0;
  virtual void reset_vectors() = 0;
  virtual int iterate(int) = 0;
  // possible return values of iterate() method
  enum{MAXITER,MAXEVAL,ETOL,FTOL,DOWNHILL,ZEROALPHA,ZEROFORCE,
       ZEROQUAD,TRSMALL,INTERROR,TIMEOUT};
 //protected: // won't compile with CUDA
  class FixMinimizeKokkos *fix_minimize_kk;  // fix that stores auxiliary data
  DAT::t_ffloat_1d xvec;            // variables for atomic dof, as 1d vector
  DAT::t_ffloat_1d fvec;            // force vector for atomic dof, as 1d vector
  double energy_force(int);
  void force_clear();
 };
 }
 #endif
 /* ERROR/WARNING messages:
 */
--- a/src/KOKKOS/min_linesearch_kokkos.cpp
+++ b/src/KOKKOS/min_linesearch_kokkos.cpp
@ -0,0 +1,401 @@
 /* ----------------------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
   Contributing author: Stan Moore (SNL)
 ------------------------------------------------------------------------- */
 #include "min_linesearch_kokkos.h"
 #include <mpi.h>
 #include <cmath>
 #include "atom_kokkos.h"
 #include "modify.h"
 #include "fix_minimize_kokkos.h"
 #include "pair.h"
 #include "output.h"
 #include "thermo.h"
 #include "error.h"
 #include "atom_masks.h"
 using namespace LAMMPS_NS;
 // ALPHA_MAX = max alpha allowed to avoid long backtracks
 // ALPHA_REDUCE = reduction ratio, should be in range [0.5,1)
 // BACKTRACK_SLOPE, should be in range (0,0.5]
 // QUADRATIC_TOL = tolerance on alpha0, should be in range [0.1,1)
 // EMACH = machine accuracy limit of energy changes (1.0e-8)
 // EPS_QUAD = tolerance for quadratic projection
 #define ALPHA_MAX 1.0
 #define ALPHA_REDUCE 0.5
 #define BACKTRACK_SLOPE 0.4
 #define QUADRATIC_TOL 0.1
 //#define EMACH 1.0e-8
 #define EMACH 1.0e-8
 #define EPS_QUAD 1.0e-28
 /* ---------------------------------------------------------------------- */
 MinLineSearchKokkos::MinLineSearchKokkos(LAMMPS *lmp) : MinKokkos(lmp)
 {
  searchflag = 1;
  atomKK = (AtomKokkos *) atom;
 }
 /* ---------------------------------------------------------------------- */
 MinLineSearchKokkos::~MinLineSearchKokkos()
 {
 }
 /* ---------------------------------------------------------------------- */
 void MinLineSearchKokkos::init()
 {
  MinKokkos::init();
  if (linestyle == 1) linemin = &MinLineSearchKokkos::linemin_quadratic;
  else error->all(FLERR,"Kokkos minimize only supports the 'min_modify line "
   "quadratic' option");
 }
 /* ---------------------------------------------------------------------- */
 void MinLineSearchKokkos::setup_style()
 {
  // memory for x0,g,h for atomic dof
  fix_minimize_kk->add_vector_kokkos();
  fix_minimize_kk->add_vector_kokkos();
  fix_minimize_kk->add_vector_kokkos();
 }
 /* ----------------------------------------------------------------------
   set current vector lengths and pointers
   called after atoms have migrated
 ------------------------------------------------------------------------- */
 void MinLineSearchKokkos::reset_vectors()
 {
  // atomic dof
  nvec = 3 * atom->nlocal;
  atomKK->sync(Device,F_MASK|X_MASK);
  auto d_x = atomKK->k_x.d_view;
  auto d_f = atomKK->k_f.d_view;
  if (nvec) xvec = DAT::t_ffloat_1d(d_x.data(),d_x.size());
  if (nvec) fvec = DAT::t_ffloat_1d(d_f.data(),d_f.size());
  x0 = fix_minimize_kk->request_vector_kokkos(0);
  g = fix_minimize_kk->request_vector_kokkos(1);
  h = fix_minimize_kk->request_vector_kokkos(2);
  auto h_fvec = Kokkos::create_mirror_view(fvec);
  Kokkos::deep_copy(h_fvec,fvec);
 }
 /* ----------------------------------------------------------------------
   line minimization methods
   find minimum-energy starting at x along h direction
   input args:   eoriginal = energy at initial x
   input extra:  n,x,x0,f,h for atomic, extra global, extra per-atom dof
   output args:  return 0 if successful move, non-zero alpha
                 return non-zero if failed
                 alpha = distance moved along h for x at min eng config
                 update neval counter of eng/force function evaluations
                 output extra: if fail, energy_force() of original x
                 if succeed, energy_force() at x + alpha*h
                 atom->x = coords at new configuration
                 atom->f = force at new configuration
                 ecurrent = energy of new configuration
 ------------------------------------------------------------------------- */
 /* ----------------------------------------------------------------------
    // linemin: quadratic line search (adapted from Dennis and Schnabel)
    // The objective function is approximated by a quadratic
    // function in alpha, for sufficiently small alpha.
    // This idea is the same as that used in the well-known secant
    // method. However, since the change in the objective function
    // (difference of two finite numbers) is not known as accurately
    // as the gradient (which is close to zero), all the expressions
    // are written in terms of gradients. In this way, we can converge
    // the LAMMPS forces much closer to zero.
    //
    // We know E,Eprev,fh,fhprev. The Taylor series about alpha_prev
    // truncated at the quadratic term is:
    //
    //     E = Eprev - del_alpha*fhprev + (1/2)del_alpha^2*Hprev
    //
    // and
    //
    //     fh = fhprev - del_alpha*Hprev
    //
    // where del_alpha = alpha-alpha_prev
    //
    // We solve these two equations for Hprev and E=Esolve, giving:
    //
    //     Esolve = Eprev - del_alpha*(f+fprev)/2
    //
    // We define relerr to be:
    //
    //      relerr = |(Esolve-E)/Eprev|
    //             = |1.0 - (0.5*del_alpha*(f+fprev)+E)/Eprev|
    //
    // If this is accurate to within a reasonable tolerance, then
    // we go ahead and use a secant step to fh = 0:
    //
    //      alpha0 = alpha - (alpha-alphaprev)*fh/delfh;
    //
 ------------------------------------------------------------------------- */
 int MinLineSearchKokkos::linemin_quadratic(double eoriginal, double &alpha)
 {
  double fdothall,fdothme,hme,hmaxall;
  double de_ideal,de;
  double delfh,engprev,relerr,alphaprev,fhprev,ff,fh,alpha0;
  double dot[2],dotall[2];
  double alphamax;
  fix_minimize_kk->k_vectors.sync<LMPDeviceType>();
  fix_minimize_kk->k_vectors.modify<LMPDeviceType>();
  // fdothall = projection of search dir along downhill gradient
  // if search direction is not downhill, exit with error
  fdothme = 0.0;
  {
    // local variables for lambda capture
    auto l_fvec = fvec;
    auto l_h = h;
    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& fdothme) {
      fdothme += l_fvec[i]*l_h[i];
    },fdothme);
  }
  MPI_Allreduce(&fdothme,&fdothall,1,MPI_DOUBLE,MPI_SUM,world);
  if (output->thermo->normflag) fdothall /= atom->natoms;
  if (fdothall <= 0.0) return DOWNHILL;
  // set alphamax so no dof is changed by more than max allowed amount
  // for atom coords, max amount = dmax
  // for extra per-atom dof, max amount = extra_max[]
  // for extra global dof, max amount is set by fix
  // also insure alphamax <= ALPHA_MAX
  // else will have to backtrack from huge value when forces are tiny
  // if all search dir components are already 0.0, exit with error
  hme = 0.0;
  {
    // local variables for lambda capture
    auto l_h = h;
    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, double& hme) {
      hme = MAX(hme,fabs(l_h[i]));
    },Kokkos::Max<double>(hme));
  }
  MPI_Allreduce(&hme,&hmaxall,1,MPI_DOUBLE,MPI_MAX,world);
  alphamax = MIN(ALPHA_MAX,dmax/hmaxall);
  if (hmaxall == 0.0) return ZEROFORCE;
  // store box and values of all dof at start of linesearch
  {
    // local variables for lambda capture
    auto l_xvec = xvec;
    auto l_x0 = x0;
    fix_minimize_kk->store_box();
    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
      l_x0[i] = l_xvec[i];
    });
  }
  // backtrack with alpha until energy decrease is sufficient
  // or until get to small energy change, then perform quadratic projection
  alpha = alphamax;
  fhprev = fdothall;
  engprev = eoriginal;
  alphaprev = 0.0;
  // // important diagnostic: test the gradient against energy
  // double etmp;
  // double alphatmp = alphamax*1.0e-4;
  // etmp = alpha_step(alphatmp,1);
  // printf("alpha = %g dele = %g dele_force = %g err = %g\n",
  //        alphatmp,etmp-eoriginal,-alphatmp*fdothall,
  //        etmp-eoriginal+alphatmp*fdothall);
  // alpha_step(0.0,1);
  while (1) {
    ecurrent = alpha_step(alpha,1);
    // compute new fh, alpha, delfh
    s_double2 sdot;
    {
      // local variables for lambda capture
      auto l_fvec = fvec;
      auto l_h = h;
      Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
        sdot.d0 += l_fvec[i]*l_fvec[i];
        sdot.d1 += l_fvec[i]*l_h[i];
      },sdot);
    }
    dot[0] = sdot.d0;
    dot[1] = sdot.d1;
    MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
    ff = dotall[0];
    fh = dotall[1];
    if (output->thermo->normflag) {
      ff /= atom->natoms;
      fh /= atom->natoms;
    }
    delfh = fh - fhprev;
    // if fh or delfh is epsilon, reset to starting point, exit with error
    if (fabs(fh) < EPS_QUAD || fabs(delfh) < EPS_QUAD) {
      ecurrent = alpha_step(0.0,0);
      return ZEROQUAD;
    }
    // Check if ready for quadratic projection, equivalent to secant method
    // alpha0 = projected alpha
    relerr = fabs(1.0-(0.5*(alpha-alphaprev)*(fh+fhprev)+ecurrent)/engprev);
    alpha0 = alpha - (alpha-alphaprev)*fh/delfh;
    if (relerr <= QUADRATIC_TOL && alpha0 > 0.0 && alpha0 < alphamax) {
      ecurrent = alpha_step(alpha0,1);
      if (ecurrent - eoriginal < EMACH) {
        return 0;
      }
    }
    // if backtracking energy change is better than ideal, exit with success
    de_ideal = -BACKTRACK_SLOPE*alpha*fdothall;
    de = ecurrent - eoriginal;
    if (de <= de_ideal)
      return 0;
    // save previous state
    fhprev = fh;
    engprev = ecurrent;
    alphaprev = alpha;
    // reduce alpha
    alpha *= ALPHA_REDUCE;
    // backtracked all the way to 0.0
    // reset to starting point, exit with error
    if (alpha <= 0.0 || de_ideal >= -EMACH) {
      ecurrent = alpha_step(0.0,0);
      return ZEROALPHA;
    }
  }
 }
 /* ---------------------------------------------------------------------- */
 double MinLineSearchKokkos::alpha_step(double alpha, int resetflag)
 {
  // reset to starting point
  atomKK->k_x.clear_sync_state(); // ignore if host positions since device
                                  //  positions will be reset below
  {
    // local variables for lambda capture
    auto l_xvec = xvec;
    auto l_x0 = x0;
    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
      l_xvec[i] = l_x0[i];
    });
  }
  // step forward along h
  if (alpha > 0.0) {
    // local variables for lambda capture
    auto l_xvec = xvec;
    auto l_h = h;
    Kokkos::parallel_for(nvec, LAMMPS_LAMBDA(const int& i) {
      l_xvec[i] += alpha*l_h[i];
    });
  }
  atomKK->modified(Device,X_MASK);
  // compute and return new energy
  neval++;
  return energy_force(resetflag);
 }
 /* ---------------------------------------------------------------------- */
 // compute projection of force on: itself and the search direction
 double MinLineSearchKokkos::compute_dir_deriv(double &ff)
 {
  double dot[2],dotall[2];
  double fh;
  // compute new fh, alpha, delfh
  s_double2 sdot;
  {
    // local variables for lambda capture
    auto l_fvec = fvec;
    auto l_h = h;
    Kokkos::parallel_reduce(nvec, LAMMPS_LAMBDA(const int& i, s_double2& sdot) {
      sdot.d0 += l_fvec[i]*l_fvec[i];
      sdot.d1 += l_fvec[i]*l_h[i];
    },sdot);
  }
  dot[0] = sdot.d0;
  dot[1] = sdot.d1;
  MPI_Allreduce(dot,dotall,2,MPI_DOUBLE,MPI_SUM,world);
  ff = dotall[0];
  fh = dotall[1];
  if (output->thermo->normflag) {
    ff /= atom->natoms;
    fh /= atom->natoms;
  }
  return fh;
 }
--- a/src/KOKKOS/min_linesearch_kokkos.h
+++ b/src/KOKKOS/min_linesearch_kokkos.h
@ -0,0 +1,69 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifndef LMP_MIN_LSRCH_KOKKOS_H
 #define LMP_MIN_LSRCH_KOKKOS_H
 #include "min_kokkos.h"
 namespace LAMMPS_NS {
  struct s_double2 {
    double d0, d1;
    KOKKOS_INLINE_FUNCTION
    s_double2() {
      d0 = d1 = 0.0;
    }
    KOKKOS_INLINE_FUNCTION
    s_double2& operator+=(const s_double2 &rhs){
      d0 += rhs.d0;
      d1 += rhs.d1;
      return *this;
    }
    KOKKOS_INLINE_FUNCTION
    void operator+=(const volatile s_double2 &rhs) volatile {
      d0 += rhs.d0;
      d1 += rhs.d1;
    }
  };
  //typedef s_double2 double2;
 class MinLineSearchKokkos : public MinKokkos {
 public:
  MinLineSearchKokkos(class LAMMPS *);
  ~MinLineSearchKokkos();
  void init();
  void setup_style();
  void reset_vectors();
 //protected: // won't compile with CUDA
  // vectors needed by linesearch minimizers
  // allocated and stored by fix_minimize
  // x,f are stored by parent or Atom class or Pair class
  DAT::t_ffloat_1d x0;   // coords at start of linesearch
  DAT::t_ffloat_1d g;    // old gradient vector
  DAT::t_ffloat_1d h;    // search direction vector
  typedef int (MinLineSearchKokkos::*FnPtr)(double, double &);
  FnPtr linemin;
  int linemin_quadratic(double, double &);
  double alpha_step(double, int);
  double compute_dir_deriv(double &);
 };
 }
 #endif
--- a/src/KOKKOS/verlet_kokkos.cpp
+++ b/src/KOKKOS/verlet_kokkos.cpp
@ -143,7 +143,6 @@ void VerletKokkos::setup(int flag)
  }
  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
  if (atomKK->molecular) {
    if (force->bond) {
      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
@ -248,7 +247,6 @@ void VerletKokkos::setup_minimal(int flag)
  }
  else if (force->pair) force->pair->compute_dummy(eflag,vflag);
  if (atomKK->molecular) {
    if (force->bond) {
      atomKK->sync(force->bond->execution_space,force->bond->datamask_read);
@ -285,7 +283,9 @@ void VerletKokkos::setup_minimal(int flag)
  if (force->newton) comm->reverse_comm();
  lmp->kokkos->auto_sync = 0;
  modify->setup(vflag);
  lmp->kokkos->auto_sync = 1;
  update->setupflag = 0;
 }
--- a/src/MOLECULE/dihedral_harmonic.cpp
+++ b/src/MOLECULE/dihedral_harmonic.cpp
@ -42,7 +42,7 @@ DihedralHarmonic::DihedralHarmonic(LAMMPS *lmp) : Dihedral(lmp)
 DihedralHarmonic::~DihedralHarmonic()
 {
-  if (allocated) {
+  if (allocated && !copymode) {
    memory->destroy(setflag);
    memory->destroy(k);
    memory->destroy(sign);
--- a/src/MOLECULE/dihedral_harmonic.h
+++ b/src/MOLECULE/dihedral_harmonic.h
@ -29,16 +29,16 @@ class DihedralHarmonic : public Dihedral {
  DihedralHarmonic(class LAMMPS *);
  virtual ~DihedralHarmonic();
  virtual void compute(int, int);
-  void coeff(int, char **);
+  virtual void coeff(int, char **);
  void write_restart(FILE *);
-  void read_restart(FILE *);
+  virtual void read_restart(FILE *);
  void write_data(FILE *);
 protected:
  double *k,*cos_shift,*sin_shift;
  int *sign,*multiplicity;
-  void allocate();
+  virtual void allocate();
 };
 }
--- a/src/REPLICA/tad.cpp
+++ b/src/REPLICA/tad.cpp
@ -197,7 +197,7 @@ void TAD::command(int narg, char **arg)
  args = new char*[narg2];
  args[0] = min_style;
-  update->create_minimize(narg2,args);
+  update->create_minimize(narg2,args,1);
  delete [] args;
@ -711,7 +711,7 @@ void TAD::perform_neb(int ievent)
  args = new char*[narg2];
  args[0] = min_style_neb;
-  update->create_minimize(narg2,args);
+  update->create_minimize(narg2,args,1);
  delete [] args;
@ -777,7 +777,7 @@ void TAD::perform_neb(int ievent)
  args = new char*[narg2];
  args[0] = min_style;
-  update->create_minimize(narg2,args);
+  update->create_minimize(narg2,args,1);
  update->etol = etol;
  update->ftol = ftol;
--- a/src/RIGID/fix_rigid.cpp
+++ b/src/RIGID/fix_rigid.cpp
@ -34,25 +34,12 @@
 #include "math_const.h"
 #include "memory.h"
 #include "error.h"
 #include "rigid_const.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
+using namespace RigidConst;
 enum{SINGLE,MOLECULE,GROUP};
 enum{NONE,XYZ,XY,YZ,XZ};
 enum{ISO,ANISO,TRICLINIC};
 #define MAXLINE 1024
 #define CHUNK 1024
 #define ATTRIBUTE_PERBODY 20
 #define TOLERANCE 1.0e-6
 #define EPSILON 1.0e-7
 #define SINERTIA 0.4            // moment of inertia prefactor for sphere
 #define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
 #define LINERTIA (1.0/12.0)     // moment of inertia prefactor for line segment
 /* ---------------------------------------------------------------------- */
@ -605,21 +592,6 @@ FixRigid::FixRigid(LAMMPS *lmp, int narg, char **arg) :
  for (ibody = 0; ibody < nbody; ibody++)
    if (nrigid[ibody] <= 1) error->all(FLERR,"One or zero atoms in rigid body");
  // bitmasks for properties of extended particles
  POINT = 1;
  SPHERE = 2;
  ELLIPSOID = 4;
  LINE = 8;
  TRIANGLE = 16;
  DIPOLE = 32;
  OMEGA = 64;
  ANGMOM = 128;
  TORQUE = 256;
  MINUSPI = -MY_PI;
  TWOPI = 2.0*MY_PI;
  // wait to setup bodies until first init() using current atom properties
  setupflag = 0;
@ -1472,8 +1444,8 @@ void FixRigid::set_xv()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega_one[i][0] = omega[ibody][0];
        omega_one[i][1] = omega[ibody][1];
@ -2018,8 +1990,8 @@ void FixRigid::setup_bodies_static()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        orient[i][0] = lbonus[line[i]].theta - theta_body;
-        while (orient[i][0] <= MINUSPI) orient[i][0] += TWOPI;
+        while (orient[i][0] <= -MY_PI) orient[i][0] += MY_2PI;
-        while (orient[i][0] > MY_PI) orient[i][0] -= TWOPI;
+        while (orient[i][0] > MY_PI) orient[i][0] -= MY_2PI;
        if (orientflag == 4) orient[i][1] = orient[i][2] = orient[i][3] = 0.0;
      } else if (eflags[i] & TRIANGLE) {
        quatatom = tbonus[tri[i]].quat;
--- a/src/RIGID/fix_rigid.h
+++ b/src/RIGID/fix_rigid.h
@ -65,7 +65,6 @@ class FixRigid : public Fix {
  double dtv,dtf,dtq;
  double *step_respa;
  int triclinic;
  double MINUSPI,TWOPI;
  char *inpfile;             // file to read rigid body attributes from
  int rstyle;               // SINGLE,MOLECULE,GROUP
@ -137,9 +136,6 @@ class FixRigid : public Fix {
  class AtomVecLine *avec_line;
  class AtomVecTri *avec_tri;
  int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE;   // bitmasks for eflags
  int OMEGA,ANGMOM,TORQUE;
  void image_shift();
  void set_xv();
  void set_v();
--- a/src/RIGID/fix_rigid_nh.cpp
+++ b/src/RIGID/fix_rigid_nh.cpp
@ -33,14 +33,11 @@
 #include "kspace.h"
 #include "memory.h"
 #include "error.h"
 #include "rigid_const.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
-
+using namespace RigidConst;
 enum{NONE,XYZ,XY,YZ,XZ};     // same as in FixRigid
 enum{ISO,ANISO,TRICLINIC};   // same as in FixRigid
 #define EPSILON 1.0e-7
 /* ---------------------------------------------------------------------- */
--- a/src/RIGID/fix_rigid_nh_small.cpp
+++ b/src/RIGID/fix_rigid_nh_small.cpp
@ -34,17 +34,12 @@
 #include "kspace.h"
 #include "memory.h"
 #include "error.h"
 #include "rigid_const.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathExtra;
-
+using namespace RigidConst;
 enum{NONE,XYZ,XY,YZ,XZ};     // same as in FixRigid
 enum{ISO,ANISO,TRICLINIC};   // same as in FixRigid
 #define EPSILON 1.0e-7
 enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
 /* ---------------------------------------------------------------------- */
--- a/src/RIGID/fix_rigid_small.cpp
+++ b/src/RIGID/fix_rigid_small.cpp
@ -38,34 +38,17 @@
 #include "hashlittle.h"
 #include "memory.h"
 #include "error.h"
 #include "rigid_const.h"
 #include <map>
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
 using namespace RigidConst;
 #define RVOUS 1   // 0 for irregular, 1 for all2all
 #define MAXLINE 1024
 #define CHUNK 1024
 #define ATTRIBUTE_PERBODY 20
 #define TOLERANCE 1.0e-6
 #define EPSILON 1.0e-7
 #define BIG 1.0e20
 #define SINERTIA 0.4            // moment of inertia prefactor for sphere
 #define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
 #define LINERTIA (1.0/12.0)     // moment of inertia prefactor for line segment
 #define DELTA_BODY 10000
 enum{NONE,XYZ,XY,YZ,XZ};        // same as in FixRigid
 enum{ISO,ANISO,TRICLINIC};      // same as in FixRigid
 enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
 /* ---------------------------------------------------------------------- */
 FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
@ -455,21 +438,6 @@ FixRigidSmall::FixRigidSmall(LAMMPS *lmp, int narg, char **arg) :
  comm_forward = 1 + bodysize;
  comm_reverse = 6;
  // bitmasks for properties of extended particles
  POINT = 1;
  SPHERE = 2;
  ELLIPSOID = 4;
  LINE = 8;
  TRIANGLE = 16;
  DIPOLE = 32;
  OMEGA = 64;
  ANGMOM = 128;
  TORQUE = 256;
  MINUSPI = -MY_PI;
  TWOPI = 2.0*MY_PI;
  // atom style pointers to particles that store extra info
  avec_ellipsoid = (AtomVecEllipsoid *) atom->style_match("ellipsoid");
@ -1384,8 +1352,8 @@ void FixRigidSmall::set_xv()
        if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]);
        else theta_body = -2.0*acos(b->quat[0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega[i][0] = b->omega[0];
        omega[i][1] = b->omega[1];
@ -2155,8 +2123,8 @@ void FixRigidSmall::setup_bodies_static()
        if (b->quat[3] >= 0.0) theta_body = 2.0*acos(b->quat[0]);
        else theta_body = -2.0*acos(b->quat[0]);
        orient[i][0] = lbonus[line[i]].theta - theta_body;
-        while (orient[i][0] <= MINUSPI) orient[i][0] += TWOPI;
+        while (orient[i][0] <= -MY_PI) orient[i][0] += MY_2PI;
-        while (orient[i][0] > MY_PI) orient[i][0] -= TWOPI;
+        while (orient[i][0] > MY_PI) orient[i][0] -= MY_2PI;
        if (orientflag == 4) orient[i][1] = orient[i][2] = orient[i][3] = 0.0;
      } else if (eflags[i] & TRIANGLE) {
        quatatom = tbonus[tri[i]].quat;
--- a/src/RIGID/fix_rigid_small.h
+++ b/src/RIGID/fix_rigid_small.h
@ -72,7 +72,6 @@ class FixRigidSmall : public Fix {
  double dtv,dtf,dtq;
  double *step_respa;
  int triclinic;
  double MINUSPI,TWOPI;
  char *inpfile;             // file to read rigid body attributes from
  int setupflag;            // 1 if body properties are setup, else 0
@ -129,9 +128,6 @@ class FixRigidSmall : public Fix {
  int dorientflag;      // 1 if particles store dipole orientation
  int reinitflag;       // 1 if re-initialize rigid bodies between runs
  int POINT,SPHERE,ELLIPSOID,LINE,TRIANGLE,DIPOLE;   // bitmasks for eflags
  int OMEGA,ANGMOM,TORQUE;
  class AtomVecEllipsoid *avec_ellipsoid;
  class AtomVecLine *avec_line;
  class AtomVecTri *avec_tri;
--- a/src/RIGID/rigid_const.h
+++ b/src/RIGID/rigid_const.h
@ -0,0 +1,54 @@
 /* -*- c++ -*- ----------------------------------------------------------
   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
   http://lammps.sandia.gov, Sandia National Laboratories
   Steve Plimpton, sjplimp@sandia.gov
   Copyright (2003) Sandia Corporation.  Under the terms of Contract
   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
   certain rights in this software.  This software is distributed under
   the GNU General Public License.
   See the README file in the top-level LAMMPS directory.
 ------------------------------------------------------------------------- */
 #ifndef LMP_RIGID_CONST_H
 #define LMP_RIGID_CONST_H
 namespace LAMMPS_NS {
  namespace RigidConst {
    enum{SINGLE,MOLECULE,GROUP};
    enum{NONE,XYZ,XY,YZ,XZ};
    enum{ISO,ANISO,TRICLINIC};
    enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
    enum {POINT     = 1<<0,
          SPHERE    = 1<<1,
          ELLIPSOID = 1<<2,
          LINE      = 1<<3,
          TRIANGLE  = 1<<4,
          DIPOLE    = 1<<5,
          OMEGA     = 1<<6,
          ANGMOM    = 1<<7,
          TORQUE    = 1<<8
    };
    static const double TOLERANCE = 1.0e-6;
    static const double EPSILON   = 1.0e-7;
    static const double BIG       = 1.0e20;
    // moment of inertia prefactor for sphere
    static const double SINERTIA = 0.4;
    // moment of inertia prefactor for ellipsoid
    static const double EINERTIA = 0.2;
    // moment of inertia prefactor for line segment
    static const double LINERTIA = 1.0/12.0;
    static const int MAXLINE    = 1024;
    static const int CHUNK      = 1024;
    static const int DELTA_BODY = 10000;
    static const int ATTRIBUTE_PERBODY = 20;
  }
 }
 #endif
--- a/src/USER-OMP/fix_rigid_nh_omp.cpp
+++ b/src/USER-OMP/fix_rigid_nh_omp.cpp
@ -38,15 +38,12 @@
 #include "math_extra.h"
 #include "math_const.h"
 #include "rigid_const.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
+using namespace RigidConst;
 enum{SINGLE,MOLECULE,GROUP};    // same as in FixRigid
 enum{ISO,ANISO,TRICLINIC};      // same as in FixRigid
 #define EINERTIA 0.4            // moment of inertia prefactor for ellipsoid
 typedef struct { double x,y,z; } dbl3_t;
@ -770,8 +767,8 @@ void FixRigidNHOMP::set_xv_thr()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega_one[i][0] = omega[ibody][0];
        omega_one[i][1] = omega[ibody][1];
--- a/src/USER-OMP/fix_rigid_omp.cpp
+++ b/src/USER-OMP/fix_rigid_omp.cpp
@ -33,14 +33,12 @@
 #include "math_extra.h"
 #include "math_const.h"
 #include "rigid_const.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
+using namespace RigidConst;
 enum{SINGLE,MOLECULE,GROUP};    // same as in FixRigid
 #define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
 typedef struct { double x,y,z; } dbl3_t;
@ -488,8 +486,8 @@ void FixRigidOMP::set_xv_thr()
        if (quat[ibody][3] >= 0.0) theta_body = 2.0*acos(quat[ibody][0]);
        else theta_body = -2.0*acos(quat[ibody][0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega_one[i][0] = omega[ibody][0];
        omega_one[i][1] = omega[ibody][1];
--- a/src/USER-OMP/fix_rigid_small_omp.cpp
+++ b/src/USER-OMP/fix_rigid_small_omp.cpp
@ -31,14 +31,12 @@
 #include "math_extra.h"
 #include "math_const.h"
 #include "rigid_const.h"
 using namespace LAMMPS_NS;
 using namespace FixConst;
 using namespace MathConst;
-
+using namespace RigidConst;
 #define EINERTIA 0.2            // moment of inertia prefactor for ellipsoid
 enum{FULL_BODY,INITIAL,FINAL,FORCE_TORQUE,VCM_ANGMOM,XCM_MASS,ITENSOR,DOF};
 typedef struct { double x,y,z; } dbl3_t;
@ -427,8 +425,8 @@ void FixRigidSmallOMP::set_xv_thr()
        if (b.quat[3] >= 0.0) theta_body = 2.0*acos(b.quat[0]);
        else theta_body = -2.0*acos(b.quat[0]);
        theta = orient[i][0] + theta_body;
-        while (theta <= MINUSPI) theta += TWOPI;
+        while (theta <= -MY_PI) theta += MY_2PI;
-        while (theta > MY_PI) theta -= TWOPI;
+        while (theta > MY_PI) theta -= MY_2PI;
        lbonus[line[i]].theta = theta;
        omega[i][0] = b.omega[0];
        omega[i][1] = b.omega[1];
--- a/src/compute_bond_local.cpp
+++ b/src/compute_bond_local.cpp
@ -33,7 +33,7 @@ using namespace LAMMPS_NS;
 #define DELTA 10000
 #define EPSILON 1.0e-12
-enum{DIST,VELVIB,OMEGA,ENGTRANS,ENGVIB,ENGROT,ENGPOT,FORCE,VARIABLE};
+enum{DIST,VELVIB,OMEGA,ENGTRANS,ENGVIB,ENGROT,ENGPOT,FORCE,FX,FY,FZ,VARIABLE};
 /* ---------------------------------------------------------------------- */
@ -64,6 +64,9 @@ ComputeBondLocal::ComputeBondLocal(LAMMPS *lmp, int narg, char **arg) :
    if (strcmp(arg[iarg],"dist") == 0) bstyle[nvalues++] = DIST;
    else if (strcmp(arg[iarg],"engpot") == 0) bstyle[nvalues++] = ENGPOT;
    else if (strcmp(arg[iarg],"force") == 0) bstyle[nvalues++] = FORCE;
    else if (strcmp(arg[iarg],"fx") == 0) bstyle[nvalues++] = FX;
    else if (strcmp(arg[iarg],"fy") == 0) bstyle[nvalues++] = FY;
    else if (strcmp(arg[iarg],"fz") == 0) bstyle[nvalues++] = FZ;
    else if (strcmp(arg[iarg],"engvib") == 0) bstyle[nvalues++] = ENGVIB;
    else if (strcmp(arg[iarg],"engrot") == 0) bstyle[nvalues++] = ENGROT;
    else if (strcmp(arg[iarg],"engtrans") == 0) bstyle[nvalues++] = ENGTRANS;
@ -127,7 +130,8 @@ ComputeBondLocal::ComputeBondLocal(LAMMPS *lmp, int narg, char **arg) :
  singleflag = 0;
  velflag = 0;
  for (int i = 0; i < nvalues; i++) {
-    if (bstyle[i] == ENGPOT || bstyle[i] == FORCE) singleflag = 1;
+    if (bstyle[i] == ENGPOT || bstyle[i] == FORCE || bstyle[i] == FX       || 
        bstyle[i] == FY     || bstyle[i] == FZ) singleflag = 1;
    if (bstyle[i] == VELVIB || bstyle[i] == OMEGA || bstyle[i] == ENGTRANS ||
        bstyle[i] == ENGVIB || bstyle[i] == ENGROT) velflag = 1;
  }
@ -393,6 +397,15 @@ int ComputeBondLocal::compute_bonds(int flag)
          case FORCE:
            ptr[n] = sqrt(rsq)*fbond;
            break;
          case FX:
            ptr[n] = dx*fbond;
            break;
          case FY:
            ptr[n] = dy*fbond;
            break;
          case FZ:
            ptr[n] = dz*fbond;
            break;
          case ENGVIB:
            ptr[n] = engvib;
            break;
--- a/src/compute_orientorder_atom.cpp
+++ b/src/compute_orientorder_atom.cpp
@ -43,12 +43,14 @@ using namespace std;
  #define MY_EPSILON (10.0*2.220446049250313e-16)
 #endif
 #define QEPSILON 1.0e-6
 /* ---------------------------------------------------------------------- */
 ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg) :
  Compute(lmp, narg, arg),
  qlist(NULL), distsq(NULL), nearest(NULL), rlist(NULL),
-  qnarray(NULL), qnm_r(NULL), qnm_i(NULL)
+  qnarray(NULL), qnm_r(NULL), qnm_i(NULL), cglist(NULL)
 {
  if (narg < 3 ) error->all(FLERR,"Illegal compute orientorder/atom command");
@ -56,6 +58,8 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
  nnn = 12;
  cutsq = 0.0;
  wlflag = 0;
  wlhatflag = 0;
  qlcompflag = 0;
  // specify which orders to request
@ -96,27 +100,39 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
      if (iarg+nqlist > narg)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      qmax = 0;
-      for (int iw = 0; iw < nqlist; iw++) {
+      for (int il = 0; il < nqlist; il++) {
-        qlist[iw] = force->numeric(FLERR,arg[iarg+iw]);
+        qlist[il] = force->numeric(FLERR,arg[iarg+il]);
-        if (qlist[iw] < 0)
+        if (qlist[il] < 0)
          error->all(FLERR,"Illegal compute orientorder/atom command");
-        if (qlist[iw] > qmax) qmax = qlist[iw];
+        if (qlist[il] > qmax) qmax = qlist[il];
      }
      iarg += nqlist;
    } else if (strcmp(arg[iarg],"wl") == 0) {
      if (iarg+2 > narg)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      if (strcmp(arg[iarg+1],"yes") == 0) wlflag = 1;
      else if (strcmp(arg[iarg+1],"no") == 0) wlflag = 0;
      else error->all(FLERR,"Illegal compute orientorder/atom command");
      iarg += 2;
    } else if (strcmp(arg[iarg],"wl/hat") == 0) {
      if (iarg+2 > narg)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      if (strcmp(arg[iarg+1],"yes") == 0) wlhatflag = 1;
      else if (strcmp(arg[iarg+1],"no") == 0) wlhatflag = 0;
      else error->all(FLERR,"Illegal compute orientorder/atom command");
      iarg += 2;
    } else if (strcmp(arg[iarg],"components") == 0) {
      qlcompflag = 1;
      if (iarg+2 > narg)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      qlcomp = force->numeric(FLERR,arg[iarg+1]);
      if (qlcomp <= 0)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      iqlcomp = -1;
-      for (int iw = 0; iw < nqlist; iw++)
+      for (int il = 0; il < nqlist; il++)
-        if (qlcomp == qlist[iw]) {
+        if (qlcomp == qlist[il]) {
-          iqlcomp = iw;
+          iqlcomp = il;
          break;
        }
-      if (iqlcomp < 0)
+      if (iqlcomp == -1)
        error->all(FLERR,"Illegal compute orientorder/atom command");
      iarg += 2;
    } else if (strcmp(arg[iarg],"cutoff") == 0) {
@ -130,8 +146,10 @@ ComputeOrientOrderAtom::ComputeOrientOrderAtom(LAMMPS *lmp, int narg, char **arg
    } else error->all(FLERR,"Illegal compute orientorder/atom command");
  }
-  if (qlcompflag) ncol = nqlist + 2*(2*qlcomp+1);
+  ncol = nqlist;
-  else ncol = nqlist;
+  if (wlflag) ncol += nqlist;
  if (wlhatflag) ncol += nqlist;
  if (qlcompflag) ncol += 2*(2*qlcomp+1);
  peratom_flag = 1;
  size_peratom_cols = ncol;
@ -151,7 +169,7 @@ ComputeOrientOrderAtom::~ComputeOrientOrderAtom()
  memory->destroy(qlist);
  memory->destroy(qnm_r);
  memory->destroy(qnm_i);
-
+  memory->destroy(cglist);
 }
 /* ---------------------------------------------------------------------- */
@ -166,8 +184,8 @@ void ComputeOrientOrderAtom::init()
    error->all(FLERR,"Compute orientorder/atom cutoff is "
               "longer than pairwise cutoff");
-  memory->create(qnm_r,qmax,2*qmax+1,"orientorder/atom:qnm_r");
+  memory->create(qnm_r,nqlist,2*qmax+1,"orientorder/atom:qnm_r");
-  memory->create(qnm_i,qmax,2*qmax+1,"orientorder/atom:qnm_i");
+  memory->create(qnm_i,nqlist,2*qmax+1,"orientorder/atom:qnm_i");
  // need an occasional full neighbor list
@ -183,6 +201,8 @@ void ComputeOrientOrderAtom::init()
    if (strcmp(modify->compute[i]->style,"orientorder/atom") == 0) count++;
  if (count > 1 && comm->me == 0)
    error->warning(FLERR,"More than one compute orientorder/atom");
  if (wlflag || wlhatflag) init_clebsch_gordan();
 }
 /* ---------------------------------------------------------------------- */
@ -274,8 +294,8 @@ void ComputeOrientOrderAtom::compute_peratom()
      // if not nnn neighbors, order parameter = 0;
      if ((ncount == 0) || (ncount < nnn)) {
-        for (int iw = 0; iw < nqlist; iw++)
+        for (int jj = 0; jj < ncol; jj++)
-          qn[iw] = 0.0;
+          qn[jj] = 0.0;
        continue;
      }
@ -287,6 +307,7 @@ void ComputeOrientOrderAtom::compute_peratom()
      }
      calc_boop(rlist, ncount, qn, qlist, nqlist);
    }
  }
 }
@ -403,13 +424,12 @@ void ComputeOrientOrderAtom::select3(int k, int n, double *arr, int *iarr, doubl
 void ComputeOrientOrderAtom::calc_boop(double **rlist,
                                       int ncount, double qn[],
                                       int qlist[], int nqlist) {
  for (int iw = 0; iw < nqlist; iw++) {
    int n = qlist[iw];
-    qn[iw] = 0.0;
+  for (int il = 0; il < nqlist; il++) {
-    for(int m = 0; m < 2*n+1; m++) {
+    int l = qlist[il];
-      qnm_r[iw][m] = 0.0;
+    for(int m = 0; m < 2*l+1; m++) {
-      qnm_i[iw][m] = 0.0;
+      qnm_r[il][m] = 0.0;
      qnm_i[il][m] = 0.0;
    }
  }
@ -433,24 +453,24 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
      expphi_i *= rxymaginv;
    }
-    for (int iw = 0; iw < nqlist; iw++) {
+    for (int il = 0; il < nqlist; il++) {
-      int n = qlist[iw];
+      int l = qlist[il];
-      qnm_r[iw][n] += polar_prefactor(n, 0, costheta);
+      qnm_r[il][l] += polar_prefactor(l, 0, costheta);
      double expphim_r = expphi_r;
      double expphim_i = expphi_i;
-      for(int m = 1; m <= +n; m++) {
+      for(int m = 1; m <= +l; m++) {
-        double prefactor = polar_prefactor(n, m, costheta);
+        double prefactor = polar_prefactor(l, m, costheta);
        double c_r = prefactor * expphim_r;
        double c_i = prefactor * expphim_i;
-        qnm_r[iw][m+n] += c_r;
+        qnm_r[il][m+l] += c_r;
-        qnm_i[iw][m+n] += c_i;
+        qnm_i[il][m+l] += c_i;
        if(m & 1) {
-          qnm_r[iw][-m+n] -= c_r;
+          qnm_r[il][-m+l] -= c_r;
-          qnm_i[iw][-m+n] += c_i;
+          qnm_i[il][-m+l] += c_i;
        } else {
-          qnm_r[iw][-m+n] += c_r;
+          qnm_r[il][-m+l] += c_r;
-          qnm_i[iw][-m+n] -= c_i;
+          qnm_i[il][-m+l] -= c_i;
        }
        double tmp_r = expphim_r*expphi_r - expphim_i*expphi_i;
        double tmp_i = expphim_r*expphi_i + expphim_i*expphi_r;
@ -461,30 +481,110 @@ void ComputeOrientOrderAtom::calc_boop(double **rlist,
    }
  }
-  double fac = sqrt(MY_4PI) / ncount;
+  // convert sums to averages
  double normfac = 0.0;
  for (int iw = 0; iw < nqlist; iw++) {
    int n = qlist[iw];
    double qm_sum = 0.0;
    for(int m = 0; m < 2*n+1; m++) {
      qm_sum += qnm_r[iw][m]*qnm_r[iw][m] + qnm_i[iw][m]*qnm_i[iw][m];
      //      printf("Ylm^2 = %d %d %g\n",n,m,
      //     qnm_r[iw][m]*qnm_r[iw][m] + qnm_i[iw][m]*qnm_i[iw][m]);
    }
    qn[iw] = fac * sqrt(qm_sum / (2*n+1));
    if (qlcompflag && iqlcomp == iw) normfac = 1.0/sqrt(qm_sum);
  double facn = 1.0 / ncount;
  for (int il = 0; il < nqlist; il++) {
    int l = qlist[il];
    for(int m = 0; m < 2*l+1; m++) {
      qnm_r[il][m] *= facn;
      qnm_i[il][m] *= facn;
    }
  }
-  // output of the complex vector
+  // calculate Q_l
  // NOTE: optional W_l_hat and components of Q_qlcomp use these stored Q_l values 
  int jj = 0;
  for (int il = 0; il < nqlist; il++) {
    int l = qlist[il];
    double qnormfac = sqrt(MY_4PI/(2*l+1));
    double qm_sum = 0.0;
    for(int m = 0; m < 2*l+1; m++)
      qm_sum += qnm_r[il][m]*qnm_r[il][m] + qnm_i[il][m]*qnm_i[il][m];
    qn[jj++] = qnormfac * sqrt(qm_sum);
  }
  // TODO: 
  // 1. [done]Need to allocate extra memory in qnarray[] for this option
  // 2. [done]Need to add keyword option
  // 3. [done]Need to caclulate Clebsch-Gordan/Wigner 3j coefficients
  //     (Can try getting them from boop.py first)
  // 5. [done]Compare to bcc values in /Users/athomps/netapp/codes/MatMiner/matminer/matminer/featurizers/boop.py
  // 6. [done]I get the right answer for W_l, but need to make sure that factor of 1/sqrt(l+1) is right for cglist
  // 7. Add documentation
  // 8. [done] run valgrind
  // 9. [done] Add Wlhat
  // 10. Update memory_usage()
  // 11. Add exact FCC values for W_4, W_4_hat
  // calculate W_l
  if (wlflag) {
    int idxcg_count = 0;
    for (int il = 0; il < nqlist; il++) {
      int l = qlist[il];
      double wlsum = 0.0;
      for(int m1 = 0; m1 < 2*l+1; m1++) {
        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
          int m = m1 + m2 - l;
          double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
          double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
          wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
          idxcg_count++;
        }
      }
      qn[jj++] = wlsum/sqrt(2*l+1);
    }
  }
  // calculate W_l_hat
  if (wlhatflag) {
    int idxcg_count = 0;
    for (int il = 0; il < nqlist; il++) {
      int l = qlist[il];
      double wlsum = 0.0;
      for(int m1 = 0; m1 < 2*l+1; m1++) {
        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
          int m = m1 + m2 - l;
          double qm1qm2_r = qnm_r[il][m1]*qnm_r[il][m2] - qnm_i[il][m1]*qnm_i[il][m2];
          double qm1qm2_i = qnm_r[il][m1]*qnm_i[il][m2] + qnm_i[il][m1]*qnm_r[il][m2];
          wlsum += (qm1qm2_r*qnm_r[il][m] + qm1qm2_i*qnm_i[il][m])*cglist[idxcg_count];
          idxcg_count++;
        }
      }
  //      Whats = [w/(q/np.sqrt(np.pi * 4 / (2 * l + 1)))**3 if abs(q) > 1.0e-6 else 0.0 for l,q,w in zip(range(1,max_l+1),Qs,Ws)]
      if (qn[il] < QEPSILON)
        qn[jj++] = 0.0;
      else {
        double qnormfac = sqrt(MY_4PI/(2*l+1));
        double qnfac = qnormfac/qn[il];
        qn[jj++] = wlsum/sqrt(2*l+1)*(qnfac*qnfac*qnfac);
      }
    }
  }
  // Calculate components of Q_l, for l=qlcomp
  if (qlcompflag) {
-    int j = nqlist;
+    int il = iqlcomp;
-    for(int m = 0; m < 2*qlcomp+1; m++) {
+    int l = qlcomp;
-      qn[j++] = qnm_r[iqlcomp][m] * normfac;
+    if (qn[il] < QEPSILON)
-      qn[j++] = qnm_i[iqlcomp][m] * normfac;
+      for(int m = 0; m < 2*l+1; m++) {
        qn[jj++] = 0.0;
        qn[jj++] = 0.0;
      }
    else {
      double qnormfac = sqrt(MY_4PI/(2*l+1));
      double qnfac = qnormfac/qn[il];
      for(int m = 0; m < 2*l+1; m++) {
        qn[jj++] = qnm_r[il][m] * qnfac;
        qn[jj++] = qnm_i[il][m] * qnfac;
      }
    }
  }
 }
 /* ----------------------------------------------------------------------
@ -542,3 +642,258 @@ double ComputeOrientOrderAtom::associated_legendre(int l, int m, double x)
  return p;
 }
 /* ----------------------------------------------------------------------
   assign Clebsch-Gordan coefficients
   using the quasi-binomial formula VMK 8.2.1(3)
   specialized for case j1=j2=j=l
 ------------------------------------------------------------------------- */
 void ComputeOrientOrderAtom::init_clebsch_gordan()
 {
  double sum,dcg,sfaccg, sfac1, sfac2;
  int m, aa2, bb2, cc2;
  int ifac, idxcg_count;
  idxcg_count = 0;
  for (int il = 0; il < nqlist; il++) {
    int l = qlist[il];
    for(int m1 = 0; m1 < 2*l+1; m1++)
      for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
        idxcg_count++;
  }
  idxcg_max = idxcg_count;
  memory->create(cglist, idxcg_max, "computeorientorderatom:cglist");
  idxcg_count = 0;
  for (int il = 0; il < nqlist; il++) {
    int l = qlist[il];
    for(int m1 = 0; m1 < 2*l+1; m1++) {
        aa2 = m1 - l;
        for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
          bb2 = m2 - l;
          m = aa2 + bb2 + l;
          sum = 0.0;
          for (int z = MAX(0, MAX(-aa2, bb2));
               z <= MIN(l, MIN(l - aa2, l + bb2)); z++) {
            ifac = z % 2 ? -1 : 1;
            sum += ifac /
              (factorial(z) *
               factorial(l - z) *
               factorial(l - aa2 - z) *
               factorial(l + bb2 - z) *
               factorial(aa2 + z) *
               factorial(-bb2 + z));
          }
          cc2 = m - l;
          sfaccg = sqrt(factorial(l + aa2) *
                        factorial(l - aa2) *
                        factorial(l + bb2) *
                        factorial(l - bb2) *
                        factorial(l + cc2) *
                        factorial(l - cc2) *
                        (2*l + 1));
          sfac1 = factorial(3*l + 1);
          sfac2 = factorial(l);
          dcg = sqrt(sfac2*sfac2*sfac2 / sfac1);
          cglist[idxcg_count] = sum * dcg * sfaccg;
          idxcg_count++;
        }
      }
  }
 }
 /* ----------------------------------------------------------------------
   factorial n, wrapper for precomputed table
 ------------------------------------------------------------------------- */
 double ComputeOrientOrderAtom::factorial(int n)
 {
  if (n < 0 || n > nmaxfactorial) {
    char str[128];
    sprintf(str, "Invalid argument to factorial %d", n);
    error->all(FLERR, str);
  }
  return nfac_table[n];
 }
 /* ----------------------------------------------------------------------
   factorial n table, size SNA::nmaxfactorial+1
 ------------------------------------------------------------------------- */
 const double ComputeOrientOrderAtom::nfac_table[] = {
  1,
  1,
  2,
  6,
  24,
  120,
  720,
  5040,
  40320,
  362880,
  3628800,
  39916800,
  479001600,
  6227020800,
  87178291200,
  1307674368000,
  20922789888000,
  355687428096000,
  6.402373705728e+15,
  1.21645100408832e+17,
  2.43290200817664e+18,
  5.10909421717094e+19,
  1.12400072777761e+21,
  2.5852016738885e+22,
  6.20448401733239e+23,
  1.5511210043331e+25,
  4.03291461126606e+26,
  1.08888694504184e+28,
  3.04888344611714e+29,
  8.8417619937397e+30,
  2.65252859812191e+32,
  8.22283865417792e+33,
  2.63130836933694e+35,
  8.68331761881189e+36,
  2.95232799039604e+38,
  1.03331479663861e+40,
  3.71993326789901e+41,
  1.37637530912263e+43,
  5.23022617466601e+44,
  2.03978820811974e+46,
  8.15915283247898e+47,
  3.34525266131638e+49,
  1.40500611775288e+51,
  6.04152630633738e+52,
  2.65827157478845e+54,
  1.1962222086548e+56,
  5.50262215981209e+57,
  2.58623241511168e+59,
  1.24139155925361e+61,
  6.08281864034268e+62,
  3.04140932017134e+64,
  1.55111875328738e+66,
  8.06581751709439e+67,
  4.27488328406003e+69,
  2.30843697339241e+71,
  1.26964033536583e+73,
  7.10998587804863e+74,
  4.05269195048772e+76,
  2.35056133128288e+78,
  1.3868311854569e+80,
  8.32098711274139e+81,
  5.07580213877225e+83,
  3.14699732603879e+85,
  1.98260831540444e+87,
  1.26886932185884e+89,
  8.24765059208247e+90,
  5.44344939077443e+92,
  3.64711109181887e+94,
  2.48003554243683e+96,
  1.71122452428141e+98,
  1.19785716699699e+100,
  8.50478588567862e+101,
  6.12344583768861e+103,
  4.47011546151268e+105,
  3.30788544151939e+107,
  2.48091408113954e+109,
  1.88549470166605e+111,
  1.45183092028286e+113,
  1.13242811782063e+115,
  8.94618213078297e+116,
  7.15694570462638e+118,
  5.79712602074737e+120,
  4.75364333701284e+122,
  3.94552396972066e+124,
  3.31424013456535e+126,
  2.81710411438055e+128,
  2.42270953836727e+130,
  2.10775729837953e+132,
  1.85482642257398e+134,
  1.65079551609085e+136,
  1.48571596448176e+138,
  1.3520015276784e+140,
  1.24384140546413e+142,
  1.15677250708164e+144,
  1.08736615665674e+146,
  1.03299784882391e+148,
  9.91677934870949e+149,
  9.61927596824821e+151,
  9.42689044888324e+153,
  9.33262154439441e+155,
  9.33262154439441e+157,
  9.42594775983835e+159,
  9.61446671503512e+161,
  9.90290071648618e+163,
  1.02990167451456e+166,
  1.08139675824029e+168,
  1.14628056373471e+170,
  1.22652020319614e+172,
  1.32464181945183e+174,
  1.44385958320249e+176,
  1.58824554152274e+178,
  1.76295255109024e+180,
  1.97450685722107e+182,
  2.23119274865981e+184,
  2.54355973347219e+186,
  2.92509369349301e+188,
  3.3931086844519e+190,
  3.96993716080872e+192,
  4.68452584975429e+194,
  5.5745857612076e+196,
  6.68950291344912e+198,
  8.09429852527344e+200,
  9.8750442008336e+202,
  1.21463043670253e+205,
  1.50614174151114e+207,
  1.88267717688893e+209,
  2.37217324288005e+211,
  3.01266001845766e+213,
  3.8562048236258e+215,
  4.97450422247729e+217,
  6.46685548922047e+219,
  8.47158069087882e+221,
  1.118248651196e+224,
  1.48727070609069e+226,
  1.99294274616152e+228,
  2.69047270731805e+230,
  3.65904288195255e+232,
  5.01288874827499e+234,
  6.91778647261949e+236,
  9.61572319694109e+238,
  1.34620124757175e+241,
  1.89814375907617e+243,
  2.69536413788816e+245,
  3.85437071718007e+247,
  5.5502938327393e+249,
  8.04792605747199e+251,
  1.17499720439091e+254,
  1.72724589045464e+256,
  2.55632391787286e+258,
  3.80892263763057e+260,
  5.71338395644585e+262,
  8.62720977423323e+264,
  1.31133588568345e+267,
  2.00634390509568e+269,
  3.08976961384735e+271,
  4.78914290146339e+273,
  7.47106292628289e+275,
  1.17295687942641e+278,
  1.85327186949373e+280,
  2.94670227249504e+282,
  4.71472363599206e+284,
  7.59070505394721e+286,
  1.22969421873945e+289,
  2.0044015765453e+291,
  3.28721858553429e+293,
  5.42391066613159e+295,
  9.00369170577843e+297,
  1.503616514865e+300, // nmaxfactorial = 167
 };
--- a/src/compute_orientorder_atom.h
+++ b/src/compute_orientorder_atom.h
@ -33,7 +33,7 @@ class ComputeOrientOrderAtom : public Compute {
  void compute_peratom();
  double memory_usage();
  double cutsq;
-  int iqlcomp, qlcomp, qlcompflag;
+  int iqlcomp, qlcomp, qlcompflag, wlflag, wlhatflag;
  int *qlist;
  int nqlist;
@ -55,6 +55,13 @@ class ComputeOrientOrderAtom : public Compute {
  double polar_prefactor(int, int, double);
  double associated_legendre(int, int, double);
  static const int nmaxfactorial = 167;
  static const double nfac_table[];
  double factorial(int);
  void init_clebsch_gordan();
  double *cglist;                      // Clebsch-Gordan coeffs
  int idxcg_max;
 };
 }
--- a/src/compute_pe.cpp
+++ b/src/compute_pe.cpp
@ -26,6 +26,7 @@
 #include "modify.h"
 #include "domain.h"
 #include "error.h"
 #include "atom_masks.h"
 using namespace LAMMPS_NS;
@ -65,6 +66,9 @@ ComputePE::ComputePE(LAMMPS *lmp, int narg, char **arg) :
      iarg++;
    }
  }
  datamask_read = EMPTY_MASK;
  datamask_modify = EMPTY_MASK;
 }
 /* ---------------------------------------------------------------------- */
--- a/src/fix_minimize.cpp
+++ b/src/fix_minimize.cpp
@ -42,8 +42,10 @@ FixMinimize::~FixMinimize()
  // delete locally stored data
  memory->destroy(peratom);
-  for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]);
+  if (vectors) {
-  memory->sfree(vectors);
+    for (int m = 0; m < nvector; m++) memory->destroy(vectors[m]);
    memory->sfree(vectors);
  }
 }
 /* ---------------------------------------------------------------------- */
--- a/src/fix_minimize.h
+++ b/src/fix_minimize.h
@ -29,22 +29,22 @@ class FixMinimize : public Fix {
 public:
  FixMinimize(class LAMMPS *, int, char **);
-  ~FixMinimize();
+  virtual ~FixMinimize();
  int setmask();
-  void init() {}
+  virtual void init() {}
  double memory_usage();
-  void grow_arrays(int);
+  virtual void grow_arrays(int);
-  void copy_arrays(int, int, int);
+  virtual void copy_arrays(int, int, int);
-  int pack_exchange(int, double *);
+  virtual int pack_exchange(int, double *);
-  int unpack_exchange(int, double *);
+  virtual int unpack_exchange(int, double *);
-  void add_vector(int);
+  virtual void add_vector(int);
  double *request_vector(int);
  void store_box();
  void reset_coords();
- private:
+ protected:
  int nvector;
  int *peratom;
  double **vectors;
--- a/src/input.cpp
+++ b/src/input.cpp
@ -1672,7 +1672,7 @@ void Input::min_style()
 {
  if (domain->box_exist == 0)
    error->all(FLERR,"Min_style command before simulation box is defined");
-  update->create_minimize(narg,arg);
+  update->create_minimize(narg,arg,1);
 }
 /* ---------------------------------------------------------------------- */
--- a/src/min.cpp
+++ b/src/min.cpp
@ -68,6 +68,8 @@ Min::Min(LAMMPS *lmp) : Pointers(lmp)
  requestor = NULL;
  external_force_clear = 0;
  kokkosable = 0;
 }
 /* ---------------------------------------------------------------------- */
@ -93,6 +95,10 @@ Min::~Min()
 void Min::init()
 {
  if (lmp->kokkos && !kokkosable)
    error->all(FLERR,"Must use a Kokkos-enabled min style (e.g. min_style cg/kk) "
     "with Kokkos minimize");
  // create fix needed for storing atom-based quantities
  // will delete it at end of run
--- a/src/min.h
+++ b/src/min.h
@ -31,16 +31,16 @@ class Min : protected Pointers {
  Min(class LAMMPS *);
  virtual ~Min();
  virtual void init();
-  void setup(int flag=1);
+  virtual void setup(int flag=1);
-  void setup_minimal(int);
+  virtual void setup_minimal(int);
-  void run(int);
+  virtual void run(int);
  void cleanup();
  int request(class Pair *, int, double);
  virtual bigint memory_usage() {return 0;}
  void modify_params(int, char **);
  virtual int modify_param(int, char **) {return 0;}
-  double fnorm_sqr();
+  virtual double fnorm_sqr();
-  double fnorm_inf();
+  virtual double fnorm_inf();
  virtual void init_style() {}
  virtual void setup_style() = 0;
@ -97,10 +97,12 @@ class Min : protected Pointers {
  double *extra_max;          // max allowed change per iter for atom's var
  class Pair **requestor;     // Pair that stores/manipulates the variable
  int kokkosable;             // 1 if this min style supports Kokkos
  int neigh_every,neigh_delay,neigh_dist_check;  // neighboring params
-  double energy_force(int);
+  virtual double energy_force(int);
-  void force_clear();
+  virtual void force_clear();
  double compute_force_norm_sqr();
  double compute_force_norm_inf();
--- a/src/minimize.cpp
+++ b/src/minimize.cpp
@ -52,9 +52,6 @@ void Minimize::command(int narg, char **arg)
  if (update->laststep < 0)
    error->all(FLERR,"Too many iterations");
  if (lmp->kokkos)
    error->all(FLERR,"Cannot yet use minimize with Kokkos");
  lmp->init();
  timer->init_timeout();
  update->minimize->setup();
--- a/src/update.cpp
+++ b/src/update.cpp
@ -78,7 +78,7 @@ Update::Update(LAMMPS *lmp) : Pointers(lmp)
  create_integrate(1,&str,1);
  str = (char *) "cg";
-  create_minimize(1,&str);
+  create_minimize(1,&str,1);
 }
 /* ---------------------------------------------------------------------- */
@ -376,22 +376,69 @@ Integrate *Update::integrate_creator(LAMMPS *lmp, int narg, char ** arg)
 /* ---------------------------------------------------------------------- */
-void Update::create_minimize(int narg, char **arg)
+void Update::create_minimize(int narg, char **arg, int trysuffix)
 {
-  if (narg != 1) error->all(FLERR,"Illegal min_style command");
+  if (narg < 1) error->all(FLERR,"Illegal run_style command");
  delete [] minimize_style;
  delete minimize;
-  if (minimize_map->find(arg[0]) != minimize_map->end()) {
+  int sflag;
-    MinimizeCreator minimize_creator = (*minimize_map)[arg[0]];
+  new_minimize(arg[0],narg-1,&arg[1],trysuffix,sflag);
    minimize = minimize_creator(lmp);
  }
  else error->all(FLERR,"Illegal min_style command");
-  int n = strlen(arg[0]) + 1;
+  if (sflag) {
-  minimize_style = new char[n];
+    char estyle[256];
-  strcpy(minimize_style,arg[0]);
+    if (sflag == 1) snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix);
    else snprintf(estyle,256,"%s/%s",arg[0],lmp->suffix2);
    int n = strlen(estyle) + 1;
    minimize_style = new char[n];
    strcpy(minimize_style,estyle);
  } else {
    int n = strlen(arg[0]) + 1;
    minimize_style = new char[n];
    strcpy(minimize_style,arg[0]);
  }
 }
 /* ----------------------------------------------------------------------
   create the Minimize style, first with suffix appended
 ------------------------------------------------------------------------- */
 void Update::new_minimize(char *style, int narg, char **arg,
                           int trysuffix, int &sflag)
 {
  if (trysuffix && lmp->suffix_enable) {
    if (lmp->suffix) {
      sflag = 1;
      char estyle[256];
      snprintf(estyle,256,"%s/%s",style,lmp->suffix);
      if (minimize_map->find(estyle) != minimize_map->end()) {
        MinimizeCreator minimize_creator = (*minimize_map)[estyle];
        minimize = minimize_creator(lmp);
        return;
      }
    }
    if (lmp->suffix2) {
      sflag = 2;
      char estyle[256];
      snprintf(estyle,256,"%s/%s",style,lmp->suffix2);
      if (minimize_map->find(estyle) != minimize_map->end()) {
        MinimizeCreator minimize_creator = (*minimize_map)[estyle];
        minimize = minimize_creator(lmp);
        return;
      }
    }
  }
  sflag = 0;
  if (minimize_map->find(style) != minimize_map->end()) {
    MinimizeCreator minimize_creator = (*minimize_map)[style];
    minimize = minimize_creator(lmp);
    return;
  }
  error->all(FLERR,"Illegal minimize style");
 }
 /* ----------------------------------------------------------------------
--- a/src/update.h
+++ b/src/update.h
@ -63,7 +63,7 @@ class Update : protected Pointers {
  void init();
  void set_units(const char *);
  void create_integrate(int, char **, int);
-  void create_minimize(int, char **);
+  void create_minimize(int, char **, int);
  void reset_timestep(int, char **);
  void reset_timestep(bigint);
  void update_time();
@ -71,6 +71,7 @@ class Update : protected Pointers {
 private:
  void new_integrate(char *, int, char **, int, int &);
  void new_minimize(char *, int, char **, int, int &);
  template <typename T> static Integrate *integrate_creator(LAMMPS *, int, char **);
  template <typename T> static Min *minimize_creator(LAMMPS *);