Merge branch 'master' of ssh://yona.ccs.ornl.gov/~/git/lammps

2011-05-24 11:34:47 -04:00
parent d80daafc56 5495848345
commit d2ca45aef6
119 changed files with 1605 additions and 778 deletions
--- a/doc/Eqs/box_inverse.jpg
+++ b/doc/Eqs/box_inverse.jpg
--- a/doc/Eqs/box_inverse.tex
+++ b/doc/Eqs/box_inverse.tex
@ -0,0 +1,14 @@
+\documentclass[12pt]{article}
+
+\begin{document}
+
+\begin{eqnarray*}
+{\rm lx} &=& a \\
+{\rm xy} &=& b \cos{\gamma}  \\
+{\rm xz} &=& c \cos{\beta}\\
+{\rm ly}^2 &=&   b^2 - {\rm xy}^2 \\
+{\rm yz} &=& \frac{b*c \cos{\alpha} - {\rm xy}*{\rm xz}}{\rm ly} \\
+{\rm lz}^2 &=&  c^2 - {\rm xz}^2 - {\rm yz}^2 \\
+\end{eqnarray*}
+
+\end{document}
--- a/doc/Section_commands.html
+++ b/doc/Section_commands.html
@ -311,19 +311,20 @@ default LAMMPS build.  These dependencies are listed as Restrictions
 in the command's documentation.
 </P>
 <DIV ALIGN=center><TABLE  BORDER=1 >
-<TR ALIGN="center"><TD ><A HREF = "angle_coeff.html">angle_coeff</A></TD><TD ><A HREF = "angle_style.html">angle_style</A></TD><TD ><A HREF = "atom_modify.html">atom_modify</A></TD><TD ><A HREF = "atom_style.html">atom_style</A></TD><TD ><A HREF = "bond_coeff.html">bond_coeff</A></TD><TD ><A HREF = "bond_style.html">bond_style</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "boundary.html">boundary</A></TD><TD ><A HREF = "change_box.html">change_box</A></TD><TD ><A HREF = "clear.html">clear</A></TD><TD ><A HREF = "communicate.html">communicate</A></TD><TD ><A HREF = "compute.html">compute</A></TD><TD ><A HREF = "compute_modify.html">compute_modify</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "create_atoms.html">create_atoms</A></TD><TD ><A HREF = "create_box.html">create_box</A></TD><TD ><A HREF = "delete_atoms.html">delete_atoms</A></TD><TD ><A HREF = "delete_bonds.html">delete_bonds</A></TD><TD ><A HREF = "dielectric.html">dielectric</A></TD><TD ><A HREF = "dihedral_coeff.html">dihedral_coeff</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "dihedral_style.html">dihedral_style</A></TD><TD ><A HREF = "dimension.html">dimension</A></TD><TD ><A HREF = "displace_atoms.html">displace_atoms</A></TD><TD ><A HREF = "displace_box.html">displace_box</A></TD><TD ><A HREF = "dump.html">dump</A></TD><TD ><A HREF = "dump_modify.html">dump_modify</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "echo.html">echo</A></TD><TD ><A HREF = "fix.html">fix</A></TD><TD ><A HREF = "fix_modify.html">fix_modify</A></TD><TD ><A HREF = "group.html">group</A></TD><TD ><A HREF = "if.html">if</A></TD><TD ><A HREF = "improper_coeff.html">improper_coeff</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "improper_style.html">improper_style</A></TD><TD ><A HREF = "include.html">include</A></TD><TD ><A HREF = "jump.html">jump</A></TD><TD ><A HREF = "kspace_modify.html">kspace_modify</A></TD><TD ><A HREF = "kspace_style.html">kspace_style</A></TD><TD ><A HREF = "label.html">label</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "lattice.html">lattice</A></TD><TD ><A HREF = "log.html">log</A></TD><TD ><A HREF = "mass.html">mass</A></TD><TD ><A HREF = "minimize.html">minimize</A></TD><TD ><A HREF = "min_modify.html">min_modify</A></TD><TD ><A HREF = "min_style.html">min_style</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "neb.html">neb</A></TD><TD ><A HREF = "neigh_modify.html">neigh_modify</A></TD><TD ><A HREF = "neighbor.html">neighbor</A></TD><TD ><A HREF = "newton.html">newton</A></TD><TD ><A HREF = "next.html">next</A></TD><TD ><A HREF = "pair_coeff.html">pair_coeff</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "pair_modify.html">pair_modify</A></TD><TD ><A HREF = "pair_style.html">pair_style</A></TD><TD ><A HREF = "pair_write.html">pair_write</A></TD><TD ><A HREF = "prd.html">prd</A></TD><TD ><A HREF = "print.html">print</A></TD><TD ><A HREF = "processors.html">processors</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "read_data.html">read_data</A></TD><TD ><A HREF = "read_restart.html">read_restart</A></TD><TD ><A HREF = "region.html">region</A></TD><TD ><A HREF = "replicate.html">replicate</A></TD><TD ><A HREF = "reset_timestep.html">reset_timestep</A></TD><TD ><A HREF = "restart.html">restart</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "run.html">run</A></TD><TD ><A HREF = "run_style.html">run_style</A></TD><TD ><A HREF = "set.html">set</A></TD><TD ><A HREF = "shell.html">shell</A></TD><TD ><A HREF = "special_bonds.html">special_bonds</A></TD><TD ><A HREF = "tad.html">tad</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "temper.html">temper</A></TD><TD ><A HREF = "thermo.html">thermo</A></TD><TD ><A HREF = "thermo_modify.html">thermo_modify</A></TD><TD ><A HREF = "thermo_style.html">thermo_style</A></TD><TD ><A HREF = "timestep.html">timestep</A></TD><TD ><A HREF = "uncompute.html">uncompute</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "undump.html">undump</A></TD><TD ><A HREF = "unfix.html">unfix</A></TD><TD ><A HREF = "units.html">units</A></TD><TD ><A HREF = "variable.html">variable</A></TD><TD ><A HREF = "velocity.html">velocity</A></TD><TD ><A HREF = "write_restart.html">write_restart</A> 
+<TR ALIGN="center"><TD ><A HREF = "accelerator.html">accelerator</A></TD><TD ><A HREF = "angle_coeff.html">angle_coeff</A></TD><TD ><A HREF = "angle_style.html">angle_style</A></TD><TD ><A HREF = "atom_modify.html">atom_modify</A></TD><TD ><A HREF = "atom_style.html">atom_style</A></TD><TD ><A HREF = "bond_coeff.html">bond_coeff</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "bond_style.html">bond_style</A></TD><TD ><A HREF = "boundary.html">boundary</A></TD><TD ><A HREF = "change_box.html">change_box</A></TD><TD ><A HREF = "clear.html">clear</A></TD><TD ><A HREF = "communicate.html">communicate</A></TD><TD ><A HREF = "compute.html">compute</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "compute_modify.html">compute_modify</A></TD><TD ><A HREF = "create_atoms.html">create_atoms</A></TD><TD ><A HREF = "create_box.html">create_box</A></TD><TD ><A HREF = "delete_atoms.html">delete_atoms</A></TD><TD ><A HREF = "delete_bonds.html">delete_bonds</A></TD><TD ><A HREF = "dielectric.html">dielectric</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "dihedral_coeff.html">dihedral_coeff</A></TD><TD ><A HREF = "dihedral_style.html">dihedral_style</A></TD><TD ><A HREF = "dimension.html">dimension</A></TD><TD ><A HREF = "displace_atoms.html">displace_atoms</A></TD><TD ><A HREF = "displace_box.html">displace_box</A></TD><TD ><A HREF = "dump.html">dump</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "dump_modify.html">dump_modify</A></TD><TD ><A HREF = "echo.html">echo</A></TD><TD ><A HREF = "fix.html">fix</A></TD><TD ><A HREF = "fix_modify.html">fix_modify</A></TD><TD ><A HREF = "group.html">group</A></TD><TD ><A HREF = "if.html">if</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "improper_coeff.html">improper_coeff</A></TD><TD ><A HREF = "improper_style.html">improper_style</A></TD><TD ><A HREF = "include.html">include</A></TD><TD ><A HREF = "jump.html">jump</A></TD><TD ><A HREF = "kspace_modify.html">kspace_modify</A></TD><TD ><A HREF = "kspace_style.html">kspace_style</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "label.html">label</A></TD><TD ><A HREF = "lattice.html">lattice</A></TD><TD ><A HREF = "log.html">log</A></TD><TD ><A HREF = "mass.html">mass</A></TD><TD ><A HREF = "minimize.html">minimize</A></TD><TD ><A HREF = "min_modify.html">min_modify</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "min_style.html">min_style</A></TD><TD ><A HREF = "neb.html">neb</A></TD><TD ><A HREF = "neigh_modify.html">neigh_modify</A></TD><TD ><A HREF = "neighbor.html">neighbor</A></TD><TD ><A HREF = "newton.html">newton</A></TD><TD ><A HREF = "next.html">next</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "pair_coeff.html">pair_coeff</A></TD><TD ><A HREF = "pair_modify.html">pair_modify</A></TD><TD ><A HREF = "pair_style.html">pair_style</A></TD><TD ><A HREF = "pair_write.html">pair_write</A></TD><TD ><A HREF = "prd.html">prd</A></TD><TD ><A HREF = "print.html">print</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "processors.html">processors</A></TD><TD ><A HREF = "read_data.html">read_data</A></TD><TD ><A HREF = "read_restart.html">read_restart</A></TD><TD ><A HREF = "region.html">region</A></TD><TD ><A HREF = "replicate.html">replicate</A></TD><TD ><A HREF = "reset_timestep.html">reset_timestep</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "restart.html">restart</A></TD><TD ><A HREF = "run.html">run</A></TD><TD ><A HREF = "run_style.html">run_style</A></TD><TD ><A HREF = "set.html">set</A></TD><TD ><A HREF = "shell.html">shell</A></TD><TD ><A HREF = "special_bonds.html">special_bonds</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "tad.html">tad</A></TD><TD ><A HREF = "temper.html">temper</A></TD><TD ><A HREF = "thermo.html">thermo</A></TD><TD ><A HREF = "thermo_modify.html">thermo_modify</A></TD><TD ><A HREF = "thermo_style.html">thermo_style</A></TD><TD ><A HREF = "timestep.html">timestep</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "uncompute.html">uncompute</A></TD><TD ><A HREF = "undump.html">undump</A></TD><TD ><A HREF = "unfix.html">unfix</A></TD><TD ><A HREF = "units.html">units</A></TD><TD ><A HREF = "variable.html">variable</A></TD><TD ><A HREF = "velocity.html">velocity</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "write_restart.html">write_restart</A> 
 </TD></TR></TABLE></DIV>

 <HR>
@ -336,14 +337,15 @@ of each style or click on the style itself for a full description:
 <DIV ALIGN=center><TABLE  BORDER=1 >
 <TR ALIGN="center"><TD ><A HREF = "fix_adapt.html">adapt</A></TD><TD ><A HREF = "fix_addforce.html">addforce</A></TD><TD ><A HREF = "fix_aveforce.html">aveforce</A></TD><TD ><A HREF = "fix_ave_atom.html">ave/atom</A></TD><TD ><A HREF = "fix_ave_correlate.html">ave/correlate</A></TD><TD ><A HREF = "fix_ave_histo.html">ave/histo</A></TD><TD ><A HREF = "fix_ave_spatial.html">ave/spatial</A></TD><TD ><A HREF = "fix_ave_time.html">ave/time</A></TD></TR>
 <TR ALIGN="center"><TD ><A HREF = "fix_bond_break.html">bond/break</A></TD><TD ><A HREF = "fix_bond_create.html">bond/create</A></TD><TD ><A HREF = "fix_bond_swap.html">bond/swap</A></TD><TD ><A HREF = "fix_box_relax.html">box/relax</A></TD><TD ><A HREF = "fix_deform.html">deform</A></TD><TD ><A HREF = "fix_deposit.html">deposit</A></TD><TD ><A HREF = "fix_drag.html">drag</A></TD><TD ><A HREF = "fix_dt_reset.html">dt/reset</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_efield.html">efield</A></TD><TD ><A HREF = "fix_enforce2d.html">enforce2d</A></TD><TD ><A HREF = "fix_evaporate.html">evaporate</A></TD><TD ><A HREF = "fix_external.html">external</A></TD><TD ><A HREF = "fix_freeze.html">freeze</A></TD><TD ><A HREF = "fix_gravity.html">gravity</A></TD><TD ><A HREF = "fix_heat.html">heat</A></TD><TD ><A HREF = "fix_indent.html">indent</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_langevin.html">langevin</A></TD><TD ><A HREF = "fix_lineforce.html">lineforce</A></TD><TD ><A HREF = "fix_momentum.html">momentum</A></TD><TD ><A HREF = "fix_move.html">move</A></TD><TD ><A HREF = "fix_msst.html">msst</A></TD><TD ><A HREF = "fix_neb.html">neb</A></TD><TD ><A HREF = "fix_nh.html">nph</A></TD><TD ><A HREF = "fix_nph_asphere.html">nph/asphere</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_nph_sphere.html">nph/sphere</A></TD><TD ><A HREF = "fix_nh.html">npt</A></TD><TD ><A HREF = "fix_npt_asphere.html">npt/asphere</A></TD><TD ><A HREF = "fix_npt_sphere.html">npt/sphere</A></TD><TD ><A HREF = "fix_nve.html">nve</A></TD><TD ><A HREF = "fix_nve_asphere.html">nve/asphere</A></TD><TD ><A HREF = "fix_nve_limit.html">nve/limit</A></TD><TD ><A HREF = "fix_nve_noforce.html">nve/noforce</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_nve_sphere.html">nve/sphere</A></TD><TD ><A HREF = "fix_nh.html">nvt</A></TD><TD ><A HREF = "fix_nvt_asphere.html">nvt/asphere</A></TD><TD ><A HREF = "fix_nvt_sllod.html">nvt/sllod</A></TD><TD ><A HREF = "fix_nvt_sphere.html">nvt/sphere</A></TD><TD ><A HREF = "fix_orient_fcc.html">orient/fcc</A></TD><TD ><A HREF = "fix_planeforce.html">planeforce</A></TD><TD ><A HREF = "fix_poems.html">poems</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_pour.html">pour</A></TD><TD ><A HREF = "fix_press_berendsen.html">press/berendsen</A></TD><TD ><A HREF = "fix_print.html">print</A></TD><TD ><A HREF = "fix_qeq_comb.html">qeq/comb</A></TD><TD ><A HREF = "fix_reax_bonds.html">reax/bonds</A></TD><TD ><A HREF = "fix_recenter.html">recenter</A></TD><TD ><A HREF = "fix_rigid.html">rigid</A></TD><TD ><A HREF = "fix_rigid.html">rigid/nve</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_rigid.html">rigid/nvt</A></TD><TD ><A HREF = "fix_setforce.html">setforce</A></TD><TD ><A HREF = "fix_shake.html">shake</A></TD><TD ><A HREF = "fix_spring.html">spring</A></TD><TD ><A HREF = "fix_spring_rg.html">spring/rg</A></TD><TD ><A HREF = "fix_spring_self.html">spring/self</A></TD><TD ><A HREF = "fix_srd.html">srd</A></TD><TD ><A HREF = "fix_store_force.html">store/force</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_store_state.html">store/state</A></TD><TD ><A HREF = "fix_temp_berendsen.html">temp/berendsen</A></TD><TD ><A HREF = "fix_temp_rescale.html">temp/rescale</A></TD><TD ><A HREF = "fix_thermal_conductivity.html">thermal/conductivity</A></TD><TD ><A HREF = "fix_tmd.html">tmd</A></TD><TD ><A HREF = "fix_ttm.html">ttm</A></TD><TD ><A HREF = "fix_viscosity.html">viscosity</A></TD><TD ><A HREF = "fix_viscous.html">viscous</A></TD></TR>
-<TR ALIGN="center"><TD ><A HREF = "fix_wall.html">wall/colloid</A></TD><TD ><A HREF = "fix_wall_gran.html">wall/gran</A></TD><TD ><A HREF = "fix_wall.html">wall/harmonic</A></TD><TD ><A HREF = "fix_wall.html">wall/lj126</A></TD><TD ><A HREF = "fix_wall.html">wall/lj93</A></TD><TD ><A HREF = "fix_wall_reflect.html">wall/reflect</A></TD><TD ><A HREF = "fix_wall_region.html">wall/region</A></TD><TD ><A HREF = "fix_wall_srd.html">wall/srd</A> 
+<TR ALIGN="center"><TD ><A HREF = "fix_efield.html">efield</A></TD><TD ><A HREF = "fix_enforce2d.html">enforce2d</A></TD><TD ><A HREF = "fix_evaporate.html">evaporate</A></TD><TD ><A HREF = "fix_external.html">external</A></TD><TD ><A HREF = "fix_freeze.html">freeze</A></TD><TD ><A HREF = "fix_gpu.html">gpu</A></TD><TD ><A HREF = "fix_gravity.html">gravity</A></TD><TD ><A HREF = "fix_heat.html">heat</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_indent.html">indent</A></TD><TD ><A HREF = "fix_langevin.html">langevin</A></TD><TD ><A HREF = "fix_lineforce.html">lineforce</A></TD><TD ><A HREF = "fix_momentum.html">momentum</A></TD><TD ><A HREF = "fix_move.html">move</A></TD><TD ><A HREF = "fix_msst.html">msst</A></TD><TD ><A HREF = "fix_neb.html">neb</A></TD><TD ><A HREF = "fix_nh.html">nph</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_nph_asphere.html">nph/asphere</A></TD><TD ><A HREF = "fix_nph_sphere.html">nph/sphere</A></TD><TD ><A HREF = "fix_nh.html">npt</A></TD><TD ><A HREF = "fix_npt_asphere.html">npt/asphere</A></TD><TD ><A HREF = "fix_npt_sphere.html">npt/sphere</A></TD><TD ><A HREF = "fix_nve.html">nve</A></TD><TD ><A HREF = "fix_nve_asphere.html">nve/asphere</A></TD><TD ><A HREF = "fix_nve_limit.html">nve/limit</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_nve_noforce.html">nve/noforce</A></TD><TD ><A HREF = "fix_nve_sphere.html">nve/sphere</A></TD><TD ><A HREF = "fix_nh.html">nvt</A></TD><TD ><A HREF = "fix_nvt_asphere.html">nvt/asphere</A></TD><TD ><A HREF = "fix_nvt_sllod.html">nvt/sllod</A></TD><TD ><A HREF = "fix_nvt_sphere.html">nvt/sphere</A></TD><TD ><A HREF = "fix_orient_fcc.html">orient/fcc</A></TD><TD ><A HREF = "fix_planeforce.html">planeforce</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_poems.html">poems</A></TD><TD ><A HREF = "fix_pour.html">pour</A></TD><TD ><A HREF = "fix_press_berendsen.html">press/berendsen</A></TD><TD ><A HREF = "fix_print.html">print</A></TD><TD ><A HREF = "fix_qeq_comb.html">qeq/comb</A></TD><TD ><A HREF = "fix_reax_bonds.html">reax/bonds</A></TD><TD ><A HREF = "fix_recenter.html">recenter</A></TD><TD ><A HREF = "fix_rigid.html">rigid</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_rigid.html">rigid/nve</A></TD><TD ><A HREF = "fix_rigid.html">rigid/nvt</A></TD><TD ><A HREF = "fix_setforce.html">setforce</A></TD><TD ><A HREF = "fix_shake.html">shake</A></TD><TD ><A HREF = "fix_spring.html">spring</A></TD><TD ><A HREF = "fix_spring_rg.html">spring/rg</A></TD><TD ><A HREF = "fix_spring_self.html">spring/self</A></TD><TD ><A HREF = "fix_srd.html">srd</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_store_force.html">store/force</A></TD><TD ><A HREF = "fix_store_state.html">store/state</A></TD><TD ><A HREF = "fix_temp_berendsen.html">temp/berendsen</A></TD><TD ><A HREF = "fix_temp_rescale.html">temp/rescale</A></TD><TD ><A HREF = "fix_thermal_conductivity.html">thermal/conductivity</A></TD><TD ><A HREF = "fix_tmd.html">tmd</A></TD><TD ><A HREF = "fix_ttm.html">ttm</A></TD><TD ><A HREF = "fix_viscosity.html">viscosity</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_viscous.html">viscous</A></TD><TD ><A HREF = "fix_wall.html">wall/colloid</A></TD><TD ><A HREF = "fix_wall_gran.html">wall/gran</A></TD><TD ><A HREF = "fix_wall.html">wall/harmonic</A></TD><TD ><A HREF = "fix_wall.html">wall/lj126</A></TD><TD ><A HREF = "fix_wall.html">wall/lj93</A></TD><TD ><A HREF = "fix_wall_reflect.html">wall/reflect</A></TD><TD ><A HREF = "fix_wall_region.html">wall/region</A></TD></TR>
+<TR ALIGN="center"><TD ><A HREF = "fix_wall_srd.html">wall/srd</A> 
 </TD></TR></TABLE></DIV>

 <P>These are fix styles contributed by users, which can be used if
--- a/doc/Section_commands.txt
+++ b/doc/Section_commands.txt
@ -307,6 +307,7 @@ included when LAMMPS was built.  Not all packages are included in a
 default LAMMPS build.  These dependencies are listed as Restrictions
 in the command's documentation.

+"accelerator"_accelerator.html,
 "angle_coeff"_angle_coeff.html,
 "angle_style"_angle_style.html,
 "atom_modify"_atom_modify.html,
@ -414,6 +415,7 @@ of each style or click on the style itself for a full description:
 "evaporate"_fix_evaporate.html,
 "external"_fix_external.html,
 "freeze"_fix_freeze.html,
+"gpu"_fix_gpu.html,
 "gravity"_fix_gravity.html,
 "heat"_fix_heat.html,
 "indent"_fix_indent.html,
--- a/doc/Section_howto.html
+++ b/doc/Section_howto.html
@ -825,6 +825,10 @@ factors (xy,xz,yz) is as follows:
 </P>
 <CENTER><IMG SRC = "Eqs/box.jpg">
 </CENTER>
+<P>The inverse relationship can be written as follows:
+</P>
+<CENTER><IMG SRC = "Eqs/box_inverse.jpg">
+</CENTER>
 <P>As discussed on the <A HREF = "dump.html">dump</A> command doc page, when the BOX
 BOUNDS for a snapshot is written to a dump file for a triclinic box,
 an orthogonal bounding box which encloses the triclinic simulation box
@ -1158,7 +1162,7 @@ discussed below, it can be referenced via the following bracket
 notation, where ID in this case is the ID of a compute.  The leading
 "c_" would be replaced by "f_" for a fix, or "v_" for a variable:
 </P>
-<DIV ALIGN=center><TABLE  BORDER=1 >
+<DIV ALIGN=center><TABLE  WIDTH="0%"  BORDER=1 >
 <TR><TD >c_ID </TD><TD > entire scalar, vector, or array</TD></TR>
 <TR><TD >c_ID[I] </TD><TD > one element of vector, one column of array</TD></TR>
 <TR><TD >c_ID[I][J] </TD><TD > one element of array 
@ -1352,7 +1356,7 @@ data and scalar/vector/array data.
 input, that could be an element of a vector or array.  Likewise a
 vector input could be a column of an array.
 </P>
-<DIV ALIGN=center><TABLE  BORDER=1 >
+<DIV ALIGN=center><TABLE  WIDTH="0%"  BORDER=1 >
 <TR><TD >Command</TD><TD > Input</TD><TD > Output</TD><TD ></TD></TR>
 <TR><TD ><A HREF = "thermo_style.html">thermo_style custom</A></TD><TD > global scalars</TD><TD > screen, log file</TD><TD ></TD></TR>
 <TR><TD ><A HREF = "dump.html">dump custom</A></TD><TD > per-atom vectors</TD><TD > dump file</TD><TD ></TD></TR>
--- a/doc/Section_start.html
+++ b/doc/Section_start.html
@ -787,7 +787,8 @@ more processors or setup a smaller problem.
 which may be used in any order.  Either the full word or the
 one-letter abbreviation can be used:
 </P>
-<UL><LI>-echo or -e
+<UL><LI>-accelerator or -a
+<LI>-echo or -e
 <LI>-partition or -p
 <LI>-in or -i
 <LI>-log or -l
@ -800,6 +801,27 @@ one-letter abbreviation can be used:
 </PRE>
 <P>Here are the details on the options:
 </P>
+<PRE>-accelerator style 
+</PRE>
+<P>Use accelerated variants of various styles if they exist.  The style
+can be <I>opt</I> or <I>gpu</I> or <I>cuda</I>.  The variant styles are part of
+optional packages that LAMMPS can be built with, as described above in
+<A HREF = "#2_3">Section 2.3</A>.  Also see the <A HREF = "accelerator.html">acclerator</A>
+command doc page.  The "opt" style corrsponds to the OPT package, the
+"gpu" style to the GPU package, and the "cuda" style to the USER-CUDA
+package.  For example, all of the packages provide a <A HREF = "pair_lj.html">pair_style
+lj/cut</A> variant, with style names lj/cut/opt or
+lj/cut/gpu or lj/cut/cuda.
+</P>
+<P>These accelerated styles can be specified explicitly in your input
+script, e.g. pair_style lj/cut/gpu.  If the -accelerator switch is
+used, you do not need to modify your input script.  The accelerator
+suffix (opt,gpu,cuda) is automatically appended when the style is
+created for atom, pair, fix, compute, and integrate styles.  If an
+accelerated version does not exist, the standard version is created.
+See the <A HREF = "accelerator.html">accelerator</A> command for info on how to
+temporarily turn off this option.
+</P>
 <PRE>-echo style 
 </PRE>
 <P>Set the style of command echoing.  The style can be <I>none</I> or <I>screen</I>
@ -994,12 +1016,11 @@ processing units (GPUs).  We plan to add more over time.  Currently,
 they only support NVIDIA GPU cards.  To use them you need to install
 certain NVIDIA CUDA software on your system:
 </P>
-<UL><LI>Check if you have an NVIDIA card: cat /proc/driver/nvidia/cards/0 Go
-<LI>to http://www.nvidia.com/object/cuda_get.html Install a driver and
-<LI>toolkit appropriate for your system (SDK is not necessary) Follow the
-<LI>instructions in README in lammps/lib/gpu to build the library.  Run
-<LI>lammps/lib/gpu/nvc_get_devices to list supported devices and
-<LI>properties 
+<UL><LI>Check if you have an NVIDIA card: cat /proc/driver/nvidia/cards/0
+<LI>Go to http://www.nvidia.com/object/cuda_get.html
+<LI>Install a driver and toolkit appropriate for your system (SDK is not necessary)
+<LI>Follow the instructions in README in lammps/lib/gpu to build the library
+<LI>Run lammps/lib/gpu/nvc_get_devices to list supported devices and properties 
 </UL>
 <H4>GPU configuration 
 </H4>
--- a/doc/Section_start.txt
+++ b/doc/Section_start.txt
@ -777,6 +777,7 @@ At run time, LAMMPS recognizes several optional command-line switches
 which may be used in any order.  Either the full word or the
 one-letter abbreviation can be used:

+-accelerator or -a
 -echo or -e
 -partition or -p
 -in or -i
@ -790,6 +791,27 @@ mpirun -np 16 lmp_ibm -var f tmp.out -log my.log -screen none < in.alloy :pre

 Here are the details on the options:

+-accelerator style :pre
+
+Use accelerated variants of various styles if they exist.  The style
+can be {opt} or {gpu} or {cuda}.  The variant styles are part of
+optional packages that LAMMPS can be built with, as described above in
+"Section 2.3"_#2_3.  Also see the "acclerator"_accelerator.html
+command doc page.  The "opt" style corrsponds to the OPT package, the
+"gpu" style to the GPU package, and the "cuda" style to the USER-CUDA
+package.  For example, all of the packages provide a "pair_style
+lj/cut"_pair_lj.html variant, with style names lj/cut/opt or
+lj/cut/gpu or lj/cut/cuda.
+
+These accelerated styles can be specified explicitly in your input
+script, e.g. pair_style lj/cut/gpu.  If the -accelerator switch is
+used, you do not need to modify your input script.  The accelerator
+suffix (opt,gpu,cuda) is automatically appended when the style is
+created for atom, pair, fix, compute, and integrate styles.  If an
+accelerated version does not exist, the standard version is created.
+See the "accelerator"_accelerator.html command for info on how to
+temporarily turn off this option.
+
 -echo style :pre

 Set the style of command echoing.  The style can be {none} or {screen}
@ -984,12 +1006,11 @@ processing units (GPUs).  We plan to add more over time.  Currently,
 they only support NVIDIA GPU cards.  To use them you need to install
 certain NVIDIA CUDA software on your system:

-Check if you have an NVIDIA card: cat /proc/driver/nvidia/cards/0 Go
-to http://www.nvidia.com/object/cuda_get.html Install a driver and
-toolkit appropriate for your system (SDK is not necessary) Follow the
-instructions in README in lammps/lib/gpu to build the library.  Run
-lammps/lib/gpu/nvc_get_devices to list supported devices and
-properties :ul
+Check if you have an NVIDIA card: cat /proc/driver/nvidia/cards/0
+Go to http://www.nvidia.com/object/cuda_get.html
+Install a driver and toolkit appropriate for your system (SDK is not necessary)
+Follow the instructions in README in lammps/lib/gpu to build the library
+Run lammps/lib/gpu/nvc_get_devices to list supported devices and properties :ul

 GPU configuration :h4

--- a/doc/accelerator.html
+++ b/doc/accelerator.html
@ -0,0 +1,89 @@
+<HTML>
+<CENTER><A HREF = "http://lammps.sandia.gov">LAMMPS WWW Site</A> - <A HREF = "Manual.html">LAMMPS Documentation</A> - <A HREF = "Section_commands.html#comm">LAMMPS Commands</A> 
+</CENTER>
+
+
+
+
+
+
+<HR>
+
+<H3>accelerator command 
+</H3>
+<P><B>Syntax:</B>
+</P>
+<PRE>accelerator style args 
+</PRE>
+<UL><LI>style = <I>off</I> or <I>on</I> or <I>cuda</I> 
+
+<LI>args = 0 or more args specific to the style 
+
+<PRE>  <I>off</I> args = none
+  <I>on</I> args = none
+  <I>cuda</I> args = to be determined 
+</PRE>
+
+</UL>
+<P><B>Examples:</B>
+</P>
+<PRE>accelerator off
+accelerator on
+accelerator cuda blah 
+</PRE>
+<P><B>Description:</B>
+</P>
+<P>Alter settings for use of accelerated versions of various styles.
+LAMMPS can be built with optional packages which provide accelerated
+versions of specific <A HREF = "atom_style.html">atom</A>, <A HREF = "pair_style.html">pair</A>,
+<A HREF = "fix.html">fix</A>, <A HREF = "compute.html">compute</A>, and <A HREF = "run_style.html">integrate</A>
+styles.
+</P>
+<P>These are the relevant packages:
+</P>
+<UL><LI>OPT = a handful of pair styles, cache-optimized for faster CPU performance
+<LI>GPU = a handful of pair styles and the PPPM kspace_style, optimized to run on one or more GPUs or multicore CPU/GPU nodes
+<LI>USER-CUDA = a collection of atom, pair, fix, compute, and intergrate styles, optimized to run on one or more NVIDIA GPUs 
+</UL>
+<P>See <A HREF = "Section_start.html#2_3">this section</A> of the manual for
+instructions on how to build LAMMPS with any of these packages.
+</P>
+<P>These styles can be specified explicitly in your input script,
+e.g. <A HREF = "pair_lj.html">pair_style lj/cut/gpu</A>.  If the -accelerator
+command-line switch is used, you do not need to modify your input
+script, as discussed in <A HREF = "Section_start.html#2_6">this section</A> of the
+manual.  The command-line suffix (opt,gpu,cuda) is automatically
+appended when the style is created for atom, pair, fix, compute, and
+integrate styles.  If an accelerated version does not exist, the
+standard version is created.
+</P>
+<P>If the -accelerator command-line switch is used, you may wish to
+disable it for one or more input script commands, so that the standard
+version of the style is used instead of the accelerated one.  This can
+be useful for performance testing or debugging.
+</P>
+<P>The <I>off</I> style allows you to do this.  The effect of the -accelerator
+command-line switch is effectively turned off until another
+accelerator command is used with the <I>on</I> style.  The <I>on</I> style can
+only be used if the -accelerator command-line switch was used.
+</P>
+<P>The <I>cuda</I> style invokes options associated with the use of the
+USER-CUDA package.  These will be described when the USER-CUDA package
+is released with LAMMPS.
+</P>
+<P><B>Restrictions:</B>
+</P>
+<P>This cuda style can only be invoked if LAMMPS was built with the
+USER-CUDA package.  See the <A HREF = "Section_start.html#2_3">Making LAMMPS</A>
+section for more info.
+</P>
+<P>Obviously, you must have GPU hardware and associated software to
+build LAMMPS with GPU support.
+</P>
+<P><B>Related commands:</B>
+</P>
+<P><A HREF = "fix_gpu.html">fix gpu</A>
+</P>
+<P><B>Default:</B> none
+</P>
+</HTML>
--- a/doc/accelerator.txt
+++ b/doc/accelerator.txt
@ -0,0 +1,81 @@
+"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
+
+:link(lws,http://lammps.sandia.gov)
+:link(ld,Manual.html)
+:link(lc,Section_commands.html#comm)
+
+:line
+
+accelerator command :h3
+
+[Syntax:]
+
+accelerator style args :pre
+
+style = {off} or {on} or {cuda} :ulb,l
+args = 0 or more args specific to the style :l
+  {off} args = none
+  {on} args = none
+  {cuda} args = to be determined :pre
+:ule
+
+[Examples:]
+
+accelerator off
+accelerator on
+accelerator cuda blah :pre
+
+[Description:]
+
+Alter settings for use of accelerated versions of various styles.
+LAMMPS can be built with optional packages which provide accelerated
+versions of specific "atom"_atom_style.html, "pair"_pair_style.html,
+"fix"_fix.html, "compute"_compute.html, and "integrate"_run_style.html
+styles.
+
+These are the relevant packages:
+
+OPT = a handful of pair styles, cache-optimized for faster CPU performance
+GPU = a handful of pair styles and the PPPM kspace_style, optimized to run on one or more GPUs or multicore CPU/GPU nodes
+USER-CUDA = a collection of atom, pair, fix, compute, and intergrate styles, optimized to run on one or more NVIDIA GPUs :ul
+
+See "this section"_Section_start.html#2_3 of the manual for
+instructions on how to build LAMMPS with any of these packages.
+
+These styles can be specified explicitly in your input script,
+e.g. "pair_style lj/cut/gpu"_pair_lj.html.  If the -accelerator
+command-line switch is used, you do not need to modify your input
+script, as discussed in "this section"_Section_start.html#2_6 of the
+manual.  The command-line suffix (opt,gpu,cuda) is automatically
+appended when the style is created for atom, pair, fix, compute, and
+integrate styles.  If an accelerated version does not exist, the
+standard version is created.
+
+If the -accelerator command-line switch is used, you may wish to
+disable it for one or more input script commands, so that the standard
+version of the style is used instead of the accelerated one.  This can
+be useful for performance testing or debugging.
+
+The {off} style allows you to do this.  The effect of the -accelerator
+command-line switch is effectively turned off until another
+accelerator command is used with the {on} style.  The {on} style can
+only be used if the -accelerator command-line switch was used.
+
+The {cuda} style invokes options associated with the use of the
+USER-CUDA package.  These will be described when the USER-CUDA package
+is released with LAMMPS.
+
+[Restrictions:]
+
+This cuda style can only be invoked if LAMMPS was built with the
+USER-CUDA package.  See the "Making LAMMPS"_Section_start.html#2_3
+section for more info.
+
+Obviously, you must have GPU hardware and associated software to
+build LAMMPS with GPU support.
+
+[Related commands:]
+
+"fix gpu"_fix_gpu.html
+
+[Default:] none
--- a/doc/compute_group_group.html
+++ b/doc/compute_group_group.html
@ -49,7 +49,7 @@ section</A> for an overview of LAMMPS output
 options.
 </P>
 <P>Both the scalar and vector values calculated by this compute are
-"extensive"., The scalar value will be in energy <A HREF = "units.html">units</A>.
+"extensive".  The scalar value will be in energy <A HREF = "units.html">units</A>.
 The vector values will be in force <A HREF = "units.html">units</A>.
 </P>
 <P><B>Restrictions:</B>
--- a/doc/compute_group_group.txt
+++ b/doc/compute_group_group.txt
@ -46,7 +46,7 @@ section"_Section_howto.html#4_15 for an overview of LAMMPS output
 options.

 Both the scalar and vector values calculated by this compute are
-"extensive"., The scalar value will be in energy "units"_units.html.
+"extensive".  The scalar value will be in energy "units"_units.html.
 The vector values will be in force "units"_units.html.

 [Restrictions:]
--- a/doc/fix_adapt.html
+++ b/doc/fix_adapt.html
@ -54,7 +54,7 @@
 </P>
 <PRE>fix 1 all adapt 1 pair soft a 1 1 v_prefactor
 fix 1 all adapt 1 pair soft a 2* 3 v_prefactor
-fix 1 all adapt 1 pair lj/cut epsilon * * v_scale1 coul/cut pre 3 3 v_scale2 scale yes reset yes
+fix 1 all adapt 1 pair lj/cut epsilon * * v_scale1 coul/cut scale 3 3 v_scale2 scale yes reset yes
 fix 1 all adapt 10 atom diameter v_size 
 </PRE>
 <P><B>Description:</B>
--- a/doc/fix_adapt.txt
+++ b/doc/fix_adapt.txt
@ -41,7 +41,7 @@ keyword = {scale} or {reset} :l

 fix 1 all adapt 1 pair soft a 1 1 v_prefactor
 fix 1 all adapt 1 pair soft a 2* 3 v_prefactor
-fix 1 all adapt 1 pair lj/cut epsilon * * v_scale1 coul/cut pre 3 3 v_scale2 scale yes reset yes
+fix 1 all adapt 1 pair lj/cut epsilon * * v_scale1 coul/cut scale 3 3 v_scale2 scale yes reset yes
 fix 1 all adapt 10 atom diameter v_size :pre

 [Description:]
--- a/doc/fix_gpu.html
+++ b/doc/fix_gpu.html
@ -45,39 +45,38 @@ specified for a run or an error will be generated. The fix will not have an
 effect on any LAMMPS computations that do not use GPU acceleration, so there 
 should not be any problems with specifying this fix first in input scripts.
 </P>
-<P><I>mode</I> specifies where neighbor list calculations will be performed.
-If <I>mode</I> is force, neighbor list calculation is performed on the
-CPU. If <I>mode</I> is force/neigh, neighbor list calculation is 
-performed on the GPU. GPU neighbor list calculation currently cannot be
-used with a triclinic box. GPU neighbor list calculation currently
-cannot be used with <A HREF = "pair_hybrid.html">hybrid</A> pair styles.
-GPU neighbor lists are not compatible with styles that are not GPU-enabled.
-When a non-GPU enabled style requires a neighbor list, it will also be
-built using CPU routines. In these cases, it will typically be more efficient
-to only use CPU neighbor list builds.
+<P>The <I>mode</I> setting specifies where neighbor list calculations will be
+performed.  If <I>mode</I> is force, neighbor list calculation is performed
+on the CPU. If <I>mode</I> is force/neigh, neighbor list calculation is
+performed on the GPU. GPU neighbor list calculation currently cannot
+be used with a triclinic box. GPU neighbor list calculation currently
+cannot be used with <A HREF = "pair_hybrid.html">hybrid</A> pair styles.  GPU
+neighbor lists are not compatible with styles that are not
+GPU-enabled.  When a non-GPU enabled style requires a neighbor list,
+it will also be built using CPU routines. In these cases, it will
+typically be more efficient to only use CPU neighbor list builds.
 </P>
-<P><I>first</I> and <I>last</I> specify the GPUs that will be used for simulation.
-On each node, the GPU IDs in the inclusive range from <I>first</I> to <I>last</I> will
-be used.
+<P>The <I>first</I> and <I>last</I> settings specify the GPUs that will be used for
+simulation.  On each node, the GPU IDs in the inclusive range from
+<I>first</I> to <I>last</I> will be used.
 </P>
-<P><I>split</I> can be used for load balancing force calculation work between
-CPU and GPU cores in GPU-enabled pair styles. If 0<<I>split</I><1.0, 
-a fixed fraction of particles is offloaded to the GPU while force calculation
-for the other particles occurs simulataneously on the CPU. If <I>split</I><0,
-the optimal fraction (based on CPU and GPU timings) is calculated
-every 25 timesteps. If <I>split</I>=1.0, all force calculations for 
-GPU accelerated pair styles are performed
-on the GPU. In this case, <A HREF = "pair_hybrid.html">hybrid</A>, 
-<A HREF = "bond_style.html">bond</A>, <A HREF = "angle_style.html">angle</A>, 
-<A HREF = "dihedral_style.html">dihedral</A>, <A HREF = "improper_style.html">improper</A>, 
-and <A HREF = "kspace_style.html">long-range</A> calculations can be performed on the CPU 
-while the GPU is performing force calculations for the GPU-enabled pair
-style.
+<P>The <I>split</I> setting can be used for load balancing force calculation
+work between CPU and GPU cores in GPU-enabled pair styles. If
+0<<I>split</I><1.0, a fixed fraction of particles is offloaded to the GPU
+while force calculation for the other particles occurs simulataneously
+on the CPU. If <I>split</I><0, the optimal fraction (based on CPU and GPU
+timings) is calculated every 25 timesteps. If <I>split</I>=1.0, all force
+calculations for GPU accelerated pair styles are performed on the
+GPU. In this case, <A HREF = "pair_hybrid.html">hybrid</A>, <A HREF = "bond_style.html">bond</A>,
+<A HREF = "angle_style.html">angle</A>, <A HREF = "dihedral_style.html">dihedral</A>,
+<A HREF = "improper_style.html">improper</A>, and <A HREF = "kspace_style.html">long-range</A>
+calculations can be performed on the CPU while the GPU is performing
+force calculations for the GPU-enabled pair style.
 </P>
-<P>In order to use GPU acceleration, a GPU enabled style must be
-selected in the input script in addition to this fix. Currently,
-this is limited to a few <A HREF = "pair_style.html">pair styles</A> and
-the PPPM <A HREF = "kspace_style.html">kspace style</A>.
+<P>In order to use GPU acceleration, a GPU enabled style must be selected
+in the input script in addition to this fix. Currently, this is
+limited to a few <A HREF = "pair_style.html">pair styles</A> and the PPPM <A HREF = "kspace_style.html">kspace
+style</A>.
 </P>
 <P>More details about these settings and various possible hardware
 configuration are in <A HREF = "Section_start.html#2_8">this section</A> of the
@ -85,6 +84,10 @@ manual.
 </P>
 <P><B>Restart, fix_modify, output, run start/stop, minimize info:</B>
 </P>
+<P>This fix is part of the "gpu" package.  It is only enabled if LAMMPS
+was built with that package.  See the <A HREF = "Section_start.html#2_3">Making
+LAMMPS</A> section for more info.
+</P>
 <P>No information about this fix is written to <A HREF = "restart.html">binary restart
 files</A>.  None of the <A HREF = "fix_modify.html">fix_modify</A> options
 are relevant to this fix.
@ -98,7 +101,8 @@ the <A HREF = "run.html">run</A> command.
 <I>mode</I> should not be used with a triclinic box or <A HREF = "pair_hybrid.html">hybrid</A>
 pair styles.
 </P>
-<P><I>split</I> must be positive when using <A HREF = "pair_hybrid.html">hybrid</A> pair styles.
+<P>The <I>split</I> setting must be positive when using
+<A HREF = "pair_hybrid.html">hybrid</A> pair styles.
 </P>
 <P>Currently, group-ID must be all.
 </P>
--- a/doc/fix_gpu.txt
+++ b/doc/fix_gpu.txt
@ -36,39 +36,38 @@ specified for a run or an error will be generated. The fix will not have an
 effect on any LAMMPS computations that do not use GPU acceleration, so there 
 should not be any problems with specifying this fix first in input scripts.

-{mode} specifies where neighbor list calculations will be performed.
-If {mode} is force, neighbor list calculation is performed on the
-CPU. If {mode} is force/neigh, neighbor list calculation is 
-performed on the GPU. GPU neighbor list calculation currently cannot be
-used with a triclinic box. GPU neighbor list calculation currently
-cannot be used with "hybrid"_pair_hybrid.html pair styles.
-GPU neighbor lists are not compatible with styles that are not GPU-enabled.
-When a non-GPU enabled style requires a neighbor list, it will also be
-built using CPU routines. In these cases, it will typically be more efficient
-to only use CPU neighbor list builds.
+The {mode} setting specifies where neighbor list calculations will be
+performed.  If {mode} is force, neighbor list calculation is performed
+on the CPU. If {mode} is force/neigh, neighbor list calculation is
+performed on the GPU. GPU neighbor list calculation currently cannot
+be used with a triclinic box. GPU neighbor list calculation currently
+cannot be used with "hybrid"_pair_hybrid.html pair styles.  GPU
+neighbor lists are not compatible with styles that are not
+GPU-enabled.  When a non-GPU enabled style requires a neighbor list,
+it will also be built using CPU routines. In these cases, it will
+typically be more efficient to only use CPU neighbor list builds.

-{first} and {last} specify the GPUs that will be used for simulation.
-On each node, the GPU IDs in the inclusive range from {first} to {last} will
-be used.
+The {first} and {last} settings specify the GPUs that will be used for
+simulation.  On each node, the GPU IDs in the inclusive range from
+{first} to {last} will be used.

-{split} can be used for load balancing force calculation work between
-CPU and GPU cores in GPU-enabled pair styles. If 0<{split}<1.0, 
-a fixed fraction of particles is offloaded to the GPU while force calculation
-for the other particles occurs simulataneously on the CPU. If {split}<0,
-the optimal fraction (based on CPU and GPU timings) is calculated
-every 25 timesteps. If {split}=1.0, all force calculations for 
-GPU accelerated pair styles are performed
-on the GPU. In this case, "hybrid"_pair_hybrid.html, 
-"bond"_bond_style.html, "angle"_angle_style.html, 
-"dihedral"_dihedral_style.html, "improper"_improper_style.html, 
-and "long-range"_kspace_style.html calculations can be performed on the CPU 
-while the GPU is performing force calculations for the GPU-enabled pair
-style.
+The {split} setting can be used for load balancing force calculation
+work between CPU and GPU cores in GPU-enabled pair styles. If
+0<{split}<1.0, a fixed fraction of particles is offloaded to the GPU
+while force calculation for the other particles occurs simulataneously
+on the CPU. If {split}<0, the optimal fraction (based on CPU and GPU
+timings) is calculated every 25 timesteps. If {split}=1.0, all force
+calculations for GPU accelerated pair styles are performed on the
+GPU. In this case, "hybrid"_pair_hybrid.html, "bond"_bond_style.html,
+"angle"_angle_style.html, "dihedral"_dihedral_style.html,
+"improper"_improper_style.html, and "long-range"_kspace_style.html
+calculations can be performed on the CPU while the GPU is performing
+force calculations for the GPU-enabled pair style.

-In order to use GPU acceleration, a GPU enabled style must be
-selected in the input script in addition to this fix. Currently,
-this is limited to a few "pair styles"_pair_style.html and
-the PPPM "kspace style"_kspace_style.html.
+In order to use GPU acceleration, a GPU enabled style must be selected
+in the input script in addition to this fix. Currently, this is
+limited to a few "pair styles"_pair_style.html and the PPPM "kspace
+style"_kspace_style.html.

 More details about these settings and various possible hardware
 configuration are in "this section"_Section_start.html#2_8 of the
@ -76,6 +75,10 @@ manual.

 [Restart, fix_modify, output, run start/stop, minimize info:]

+This fix is part of the "gpu" package.  It is only enabled if LAMMPS
+was built with that package.  See the "Making
+LAMMPS"_Section_start.html#2_3 section for more info.
+
 No information about this fix is written to "binary restart
 files"_restart.html.  None of the "fix_modify"_fix_modify.html options
 are relevant to this fix.
@ -89,7 +92,8 @@ The fix must be the first fix specified for a given run. The force/neigh
 {mode} should not be used with a triclinic box or "hybrid"_pair_hybrid.html
 pair styles.

-{split} must be positive when using "hybrid"_pair_hybrid.html pair styles.
+The {split} setting must be positive when using
+"hybrid"_pair_hybrid.html pair styles.

 Currently, group-ID must be all.

--- a/examples/reax/control.reax_c.tatb
+++ b/examples/reax/control.reax_c.tatb
@ -11,11 +11,12 @@ thb_cutoff              0.001 ! cutoff value for three body interactions
 q_err                   1e-6  ! average per atom error norm allowed in GMRES convergence

 geo_format              0    ! 0: xyz, 1: pdb, 2: bgf
-write_freq              0    ! write trajectory after so many steps
+write_freq              25   ! write trajectory after so many steps
 traj_compress           0    ! 0: no compression  1: uses zlib to compress trajectory output
 traj_title              TATB ! (no white spaces)
 atom_info               0    ! 0: no atom info, 1: print basic atom info in the trajectory file
 atom_forces             0    ! 0: basic atom format, 1: print force on each atom in the trajectory file
 atom_velocities         0    ! 0: basic atom format, 1: print the velocity of each atom in the trajectory file
-bond_info               0    ! 0: do not print bonds, 1: print bonds in the trajectory file
+bond_info               1    ! 0: do not print bonds, 1: print bonds in the trajectory file
 angle_info              0    ! 0: do not print angles, 1: print angles in the trajectory file 
+
--- a/lib/gpu/Makefile.linux
+++ b/lib/gpu/Makefile.linux
@ -20,7 +20,11 @@
 CUDA_HOME = /usr/local/cuda
 NVCC = nvcc

+# newer CUDA
 CUDA_ARCH = -arch=sm_13
+# older CUDA
+#CUDA_ARCH = -arch=sm_10 -DCUDA_PRE_THREE
+
 CUDA_PRECISION = -D_SINGLE_SINGLE
 CUDA_INCLUDE = -I$(CUDA_HOME)/include
 CUDA_LIB = -L$(CUDA_HOME)/lib64
--- a/lib/gpu/README
+++ b/lib/gpu/README
@ -33,13 +33,17 @@ NOTE: Installation of the CUDA SDK is not required.

 Current pair styles supporting GPU acceleration:

-  1. lj/cut/gpu
-  2. lj/cut/coul/cut/gpu
-  3. lj/cut/coul/long/gpu
-  4. lj96/cut/gpu
-  5. gayberne/gpu
-  6. cmm/cg/gpu
-  7. cmm/cg/coul/long/gpu
+  1. lj/cut
+  2. lj96/cut
+  3. lj/expand
+  4. lj/cut/coul/cut
+  5. lj/cut/coul/long
+  6. lj/charmm/coul/long
+  7. morse
+  8. cg/cmm
+  9. cg/cmm/coul/long
+ 10. gayberne
+ 11. pppm

                     MULTIPLE LAMMPS PROCESSES
                     
@ -52,12 +56,12 @@ LAMMPS user manual for details on running with GPU acceleration.

                    BUILDING AND PRECISION MODES

-To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME, NVCC, CUDA_INCLUD,
-CUDA_LIB and CUDA_OPTS variables in one of the Makefiles. CUDA_ARCH should
-be set based on the compute capability of your GPU. This can be verified by
-running the nvc_get_devices executable after the build is complete.
-Additionally, the GPU package must be installed and compiled for LAMMPS.
-This may require editing the gpu_SYSPATH variable in the LAMMPS makefile.
+To build, edit the CUDA_ARCH, CUDA_PRECISION, CUDA_HOME variables in one of 
+the Makefiles. CUDA_ARCH should be set based on the compute capability of 
+your GPU. This can be verified by running the nvc_get_devices executable after
+the build is complete. Additionally, the GPU package must be installed and
+compiled for LAMMPS. This may require editing the gpu_SYSPATH variable in the
+LAMMPS makefile.

 Please note that the GPU library accesses the CUDA driver library directly,
 so it needs to be linked not only to the CUDA runtime library (libcudart.so)
@ -74,6 +78,10 @@ the CUDA_PRECISION variable:
  CUDA_PREC = -D_DOUBLE_DOUBLE  # Double precision for all calculations
  CUDA_PREC = -D_SINGLE_DOUBLE  # Accumulation of forces, etc. in double

+NOTE: PPPM acceleration can only be run on GPUs with compute capability>=1.1.
+      You will get the error "GPU library not compiled for this accelerator."
+      when attempting to run PPPM on a GPU with compute capability 1.0.
+
 NOTE: Double precision is only supported on certain GPUs (with
      compute capability>=1.3).
      
@ -83,15 +91,17 @@ NOTE: For Tesla and other graphics cards with compute capability>=1.3,
 NOTE: For Fermi, make sure that -arch=sm_20 is set on the CUDA_ARCH line.

 NOTE: The gayberne/gpu pair style will only be installed if the ASPHERE
-      package has been installed before installing the GPU package in LAMMPS.
+      package has been installed.

 NOTE: The cg/cmm/gpu and cg/cmm/coul/long/gpu pair styles will only be
-      installed if the USER-CG-CMM package has been installed before
-      installing the GPU package in LAMMPS.
+      installed if the USER-CG-CMM package has been installed.

-NOTE: The lj/cut/coul/long/gpu and cg/cmm/coul/long/gpu style will only be 
-      installed if the KSPACE package has been installed before installing
-      the GPU package in LAMMPS.
+NOTE: The lj/cut/coul/long/gpu, cg/cmm/coul/long/gpu, pppm/gpu/single, and
+      pppm/gpu/double styles will only be installed if the KSPACE package has
+      been installed.
+
+NOTE: The lj/charmm/coul/long will only be installed if the MOLECULE package
+      has been installed.

                      EXAMPLE BUILD PROCESS
                    
@ -105,7 +115,3 @@ make yes-asphere
 make yes-kspace
 make yes-gpu
 make linux
-
------------------------------------------------------------------------
-Last merge with gpulammps: r561 on 2010-11-12
------------------------------------------------------------------------
--- a/lib/gpu/cmm_cut_gpu_kernel.cu
+++ b/lib/gpu/cmm_cut_gpu_kernel.cu
@ -18,30 +18,6 @@
 #ifndef CMM_GPU_KERNEL
 #define CMM_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/cmmc_long_gpu_kernel.cu
+++ b/lib/gpu/cmmc_long_gpu_kernel.cu
@ -18,38 +18,6 @@
 #ifndef CMML_GPU_KERNEL
 #define CMML_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
-#define EWALD_F (numtyp)1.12837917
-#define EWALD_P (numtyp)0.3275911
-#define A1 (numtyp)0.254829592
-#define A2 (numtyp)-0.284496736
-#define A3 (numtyp)1.421413741
-#define A4 (numtyp)-1.453152027
-#define A5 (numtyp)1.061405429
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
+#define EWALD_F (numtyp)1.12837917
+#define EWALD_P (numtyp)0.3275911
+#define A1 (numtyp)0.254829592
+#define A2 (numtyp)-0.284496736
+#define A3 (numtyp)1.421413741
+#define A4 (numtyp)-1.453152027
+#define A5 (numtyp)1.061405429
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/crml_gpu_kernel.cu
+++ b/lib/gpu/crml_gpu_kernel.cu
@ -18,40 +18,6 @@
 #ifndef CRML_GPU_KERNEL
 #define CRML_GPU_KERNEL

-#define MAX_BIO_SHARED_TYPES 128
-
-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
-#define EWALD_F (numtyp)1.12837917
-#define EWALD_P (numtyp)0.3275911
-#define A1 (numtyp)0.254829592
-#define A2 (numtyp)-0.284496736
-#define A3 (numtyp)1.421413741
-#define A4 (numtyp)-1.453152027
-#define A5 (numtyp)1.061405429
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -94,6 +60,40 @@ __inline float fetch_q(const int& i, const float *q)

 #endif

+#define MAX_BIO_SHARED_TYPES 128
+
+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
+#define EWALD_F (numtyp)1.12837917
+#define EWALD_P (numtyp)0.3275911
+#define A1 (numtyp)0.254829592
+#define A2 (numtyp)-0.284496736
+#define A3 (numtyp)1.421413741
+#define A4 (numtyp)-1.453152027
+#define A5 (numtyp)1.061405429
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/ellipsoid_nbor.cu
+++ b/lib/gpu/ellipsoid_nbor.cu
@ -16,16 +16,6 @@
 #ifndef ELLIPSOID_NBOR_H
 #define ELLIPSOID_NBOR_H

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#else
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -42,6 +32,16 @@

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#else
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#endif
+
 // ---------------------------------------------------------------------------
 // Unpack neighbors from dev_ij array into dev_nbor matrix for coalesced access
 // -- Only unpack neighbors matching the specified inclusive range of forms
--- a/lib/gpu/geryon/ucl_nv_kernel.h
+++ b/lib/gpu/geryon/ucl_nv_kernel.h
@ -33,6 +33,14 @@
 #define MEM_THREADS 32
 #endif

+#ifdef CUDA_PRE_THREE
+struct __builtin_align__(16) _double4
+{
+  double x, y, z, w;
+};
+typedef struct _double4 double4;
+#endif
+
 #define GLOBAL_ID_X threadIdx.x+mul24(blockIdx.x,blockDim.x)
 #define GLOBAL_ID_Y threadIdx.y+mul24(blockIdx.y,blockDim.y)
 #define GLOBAL_SIZE_X mul24(gridDim.x,blockDim.x);
--- a/lib/gpu/lj96_cut_gpu_kernel.cu
+++ b/lib/gpu/lj96_cut_gpu_kernel.cu
@ -18,30 +18,6 @@
 #ifndef LJ96_GPU_KERNEL
 #define LJ96_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/lj_cut_gpu_kernel.cu
+++ b/lib/gpu/lj_cut_gpu_kernel.cu
@ -18,30 +18,6 @@
 #ifndef LJ_GPU_KERNEL
 #define LJ_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/lj_expand_gpu_kernel.cu
+++ b/lib/gpu/lj_expand_gpu_kernel.cu
@ -18,30 +18,6 @@
 #ifndef LJE_GPU_KERNEL
 #define LJE_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/ljc_cut_gpu_kernel.cu
+++ b/lib/gpu/ljc_cut_gpu_kernel.cu
@ -18,30 +18,6 @@
 #ifndef LJC_GPU_KERNEL
 #define LJC_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -85,6 +61,30 @@ __inline float fetch_q(const int& i, const float *q)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/ljcl_cut_gpu_kernel.cu
+++ b/lib/gpu/ljcl_cut_gpu_kernel.cu
@ -18,38 +18,6 @@
 #ifndef LJCL_GPU_KERNEL
 #define LJCL_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
-#define EWALD_F (numtyp)1.12837917
-#define EWALD_P (numtyp)0.3275911
-#define A1 (numtyp)0.254829592
-#define A2 (numtyp)-0.284496736
-#define A3 (numtyp)1.421413741
-#define A4 (numtyp)-1.453152027
-#define A5 (numtyp)1.061405429
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -93,6 +61,38 @@ __inline float fetch_q(const int& i, const float *q)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
+#define EWALD_F (numtyp)1.12837917
+#define EWALD_P (numtyp)0.3275911
+#define A1 (numtyp)0.254829592
+#define A2 (numtyp)-0.284496736
+#define A3 (numtyp)1.421413741
+#define A4 (numtyp)-1.453152027
+#define A5 (numtyp)1.061405429
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/morse_gpu_kernel.cu
+++ b/lib/gpu/morse_gpu_kernel.cu
@ -18,30 +18,6 @@
 #ifndef MORSE_GPU_KERNEL
 #define MORSE_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp2 double2
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp2 float2
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "nv_kernel_def.h"
@ -75,6 +51,30 @@ __inline float4 fetch_pos(const int& i, const float4 *pos)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp2 double2
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp2 float2
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
 #define SBBITS 30
 #define NEIGHMASK 0x3FFFFFFF
 __inline int sbmask(int j) { return j >> SBBITS & 3; }
--- a/lib/gpu/pair_gpu_atom_kernel.cu
+++ b/lib/gpu/pair_gpu_atom_kernel.cu
@ -15,6 +15,13 @@
   Contributing authors: Mike Brown (ORNL), brownw@ornl.gov
 ------------------------------------------------------------------------- */

+#ifdef NV_KERNEL
+#include "geryon/ucl_nv_kernel.h"
+#else
+#pragma OPENCL EXTENSION cl_khr_fp64: enable
+#define GLOBAL_ID_X get_global_id(0)
+#endif
+
 #ifdef _DOUBLE_DOUBLE
 #define numtyp double
 #define numtyp4 double4
@ -23,13 +30,6 @@
 #define numtyp4 float4
 #endif

-#ifdef NV_KERNEL
-#include "geryon/ucl_nv_kernel.h"
-#else
-#pragma OPENCL EXTENSION cl_khr_fp64: enable
-#define GLOBAL_ID_X get_global_id(0)
-#endif
-
 __kernel void kernel_cast_x(__global numtyp4 *x_type, __global double *x,
                            __global int *type, const int nall) {
  int ii=GLOBAL_ID_X;
--- a/lib/gpu/pair_gpu_device.cpp
+++ b/lib/gpu/pair_gpu_device.cpp
@ -549,8 +549,9 @@ int PairGPUDeviceT::compile_kernels() {
  k_info.run(&d_gpu_lib_data.begin());
  ucl_copy(h_gpu_lib_data,d_gpu_lib_data,false);
  
+  _ptx_arch=static_cast<double>(h_gpu_lib_data[0])/100.0;
  #ifndef USE_OPENCL
-  if (static_cast<double>(h_gpu_lib_data[0])/100.0>gpu->arch())
+  if (_ptx_arch>gpu->arch())
    return -4;
  #endif

--- a/lib/gpu/pair_gpu_device.h
+++ b/lib/gpu/pair_gpu_device.h
@ -226,6 +226,8 @@ class PairGPUDevice {
  inline int block_bio_pair() const { return _block_bio_pair; }
  /// Return the maximum number of atom types for shared mem with "bio" styles
  inline int max_bio_shared_types() const { return _max_bio_shared_types; }
+  /// Architecture gpu code compiled for (returns 0 for OpenCL)
+  inline double ptx_arch() const { return _ptx_arch; }

  // -------------------- SHARED DEVICE ROUTINES -------------------- 
  // Perform asynchronous zero of integer array 
@ -281,6 +283,7 @@ class PairGPUDevice {
  int _gpu_mode, _first_device, _last_device, _nthreads;
  double _particle_split;
  double _cpu_full;
+  double _ptx_arch;

  int _num_mem_threads, _warp_size, _threads_per_atom, _threads_per_charge;
  int _pppm_max_spline, _pppm_block;
--- a/lib/gpu/pppm_gpu_kernel.cu
+++ b/lib/gpu/pppm_gpu_kernel.cu
@ -18,27 +18,6 @@
 #ifndef PPPM_GPU_KERNEL
 #define PPPM_GPU_KERNEL

-#ifdef _DOUBLE_DOUBLE
-#define numtyp double
-#define numtyp4 double4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifdef _SINGLE_DOUBLE
-#define numtyp float
-#define numtyp4 float4
-#define acctyp double
-#define acctyp4 double4
-#endif
-
-#ifndef numtyp
-#define numtyp float
-#define numtyp4 float4
-#define acctyp float
-#define acctyp4 float4
-#endif
-
 #ifdef NV_KERNEL

 #include "geryon/ucl_nv_kernel.h"
@ -67,6 +46,12 @@ __inline float fetch_q(const int& i, const float *q)

 #endif

+// Allow PPPM to compile without atomics for NVIDIA 1.0 cards, error
+// generated at runtime with use of pppm/gpu
+#if (__CUDA_ARCH__ < 110)
+#define atom_add(x,y) 0
+#endif
+
 #else

 #pragma OPENCL EXTENSION cl_khr_fp64: enable
@ -85,6 +70,27 @@ __inline float fetch_q(const int& i, const float *q)

 #endif

+#ifdef _DOUBLE_DOUBLE
+#define numtyp double
+#define numtyp4 double4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifdef _SINGLE_DOUBLE
+#define numtyp float
+#define numtyp4 float4
+#define acctyp double
+#define acctyp4 double4
+#endif
+
+#ifndef numtyp
+#define numtyp float
+#define numtyp4 float4
+#define acctyp float
+#define acctyp4 float4
+#endif
+
 // Maximum order for spline
 #define PPPM_MAX_SPLINE 8
 // Thread block size for PPPM kernels
--- a/lib/gpu/pppm_gpu_memory.cpp
+++ b/lib/gpu/pppm_gpu_memory.cpp
@ -66,7 +66,11 @@ grdtyp * PPPMGPUMemoryT::init(const int nlocal, const int nall, FILE *_screen,
    flag=-5;
    return 0;
  }
-  
+  if (device->ptx_arch()>0.0 && device->ptx_arch()<1.1) {
+    flag=-4;
+    return 0;
+  }
+
  ucl_device=device->gpu;
  atom=&device->atom;

--- a/src/ASPHERE/Install.sh
+++ b/src/ASPHERE/Install.sh
@ -1,7 +1,4 @@
 # Install/unInstall package files in LAMMPS
-# for unInstall, also unInstall/Install GPU package if installed
-#   so it will remove GPU files that depend on ASPHERE files,
-#   then replace others

 if (test $1 = 1) then

@ -25,10 +22,6 @@ if (test $1 = 1) then
  cp pair_gayberne.h ..
  cp pair_resquared.h ..

-  if (test -e ../pair_lj_cut_gpu.h) then
-    cd ../GPU; /bin/sh Install.sh 1
-  fi
-
 elif (test $1 = 0) then

  rm ../compute_erotate_asphere.cpp
@ -51,8 +44,4 @@ elif (test $1 = 0) then
  rm ../pair_gayberne.h
  rm ../pair_resquared.h

-  if (test -e ../pair_gayberne_gpu.h) then
-    cd ../GPU; /bin/sh Install.sh 0; /bin/sh Install.sh 1
-  fi
-
 fi
--- a/src/ASPHERE/pair_resquared.cpp
+++ b/src/ASPHERE/pair_resquared.cpp
@ -503,8 +503,8 @@ void PairRESquared::precompute_i(const int i,RE2Vars &ws)
  int *ellipsoid = atom->ellipsoid;
  AtomVecEllipsoid::Bonus *bonus = avec->bonus;
  MathExtra::quat_to_mat_trans(bonus[ellipsoid[i]].quat,ws.A);
-  MathExtra::transpose_times_diag3(ws.A,well[atom->type[i]],ws.aTe);
-  MathExtra::transpose_times_diag3(ws.A,shape2[atom->type[i]],aTs);
+  MathExtra::transpose_diag3(ws.A,well[atom->type[i]],ws.aTe);
+  MathExtra::transpose_diag3(ws.A,shape2[atom->type[i]],aTs);
  MathExtra::diag_times3(shape2[atom->type[i]],ws.A,ws.sa);
  MathExtra::times3(aTs,ws.A,ws.gamma);
  MathExtra::rotation_generator_x(ws.A,ws.lA[0]);
@ -885,7 +885,7 @@ double PairRESquared::resquared_lj(const int i, const int j,
  scorrect[0] = scorrect[0] * scorrect[0] / 2.0;
  scorrect[1] = scorrect[1] * scorrect[1] / 2.0;
  scorrect[2] = scorrect[2] * scorrect[2] / 2.0;
-  MathExtra::transpose_times_diag3(wi.A,scorrect,aTs);
+  MathExtra::transpose_diag3(wi.A,scorrect,aTs);
  MathExtra::times3(aTs,wi.A,gamma);
  for (int ii=0; ii<3; ii++)
    MathExtra::times3(aTs,wi.lA[ii],lAtwo[ii]);
--- a/src/CLASS2/pair_lj_class2.h
+++ b/src/CLASS2/pair_lj_class2.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PairLJClass2 : public Pair {
 public:
  PairLJClass2(class LAMMPS *);
-  ~PairLJClass2();
+  virtual ~PairLJClass2();
  void compute(int, int);
  void settings(int, char **);
  void coeff(int, char **);
@ -38,7 +38,7 @@ class PairLJClass2 : public Pair {
  void read_restart_settings(FILE *);
  double single(int, int, int, int, double, double, double, double &);

- private:
+ protected:
  double cut_global;
  double **cut;
  double **epsilon,**sigma;
--- a/src/CLASS2/pair_lj_class2_coul_cut.h
+++ b/src/CLASS2/pair_lj_class2_coul_cut.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PairLJClass2CoulCut : public Pair {
 public:
  PairLJClass2CoulCut(class LAMMPS *);
-  ~PairLJClass2CoulCut();
+  virtual ~PairLJClass2CoulCut();
  void compute(int, int);
  void settings(int, char **);
  void coeff(int, char **);
@ -39,7 +39,7 @@ class PairLJClass2CoulCut : public Pair {
  void read_restart_settings(FILE *);
  double single(int, int, int, int, double, double, double, double &);

- private:
+ protected:
  double cut_lj_global,cut_coul_global;
  double **cut_lj,**cut_ljsq;
  double **cut_coul,**cut_coulsq;
--- a/src/CLASS2/pair_lj_class2_coul_long.h
+++ b/src/CLASS2/pair_lj_class2_coul_long.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PairLJClass2CoulLong : public Pair {
 public:
  PairLJClass2CoulLong(class LAMMPS *);
-  ~PairLJClass2CoulLong();
+  virtual ~PairLJClass2CoulLong();
  void compute(int, int);
  void settings(int, char **);
  void coeff(int, char **);
@ -40,7 +40,7 @@ class PairLJClass2CoulLong : public Pair {
  double single(int, int, int, int, double, double, double, double &);
  void *extract(char *, int &);

- private:
+ protected:
  double cut_lj_global;
  double **cut_lj,**cut_ljsq;
  double cut_coul,cut_coulsq;
--- a/src/Depend.sh
+++ b/src/Depend.sh
@ -0,0 +1,36 @@
+# Depend.sh = Install/unInstall files from dependent packages
+# only Install/unInstall if dependent package is already installed
+# install dependent child files when parent files installed
+# uninstall dependent child files when parent files uninstalled
+
+if (test $1 = 1) then
+
+  if (test -e pair_lj_cut_opt.h) then
+    cd OPT; /bin/sh Install.sh 1; cd ..
+  fi
+  if (test -e pair_lj_cut_gpu.h) then
+    cd GPU; /bin/sh Install.sh 1; cd ..
+  fi
+  if (test -e cg_cmm_params.h) then
+    cd USER-CG-CMM; /bin/sh Install.sh 1; cd ..
+  fi
+  if (test -e pair_lj_cut_cuda.h) then
+    cd USER-CUDA; /bin/sh Install.sh 1; cd ..
+  fi
+
+elif (test $1 = 0) then
+
+  if (test -e pair_lj_cut_opt.h) then
+    cd OPT; /bin/sh Install.sh 0; /bin/sh Install.sh 1; cd ..
+  fi
+  if (test -e pair_lj_cut_gpu.h) then
+    cd GPU; /bin/sh Install.sh 0; /bin/sh Install.sh 1; cd ..
+  fi
+  if (test -e cg_cmm_params.h) then
+    cd USER-CG-CMM; /bin/sh Install.sh 0; /bin/sh Install.sh 1; cd ..
+  fi
+  if (test -e pair_lj_cut_cuda.h) then
+    cd USER-CUDA; /bin/sh Install.sh 0; /bin/sh Install.sh 1; cd ..
+  fi
+
+fi
--- a/src/GPU/Install.sh
+++ b/src/GPU/Install.sh
@ -1,7 +1,6 @@
 # Install/unInstall package files in LAMMPS
-# edit Makefile.package to include/exclude GPU library
-# do not copy gayberne files if non-GPU version does not exist
-# do not copy charmm files if non-GPU version does not exist
+# edit Makefile.package to include/exclude GPU info
+# do not install child files if parent does not exist

 if (test $1 = 1) then

@ -10,17 +9,9 @@ if (test $1 = 1) then
    sed -i -e 's/[^ \t]*gpu_[^ \t]*) //' ../Makefile.package
    sed -i -e 's|^PKG_PATH =[ \t]*|&-L../../lib/gpu |' ../Makefile.package
    sed -i -e 's|^PKG_LIB =[ \t]*|&-lgpu |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(gpu_SYSPATH) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(gpu_SYSINC) |' ../Makefile.package
    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(gpu_SYSLIB) |' ../Makefile.package
-  fi
-  
-  if (test -e ../pppm.cpp) then
-    cp pppm_gpu.cpp ..
-    cp pppm_gpu_single.cpp ..
-    cp pppm_gpu_double.cpp ..
-    cp pppm_gpu.h ..
-    cp pppm_gpu_single.h ..
-    cp pppm_gpu_double.h ..
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(gpu_SYSPATH) |' ../Makefile.package
  fi
  
  if (test -e ../pair_gayberne.cpp) then
@ -54,12 +45,24 @@ if (test $1 = 1) then
    cp pair_cg_cmm_coul_msm_gpu.h ..
  fi

+  if (test -e ../pppm.cpp) then
+    cp pppm_gpu.cpp ..
+    cp pppm_gpu_single.cpp ..
+    cp pppm_gpu_double.cpp ..
+    cp pppm_gpu.h ..
+    cp pppm_gpu_single.h ..
+    cp pppm_gpu_double.h ..
+  fi
+
  cp pair_lj_cut_gpu.cpp ..
  cp pair_morse_gpu.cpp ..
  cp pair_lj96_cut_gpu.cpp ..
  cp pair_lj_expand_gpu.cpp ..
  cp pair_lj_cut_coul_cut_gpu.cpp ..
  cp pair_lj_cut_tgpu.cpp ..
+
+  cp fix_gpu.cpp ..
+
  cp pair_lj_cut_gpu.h ..
  cp pair_morse_gpu.h ..
  cp pair_lj96_cut_gpu.h ..
@ -67,7 +70,6 @@ if (test $1 = 1) then
  cp pair_lj_cut_coul_cut_gpu.h ..
  cp pair_lj_cut_tgpu.h ..
  
-  cp fix_gpu.cpp ..
  cp fix_gpu.h ..
  cp gpu_extra.h ..

@ -98,6 +100,7 @@ elif (test $1 = 0) then
  rm ../pair_cg_cmm_coul_long_gpu.cpp
  rm ../pair_cg_cmm_coul_msm.cpp
  rm ../pair_cg_cmm_coul_msm_gpu.cpp
+
  rm ../fix_gpu.cpp
  rm ../pair_omp_gpu.cpp

@ -118,6 +121,7 @@ elif (test $1 = 0) then
  rm ../pair_cg_cmm_coul_long_gpu.h
  rm ../pair_cg_cmm_coul_msm.h
  rm ../pair_cg_cmm_coul_msm_gpu.h
+
  rm ../fix_gpu.h
  rm ../gpu_extra.h
  rm ../pair_omp_gpu.h
--- a/src/GRANULAR/fix_pour.h
+++ b/src/GRANULAR/fix_pour.h
@ -28,6 +28,7 @@ class FixPour : public Fix {
  friend class PairGranHertzHistory;
  friend class PairGranHooke;
  friend class PairGranHookeHistory;
+  friend class PairGranHookeCuda;

 public:
  FixPour(class LAMMPS *, int, char **);
--- a/src/GRANULAR/pair_gran_hooke_history.h
+++ b/src/GRANULAR/pair_gran_hooke_history.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PairGranHookeHistory : public Pair {
 public:
  PairGranHookeHistory(class LAMMPS *);
-  ~PairGranHookeHistory();
+  virtual ~PairGranHookeHistory();
  virtual void compute(int, int);
  virtual void settings(int, char **);
  void coeff(int, char **);
--- a/src/KSPACE/Install.sh
+++ b/src/KSPACE/Install.sh
@ -1,7 +1,4 @@
 # Install/unInstall package files in LAMMPS
-# for unInstall, also unInstall/Install OPT package if installed
-#   so it will remove OPT files that depend on KSPACE files,
-#   then replace others

 if (test $1 = 1) then

@ -63,8 +60,4 @@ elif (test $1 = 0) then
  rm ../remap.h
  rm ../remap_wrap.h

-  if (test -e ../pair_lj_charmm_coul_long_opt.h) then
-    cd ../OPT; sh Install.sh 0; sh Install.sh 1
-  fi
-
 fi
--- a/src/KSPACE/pair_born_coul_long.h
+++ b/src/KSPACE/pair_born_coul_long.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PairBornCoulLong : public Pair {
 public:
  PairBornCoulLong(class LAMMPS *);
-  ~PairBornCoulLong();
+  virtual ~PairBornCoulLong();
  void compute(int, int);
  void settings(int, char **);
  void coeff(int, char **);
@ -40,7 +40,7 @@ class PairBornCoulLong : public Pair {
  double single(int, int, int, int, double, double, double, double &);
  void *extract(char *, int &);

- private:
+ protected:
  double cut_lj_global;
  double **cut_lj,**cut_ljsq;
  double cut_coul,cut_coulsq;
--- a/src/KSPACE/pair_buck_coul_long.h
+++ b/src/KSPACE/pair_buck_coul_long.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PairBuckCoulLong : public Pair {
 public:
  PairBuckCoulLong(class LAMMPS *);
-  ~PairBuckCoulLong();
+  virtual ~PairBuckCoulLong();
  void compute(int, int);
  void settings(int, char **);
  void coeff(int, char **);
@ -40,7 +40,7 @@ class PairBuckCoulLong : public Pair {
  double single(int, int, int, int, double, double, double, double &);
  void *extract(char *, int &);

- private:
+ protected:
  double cut_lj_global;
  double **cut_lj,**cut_ljsq;
  double cut_coul,cut_coulsq;
--- a/src/KSPACE/pppm.h
+++ b/src/KSPACE/pppm.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PPPM : public KSpace {
 public:
  PPPM(class LAMMPS *, int, char **);
-  ~PPPM();
+  virtual ~PPPM();
  void init();
  void setup();
  void compute(int, int);
--- a/src/MAKE/Makefile.g++
+++ b/src/MAKE/Makefile.g++
@ -38,7 +38,7 @@ MPI_LIB =	-lmpich -lpthread
 # PATH = path for FFT library
 # LIB = name of FFT library

-FFT_INC =       -DFFT_FFTW
+FFT_INC =       -DFFT_FFTW 
 FFT_PATH = 
 FFT_LIB =	-lfftw

@ -51,21 +51,30 @@ JPG_INC =
 JPG_PATH = 	
 JPG_LIB =	

-# additional system libraries needed by LAMMPS package libraries
+# additional system settings needed by LAMMPS package libraries
 # these settings are IGNORED if the corresponding LAMMPS package
 #   (e.g. gpu, meam) is NOT included in the LAMMPS build
-# SYSLIB = names of libraries
-# SYSPATH = paths of libraries
+# SYSINC = settings to compile with
+# SYSLIB = libraries to link with
+# SYSPATH = paths to libraries
+
+gpu_SYSINC =
+meam_SYSINC =
+reax_SYSINC =
+user-atc_SYSINC =
+user-cuda_SYSINC = -I/usr/local/cuda/include -DCUDA -DCUDA_ARCH=20 -DFFT_CUFFT

 gpu_SYSLIB =       -lcudart -lcuda
 meam_SYSLIB =      -lgfortran
 reax_SYSLIB =      -lgfortran
 user-atc_SYSLIB =  -lblas -llapack
+user-cuda_SYSLIB = -lcudart -lcuda

 gpu_SYSPATH =      -L/usr/local/cuda/lib64
 meam_SYSPATH =
 reax_SYSPATH =
 user-atc_SYSPATH = 	
+user-cuda_SYSPATH = 	

 # ---------------------------------------------------------------------
 # build rules and dependencies
@ -73,7 +82,7 @@ user-atc_SYSPATH =

 include	Makefile.package

-EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC)
+EXTRA_INC = $(LMP_INC) $(PKG_INC) $(MPI_INC) $(FFT_INC) $(JPG_INC) $(PKG_SYSINC)
 EXTRA_PATH = $(PKG_PATH) $(MPI_PATH) $(FFT_PATH) $(JPG_PATH) $(PKG_SYSPATH)
 EXTRA_LIB = $(PKG_LIB) $(MPI_LIB) $(FFT_LIB) $(JPG_LIB) $(PKG_SYSLIB)

--- a/src/MANYBODY/Install.sh
+++ b/src/MANYBODY/Install.sh
@ -1,7 +1,4 @@
 # Install/unInstall package files in LAMMPS
-# for unInstall, also unInstall/Install OPT package if installed
-#   so it will remove OPT files that depend on MANYBODY files,
-#   then replace others

 if (test $1 = 1) then

@ -27,10 +24,6 @@ if (test $1 = 1) then
  cp pair_tersoff.h ..
  cp pair_tersoff_zbl.h ..

-  if (test -e ../pair_lj_cut_opt.h) then
-    cd ../OPT; sh Install.sh 1
-  fi
-
 elif (test $1 = 0) then

  rm ../fix_qeq_comb.cpp
@ -55,8 +48,4 @@ elif (test $1 = 0) then
  rm ../pair_tersoff.h
  rm ../pair_tersoff_zbl.h

-  if (test -e ../pair_eam_opt.h) then
-    cd ../OPT; sh Install.sh 0; sh Install.sh 1
-  fi
-
 fi
--- a/src/MEAM/Install.sh
+++ b/src/MEAM/Install.sh
@ -1,5 +1,5 @@
 # Install/unInstall package files in LAMMPS
-# edit Makefile.package to include/exclude MEAM library
+# edit Makefile.package to include/exclude MEAM info

 if (test $1 = 1) then

@ -9,8 +9,9 @@ if (test $1 = 1) then
    sed -i -e 's|^PKG_INC =[ \t]*|&-I../../lib/meam |' ../Makefile.package
    sed -i -e 's|^PKG_PATH =[ \t]*|&-L../../lib/meam |' ../Makefile.package
    sed -i -e 's|^PKG_LIB =[ \t]*|&-lmeam |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(meam_SYSPATH) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(meam_SYSINC) |' ../Makefile.package
    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(meam_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(meam_SYSPATH) |' ../Makefile.package
  fi

  cp pair_meam.cpp ..
--- a/src/MOLECULE/atom_vec_angle.h
+++ b/src/MOLECULE/atom_vec_angle.h
@ -27,23 +27,24 @@ namespace LAMMPS_NS {
 class AtomVecAngle : public AtomVec {
 public:
  AtomVecAngle(class LAMMPS *, int, char **);
+  virtual ~AtomVecAngle() {}
  void grow(int);
  void grow_reset();
  void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
  int pack_reverse(int, int, double *);
  void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
  int pack_border_hybrid(int, int *, double *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
  int unpack_border_hybrid(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
  int size_restart();
  int pack_restart(int, double *);
  int unpack_restart(double *);
@ -52,7 +53,7 @@ class AtomVecAngle : public AtomVec {
  int data_atom_hybrid(int, char **);
  bigint memory_usage();

- private:
+ protected:
  int *tag,*type,*mask,*image;
  double **x,**v,**f;
  int *molecule;
--- a/src/MOLECULE/atom_vec_full.h
+++ b/src/MOLECULE/atom_vec_full.h
@ -27,23 +27,24 @@ namespace LAMMPS_NS {
 class AtomVecFull : public AtomVec {
 public:
  AtomVecFull(class LAMMPS *, int, char **);
+  virtual ~AtomVecFull() {}
  void grow(int);
  void grow_reset();
  void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
  int pack_reverse(int, int, double *);
  void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
  int pack_border_hybrid(int, int *, double *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
  int unpack_border_hybrid(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
  int size_restart();
  int pack_restart(int, double *);
  int unpack_restart(double *);
@ -52,7 +53,7 @@ class AtomVecFull : public AtomVec {
  int data_atom_hybrid(int, char **);
  bigint memory_usage();

- private:
+ protected:
  int *tag,*type,*mask,*image;
  double **x,**v,**f;
  double *q;
--- a/src/MOLECULE/pair_lj_charmm_coul_charmm.h
+++ b/src/MOLECULE/pair_lj_charmm_coul_charmm.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class PairLJCharmmCoulCharmm : public Pair {
 public:
  PairLJCharmmCoulCharmm(class LAMMPS *);
-  ~PairLJCharmmCoulCharmm();
+  virtual ~PairLJCharmmCoulCharmm();
  virtual void compute(int, int);
  void settings(int, char **);
  void coeff(int, char **);
--- a/src/Makefile
+++ b/src/Makefile
@ -149,7 +149,7 @@ yes-%:
 	  echo "Package $(@:yes-%=%) does not exist"; \
 	else \
 	  echo "Installing package $(@:yes-%=%)"; \
-	  cd $(YESDIR); $(SHELL) Install.sh 1; \
+	  cd $(YESDIR); $(SHELL) Install.sh 1; cd ..; $(SHELL) Depend.sh 1; \
 	fi;

 no-%:
@ -157,7 +157,7 @@ no-%:
 	  echo "Package $(@:no-%=%) does not exist"; \
 	else \
 	  echo "Uninstalling package $(@:no-%=%), ignore errors"; \
-	  cd $(NODIR); $(SHELL) Install.sh 0; cd ..; \
+	  cd $(NODIR); $(SHELL) Install.sh 0; cd ..; $(SHELL) Depend.sh 0; \
        fi;

 # status = list differences between src and package files
--- a/src/Makefile.package.empty
+++ b/src/Makefile.package.empty
@ -5,5 +5,6 @@ PKG_INC =
 PKG_PATH =  
 PKG_LIB =   

-PKG_SYSPATH = 
+PKG_SYSINC = 
 PKG_SYSLIB =  
+PKG_SYSPATH = 
--- a/src/OPT/Install.sh
+++ b/src/OPT/Install.sh
@ -1,5 +1,5 @@
 # Install/unInstall package files in LAMMPS
-# do not copy eam and charmm files if non-OPT versions do not exist
+# do not install child files if parent does not exist

 if (test $1 = 1) then

--- a/src/OPT/pair_eam_opt.h
+++ b/src/OPT/pair_eam_opt.h
@ -193,6 +193,7 @@ void PairEAMOpt::eval()

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
+      j &= NEIGHMASK;

      double delx = xtmp - xx[j].x;
      double dely = ytmp - xx[j].y;
@ -269,7 +270,8 @@ void PairEAMOpt::eval()

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
-      
+      j &= NEIGHMASK;
+
      double delx = xtmp - xx[j].x;
      double dely = ytmp - xx[j].y;
      double delz = ztmp - xx[j].z;
--- a/src/OPT/pair_lj_charmm_coul_long_opt.h
+++ b/src/OPT/pair_lj_charmm_coul_long_opt.h
@ -66,7 +66,7 @@ void PairLJCharmmCoulLongOpt::eval()
    double _pad[2];
  } fast_alpha_t;
  
-  int i,j,ii,jj,inum,jnum,itype,jtype,itable;
+  int i,j,ii,jj,inum,jnum,itype,jtype,itable,sbindex;
  double fraction,table;
  double r,r2inv,r6inv,forcecoul,forcelj,factor_coul,factor_lj;
  double grij,expm2,prefactor,t,erfc;
@ -132,8 +132,9 @@ void PairLJCharmmCoulLongOpt::eval()

    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
-      
-      if (j <= NEIGHMASK) {
+      sbindex = sbmask(j);
+
+      if (sbindex == 0) {
 	double delx = xtmp - xx[j].x;
 	double dely = ytmp - xx[j].y;
 	double delz = ztmp - xx[j].z;
@ -219,8 +220,8 @@ void PairLJCharmmCoulLongOpt::eval()
 	}

      } else {
-	factor_lj = special_lj[sbmask(j)];
-	factor_coul = special_coul[sbmask(j)];
+	factor_lj = special_lj[sbindex];
+	factor_coul = special_coul[sbindex];
 	j &= NEIGHMASK;

 	double delx = xtmp - xx[j].x;
--- a/src/OPT/pair_lj_cut_opt.h
+++ b/src/OPT/pair_lj_cut_opt.h
@ -54,7 +54,8 @@ void PairLJCutOpt::eval()
    double _pad[2];
  } fast_alpha_t;
  
-  int i,j,ii,jj,inum,jnum,itype,jtype;
+  int i,j,ii,jj,inum,jnum,itype,jtype,sbindex;
+  double factor_lj;
  double evdwl = 0.0;
  
  double** __restrict__ x = atom->x;
@ -106,9 +107,9 @@ void PairLJCutOpt::eval()
    
    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
-      double factor_lj;
+      sbindex = sbmask(j);

-      if (j <= NEIGHMASK) {
+      if (sbindex == 0) {
 	double delx = xtmp - xx[j].x;
 	double dely = ytmp - xx[j].y;
 	double delz = ztmp - xx[j].z;
@ -141,7 +142,7 @@ void PairLJCutOpt::eval()
 	}

      } else {
-	factor_lj = special_lj[sbmask(j)];
+	factor_lj = special_lj[sbindex];
 	j &= NEIGHMASK;

 	double delx = xtmp - xx[j].x;
--- a/src/OPT/pair_morse_opt.h
+++ b/src/OPT/pair_morse_opt.h
@ -55,7 +55,8 @@ void PairMorseOpt::eval()
    double _pad[2];
  } fast_alpha_t;
  
-  int i,j,ii,jj,inum,jnum,itype,jtype;
+  int i,j,ii,jj,inum,jnum,itype,jtype,sbindex;
+  double factor_lj;
  double evdwl = 0.0;
  
  double** __restrict__ x = atom->x;
@ -107,9 +108,9 @@ void PairMorseOpt::eval()
   
    for (jj = 0; jj < jnum; jj++) {
      j = jlist[jj];
-      double factor_lj;
+      sbindex = sbmask(j);

-      if (j <= NEIGHMASK) {
+      if (sbindex == 0) {
 	double delx = xtmp - xx[j].x;
 	double dely = ytmp - xx[j].y;
 	double delz = ztmp - xx[j].z;
@ -140,7 +141,7 @@ void PairMorseOpt::eval()
 	}

      } else {
-	factor_lj = special_lj[sbmask(j)];
+	factor_lj = special_lj[sbindex];
 	j &= NEIGHMASK;
 	
 	double delx = xtmp - xx[j].x;
--- a/src/PERI/atom_vec_peri.cpp
+++ b/src/PERI/atom_vec_peri.cpp
@ -240,7 +240,7 @@ int AtomVecPeri::pack_comm_hybrid(int n, int *list, double *buf)
  m = 0;
  for (i = 0; i < n; i++) {
    j = list[i];
-    buf[m++] = s0[i];
+    buf[m++] = s0[j];
  }
  return m;
 }
--- a/src/POEMS/Install.sh
+++ b/src/POEMS/Install.sh
@ -1,5 +1,5 @@
 # Install/unInstall package files in LAMMPS
-# edit Makefile.package to include/exclude POEMS library
+# edit Makefile.package to include/exclude POEMS info

 if (test $1 = 1) then

--- a/src/REAX/Install.sh
+++ b/src/REAX/Install.sh
@ -1,5 +1,5 @@
 # Install/unInstall package files in LAMMPS
-# edit Makefile.package to include/exclude REAX library
+# edit Makefile.package to include/exclude REAX info

 if (test $1 = 1) then

@ -9,8 +9,9 @@ if (test $1 = 1) then
    sed -i -e 's|^PKG_INC =[ \t]*|&-I../../lib/reax |' ../Makefile.package
    sed -i -e 's|^PKG_PATH =[ \t]*|&-L../../lib/reax |' ../Makefile.package
    sed -i -e 's|^PKG_LIB =[ \t]*|&-lreax |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(reax_SYSPATH) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(reax_SYSINC) |' ../Makefile.package
    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(reax_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(reax_SYSPATH) |' ../Makefile.package
  fi

  cp pair_reax.cpp ..
--- a/src/USER-ATC/Install.sh
+++ b/src/USER-ATC/Install.sh
@ -1,5 +1,5 @@
 # Install/unInstall package files in LAMMPS
-# edit Makefile.package to include/exclude ATC library
+# edit Makefile.package to include/exclude ATC info

 if (test $1 = 1) then

@ -9,8 +9,9 @@ if (test $1 = 1) then
    sed -i -e 's|^PKG_INC =[ \t]*|&-I../../lib/atc |' ../Makefile.package
    sed -i -e 's|^PKG_PATH =[ \t]*|&-L../../lib/atc |' ../Makefile.package
    sed -i -e 's|^PKG_LIB =[ \t]*|&-latc |' ../Makefile.package
-    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(user-atc_SYSPATH) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSINC =[ \t]*|&$(user-atc_SYSINC) |' ../Makefile.package
    sed -i -e 's|^PKG_SYSLIB =[ \t]*|&$(user-atc_SYSLIB) |' ../Makefile.package
+    sed -i -e 's|^PKG_SYSPATH =[ \t]*|&$(user-atc_SYSPATH) |' ../Makefile.package
  fi

  cp fix_atc.h ..
--- a/src/USER-CG-CMM/Install.sh
+++ b/src/USER-CG-CMM/Install.sh
@ -1,4 +1,5 @@
 # Install/unInstall package files in LAMMPS
+# do not install child files if parent does not exist

 if (test $1 = 1) then

@ -7,6 +8,11 @@ if (test $1 = 1) then
    cp angle_cg_cmm.cpp ..
  fi

+  if (test -e ../pppm.cpp) then
+    cp pair_cg_cmm_coul_long.cpp ..
+    cp pair_cg_cmm_coul_long.h ..
+  fi
+
  cp cg_cmm_parms.h ..
  cp cg_cmm_parms.cpp ..

@ -17,15 +23,10 @@ if (test $1 = 1) then
  cp pair_cg_cmm_coul_cut.cpp ..
  cp pair_cg_cmm_coul_cut.h ..

-  if (test -e ../pppm.cpp) then
-    cp pair_cg_cmm_coul_long.cpp ..
-    cp pair_cg_cmm_coul_long.h ..
-  fi
-
 elif (test $1 = 0) then

-  rm -f ../angle_cg_cmm.h
-  rm -f ../angle_cg_cmm.cpp
+  rm ../angle_cg_cmm.h
+  rm ../angle_cg_cmm.cpp

  rm ../cg_cmm_parms.h
  rm ../cg_cmm_parms.cpp
@ -37,7 +38,7 @@ elif (test $1 = 0) then
  rm ../pair_cg_cmm_coul_cut.cpp
  rm ../pair_cg_cmm_coul_cut.h

-  rm -f ../pair_cg_cmm_coul_long.cpp
-  rm -f ../pair_cg_cmm_coul_long.h
+  rm ../pair_cg_cmm_coul_long.cpp
+  rm ../pair_cg_cmm_coul_long.h

 fi
--- a/src/USER-EFF/atom_vec_electron.cpp
+++ b/src/USER-EFF/atom_vec_electron.cpp
@ -237,7 +237,7 @@ int AtomVecElectron::pack_comm_hybrid(int n, int *list, double *buf)
  m = 0;
  for (i = 0; i < n; i++) {
    j = list[i];
-    buf[m++] = eradius[i];
+    buf[m++] = eradius[j];
  }
  return m;
 }
--- a/src/accelerator.h
+++ b/src/accelerator.h
@ -0,0 +1,73 @@
+/* ----------------------------------------------------------------------
+   LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
+   http://lammps.sandia.gov, Sandia National Laboratories
+   Steve Plimpton, sjplimp@sandia.gov
+
+   Copyright (2003) Sandia Corporation.  Under the terms of Contract
+   DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
+   certain rights in this software.  This software is distributed under 
+   the GNU General Public License.
+
+   See the README file in the top-level LAMMPS directory.
+------------------------------------------------------------------------- */
+
+// dummy interface to USER-CUDA
+// used when USER-CUDA is not installed
+
+#ifndef LMP_ACCELERATOR_H
+#define LMP_ACCELERATOR_H
+
+#include "comm.h"
+#include "modify.h"
+#include "verlet.h"
+
+namespace LAMMPS_NS {
+
+class Cuda {
+ public:
+  int cuda_exists;
+  int oncpu;
+  int neighbor_decide_by_integrator;
+  
+  Cuda(class LAMMPS *) {cuda_exists = 0;}
+  ~Cuda() {}
+  void setDevice(class LAMMPS *) {}
+  void accelerator(int, char **) {}
+  void evsetup_eatom_vatom(int, int) {}
+  void downloadAll() {}
+  void uploadAll() {}
+};
+
+class CommCuda : public Comm {
+ public:
+ CommCuda(class LAMMPS *lmp) : Comm(lmp) {}
+  ~CommCuda() {}
+};
+
+class DomainCuda : public Domain {
+ public:
+ DomainCuda(class LAMMPS *lmp) : Domain(lmp) {}
+  ~DomainCuda() {}
+};
+
+class NeighborCuda : public Neighbor {
+ public:
+ NeighborCuda(class LAMMPS *lmp) : Neighbor(lmp) {}
+  ~NeighborCuda() {}
+};
+
+class ModifyCuda : public Modify {
+ public:
+ ModifyCuda(class LAMMPS *lmp) : Modify(lmp) {}
+  ~ModifyCuda() {}
+};
+ 
+class VerletCuda : public Verlet {
+ public:
+ VerletCuda(class LAMMPS *lmp, int narg, char **arg) : Verlet(lmp,narg,arg) {}
+  ~VerletCuda() {}
+};
+
+}
+
+#endif
--- a/src/atom.cpp
+++ b/src/atom.cpp
@ -12,6 +12,7 @@
 ------------------------------------------------------------------------- */

 #include "mpi.h"
+#include "math.h"
 #include "stdio.h"
 #include "stdlib.h"
 #include "string.h"
@ -30,6 +31,7 @@
 #include "update.h"
 #include "domain.h"
 #include "group.h"
+#include "accelerator.h"
 #include "memory.h"
 #include "error.h"

@ -38,6 +40,9 @@ using namespace LAMMPS_NS;
 #define DELTA 1
 #define DELTA_MEMSTR 1024
 #define EPSILON 1.0e-6
+#define CUDA_CHUNK 3000
+
+enum{NOACCEL,OPT,GPU,USERCUDA};     // same as lammps.cpp

 #define MIN(A,B) ((A) < (B)) ? (A) : (B)
 #define MAX(A,B) ((A) > (B)) ? (A) : (B)
@ -241,7 +246,7 @@ void Atom::settings(Atom *old)
   called from input script, restart file, replicate
 ------------------------------------------------------------------------- */

-void Atom::create_avec(const char *style, int narg, char **arg)
+void Atom::create_avec(const char *style, int narg, char **arg, char *suffix)
 {
  delete [] atom_style;
  if (avec) delete avec;
@ -256,10 +261,20 @@ void Atom::create_avec(const char *style, int narg, char **arg)
  rmass_flag = radius_flag = omega_flag = torque_flag = angmom_flag = 0;
  vfrac_flag = spin_flag = eradius_flag = ervel_flag = erforce_flag = 0;

-  avec = new_avec(style,narg,arg);
-  int n = strlen(style) + 1;
-  atom_style = new char[n];
-  strcpy(atom_style,style);
+  int sflag;
+  avec = new_avec(style,narg,arg,suffix,sflag);
+
+  if (sflag) {
+    char estyle[256];
+    sprintf(estyle,"%s/%s",style,suffix);
+    int n = strlen(estyle) + 1;
+    atom_style = new char[n];
+    strcpy(atom_style,estyle);
+  } else {
+    int n = strlen(style) + 1;
+    atom_style = new char[n];
+    strcpy(atom_style,style);
+  }

  // if molecular system, default is to have array map

@ -268,11 +283,30 @@ void Atom::create_avec(const char *style, int narg, char **arg)
 }

 /* ----------------------------------------------------------------------
-   generate an AtomVec class
+   generate an AtomVec class, first with suffix appended
 ------------------------------------------------------------------------- */

-AtomVec *Atom::new_avec(const char *style, int narg, char **arg)
+AtomVec *Atom::new_avec(const char *style, int narg, char **arg,
+			char *suffix, int &sflag)
 {
+  if (suffix && lmp->offaccel == 0) {
+    sflag = 1;
+    char estyle[256];
+    sprintf(estyle,"%s/%s",style,suffix);
+
+    if (0) return NULL;
+
+#define ATOM_CLASS
+#define AtomStyle(key,Class) \
+    else if (strcmp(estyle,#key) == 0) return new Class(lmp,narg,arg);
+#include "style_atom.h"
+#undef AtomStyle
+#undef ATOM_CLASS
+
+  }
+
+  sflag = 0;
+
  if (0) return NULL;

 #define ATOM_CLASS
@ -282,6 +316,7 @@ AtomVec *Atom::new_avec(const char *style, int narg, char **arg)
 #undef ATOM_CLASS

  else error->all("Invalid atom style");
+
  return NULL;
 }

@ -1298,6 +1333,11 @@ void Atom::sort()

  nextsort = (update->ntimestep/sortfreq)*sortfreq + sortfreq;

+  // download data from GPU if necessary
+
+  if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu) 
+    lmp->cuda->downloadAll();
+
  // re-setup sort bins if needed

  if (domain->box_change) setup_sort_bins();
@ -1373,6 +1413,11 @@ void Atom::sort()
    current[empty] = permute[empty];
  }

+  // upload data back to GPU if necessary
+
+  if (lmp->accelerator == USERCUDA && !lmp->cuda->oncpu)
+    lmp->cuda->uploadAll();
+
  // sanity check that current = permute

  //int flag = 0;
@ -1389,12 +1434,25 @@ void Atom::sort()

 void Atom::setup_sort_bins()
 {
-  // binsize = user setting or 1/2 of neighbor cutoff
-  // neighbor cutoff can be 0.0
+  // binsize = user setting or default
+  // default = 1/2 of neighbor cutoff for non-CUDA
+  //           CUDA_CHUNK atoms/proc for CUDA
+  // check if neighbor cutoff = 0.0

  double binsize;
  if (userbinsize > 0.0) binsize = userbinsize;
-  else binsize = 0.5 * neighbor->cutneighmax;
+  else if (lmp->accelerator == USERCUDA) {
+    if (domain->dimension == 3) {
+      double vol = (domain->boxhi[0]-domain->boxlo[0]) * 
+	(domain->boxhi[1]-domain->boxlo[1]) * 
+	(domain->boxhi[2]-domain->boxlo[2]);
+      binsize = pow(1.0*CUDA_CHUNK/natoms*vol,1.0/3.0);
+    } else {
+      double area = (domain->boxhi[0]-domain->boxlo[0]) * 
+	(domain->boxhi[1]-domain->boxlo[1]);
+      binsize = pow(1.0*CUDA_CHUNK/natoms*area,1.0/2.0);
+    }
+  } else binsize = 0.5 * neighbor->cutneighmax;
  if (binsize == 0.0) error->all("Atom sorting has bin size = 0.0");

  double bininv = 1.0/binsize;
--- a/src/atom.h
+++ b/src/atom.h
@ -114,8 +114,8 @@ class Atom : protected Pointers {
  ~Atom();

  void settings(class Atom *);
-  void create_avec(const char *, int, char **);
-  class AtomVec *new_avec(const char *, int, char **);
+  void create_avec(const char *, int, char **, char *suffix = NULL);
+  class AtomVec *new_avec(const char *, int, char **, char *, int &);
  void init();
  void setup();

@ -155,6 +155,9 @@ class Atom : protected Pointers {

  void *extract(char *);

+  inline int* get_map_array() {return map_array;};
+  inline int get_map_size() {return map_tag_max+1;};
+
  bigint memory_usage();
  int memcheck(const char *);

--- a/src/atom_vec.h
+++ b/src/atom_vec.h
@ -38,6 +38,9 @@ class AtomVec : protected Pointers {
  int size_data_bonus;                 // number of values in Bonus line
  int xcol_data;                       // column (1-N) where x is in Atom line

+  int cudable;                         // 1 if atom style is CUDA-enabled
+  int *maxsend;                        // CUDA-specific variable
+
  AtomVec(class LAMMPS *, int, char **);
  virtual ~AtomVec() {}
  virtual void init();
--- a/src/atom_vec_atomic.h
+++ b/src/atom_vec_atomic.h
@ -27,22 +27,22 @@ namespace LAMMPS_NS {
 class AtomVecAtomic : public AtomVec {
 public:
  AtomVecAtomic(class LAMMPS *, int, char **);
-  ~AtomVecAtomic() {}
+  virtual ~AtomVecAtomic() {}
  void grow(int);
  void grow_reset();
  void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
  int pack_reverse(int, int, double *);
  void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
  int size_restart();
  int pack_restart(int, double *);
  int unpack_restart(double *);
@ -50,7 +50,7 @@ class AtomVecAtomic : public AtomVec {
  void data_atom(double *, int, char **);
  bigint memory_usage();

- private:
+ protected:
  int *tag,*type,*mask,*image;
  double **x,**v,**f;
 };
--- a/src/atom_vec_charge.h
+++ b/src/atom_vec_charge.h
@ -27,23 +27,24 @@ namespace LAMMPS_NS {
 class AtomVecCharge : public AtomVec {
 public:
  AtomVecCharge(class LAMMPS *, int, char **);
+  virtual ~AtomVecCharge() {}
  void grow(int);
  void grow_reset();
  void copy(int, int, int);
-  int pack_comm(int, int *, double *, int, int *);
-  int pack_comm_vel(int, int *, double *, int, int *);
-  void unpack_comm(int, int, double *);
-  void unpack_comm_vel(int, int, double *);
+  virtual int pack_comm(int, int *, double *, int, int *);
+  virtual int pack_comm_vel(int, int *, double *, int, int *);
+  virtual void unpack_comm(int, int, double *);
+  virtual void unpack_comm_vel(int, int, double *);
  int pack_reverse(int, int, double *);
  void unpack_reverse(int, int *, double *);
-  int pack_border(int, int *, double *, int, int *);
-  int pack_border_vel(int, int *, double *, int, int *);
+  virtual int pack_border(int, int *, double *, int, int *);
+  virtual int pack_border_vel(int, int *, double *, int, int *);
  int pack_border_hybrid(int, int *, double *);
-  void unpack_border(int, int, double *);
-  void unpack_border_vel(int, int, double *);
+  virtual void unpack_border(int, int, double *);
+  virtual void unpack_border_vel(int, int, double *);
  int unpack_border_hybrid(int, int, double *);
-  int pack_exchange(int, double *);
-  int unpack_exchange(double *);
+  virtual int pack_exchange(int, double *);
+  virtual int unpack_exchange(double *);
  int size_restart();
  int pack_restart(int, double *);
  int unpack_restart(double *);
@ -52,7 +53,7 @@ class AtomVecCharge : public AtomVec {
  int data_atom_hybrid(int, char **);
  bigint memory_usage();

- private:
+ protected:
  int *tag,*type,*mask,*image;
  double **x,**v,**f;
  double *q;
--- a/src/atom_vec_hybrid.cpp
+++ b/src/atom_vec_hybrid.cpp
@ -34,7 +34,7 @@ using namespace LAMMPS_NS;
 AtomVecHybrid::AtomVecHybrid(LAMMPS *lmp, int narg, char **arg) :
  AtomVec(lmp, narg, arg)
 {
-  int i,k;
+  int i,k,dummy;

  if (narg < 1) error->all("Illegal atom_style command");

@ -50,7 +50,7 @@ AtomVecHybrid::AtomVecHybrid(LAMMPS *lmp, int narg, char **arg) :
 	error->all("Atom style hybrid cannot use same atom style twice");
    if (strcmp(arg[i],"hybrid") == 0) 
      error->all("Atom style hybrid cannot have hybrid as an argument");
-    styles[i] = atom->new_avec(arg[i],0,NULL);
+    styles[i] = atom->new_avec(arg[i],0,NULL,NULL,dummy);
    keywords[i] = new char[strlen(arg[i])+1];
    strcpy(keywords[i],arg[i]);
  }
--- a/src/atom_vec_sphere.cpp
+++ b/src/atom_vec_sphere.cpp
@ -374,8 +374,8 @@ int AtomVecSphere::pack_comm_hybrid(int n, int *list, double *buf)
  m = 0;
  for (i = 0; i < n; i++) {
    j = list[i];
-    buf[m++] = radius[i];
-    buf[m++] = rmass[i];
+    buf[m++] = radius[j];
+    buf[m++] = rmass[j];
  }
  return m;
 }
--- a/src/comm.cpp
+++ b/src/comm.cpp
@ -400,7 +400,7 @@ void Comm::setup()
   other per-atom attributes may also be sent via pack/unpack routines
 ------------------------------------------------------------------------- */

-void Comm::forward_comm()
+void Comm::forward_comm(int dummy)
 {
  int n;
  MPI_Request request;
--- a/src/comm.h
+++ b/src/comm.h
@ -35,27 +35,27 @@ class Comm : protected Pointers {
  int ***grid2proc;                 // which proc owns i,j,k loc in 3d grid

  Comm(class LAMMPS *);
-  ~Comm();
+  virtual ~Comm();

-  void init();
-  void set_procs();                 // setup 3d grid of procs
-  void setup();                     // setup 3d communication pattern
-  void forward_comm();              // forward communication of atom coords
-  void reverse_comm();              // reverse communication of forces
-  void exchange();                  // move atoms to new procs
-  void borders();                   // setup list of atoms to communicate
+  virtual void init();
+  virtual void set_procs();                 // setup 3d grid of procs
+  virtual void setup();                     // setup 3d communication pattern
+  virtual void forward_comm(int dummy = 0); // forward communication of atom coords
+  virtual void reverse_comm();              // reverse communication of forces
+  virtual void exchange();                  // move atoms to new procs
+  virtual void borders();                   // setup list of atoms to communicate

-  void forward_comm_pair(class Pair *);        // forward comm from a Pair
-  void reverse_comm_pair(class Pair *);        // reverse comm from a Pair
-  void forward_comm_fix(class Fix *);          // forward comm from a Fix
-  void reverse_comm_fix(class Fix *);          // reverse comm from a Fix
-  void forward_comm_compute(class Compute *);  // forward comm from a Compute
-  void reverse_comm_compute(class Compute *);  // reverse comm from a Compute
+  virtual void forward_comm_pair(class Pair *);        // forward comm from a Pair
+  virtual void reverse_comm_pair(class Pair *);        // reverse comm from a Pair
+  virtual void forward_comm_fix(class Fix *);          // forward comm from a Fix
+  virtual void reverse_comm_fix(class Fix *);          // reverse comm from a Fix
+  virtual void forward_comm_compute(class Compute *);  // forward comm from a Compute
+  virtual void reverse_comm_compute(class Compute *);  // reverse comm from a Compute

-  void set(int, char **);           // set communication style
-  bigint memory_usage();
+  virtual void set(int, char **);           // set communication style
+  virtual bigint memory_usage();

- private:
+ protected:
  int style;                        // single vs multi-type comm
  int nswap;                        // # of swaps to perform
  int need[3];                      // procs I need atoms from in each dim
@ -87,18 +87,18 @@ class Comm : protected Pointers {
  int maxsend,maxrecv;              // current size of send/recv buffer
  int maxforward,maxreverse;        // max # of datums in forward/reverse comm

-  void procs2box();                 // map procs to 3d box
-  void cross(double, double, double,
+  virtual void procs2box();                 // map procs to 3d box
+  virtual void cross(double, double, double,
 	     double, double, double,
 	     double &, double &, double &);    // cross product
-  void grow_send(int,int);          // reallocate send buffer
-  void grow_recv(int);              // free/allocate recv buffer
-  void grow_list(int, int);         // reallocate one sendlist
-  void grow_swap(int);              // grow swap and multi arrays
-  void allocate_swap(int);          // allocate swap arrays
-  void allocate_multi(int);         // allocate multi arrays
-  void free_swap();                 // free swap arrays
-  void free_multi();                // free multi arrays
+  virtual void grow_send(int,int);          // reallocate send buffer
+  virtual void grow_recv(int);              // free/allocate recv buffer
+  virtual void grow_list(int, int);         // reallocate one sendlist
+  virtual void grow_swap(int);              // grow swap and multi arrays
+  virtual void allocate_swap(int);          // allocate swap arrays
+  virtual void allocate_multi(int);         // allocate multi arrays
+  virtual void free_swap();                 // free swap arrays
+  virtual void free_multi();                // free multi arrays
 };

 }
--- a/src/compute.cpp
+++ b/src/compute.cpp
@ -68,6 +68,7 @@ Compute::Compute(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)

  timeflag = 0;
  comm_forward = comm_reverse = 0;
+  cudable = 0;

  invoked_scalar = invoked_vector = invoked_array = -1;
  invoked_peratom = invoked_local = -1;
--- a/src/compute.h
+++ b/src/compute.h
@ -77,6 +77,8 @@ class Compute : protected Pointers {
  int comm_forward;   // size of forward communication (0 if none)
  int comm_reverse;   // size of reverse communication (0 if none)

+  int cudable;        // 1 if compute is CUDA-enabled
+
  Compute(class LAMMPS *, int, char **);
  virtual ~Compute();
  void modify_params(int, char **);
--- a/src/compute_pressure.h
+++ b/src/compute_pressure.h
@ -27,13 +27,13 @@ namespace LAMMPS_NS {
 class ComputePressure : public Compute {
 public:
  ComputePressure(class LAMMPS *, int, char **);
-  ~ComputePressure();
+  virtual ~ComputePressure();
  void init();
  double compute_scalar();
  void compute_vector();
  void reset_extra_compute_fix(char *);

- private:
+ protected:
  double boltz,nktv2p,inv_volume;
  int nvirial,dimension;
  double **vptr;
--- a/src/compute_temp_partial.h
+++ b/src/compute_temp_partial.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class ComputeTempPartial : public Compute {
 public:
  ComputeTempPartial(class LAMMPS *, int, char **);
-  ~ComputeTempPartial();
+  virtual ~ComputeTempPartial();
  void init();
  double compute_scalar();
  void compute_vector();
@ -39,7 +39,7 @@ class ComputeTempPartial : public Compute {
  void restore_bias_all();
  double memory_usage();

- private:
+ protected:
  int xflag,yflag,zflag;
  int fix_dof;
  double tfactor;
--- a/src/domain.h
+++ b/src/domain.h
@ -85,14 +85,14 @@ class Domain : protected Pointers {
  class Region **regions;                  // list of defined Regions

  Domain(class LAMMPS *);
-  ~Domain();
-  void init();
+  virtual ~Domain();
+  virtual void init();
  void set_initial_box();
-  void set_global_box();
-  void set_lamda_box();
-  void set_local_box();
-  void reset_box();
-  void pbc();
+  virtual void set_global_box();
+  virtual void set_lamda_box();
+  virtual void set_local_box();
+  virtual void reset_box();
+  virtual void pbc();
  void remap(double *, int &);
  void remap(double *);
  void remap_near(double *, double *);
@ -107,8 +107,8 @@ class Domain : protected Pointers {
  void set_boundary(int, char **);
  void print_box(const char *);

-  void lamda2x(int);
-  void x2lamda(int);
+  virtual void lamda2x(int);
+  virtual void x2lamda(int);
  void lamda2x(double *, double *);
  void x2lamda(double *, double *);
  void bbox(double *, double *, double *, double *);
--- a/src/fix.cpp
+++ b/src/fix.cpp
@ -58,6 +58,7 @@ Fix::Fix(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
  time_depend = 0;
  create_attribute = 0;
  restart_pbc = 0;
+  cudable_comm = 0;

  scalar_flag = vector_flag = array_flag = 0;
  peratom_flag = local_flag = 0;
--- a/src/fix.h
+++ b/src/fix.h
@ -41,6 +41,7 @@ class Fix : protected Pointers {
                                 //      setting when a new atom is created
  int restart_pbc;               // 1 if fix moves atoms (except integrate)
                                 //      so write_restart must remap to PBC
+  int cudable_comm;              // 1 if fix has CUDA-enabled communication

  int scalar_flag;               // 0/1 if compute_scalar() function exists
  int vector_flag;               // 0/1 if compute_vector() function exists
--- a/src/fix_adapt.cpp
+++ b/src/fix_adapt.cpp
@ -54,7 +54,7 @@ FixAdapt::FixAdapt(LAMMPS *lmp, int narg, char **arg) : Fix(lmp, narg, arg)
      nadapt++;
      iarg += 6;
    } else if (strcmp(arg[iarg],"kspace") == 0) {
-      if (iarg+6 > narg) error->all("Illegal fix adapt command");
+      if (iarg+2 > narg) error->all("Illegal fix adapt command");
      nadapt++;
      iarg += 2;
    } else if (strcmp(arg[iarg],"atom") == 0) {
--- a/src/fix_nve.h
+++ b/src/fix_nve.h
@ -27,13 +27,14 @@ namespace LAMMPS_NS {
 class FixNVE : public Fix {
 public:
  FixNVE(class LAMMPS *, int, char **);
+  virtual ~FixNVE() {}
  int setmask();
  virtual void init();
  virtual void initial_integrate(int);
  virtual void final_integrate();
-  void initial_integrate_respa(int, int, int);
-  void final_integrate_respa(int, int);
-  void reset_dt();
+  virtual void initial_integrate_respa(int, int, int);
+  virtual void final_integrate_respa(int, int);
+  virtual void reset_dt();

 protected:
  double dtv,dtf;
--- a/src/fix_rigid_nve.cpp
+++ b/src/fix_rigid_nve.cpp
@ -58,8 +58,8 @@ void FixRigidNVE::setup(int vflag)
  
  double mbody[3];
  for (int ibody = 0; ibody < nbody; ibody++) {
-    MathExtra::matvec_rows(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   angmom[ibody],mbody);
+    MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+				angmom[ibody],mbody);
    MathExtra::quatvec(quat[ibody],mbody,conjqm[ibody]);
    conjqm[ibody][0] *= 2.0;
    conjqm[ibody][1] *= 2.0;
@ -99,8 +99,8 @@ void FixRigidNVE::initial_integrate(int vflag)
    torque[ibody][1] *= tflag[ibody][1];
    torque[ibody][2] *= tflag[ibody][2];
    
-    MathExtra::matvec_rows(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   torque[ibody],tbody);
+    MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+				torque[ibody],tbody);
    MathExtra::quatvec(quat[ibody],tbody,fquat);
    
    conjqm[ibody][0] += dtf2 * fquat[0];
@ -123,8 +123,8 @@ void FixRigidNVE::initial_integrate(int vflag)
    MathExtra::q_to_exyz(quat[ibody],ex_space[ibody],ey_space[ibody],
 			 ez_space[ibody]);
    MathExtra::invquatvec(quat[ibody],conjqm[ibody],mbody);
-    MathExtra::matvec_cols(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   mbody,angmom[ibody]);
+    MathExtra::matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+		      mbody,angmom[ibody]);
    
    angmom[ibody][0] *= 0.5;
    angmom[ibody][1] *= 0.5;
@ -252,8 +252,8 @@ void FixRigidNVE::final_integrate()
    torque[ibody][1] *= tflag[ibody][1];
    torque[ibody][2] *= tflag[ibody][2];
    
-    MathExtra::matvec_rows(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   torque[ibody],tbody);
+    MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+				torque[ibody],tbody);
    MathExtra::quatvec(quat[ibody],tbody,fquat);
    
    conjqm[ibody][0] += dtf2 * fquat[0];
@ -262,8 +262,8 @@ void FixRigidNVE::final_integrate()
    conjqm[ibody][3] += dtf2 * fquat[3];
    
    MathExtra::invquatvec(quat[ibody],conjqm[ibody],mbody);
-    MathExtra::matvec_cols(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   mbody,angmom[ibody]);
+    MathExtra::matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+		      mbody,angmom[ibody]);
    
    angmom[ibody][0] *= 0.5;
    angmom[ibody][1] *= 0.5;
--- a/src/fix_rigid_nvt.cpp
+++ b/src/fix_rigid_nvt.cpp
@ -164,8 +164,8 @@ void FixRigidNVT::setup(int vflag)
  
  double mbody[3];
  for (int ibody = 0; ibody < nbody; ibody++) {
-    MathExtra::matvec_rows(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   angmom[ibody],mbody);
+    MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+				angmom[ibody],mbody);
    MathExtra::quatvec(quat[ibody],mbody,conjqm[ibody]);
    conjqm[ibody][0] *= 2.0;
    conjqm[ibody][1] *= 2.0;
@ -225,8 +225,8 @@ void FixRigidNVT::initial_integrate(int vflag)
    
    // step 1.3 - apply torque (body coords) to quaternion momentum

-    MathExtra::matvec_rows(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   torque[ibody],tbody);
+    MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+				torque[ibody],tbody);
    MathExtra::quatvec(quat[ibody],tbody,fquat);
    
    conjqm[ibody][0] += dtf2 * fquat[0];
@ -253,8 +253,8 @@ void FixRigidNVT::initial_integrate(int vflag)
    MathExtra::q_to_exyz(quat[ibody],ex_space[ibody],ey_space[ibody],
 			 ez_space[ibody]);
    MathExtra::invquatvec(quat[ibody],conjqm[ibody],mbody);
-    MathExtra::matvec_cols(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   mbody,angmom[ibody]);
+    MathExtra::matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+		      mbody,angmom[ibody]);
    
    angmom[ibody][0] *= 0.5;
    angmom[ibody][1] *= 0.5;
@ -398,8 +398,8 @@ void FixRigidNVT::final_integrate()
    
    // convert torque to the body frame 
    
-    MathExtra::matvec_rows(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   torque[ibody],tbody);
+    MathExtra::transpose_matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+				torque[ibody],tbody);
    
    // compute "force" for quaternion
    
@ -416,8 +416,8 @@ void FixRigidNVT::final_integrate()
    // then convert to the space-fixed frame
    
    MathExtra::invquatvec(quat[ibody],conjqm[ibody],mbody);
-    MathExtra::matvec_cols(ex_space[ibody],ey_space[ibody],ez_space[ibody],
-			   mbody,angmom[ibody]);
+    MathExtra::matvec(ex_space[ibody],ey_space[ibody],ez_space[ibody],
+		      mbody,angmom[ibody]);
    
    angmom[ibody][0] *= 0.5;
    angmom[ibody][1] *= 0.5;
--- a/src/fix_viscous.h
+++ b/src/fix_viscous.h
@ -27,7 +27,7 @@ namespace LAMMPS_NS {
 class FixViscous : public Fix {
 public:
  FixViscous(class LAMMPS *, int, char **);
-  ~FixViscous();
+  virtual ~FixViscous();
  int setmask();
  void init();
  void setup(int);
@ -36,7 +36,7 @@ class FixViscous : public Fix {
  void post_force_respa(int, int, int);
  void min_post_force(int);

- private:
+ protected:
  double *gamma;
  int nlevels_respa;
 };
--- a/src/fix_wall_lj126.h
+++ b/src/fix_wall_lj126.h
@ -34,6 +34,7 @@ class FixWallLJ126 : public FixWall {
  double coeff1[6],coeff2[6],coeff3[6],coeff4[6],offset[6];
 };

-#endif
 }
+
+#endif
 #endif
--- a/src/fix_wall_reflect.h
+++ b/src/fix_wall_reflect.h
@ -27,12 +27,12 @@ namespace LAMMPS_NS {
 class FixWallReflect : public Fix {
 public:
  FixWallReflect(class LAMMPS *, int, char **);
-  ~FixWallReflect();
+  virtual ~FixWallReflect();
  int setmask();
  void init();
  void post_integrate();

- private:
+ protected:
  int nwall;
  int wallwhich[6],wallstyle[6];
  double coord0[6];
--- a/src/force.cpp
+++ b/src/force.cpp
@ -117,23 +117,51 @@ void Force::init()
   create a pair style, called from input script or restart file
 ------------------------------------------------------------------------- */

-void Force::create_pair(const char *style)
+void Force::create_pair(const char *style, char *suffix)
 {
  delete [] pair_style;
  if (pair) delete pair;

-  pair = new_pair(style);
-  int n = strlen(style) + 1;
-  pair_style = new char[n];
-  strcpy(pair_style,style);
+  int sflag;
+  pair = new_pair(style,suffix,sflag);
+
+  if (sflag) {
+    char estyle[256];
+    sprintf(estyle,"%s/%s",style,suffix);
+    int n = strlen(estyle) + 1;
+    pair_style = new char[n];
+    strcpy(pair_style,estyle);
+  } else {
+    int n = strlen(style) + 1;
+    pair_style = new char[n];
+    strcpy(pair_style,style);
+  }
 }

 /* ----------------------------------------------------------------------
-   generate a pair class
+   generate a pair class, first with suffix appended
 ------------------------------------------------------------------------- */

-Pair *Force::new_pair(const char *style)
+Pair *Force::new_pair(const char *style, char *suffix, int &sflag)
 {
+  if (suffix && lmp->offaccel == 0) {
+    sflag = 1;
+    char estyle[256];
+    sprintf(estyle,"%s/%s",style,suffix);
+
+    if (0) return NULL;
+
+#define PAIR_CLASS
+#define PairStyle(key,Class) \
+    else if (strcmp(estyle,#key) == 0) return new Class(lmp);
+#include "style_pair.h"
+#undef PairStyle
+#undef PAIR_CLASS
+
+  }
+
+  sflag = 0;
+
  if (strcmp(style,"none") == 0) return NULL;

 #define PAIR_CLASS
@ -143,6 +171,7 @@ Pair *Force::new_pair(const char *style)
 #undef PAIR_CLASS

  else error->all("Invalid pair style");
+
  return NULL;
 }

--- a/src/force.h
+++ b/src/force.h
@ -64,8 +64,8 @@ class Force : protected Pointers {
  ~Force();
  void init();

-  void create_pair(const char *);
-  class Pair *new_pair(const char *);
+  void create_pair(const char *, char *suffix = NULL);
+  class Pair *new_pair(const char *, char *, int &);
  class Pair *pair_match(const char *, int);

  void create_bond(const char *);
--- a/src/group.cpp
+++ b/src/group.cpp
@ -142,6 +142,7 @@ void Group::assign(int narg, char **arg)
    
  // style = region
  // add to group if atom is in region
+  // init all regions via domain->init() to insure region can perform match()

  if (strcmp(arg[1],"region") == 0) {

@ -149,6 +150,7 @@ void Group::assign(int narg, char **arg)
    
    int iregion = domain->find_region(arg[2]);
    if (iregion == -1) error->all("Group region ID does not exist");
+    domain->init();

    for (i = 0; i < nlocal; i++)
      if (domain->regions[iregion]->match(x[i][0],x[i][1],x[i][2]))
--- a/src/input.cpp
+++ b/src/input.cpp
@ -42,6 +42,7 @@
 #include "neighbor.h"
 #include "special.h"
 #include "variable.h"
+#include "accelerator.h"
 #include "error.h"
 #include "memory.h"

@ -417,6 +418,7 @@ int Input::execute_command()
  else if (!strcmp(command,"shell")) shell();
  else if (!strcmp(command,"variable")) variable_command();

+  else if (!strcmp(command,"accelerator")) accelerator();
  else if (!strcmp(command,"angle_coeff")) angle_coeff();
  else if (!strcmp(command,"angle_style")) angle_style();
  else if (!strcmp(command,"atom_modify")) atom_modify();
@ -801,6 +803,32 @@ void Input::variable_command()
   one function for each LAMMPS-specific input script command
 ------------------------------------------------------------------------- */

+void Input::accelerator()
+{
+  if (domain->box_exist) 
+    error->all("Accelerator command after simulation box is defined");
+  if (narg < 1) error->all("Illegal accelerator command");
+  if (!lmp->asuffix || (strcmp(lmp->asuffix,arg[0]) != 0))
+    error->all("Accelerator command requires matching command-line -a switch");
+
+  if (strcmp(arg[0],"off") == 0) {
+    if (narg != 1) error->all("Illegal accelerator command");
+    lmp->offaccel = 1;
+    return;
+  }
+
+  if (strcmp(arg[0],"on") == 0) {
+    if (narg != 1) error->all("Illegal accelerator command");
+    lmp->offaccel = 0;
+    return;
+  }
+
+  if (strcmp(arg[0],"cuda") == 0) lmp->cuda->accelerator(narg-1,&arg[1]);
+  else error->all("Illegal accelerator command");
+}
+
+/* ---------------------------------------------------------------------- */
+
 void Input::angle_coeff()
 {
  if (domain->box_exist == 0)
@ -837,7 +865,7 @@ void Input::atom_style()
  if (narg < 1) error->all("Illegal atom_style command");
  if (domain->box_exist) 
    error->all("Atom_style command after simulation box is defined");
-  atom->create_avec(arg[0],narg-1,&arg[1]);
+  atom->create_avec(arg[0],narg-1,&arg[1],lmp->asuffix);
 }

 /* ---------------------------------------------------------------------- */
@ -884,7 +912,7 @@ void Input::communicate()

 void Input::compute()
 {
-  modify->add_compute(narg,arg);
+  modify->add_compute(narg,arg,lmp->asuffix);
 }

 /* ---------------------------------------------------------------------- */
@ -962,7 +990,7 @@ void Input::dump_modify()

 void Input::fix()
 {
-  modify->add_fix(narg,arg);
+  modify->add_fix(narg,arg,lmp->asuffix);
 }

 /* ---------------------------------------------------------------------- */
@ -1132,7 +1160,7 @@ void Input::pair_style()
    force->pair->settings(narg-1,&arg[1]);
    return;
  }
-  force->create_pair(arg[0]);
+  force->create_pair(arg[0],lmp->asuffix);
  if (force->pair) force->pair->settings(narg-1,&arg[1]);
 }

@ -1191,7 +1219,7 @@ void Input::run_style()
 {
  if (domain->box_exist == 0)
    error->all("Run_style command before simulation box is defined");
-  update->create_integrate(narg,arg);
+  update->create_integrate(narg,arg,lmp->asuffix);
 }

 /* ---------------------------------------------------------------------- */
--- a/src/input.h
+++ b/src/input.h
@ -61,7 +61,8 @@ class Input : protected Pointers {
  void shell();
  void variable_command();

-  void angle_coeff();          // LAMMPS commands
+  void accelerator();          // LAMMPS commands
+  void angle_coeff();
  void angle_style();
  void atom_modify();
  void atom_style();
--- a/src/lammps.cpp
+++ b/src/lammps.cpp
@ -27,10 +27,13 @@
 #include "modify.h"
 #include "group.h"
 #include "output.h"
+#include "accelerator.h"
 #include "timer.h"

 using namespace LAMMPS_NS;

+enum{NOACCEL,OPT,GPU,USERCUDA};
+
 /* ----------------------------------------------------------------------
   start up LAMMPS
   allocate fundamental classes (memory, error, universe, input)
@ -54,14 +57,18 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
  int inflag = 0;
  int screenflag = 0;
  int logflag = 0;
+  accelerator = NOACCEL;
+  asuffix = NULL;
+  offaccel = 0;
+  cuda = NULL;
+
  int iarg = 1;

  while (iarg < narg) {
    if (strcmp(arg[iarg],"-partition") == 0 || 
 	strcmp(arg[iarg],"-p") == 0) {
      universe->existflag = 1;
-      if (iarg+2 > narg) 
-	error->universe_all("Invalid command-line argument");
+      if (iarg+2 > narg) error->universe_all("Invalid command-line argument");
      iarg++;
      while (iarg < narg && arg[iarg][0] != '-') {
 	universe->add_world(arg[iarg]);
@ -69,32 +76,37 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
      }
    } else if (strcmp(arg[iarg],"-in") == 0 || 
 	       strcmp(arg[iarg],"-i") == 0) {
-      if (iarg+2 > narg) 
-	error->universe_all("Invalid command-line argument");
+      if (iarg+2 > narg) error->universe_all("Invalid command-line argument");
      inflag = iarg + 1;
      iarg += 2;
    } else if (strcmp(arg[iarg],"-screen") == 0 || 
 	       strcmp(arg[iarg],"-s") == 0) {
-      if (iarg+2 > narg) 
-	error->universe_all("Invalid command-line argument");
+      if (iarg+2 > narg) error->universe_all("Invalid command-line argument");
      screenflag = iarg + 1;
      iarg += 2;
    } else if (strcmp(arg[iarg],"-log") == 0 || 
 	       strcmp(arg[iarg],"-l") == 0) {
-      if (iarg+2 > narg) 
-	error->universe_all("Invalid command-line argument");
+      if (iarg+2 > narg) error->universe_all("Invalid command-line argument");
      logflag = iarg + 1;
      iarg += 2;
    } else if (strcmp(arg[iarg],"-var") == 0 || 
 	       strcmp(arg[iarg],"-v") == 0) {
-      if (iarg+3 > narg) 
-	error->universe_all("Invalid command-line argument");
+      if (iarg+3 > narg) error->universe_all("Invalid command-line argument");
      iarg += 2;
      while (iarg < narg && arg[iarg][0] != '-') iarg++;
    } else if (strcmp(arg[iarg],"-echo") == 0 || 
 	       strcmp(arg[iarg],"-e") == 0) {
-      if (iarg+2 > narg) 
-	error->universe_all("Invalid command-line argument");
+      if (iarg+2 > narg) error->universe_all("Invalid command-line argument");
+      iarg += 2;
+    } else if (strcmp(arg[iarg],"-accel") == 0 || 
+	       strcmp(arg[iarg],"-a") == 0) {
+      if (iarg+2 > narg) error->universe_all("Invalid command-line argument");
+      if (strcmp(arg[iarg+1],"opt") == 0) accelerator = OPT;
+      else if (strcmp(arg[iarg+1],"gpu") == 0) accelerator = GPU;
+      else if (strcmp(arg[iarg+1],"cuda") == 0) accelerator = USERCUDA;
+      else error->universe_all("Invalid command-line argument");
+      asuffix = new char[8];
+      strcpy(asuffix,arg[iarg+1]);
      iarg += 2;
    } else error->universe_all("Invalid command-line argument");
  }
@ -265,6 +277,16 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
  if (mpisize != sizeof(bigint))
      error->all("MPI_LMP_BIGINT and bigint in lmptype.h are not compatible");

+  // check consistency of -a switch with installed packages
+  // for OPT and GPU, no problem if not installed
+  // for USER-CUDA, throw error if not installed
+
+  if (accelerator == USERCUDA) {
+    cuda = new Cuda(this);
+    if (!cuda->cuda_exists)
+      error->all("Command-line switch requires USER-CUDA package be installed");
+  }
+
  // allocate input class now that MPI is fully setup

  input = new Input(this,narg,arg);
@ -285,6 +307,7 @@ LAMMPS::LAMMPS(int narg, char **arg, MPI_Comm communicator)
 LAMMPS::~LAMMPS()
 {
  destroy();
+  if (accelerator == USERCUDA) delete cuda;

  if (universe->nworlds == 1) {
    if (logfile) fclose(logfile);
@ -296,6 +319,8 @@ LAMMPS::~LAMMPS()

  if (world != universe->uworld) MPI_Comm_free(&world);

+  delete [] asuffix;
+
  delete input;
  delete universe;
  delete error;
@ -305,17 +330,28 @@ LAMMPS::~LAMMPS()
 /* ----------------------------------------------------------------------
   allocate single instance of top-level classes
   fundamental classes are allocated in constructor
+   some classes have accelerator variants
 ------------------------------------------------------------------------- */

 void LAMMPS::create()
 {
  atom = new Atom(this);
-  neighbor = new Neighbor(this);
-  comm = new Comm(this);
-  domain = new Domain(this);
+
+  if (accelerator == USERCUDA) neighbor = new NeighborCuda(this);
+  else neighbor = new Neighbor(this);
+
+  if (accelerator == USERCUDA) comm = new CommCuda(this);
+  else comm = new Comm(this);
+
+  if (accelerator == USERCUDA) domain = new DomainCuda(this);
+  else domain = new Domain(this);
+
  group = new Group(this);
  force = new Force(this);    // must be after group, to create temperature
-  modify = new Modify(this);
+
+  if (accelerator == USERCUDA) modify = new ModifyCuda(this);
+  else modify = new Modify(this);
+
  output = new Output(this);  // must be after group, so "all" exists
                              // must be after modify so can create Computes
  update = new Update(this);  // must be after output, force, neighbor
@ -328,6 +364,8 @@ void LAMMPS::create()

 void LAMMPS::init()
 {
+  if (accelerator == USERCUDA) cuda->accelerator(0,NULL);
+ 
  update->init();
  force->init();         // pair must come after update due to minimizer
  domain->init();
--- a/src/lammps.h
+++ b/src/lammps.h
@ -42,6 +42,11 @@ class LAMMPS {
  FILE *screen;                  // screen output
  FILE *logfile;                 // logfile

+  int accelerator;               // accelerator flag
+  char *asuffix;                 // accelerator suffix
+  int offaccel;                  // 1 if accelerator flag currently disabled
+  class Cuda *cuda;              // CUDA accelerator class
+
  LAMMPS(int, char **, MPI_Comm);
  ~LAMMPS();
  void create();
--- a/src/math_extra.h
+++ b/src/math_extra.h
@ -63,8 +63,8 @@ namespace MathExtra {
  inline void transpose_matvec(const double *ex, const double *ey, 
 			       const double *ez, const double *v,
 			       double *ans);
-  inline void transpose_times_diag3(const double mat[3][3], const double*vec,
-                                    double ans[3][3]);
+  inline void transpose_diag3(const double mat[3][3], const double*vec,
+			      double ans[3][3]);
  inline void vecmat(const double *v, const double m[3][3], double *ans);
  inline void scalar_times3(const double f, double m[3][3]); 

@ -91,10 +91,6 @@ namespace MathExtra {
  inline void invquatvec(double *a, double *b, double *c);
  inline void axisangle_to_quat(const double *v, const double angle,
                                double *quat);
-  inline void matvec_rows(double *x, double *y, double *z, 
-			  double *b, double *c);
-  inline void matvec_cols(double *x, double *y, double *z,
-			  double *b, double *c);

  void angmom_to_omega(double *m, double *ex, double *ey, double *ez,
 		       double *idiag, double *w);
@ -418,8 +414,8 @@ void MathExtra::transpose_matvec(const double *ex, const double *ey,
   transposed matrix times diagonal matrix
 ------------------------------------------------------------------------- */

-void MathExtra::transpose_times_diag3(const double m[3][3],
-                                      const double *d, double ans[3][3])
+void MathExtra::transpose_diag3(const double m[3][3], const double *d, 
+				double ans[3][3])
 {
  ans[0][0] = m[0][0]*d[0];
  ans[0][1] = m[1][0]*d[1];
@ -562,30 +558,6 @@ void MathExtra::axisangle_to_quat(const double *v, const double angle,
  quat[3] = v[2]*sina;
 }

-/* ----------------------------------------------------------------------
-  matvec_rows: c = Ab, where rows of A are x, y, z
------------------------------------------------------------------------- */
-
-void MathExtra::matvec_rows(double *x, double *y, double *z, 
-			    double *b, double *c)
-{
-  c[0] = x[0]*b[0] + x[1]*b[1] + x[2]*b[2];
-  c[1] = y[0]*b[0] + y[1]*b[1] + y[2]*b[2];
-  c[2] = z[0]*b[0] + z[1]*b[1] + z[2]*b[2];
-}
-
-/* ----------------------------------------------------------------------
-  matvec_cols: c = Ab, where columns of A are x, y, z
------------------------------------------------------------------------- */
-
-void MathExtra::matvec_cols(double *x, double *y, double *z,
-			    double *b, double *c)
-{
-  c[0] = x[0]*b[0] + y[0]*b[1] + z[0]*b[2];
-  c[1] = x[1]*b[0] + y[1]*b[1] + z[1]*b[2];
-  c[2] = x[2]*b[0] + y[2]*b[1] + z[2]*b[2];
-}
-
 /* ----------------------------------------------------------------------
   Apply principal rotation generator about x to rotation matrix m
 ------------------------------------------------------------------------- */
--- a/src/modify.cpp
+++ b/src/modify.cpp
@ -274,7 +274,7 @@ void Modify::setup_pre_force(int vflag)
    for (int i = 0; i < n_pre_force; i++)
      fix[list_pre_force[i]]->setup_pre_force(vflag);
  else if (update->whichflag == 2)
-    for (int i = 0; i < n_pre_force; i++)
+    for (int i = 0; i < n_min_pre_force; i++)
      fix[list_min_pre_force[i]]->min_setup_pre_force(vflag);
 }

@ -591,7 +591,7 @@ int Modify::min_reset_ref()
   add a new fix or replace one with same ID
 ------------------------------------------------------------------------- */

-void Modify::add_fix(int narg, char **arg)
+void Modify::add_fix(int narg, char **arg, char *suffix)
 {
  if (domain->box_exist == 0) 
    error->all("Fix command before simulation box is defined");
@ -636,17 +636,39 @@ void Modify::add_fix(int narg, char **arg)
    }
  }

-  // create the Fix
+  // create the Fix, first with suffix appended

-  if (0) return;         // dummy line to enable else-if macro expansion
+  int success = 0;
+
+  if (suffix && lmp->offaccel == 0) {
+    char estyle[256];
+    sprintf(estyle,"%s/%s",arg[2],suffix);
+    success = 1;
+
+    if (0) return;

 #define FIX_CLASS
 #define FixStyle(key,Class) \
-  else if (strcmp(arg[2],#key) == 0) fix[ifix] = new Class(lmp,narg,arg);
+    else if (strcmp(estyle,#key) == 0) fix[ifix] = new Class(lmp,narg,arg);
 #include "style_fix.h"
+#undef FixStyle
 #undef FIX_CLASS

-  else error->all("Invalid fix style");
+    else success = 0;
+  }
+
+  if (!success) {
+    if (0) return;
+
+#define FIX_CLASS
+#define FixStyle(key,Class) \
+    else if (strcmp(arg[2],#key) == 0) fix[ifix] = new Class(lmp,narg,arg);
+#include "style_fix.h"
+#undef FixStyle
+#undef FIX_CLASS
+
+    else error->all("Invalid fix style");
+  }

  // set fix mask values and increment nfix (if new)

@ -740,7 +762,7 @@ int Modify::find_fix(const char *id)
   add a new compute
 ------------------------------------------------------------------------- */

-void Modify::add_compute(int narg, char **arg)
+void Modify::add_compute(int narg, char **arg, char *suffix)
 {
  if (narg < 3) error->all("Illegal compute command");

@ -758,18 +780,41 @@ void Modify::add_compute(int narg, char **arg)
      memory->srealloc(compute,maxcompute*sizeof(Compute *),"modify:compute");
  }

-  // create the Compute
+  // create the Compute, first with suffix appended

-  if (0) return;         // dummy line to enable else-if macro expansion
+  int success = 0;
+
+  if (suffix && lmp->offaccel == 0) {
+    char estyle[256];
+    sprintf(estyle,"%s/%s",arg[2],suffix);
+    success = 1;
+
+    if (0) return;

 #define COMPUTE_CLASS
 #define ComputeStyle(key,Class) \
-  else if (strcmp(arg[2],#key) == 0) \
-    compute[ncompute] = new Class(lmp,narg,arg);
+    else if (strcmp(estyle,#key) == 0) \
+      compute[ncompute] = new Class(lmp,narg,arg);
 #include "style_compute.h"
+#undef ComputeStyle
 #undef COMPUTE_CLASS

-  else error->all("Invalid compute style");
+    else success = 0;
+  }
+
+  if (!success) {
+    if (0) return;
+
+#define COMPUTE_CLASS
+#define ComputeStyle(key,Class) \
+    else if (strcmp(arg[2],#key) == 0) \
+      compute[ncompute] = new Class(lmp,narg,arg);
+#include "style_compute.h"
+#undef ComputeStyle
+#undef COMPUTE_CLASS
+
+    else error->all("Invalid compute style");
+  }

  ncompute++;
 }
--- a/src/modify.h
+++ b/src/modify.h
@ -40,22 +40,22 @@ class Modify : protected Pointers {
  class Compute **compute;

  Modify(class LAMMPS *);
-  ~Modify();
-  void init();
-  void setup(int);
-  void setup_pre_exchange();
-  void setup_pre_force(int);
-  void initial_integrate(int);
-  void post_integrate();
+  virtual ~Modify();
+  virtual void init();
+  virtual void setup(int);
+  virtual void setup_pre_exchange();
+  virtual void setup_pre_force(int);
+  virtual void initial_integrate(int);
+  virtual void post_integrate();
  void pre_decide();
-  void pre_exchange();
-  void pre_neighbor();
-  void pre_force(int);
-  void post_force(int);
-  void final_integrate();
-  void end_of_step();
-  double thermo_energy();
-  void post_run();
+  virtual void pre_exchange();
+  virtual void pre_neighbor();
+  virtual void pre_force(int);
+  virtual void post_force(int);
+  virtual void final_integrate();
+  virtual void end_of_step();
+  virtual double thermo_energy();
+  virtual void post_run();

  void setup_pre_force_respa(int, int);
  void initial_integrate_respa(int, int, int);
@ -79,12 +79,12 @@ class Modify : protected Pointers {
  double max_alpha(double *);
  int min_dof();

-  void add_fix(int, char **);
+  void add_fix(int, char **, char *suffix = NULL);
  void modify_fix(int, char **);
  void delete_fix(const char *);
  int find_fix(const char *);

-  void add_compute(int, char **);
+  void add_compute(int, char **, char *suffix = NULL);
  void modify_compute(int, char **);
  void delete_compute(char *);
  int find_compute(char *);
@ -98,7 +98,7 @@ class Modify : protected Pointers {

  bigint memory_usage();

- private:
+ protected:

  // lists of fixes to apply at different stages of timestep

--- a/Show More
+++ b/Show More