From 978269a4cedca192dac0ec6d2ad0de0e6e0085d4 Mon Sep 17 00:00:00 2001
From: sjplimp <sjplimp@f3b2605a-c512-4ea7-a41b-209d697bcdaa>
Date: Thu, 11 Sep 2014 16:47:30 +0000
Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12492
 f3b2605a-c512-4ea7-a41b-209d697bcdaa

---
 examples/gpu/README           | 45 +++++++++++++++--------------------
 examples/gpu/in.gpu.melt.2.5  |  4 +---
 examples/gpu/in.gpu.melt.5.0  |  4 +---
 examples/gpu/in.gpu.phosphate |  6 ++---
 examples/gpu/in.gpu.rhodo     |  6 ++---
 5 files changed, 25 insertions(+), 40 deletions(-)

diff --git a/examples/gpu/README b/examples/gpu/README
index 8fb8db00ab..9feddec662 100644
--- a/examples/gpu/README
+++ b/examples/gpu/README
@@ -1,35 +1,28 @@
-These are input scripts designed for use with the GPU package.
+These example scripts can be run with the USER-CUDA package, assuming
+you built LAMMPS with the package and the precision you want.
 
-To run them, you must first build LAMMPS with the GPU package
-installed, following the steps explained in Section 2.3 of
-doc/Section_start.html and lib/gpu/README.  An overview of building
-and running LAMMPS with the GPU package is given in Section 5.6 of
-doc/Section_accelerate.html.  Note that you can choose the precision
-at which computations are performed on the GPU in the build process.
+You can run any of the scripts as follows.  You can also reset the
+x,y,z variables in the command line to change the size of the problem.
 
-Note that lines such as this in each of the input scripts:
+With the GPU package on 1 GPU:
 
-package 	gpu force/neigh 0 1 1
+lmp_machine -sf gpu < in.gpu.melt.2.5
+mpirun -np 8 lmp_machine -sf gpu < in.gpu.phosphate
 
-are set for running on a compute node with 2 GPUs.  If you
-have a single GPU, you should comment out the line, since
-the default is 1 GPU per compute node.
+With the GPU package on 2 GPUs:
 
-The scripts can be run in the usual manner:
+mpirun -np 4 lmp_machine -sf gpu -pk gpu 2 tpa 8 < in.gpu.melt.5.0
+mpirun -np 12 lmp_machine -sf gpu -pk gpu 2 < in.gpu.rhodo
 
-lmp_g++ < in.gpu.melt.2.5
-lmp_g++ < in.gpu.melt.5.0
-lmp_g++ < in.gpu.phosphate
-lmp_g++ < in.gpu.rhodo
+CPU-only:
 
-mpirun -np 4 lmp_g++ < in.gpu.melt.2.5
-mpirun -np 4 lmp_g++ < in.gpu.melt.5.0
-mpirun -np 4 lmp_g++ < in.gpu.phosphate
-mpirun -np 4 lmp_g++ < in.gpu.rhodo
+lmp_machine < in.gpu.melt.2.5
+mpirun -np 4 lmp_machine < in.gpu.melt.5.0
+mpirun -np 8 lmp_machine < in.gpu.rhodo
 
-The first set of commmands will run a single MPI task using a single
-GPU (even if you have 2 GPUs).
+Note that with the GPU package you can have more MPI tasks
+than the number of GPUs (both per node).
 
-The second set of commands will run 4 MPI tasks, with 2 MPI tasks per
-GPU (if you have 2 GPUs), or 4 MPI tasks per GPU (if you have a single
-GPU).
+Also note that when running the in.gpu.melt.5.0 problem on the GPU,
+which has a long cutoff, the package gpu "tpa" setting should be > 1
+(e.g. 8) for best performance.
diff --git a/examples/gpu/in.gpu.melt.2.5 b/examples/gpu/in.gpu.melt.2.5
index be59cc1099..65a5ad8099 100644
--- a/examples/gpu/in.gpu.melt.2.5
+++ b/examples/gpu/in.gpu.melt.2.5
@@ -1,7 +1,5 @@
 # 3d Lennard-Jones melt
 
-package 	gpu 1
-
 variable	x index 2
 variable	y index 2
 variable	z index 2
@@ -21,7 +19,7 @@ mass		1 1.0
 
 velocity	all create 1.44 87287 loop geom
 
-pair_style	lj/cut/gpu 2.5
+pair_style	lj/cut 2.5
 pair_coeff	1 1 1.0 1.0 2.5
 
 neighbor	0.3 bin
diff --git a/examples/gpu/in.gpu.melt.5.0 b/examples/gpu/in.gpu.melt.5.0
index 00a65a8374..e22f38520d 100644
--- a/examples/gpu/in.gpu.melt.5.0
+++ b/examples/gpu/in.gpu.melt.5.0
@@ -1,7 +1,5 @@
 # 3d Lennard-Jones melt
 
-package 	gpu 1 tpa 8
-
 variable	x index 2
 variable	y index 2
 variable	z index 2
@@ -21,7 +19,7 @@ mass		1 1.0
 
 velocity	all create 1.44 87287 loop geom
 
-pair_style	lj/cut/gpu 5.0
+pair_style	lj/cut 5.0
 pair_coeff	1 1 1.0 1.0 5.0
 
 neighbor	0.3 bin
diff --git a/examples/gpu/in.gpu.phosphate b/examples/gpu/in.gpu.phosphate
index a9b9679ec0..d074650047 100644
--- a/examples/gpu/in.gpu.phosphate
+++ b/examples/gpu/in.gpu.phosphate
@@ -1,14 +1,13 @@
 # GI-System
 
 units metal
-package		gpu 1
 
 atom_style      charge 
 read_data 	data.phosphate
 
 replicate 	3 3 3
 
-pair_style      lj/cut/coul/long/gpu 15.0
+pair_style      lj/cut/coul/long 15.0
 
 pair_coeff 1 1  0.0 0.29
 pair_coeff 1 2  0.0 0.29
@@ -17,7 +16,7 @@ pair_coeff 2 2  0.0 0.29
 pair_coeff 2 3  0.004251 1.91988674
 pair_coeff 3 3  0.012185 2.91706967
 
-kspace_style    pppm/gpu 1e-5
+kspace_style    pppm 1e-5
 
 neighbor	2.0 bin
 
@@ -27,4 +26,3 @@ timestep 0.001
 
 fix 		1 all npt temp 400 400 0.01 iso 1000.0 1000.0 1.0
 run 		200
-unfix 		1
diff --git a/examples/gpu/in.gpu.rhodo b/examples/gpu/in.gpu.rhodo
index d8eaf46b39..dd25f27224 100644
--- a/examples/gpu/in.gpu.rhodo
+++ b/examples/gpu/in.gpu.rhodo
@@ -1,7 +1,5 @@
 # Rhodopsin model
 
-package 	gpu 1
-
 variable	x index 2
 variable	y index 2
 variable	z index 2
@@ -14,9 +12,9 @@ bond_style      harmonic
 angle_style     charmm 
 dihedral_style  charmm 
 improper_style  harmonic 
-pair_style      lj/charmm/coul/long/gpu 8.0 10.0 
+pair_style      lj/charmm/coul/long 8.0 10.0 
 pair_modify     mix arithmetic 
-kspace_style    pppm/gpu 1e-4 
+kspace_style    pppm 1e-4 
 
 read_data       data.rhodo