From 978269a4cedca192dac0ec6d2ad0de0e6e0085d4 Mon Sep 17 00:00:00 2001 From: sjplimp Date: Thu, 11 Sep 2014 16:47:30 +0000 Subject: [PATCH] git-svn-id: svn://svn.icms.temple.edu/lammps-ro/trunk@12492 f3b2605a-c512-4ea7-a41b-209d697bcdaa --- examples/gpu/README | 45 +++++++++++++++-------------------- examples/gpu/in.gpu.melt.2.5 | 4 +--- examples/gpu/in.gpu.melt.5.0 | 4 +--- examples/gpu/in.gpu.phosphate | 6 ++--- examples/gpu/in.gpu.rhodo | 6 ++--- 5 files changed, 25 insertions(+), 40 deletions(-) diff --git a/examples/gpu/README b/examples/gpu/README index 8fb8db00ab..9feddec662 100644 --- a/examples/gpu/README +++ b/examples/gpu/README @@ -1,35 +1,28 @@ -These are input scripts designed for use with the GPU package. +These example scripts can be run with the USER-CUDA package, assuming +you built LAMMPS with the package and the precision you want. -To run them, you must first build LAMMPS with the GPU package -installed, following the steps explained in Section 2.3 of -doc/Section_start.html and lib/gpu/README. An overview of building -and running LAMMPS with the GPU package is given in Section 5.6 of -doc/Section_accelerate.html. Note that you can choose the precision -at which computations are performed on the GPU in the build process. +You can run any of the scripts as follows. You can also reset the +x,y,z variables in the command line to change the size of the problem. -Note that lines such as this in each of the input scripts: +With the GPU package on 1 GPU: -package gpu force/neigh 0 1 1 +lmp_machine -sf gpu < in.gpu.melt.2.5 +mpirun -np 8 lmp_machine -sf gpu < in.gpu.phosphate -are set for running on a compute node with 2 GPUs. If you -have a single GPU, you should comment out the line, since -the default is 1 GPU per compute node. +With the GPU package on 2 GPUs: -The scripts can be run in the usual manner: +mpirun -np 4 lmp_machine -sf gpu -pk gpu 2 tpa 8 < in.gpu.melt.5.0 +mpirun -np 12 lmp_machine -sf gpu -pk gpu 2 < in.gpu.rhodo -lmp_g++ < in.gpu.melt.2.5 -lmp_g++ < in.gpu.melt.5.0 -lmp_g++ < in.gpu.phosphate -lmp_g++ < in.gpu.rhodo +CPU-only: -mpirun -np 4 lmp_g++ < in.gpu.melt.2.5 -mpirun -np 4 lmp_g++ < in.gpu.melt.5.0 -mpirun -np 4 lmp_g++ < in.gpu.phosphate -mpirun -np 4 lmp_g++ < in.gpu.rhodo +lmp_machine < in.gpu.melt.2.5 +mpirun -np 4 lmp_machine < in.gpu.melt.5.0 +mpirun -np 8 lmp_machine < in.gpu.rhodo -The first set of commmands will run a single MPI task using a single -GPU (even if you have 2 GPUs). +Note that with the GPU package you can have more MPI tasks +than the number of GPUs (both per node). -The second set of commands will run 4 MPI tasks, with 2 MPI tasks per -GPU (if you have 2 GPUs), or 4 MPI tasks per GPU (if you have a single -GPU). +Also note that when running the in.gpu.melt.5.0 problem on the GPU, +which has a long cutoff, the package gpu "tpa" setting should be > 1 +(e.g. 8) for best performance. diff --git a/examples/gpu/in.gpu.melt.2.5 b/examples/gpu/in.gpu.melt.2.5 index be59cc1099..65a5ad8099 100644 --- a/examples/gpu/in.gpu.melt.2.5 +++ b/examples/gpu/in.gpu.melt.2.5 @@ -1,7 +1,5 @@ # 3d Lennard-Jones melt -package gpu 1 - variable x index 2 variable y index 2 variable z index 2 @@ -21,7 +19,7 @@ mass 1 1.0 velocity all create 1.44 87287 loop geom -pair_style lj/cut/gpu 2.5 +pair_style lj/cut 2.5 pair_coeff 1 1 1.0 1.0 2.5 neighbor 0.3 bin diff --git a/examples/gpu/in.gpu.melt.5.0 b/examples/gpu/in.gpu.melt.5.0 index 00a65a8374..e22f38520d 100644 --- a/examples/gpu/in.gpu.melt.5.0 +++ b/examples/gpu/in.gpu.melt.5.0 @@ -1,7 +1,5 @@ # 3d Lennard-Jones melt -package gpu 1 tpa 8 - variable x index 2 variable y index 2 variable z index 2 @@ -21,7 +19,7 @@ mass 1 1.0 velocity all create 1.44 87287 loop geom -pair_style lj/cut/gpu 5.0 +pair_style lj/cut 5.0 pair_coeff 1 1 1.0 1.0 5.0 neighbor 0.3 bin diff --git a/examples/gpu/in.gpu.phosphate b/examples/gpu/in.gpu.phosphate index a9b9679ec0..d074650047 100644 --- a/examples/gpu/in.gpu.phosphate +++ b/examples/gpu/in.gpu.phosphate @@ -1,14 +1,13 @@ # GI-System units metal -package gpu 1 atom_style charge read_data data.phosphate replicate 3 3 3 -pair_style lj/cut/coul/long/gpu 15.0 +pair_style lj/cut/coul/long 15.0 pair_coeff 1 1 0.0 0.29 pair_coeff 1 2 0.0 0.29 @@ -17,7 +16,7 @@ pair_coeff 2 2 0.0 0.29 pair_coeff 2 3 0.004251 1.91988674 pair_coeff 3 3 0.012185 2.91706967 -kspace_style pppm/gpu 1e-5 +kspace_style pppm 1e-5 neighbor 2.0 bin @@ -27,4 +26,3 @@ timestep 0.001 fix 1 all npt temp 400 400 0.01 iso 1000.0 1000.0 1.0 run 200 -unfix 1 diff --git a/examples/gpu/in.gpu.rhodo b/examples/gpu/in.gpu.rhodo index d8eaf46b39..dd25f27224 100644 --- a/examples/gpu/in.gpu.rhodo +++ b/examples/gpu/in.gpu.rhodo @@ -1,7 +1,5 @@ # Rhodopsin model -package gpu 1 - variable x index 2 variable y index 2 variable z index 2 @@ -14,9 +12,9 @@ bond_style harmonic angle_style charmm dihedral_style charmm improper_style harmonic -pair_style lj/charmm/coul/long/gpu 8.0 10.0 +pair_style lj/charmm/coul/long 8.0 10.0 pair_modify mix arithmetic -kspace_style pppm/gpu 1e-4 +kspace_style pppm 1e-4 read_data data.rhodo