diff --git a/examples/kokkos/README b/examples/kokkos/README deleted file mode 100644 index 63859d8518..0000000000 --- a/examples/kokkos/README +++ /dev/null @@ -1,42 +0,0 @@ -The in.kokkos input script is a copy of the bench/in.lj script, -but can be run with the KOKKOS package, - -To run it, you must first build LAMMPS with the KOKKOS package -installed, following the steps explained in Section 2.3.4 of -doc/Section_start.html. An overview of building and running LAMMPS -with the KOKKOS package, for different compute-node hardware on your -machine, is given in Section 5.8 of doc/Section_accelerate.html. - -The example log files included in this directory are for a desktop box -with dual hex-core CPUs and 2 GPUs. - -Two executables were built in the following manner: - -make yes-kokkos -make g++ OMP=yes -> lmp_cpu -make cuda CUDA=yes -> lmp_cuda - -Then the following runs were made. The "->" means that the run -produced log.lammps which was then copied to the named log file. - -* MPI-only (non-KOKKOS) runs - -lmp_cpu < in.kokkos -> log.kokkos.date.mpionly.1 -mpirun -np 4 lmp_cpu < in.kokkos -> log.kokkos.date.mpionly.4 - -* OpenMP threaded runs on CPUs only - -lmp_cpu -k on t 1 -sf kk < in.kokkos.half -> log.kokkos.date.cpu.1 -lmp_cpu -k on t 4 -sf kk < in.kokkos -> log.kokkos.date.cpu.4 - -Note that in.kokkos.half was use for one of the runs, which uses the -package command to force the use of half neighbor lists which are -faster when running on just 1 thread. - -* GPU runs on 1 or 2 GPUs - -lmp_cuda -k on t 6 -sf kk < in.kokkos -> log.kokkos.date.gpu.1 -mpirun -np 2 lmp_cuda -k on t 6 -sf kk < in.kokkos -> log.kokkos.date.gpu.2 - -Note that this is a very small problem (32K atoms) to run -on 1 or 2 GPUs. diff --git a/examples/kokkos/in.kokkos b/examples/kokkos/in.kokkos deleted file mode 100644 index 01e12ef8a9..0000000000 --- a/examples/kokkos/in.kokkos +++ /dev/null @@ -1,30 +0,0 @@ -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable yy equal 20*$y -variable zz equal 20*$z - -units lj -atom_style atomic - -lattice fcc 0.8442 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -create_box 1 box -create_atoms 1 box -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 diff --git a/examples/kokkos/in.kokkos.half b/examples/kokkos/in.kokkos.half deleted file mode 100644 index 9847d18ef0..0000000000 --- a/examples/kokkos/in.kokkos.half +++ /dev/null @@ -1,32 +0,0 @@ -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable yy equal 20*$y -variable zz equal 20*$z - -package kokkos neigh half - -units lj -atom_style atomic - -lattice fcc 0.8442 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -create_box 1 box -create_atoms 1 box -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 diff --git a/examples/kokkos/log.kokkos.1Feb14.cpu.1 b/examples/kokkos/log.kokkos.1Feb14.cpu.1 deleted file mode 100644 index 76c5f5747a..0000000000 --- a/examples/kokkos/log.kokkos.1Feb14.cpu.1 +++ /dev/null @@ -1,68 +0,0 @@ -LAMMPS (27 May 2014) -KOKKOS mode is enabled (../lammps.cpp:468) - using 1 OpenMP thread(s) per MPI task -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable xx equal 20*1 -variable yy equal 20*$y -variable yy equal 20*1 -variable zz equal 20*$z -variable zz equal 20*1 - -package kokkos neigh half - -units lj -atom_style atomic - -lattice fcc 0.8442 -Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -region box block 0 20 0 ${yy} 0 ${zz} -region box block 0 20 0 20 0 ${zz} -region box block 0 20 0 20 0 20 -create_box 1 box -Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919) - 1 by 1 by 1 MPI processor grid -create_atoms 1 box -Created 32000 atoms -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 -Memory usage per processor = 7.79551 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 1.44 -6.7733681 0 -4.6134356 -5.0197073 - 100 0.7574531 -5.7585055 0 -4.6223613 0.20726105 -Loop time of 2.29105 on 1 procs (1 MPI x 1 OpenMP) for 100 steps with 32000 atoms - -Pair time (%) = 1.82425 (79.6249) -Neigh time (%) = 0.338632 (14.7806) -Comm time (%) = 0.0366232 (1.59853) -Outpt time (%) = 0.000144005 (0.00628553) -Other time (%) = 0.0914049 (3.98965) - -Nlocal: 32000 ave 32000 max 32000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 19657 ave 19657 max 19657 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 1.20283e+06 ave 1.20283e+06 max 1.20283e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 1202833 -Ave neighs/atom = 37.5885 -Neighbor list builds = 5 -Dangerous builds = 0 diff --git a/examples/kokkos/log.kokkos.1Feb14.cpu.4 b/examples/kokkos/log.kokkos.1Feb14.cpu.4 deleted file mode 100644 index 2b6001025b..0000000000 --- a/examples/kokkos/log.kokkos.1Feb14.cpu.4 +++ /dev/null @@ -1,68 +0,0 @@ -LAMMPS (27 May 2014) -KOKKOS mode is enabled (../lammps.cpp:468) - using 4 OpenMP thread(s) per MPI task -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable xx equal 20*1 -variable yy equal 20*$y -variable yy equal 20*1 -variable zz equal 20*$z -variable zz equal 20*1 - -units lj -atom_style atomic - -lattice fcc 0.8442 -Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -region box block 0 20 0 ${yy} 0 ${zz} -region box block 0 20 0 20 0 ${zz} -region box block 0 20 0 20 0 20 -create_box 1 box -Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919) - 1 by 1 by 1 MPI processor grid -create_atoms 1 box -Created 32000 atoms -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 -Memory usage per processor = 13.2888 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 1.44 -6.7733681 0 -4.6134356 -5.0197073 - 100 0.7574531 -5.7585055 0 -4.6223613 0.20726105 -Loop time of 0.983697 on 4 procs (1 MPI x 4 OpenMP) for 100 steps with 32000 atoms - -Pair time (%) = 0.767155 (77.9869) -Neigh time (%) = 0.14734 (14.9782) -Comm time (%) = 0.041466 (4.21532) -Outpt time (%) = 0.000172138 (0.0174991) -Other time (%) = 0.0275636 (2.80204) - -Nlocal: 32000 ave 32000 max 32000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 19657 ave 19657 max 19657 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 0 ave 0 max 0 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -FullNghs: 2.40567e+06 ave 2.40567e+06 max 2.40567e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2405666 -Ave neighs/atom = 75.1771 -Neighbor list builds = 5 -Dangerous builds = 0 diff --git a/examples/kokkos/log.kokkos.1Feb14.gpu.1 b/examples/kokkos/log.kokkos.1Feb14.gpu.1 deleted file mode 100644 index 8dd9caca4c..0000000000 --- a/examples/kokkos/log.kokkos.1Feb14.gpu.1 +++ /dev/null @@ -1,68 +0,0 @@ -LAMMPS (27 May 2014) -KOKKOS mode is enabled (../lammps.cpp:468) - using 6 OpenMP thread(s) per MPI task -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable xx equal 20*1 -variable yy equal 20*$y -variable yy equal 20*1 -variable zz equal 20*$z -variable zz equal 20*1 - -units lj -atom_style atomic - -lattice fcc 0.8442 -Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -region box block 0 20 0 ${yy} 0 ${zz} -region box block 0 20 0 20 0 ${zz} -region box block 0 20 0 20 0 20 -create_box 1 box -Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919) - 1 by 1 by 1 MPI processor grid -create_atoms 1 box -Created 32000 atoms -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 -Memory usage per processor = 16.9509 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 1.44 -6.7733681 0 -4.6134356 -5.0197073 - 100 0.7574531 -5.7585055 0 -4.6223613 0.20726105 -Loop time of 0.57192 on 6 procs (1 MPI x 6 OpenMP) for 100 steps with 32000 atoms - -Pair time (%) = 0.205416 (35.917) -Neigh time (%) = 0.112468 (19.665) -Comm time (%) = 0.174223 (30.4629) -Outpt time (%) = 0.000159025 (0.0278055) -Other time (%) = 0.0796535 (13.9274) - -Nlocal: 32000 ave 32000 max 32000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 19657 ave 19657 max 19657 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 0 ave 0 max 0 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -FullNghs: 2.40567e+06 ave 2.40567e+06 max 2.40567e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 2405666 -Ave neighs/atom = 75.1771 -Neighbor list builds = 5 -Dangerous builds = 0 diff --git a/examples/kokkos/log.kokkos.1Feb14.gpu.2 b/examples/kokkos/log.kokkos.1Feb14.gpu.2 deleted file mode 100644 index 938485a350..0000000000 --- a/examples/kokkos/log.kokkos.1Feb14.gpu.2 +++ /dev/null @@ -1,68 +0,0 @@ -LAMMPS (27 May 2014) -KOKKOS mode is enabled (../lammps.cpp:468) - using 6 OpenMP thread(s) per MPI task -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable xx equal 20*1 -variable yy equal 20*$y -variable yy equal 20*1 -variable zz equal 20*$z -variable zz equal 20*1 - -units lj -atom_style atomic - -lattice fcc 0.8442 -Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -region box block 0 20 0 ${yy} 0 ${zz} -region box block 0 20 0 20 0 ${zz} -region box block 0 20 0 20 0 20 -create_box 1 box -Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919) - 1 by 1 by 2 MPI processor grid -create_atoms 1 box -Created 32000 atoms -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 -Memory usage per processor = 8.95027 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 1.44 -6.7733681 0 -4.6134356 -5.0197073 - 100 0.7574531 -5.7585055 0 -4.6223613 0.20726105 -Loop time of 0.689608 on 12 procs (2 MPI x 6 OpenMP) for 100 steps with 32000 atoms - -Pair time (%) = 0.210953 (30.5903) -Neigh time (%) = 0.122991 (17.8349) -Comm time (%) = 0.25264 (36.6353) -Outpt time (%) = 0.000259042 (0.0375636) -Other time (%) = 0.102765 (14.9019) - -Nlocal: 16000 ave 16001 max 15999 min -Histogram: 1 0 0 0 0 0 0 0 0 1 -Nghost: 13632.5 ave 13635 max 13630 min -Histogram: 1 0 0 0 0 0 0 0 0 1 -Neighs: 0 ave 0 max 0 min -Histogram: 2 0 0 0 0 0 0 0 0 0 -FullNghs: 1.20283e+06 ave 1.20347e+06 max 1.2022e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 1 - -Total # of neighbors = 2405666 -Ave neighs/atom = 75.1771 -Neighbor list builds = 5 -Dangerous builds = 0 diff --git a/examples/kokkos/log.kokkos.1Feb14.mpionly.1 b/examples/kokkos/log.kokkos.1Feb14.mpionly.1 deleted file mode 100644 index d7763feb76..0000000000 --- a/examples/kokkos/log.kokkos.1Feb14.mpionly.1 +++ /dev/null @@ -1,65 +0,0 @@ -LAMMPS (27 May 2014) - using 1 OpenMP thread(s) per MPI task -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable xx equal 20*1 -variable yy equal 20*$y -variable yy equal 20*1 -variable zz equal 20*$z -variable zz equal 20*1 - -units lj -atom_style atomic - -lattice fcc 0.8442 -Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -region box block 0 20 0 ${yy} 0 ${zz} -region box block 0 20 0 20 0 ${zz} -region box block 0 20 0 20 0 20 -create_box 1 box -Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919) - 1 by 1 by 1 MPI processor grid -create_atoms 1 box -Created 32000 atoms -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 -Memory usage per processor = 8.21387 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 1.44 -6.7733681 0 -4.6134356 -5.0197073 - 100 0.7574531 -5.7585055 0 -4.6223613 0.20726105 -Loop time of 2.57975 on 1 procs (1 MPI x 1 OpenMP) for 100 steps with 32000 atoms - -Pair time (%) = 2.20959 (85.6512) -Neigh time (%) = 0.269136 (10.4326) -Comm time (%) = 0.0252256 (0.977833) -Outpt time (%) = 0.000126123 (0.00488898) -Other time (%) = 0.0756752 (2.93343) - -Nlocal: 32000 ave 32000 max 32000 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Nghost: 19657 ave 19657 max 19657 min -Histogram: 1 0 0 0 0 0 0 0 0 0 -Neighs: 1.20283e+06 ave 1.20283e+06 max 1.20283e+06 min -Histogram: 1 0 0 0 0 0 0 0 0 0 - -Total # of neighbors = 1202833 -Ave neighs/atom = 37.5885 -Neighbor list builds = 5 -Dangerous builds = 0 diff --git a/examples/kokkos/log.kokkos.1Feb14.mpionly.4 b/examples/kokkos/log.kokkos.1Feb14.mpionly.4 deleted file mode 100644 index 1838aafd09..0000000000 --- a/examples/kokkos/log.kokkos.1Feb14.mpionly.4 +++ /dev/null @@ -1,65 +0,0 @@ -LAMMPS (27 May 2014) - using 1 OpenMP thread(s) per MPI task -# 3d Lennard-Jones melt - -variable x index 1 -variable y index 1 -variable z index 1 - -variable xx equal 20*$x -variable xx equal 20*1 -variable yy equal 20*$y -variable yy equal 20*1 -variable zz equal 20*$z -variable zz equal 20*1 - -units lj -atom_style atomic - -lattice fcc 0.8442 -Lattice spacing in x,y,z = 1.6796 1.6796 1.6796 -region box block 0 ${xx} 0 ${yy} 0 ${zz} -region box block 0 20 0 ${yy} 0 ${zz} -region box block 0 20 0 20 0 ${zz} -region box block 0 20 0 20 0 20 -create_box 1 box -Created orthogonal box = (0 0 0) to (33.5919 33.5919 33.5919) - 1 by 2 by 2 MPI processor grid -create_atoms 1 box -Created 32000 atoms -mass 1 1.0 - -velocity all create 1.44 87287 loop geom - -pair_style lj/cut 2.5 -pair_coeff 1 1 1.0 1.0 2.5 - -neighbor 0.3 bin -neigh_modify delay 0 every 20 check no - -fix 1 all nve - -run 100 -Memory usage per processor = 4.09506 Mbytes -Step Temp E_pair E_mol TotEng Press - 0 1.44 -6.7733681 0 -4.6134356 -5.0197073 - 100 0.7574531 -5.7585055 0 -4.6223613 0.20726105 -Loop time of 0.709072 on 4 procs (4 MPI x 1 OpenMP) for 100 steps with 32000 atoms - -Pair time (%) = 0.574495 (81.0206) -Neigh time (%) = 0.0709588 (10.0073) -Comm time (%) = 0.0474771 (6.69567) -Outpt time (%) = 6.62804e-05 (0.00934748) -Other time (%) = 0.0160753 (2.26708) - -Nlocal: 8000 ave 8037 max 7964 min -Histogram: 2 0 0 0 0 0 0 0 1 1 -Nghost: 9007.5 ave 9050 max 8968 min -Histogram: 1 1 0 0 0 0 0 1 0 1 -Neighs: 300708 ave 305113 max 297203 min -Histogram: 1 0 0 1 1 0 0 0 0 1 - -Total # of neighbors = 1202833 -Ave neighs/atom = 37.5885 -Neighbor list builds = 5 -Dangerous builds = 0