Compare commits

...

25 Commits

Author SHA1 Message Date
f6c76e04b8 patch 16Mar18 2018-03-19 08:26:58 -06:00
3befd4b603 Merge pull request #843 from akohlmey/whitespace-cleanup
Whitespace cleanup for stable release
2018-03-16 14:44:30 -06:00
e9ac8ba01e cleanup embedded or trailing tabs 2018-03-16 13:21:54 -04:00
59dbb49cf9 remove trailing whitespace 2018-03-16 12:37:27 -04:00
ee862d8bf5 replace leading tabs 2018-03-16 12:34:33 -04:00
fc3de22c17 Merge pull request #841 from stanmoore1/compiler_warnings
Fix compiler warnings
2018-03-16 09:26:59 -06:00
ab914a9220 Merge pull request #840 from akohlmey/collected-small-fixes
Collected small fixes for stable release
2018-03-16 09:25:59 -06:00
7c300eebd5 Merge pull request #837 from akohlmey/reaxff-bugfix-from-scm
reaxff corrected bond order bugfix
2018-03-16 09:25:38 -06:00
94a923191a more whitespace cleanup 2018-03-15 22:02:02 -04:00
7d2ada9d80 whitespace cleanup 2018-03-15 21:57:45 -04:00
15a9600569 Fix compiler warnings 2018-03-14 13:27:03 -06:00
d62534665f correct potential out-of-bounds memory access 2018-03-14 12:11:58 -04:00
d00908ea3e whitespace cleanup 2018-03-13 23:02:55 -04:00
6965307250 print warning when "compress yes" is ignored with delete_atoms 2018-03-13 22:58:39 -04:00
d9c6278844 Merge pull request #838 from zozo123/replaced-strcmpi-with-strncmpi-to-limit-number-of-chars-compared
Tools/Matlab: Allow to read LAMMPS output fields
2018-03-12 16:39:35 -06:00
821b18641d kokkos version of reaxff corrected bond order bugfix from Tomáš Trnka trnka@scm.com posted on lammps-users 2018-03-12 16:58:03 -04:00
ce4ffe5933 Merge pull request #833 from stanmoore1/kk_update_2.6
Update Kokkos library to v2.6.00
2018-03-12 13:51:33 -06:00
9c3296aad2 Tools/Matlab: Allow to read LAMPPS output field
Some output fields have attributes attached on the same
line. e.g. "ITEM: BOX BOUNDS pp pp pp". This patch replaced all
the strcmpi to strncmpi in order to limit the number of character
compared with LAMPPS outputs.

Signed-off-by: Yossi Eliaz <eliaz123@gmail.com>
2018-03-12 13:45:13 -05:00
b2c8c40204 reaxff corrected bond order bugfix from Tomáš Trnka trnka@scm.com posted on lammps-users 2018-03-12 12:15:37 -04:00
25c46593ee protect OpenMP header include with ifdefs 2018-03-12 11:56:54 -04:00
35abbab966 Merge pull request #835 from junghans/fix_python
lammps.py: inconsistent use of tabs and spaces in indentation
2018-03-09 08:42:15 -07:00
d358e886c5 Merge pull request #834 from akohlmey/new-reax-logs
provide new reference outputs for various reaxff examples
2018-03-09 08:41:44 -07:00
62d446668c lammps.py: inconsistent use of tabs and spaces in indentation 2018-03-08 16:23:44 -07:00
fcfbdb13ab provide new reference outputs for various reaxff examples 2018-03-08 18:10:28 -05:00
39786b1740 Update Kokkos library to r2.6.00 2018-03-08 10:57:08 -07:00
1191 changed files with 29158 additions and 23076 deletions

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="8 Mar 2018 version">
<META NAME="docnumber" CONTENT="16 Mar 2018 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
<H1></H1>
LAMMPS Documentation :c,h3
8 Mar 2018 version :c,h4
16 Mar 2018 version :c,h4
Version info: :h4

View File

@ -1,70 +0,0 @@
LAMMPS (5 Oct 2016)
# REAX potential for Nitroamines system
# .....
units real
atom_style charge
read_data data.AB
orthogonal box = (0 0 0) to (25 25 25)
1 by 2 by 2 MPI processor grid
reading atoms ...
104 atoms
pair_style reax/c lmp_control
pair_coeff * * ffield.reax.AB H B N
Reading potential file ffield.reax.AB with DATE: 2011-02-18
neighbor 2 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
fix 3 all temp/berendsen 500.0 500.0 100.0
timestep 0.25
#dump 1 all atom 30 dump.reax.ab
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 12.622 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -8505.1816 0 -8505.1816 -673.36566
3000 496.56561 -8405.3755 0 -8252.9182 472.58916
Loop time of 7.23109 on 4 procs for 3000 steps with 104 atoms
Performance: 8.961 ns/day, 2.678 hours/ns, 414.875 timesteps/s
99.4% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 5.705 | 5.7262 | 5.7504 | 0.7 | 79.19
Neigh | 0.14367 | 0.15976 | 0.16805 | 2.4 | 2.21
Comm | 0.053353 | 0.077311 | 0.097821 | 5.7 | 1.07
Output | 1.812e-05 | 1.9848e-05 | 2.408e-05 | 0.1 | 0.00
Modify | 1.2559 | 1.2647 | 1.2818 | 0.9 | 17.49
Other | | 0.003126 | | | 0.04
Nlocal: 26 ave 35 max 13 min
Histogram: 1 0 0 0 0 1 0 0 1 1
Nghost: 421 ave 450 max 377 min
Histogram: 1 0 0 0 0 1 0 0 1 1
Neighs: 847.25 ave 1149 max 444 min
Histogram: 1 0 0 0 1 0 0 0 1 1
Total # of neighbors = 3389
Ave neighs/atom = 32.5865
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:07

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for Nitroamines system
# .....
@ -28,43 +29,53 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 18.4119 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 19.3 | 19.3 | 19.3 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -8505.1816 0 -8505.1816 -673.36566
3000 499.30579 -8405.1387 0 -8251.8401 -94.844317
Loop time of 12.5114 on 1 procs for 3000 steps with 104 atoms
3000 478.18595 -8398.4168 0 -8251.6025 1452.6935
Loop time of 14.3573 on 1 procs for 3000 steps with 104 atoms
Performance: 5.179 ns/day, 4.634 hours/ns, 239.782 timesteps/s
99.3% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 4.513 ns/day, 5.318 hours/ns, 208.952 timesteps/s
96.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 11.137 | 11.137 | 11.137 | 0.0 | 89.01
Neigh | 0.29816 | 0.29816 | 0.29816 | 0.0 | 2.38
Comm | 0.016993 | 0.016993 | 0.016993 | 0.0 | 0.14
Output | 1.1921e-05 | 1.1921e-05 | 1.1921e-05 | 0.0 | 0.00
Modify | 1.0552 | 1.0552 | 1.0552 | 0.0 | 8.43
Other | | 0.004142 | | | 0.03
Pair | 12.709 | 12.709 | 12.709 | 0.0 | 88.52
Neigh | 0.36804 | 0.36804 | 0.36804 | 0.0 | 2.56
Comm | 0.022419 | 0.022419 | 0.022419 | 0.0 | 0.16
Output | 2.8133e-05 | 2.8133e-05 | 2.8133e-05 | 0.0 | 0.00
Modify | 1.2513 | 1.2513 | 1.2513 | 0.0 | 8.72
Other | | 0.006263 | | | 0.04
Nlocal: 104 ave 104 max 104 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 694 ave 694 max 694 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 2927 ave 2927 max 2927 min
Neighs: 2866 ave 2866 max 2866 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 2927
Ave neighs/atom = 28.1442
Total # of neighbors = 2866
Ave neighs/atom = 27.5577
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:12
Total wall time: 0:00:14

View File

@ -0,0 +1,81 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for Nitroamines system
# .....
units real
atom_style charge
read_data data.AB
orthogonal box = (0 0 0) to (25 25 25)
1 by 2 by 2 MPI processor grid
reading atoms ...
104 atoms
pair_style reax/c lmp_control
pair_coeff * * ffield.reax.AB H B N
Reading potential file ffield.reax.AB with DATE: 2011-02-18
neighbor 2 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
fix 3 all temp/berendsen 500.0 500.0 100.0
timestep 0.25
#dump 1 all atom 30 dump.reax.ab
run 3000
Neighbor list info ...
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 12.38 | 13.22 | 13.64 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -8505.1816 0 -8505.1816 -673.36566
3000 555.17702 -8426.5541 0 -8256.1017 219.26856
Loop time of 9.03521 on 4 procs for 3000 steps with 104 atoms
Performance: 7.172 ns/day, 3.346 hours/ns, 332.034 timesteps/s
94.6% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 7.0347 | 7.0652 | 7.1049 | 1.0 | 78.20
Neigh | 0.18481 | 0.20727 | 0.22108 | 3.0 | 2.29
Comm | 0.075175 | 0.11496 | 0.14517 | 7.4 | 1.27
Output | 2.2888e-05 | 2.569e-05 | 3.1948e-05 | 0.0 | 0.00
Modify | 1.6286 | 1.6421 | 1.6649 | 1.1 | 18.17
Other | | 0.005646 | | | 0.06
Nlocal: 26 ave 35 max 13 min
Histogram: 1 0 0 0 0 1 0 0 1 1
Nghost: 420.25 ave 454 max 370 min
Histogram: 1 0 0 0 0 1 0 0 1 1
Neighs: 862.5 ave 1178 max 444 min
Histogram: 1 0 0 0 1 0 0 0 1 1
Total # of neighbors = 3450
Ave neighs/atom = 33.1731
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:09

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for AuO system
# .....
@ -28,30 +29,40 @@ timestep 0.25
run 100
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 4 5
Memory usage per processor = 144.382 Mbytes
binsize = 6, bins = 5 4 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 157.6 | 157.6 | 157.6 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -72201.743 0 -72201.743 -166.1947
100 69.043346 -72076.31 0 -71878.943 22702.308
Loop time of 17.7559 on 1 procs for 100 steps with 960 atoms
0 0 -72201.743 0 -72201.743 -166.19482
100 69.043331 -72076.309 0 -71878.942 22702.89
Loop time of 18.4369 on 1 procs for 100 steps with 960 atoms
Performance: 0.122 ns/day, 197.288 hours/ns, 5.632 timesteps/s
99.8% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 0.117 ns/day, 204.854 hours/ns, 5.424 timesteps/s
98.7% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 15.102 | 15.102 | 15.102 | 0.0 | 85.05
Neigh | 0.49358 | 0.49358 | 0.49358 | 0.0 | 2.78
Comm | 0.0067561 | 0.0067561 | 0.0067561 | 0.0 | 0.04
Output | 1.502e-05 | 1.502e-05 | 1.502e-05 | 0.0 | 0.00
Modify | 2.1525 | 2.1525 | 2.1525 | 0.0 | 12.12
Other | | 0.001267 | | | 0.01
Pair | 15.373 | 15.373 | 15.373 | 0.0 | 83.38
Neigh | 0.58774 | 0.58774 | 0.58774 | 0.0 | 3.19
Comm | 0.0079026 | 0.0079026 | 0.0079026 | 0.0 | 0.04
Output | 3.171e-05 | 3.171e-05 | 3.171e-05 | 0.0 | 0.00
Modify | 2.4665 | 2.4665 | 2.4665 | 0.0 | 13.38
Other | | 0.001366 | | | 0.01
Nlocal: 960 ave 960 max 960 min
Histogram: 1 0 0 0 0 0 0 0 0 0

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for AuO system
# .....
@ -28,30 +29,40 @@ timestep 0.25
run 100
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 4 5
Memory usage per processor = 80.1039 Mbytes
binsize = 6, bins = 5 4 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 87.17 | 87.17 | 87.17 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -72201.743 0 -72201.743 -166.20356
100 69.043372 -72076.31 0 -71878.943 22701.855
Loop time of 7.66838 on 4 procs for 100 steps with 960 atoms
0 0 -72201.743 0 -72201.743 -166.2027
100 69.043379 -72076.31 0 -71878.943 22701.771
Loop time of 8.44797 on 4 procs for 100 steps with 960 atoms
Performance: 0.282 ns/day, 85.204 hours/ns, 13.041 timesteps/s
99.7% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 0.256 ns/day, 93.866 hours/ns, 11.837 timesteps/s
96.5% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 6.7833 | 6.7864 | 6.7951 | 0.2 | 88.50
Neigh | 0.2412 | 0.24206 | 0.24396 | 0.2 | 3.16
Comm | 0.010402 | 0.019419 | 0.022561 | 3.7 | 0.25
Output | 2.0981e-05 | 2.3007e-05 | 2.9087e-05 | 0.1 | 0.00
Modify | 0.61733 | 0.61964 | 0.62064 | 0.2 | 8.08
Other | | 0.0007888 | | | 0.01
Pair | 7.3702 | 7.3757 | 7.3879 | 0.3 | 87.31
Neigh | 0.28875 | 0.29449 | 0.29747 | 0.6 | 3.49
Comm | 0.015008 | 0.027055 | 0.032681 | 4.3 | 0.32
Output | 2.4319e-05 | 2.8551e-05 | 3.8624e-05 | 0.0 | 0.00
Modify | 0.74721 | 0.74985 | 0.75539 | 0.4 | 8.88
Other | | 0.0008975 | | | 0.01
Nlocal: 240 ave 240 max 240 min
Histogram: 4 0 0 0 0 0 0 0 0 0
@ -67,4 +78,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:07
Total wall time: 0:00:08

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for CHO system
# .....
@ -28,30 +29,40 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 17.7936 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 18.68 | 18.68 | 18.68 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -10226.557 0 -10226.557 -106.09789
3000 548.72503 -10170.457 0 -10000.349 34.314945
Loop time of 11.5678 on 1 procs for 3000 steps with 105 atoms
0 0 -10226.557 0 -10226.557 -106.09755
3000 548.5116 -10170.389 0 -10000.348 40.372297
Loop time of 12.6046 on 1 procs for 3000 steps with 105 atoms
Performance: 5.602 ns/day, 4.284 hours/ns, 259.340 timesteps/s
99.3% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 5.141 ns/day, 4.668 hours/ns, 238.008 timesteps/s
98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 10.111 | 10.111 | 10.111 | 0.0 | 87.41
Neigh | 0.27992 | 0.27992 | 0.27992 | 0.0 | 2.42
Comm | 0.01603 | 0.01603 | 0.01603 | 0.0 | 0.14
Output | 1.2159e-05 | 1.2159e-05 | 1.2159e-05 | 0.0 | 0.00
Modify | 1.1563 | 1.1563 | 1.1563 | 0.0 | 10.00
Other | | 0.004084 | | | 0.04
Pair | 10.931 | 10.931 | 10.931 | 0.0 | 86.72
Neigh | 0.33107 | 0.33107 | 0.33107 | 0.0 | 2.63
Comm | 0.017975 | 0.017975 | 0.017975 | 0.0 | 0.14
Output | 2.0742e-05 | 2.0742e-05 | 2.0742e-05 | 0.0 | 0.00
Modify | 1.3197 | 1.3197 | 1.3197 | 0.0 | 10.47
Other | | 0.005059 | | | 0.04
Nlocal: 105 ave 105 max 105 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -67,4 +78,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:11
Total wall time: 0:00:12

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for CHO system
# .....
@ -28,30 +29,40 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 12.9938 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 11.75 | 12.85 | 13.81 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -10226.557 0 -10226.557 -106.0974
3000 547.91377 -10170.194 0 -10000.338 61.118402
Loop time of 6.51546 on 4 procs for 3000 steps with 105 atoms
0 0 -10226.557 0 -10226.557 -106.09745
3000 548.30567 -10170.323 0 -10000.346 47.794514
Loop time of 7.42367 on 4 procs for 3000 steps with 105 atoms
Performance: 9.946 ns/day, 2.413 hours/ns, 460.443 timesteps/s
99.1% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 8.729 ns/day, 2.750 hours/ns, 404.113 timesteps/s
97.7% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.9869 | 5.0615 | 5.1246 | 2.3 | 77.68
Neigh | 0.12213 | 0.14723 | 0.17304 | 5.5 | 2.26
Comm | 0.05189 | 0.11582 | 0.18932 | 15.4 | 1.78
Output | 1.812e-05 | 2.0564e-05 | 2.5988e-05 | 0.1 | 0.00
Modify | 1.1626 | 1.1878 | 1.2122 | 1.9 | 18.23
Other | | 0.003059 | | | 0.05
Pair | 5.3058 | 5.4086 | 5.4922 | 3.1 | 72.86
Neigh | 0.14791 | 0.17866 | 0.2106 | 6.5 | 2.41
Comm | 0.080185 | 0.16666 | 0.26933 | 17.7 | 2.24
Output | 2.5988e-05 | 2.8491e-05 | 3.4571e-05 | 0.0 | 0.00
Modify | 1.6364 | 1.6658 | 1.6941 | 2.0 | 22.44
Other | | 0.003964 | | | 0.05
Nlocal: 26.25 ave 45 max 6 min
Histogram: 1 0 1 0 0 0 0 0 1 1
@ -67,4 +78,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:06
Total wall time: 0:00:07

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for Nitroamines system
# .....
@ -29,13 +30,23 @@ thermo 1
dump 4 all xyz 5000 dumpnpt.xyz
run 10
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 28 27 17
Memory usage per processor = 440.212 Mbytes
binsize = 6, bins = 28 27 17
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes
Step Temp E_pair TotEng Press
0 0 -808525.04 -808525.04 58194.694
1 4.9935726 -808803.89 -808546.69 58205.825
@ -48,20 +59,20 @@ Step Temp E_pair TotEng Press
8 320.17692 -826387.27 -809896.43 58886.877
9 404.17073 -831129.48 -810312.5 59064.551
10 497.02486 -836425.19 -810825.72 59260.714
Loop time of 20.3094 on 1 procs for 10 steps with 17280 atoms
Loop time of 21.5054 on 1 procs for 10 steps with 17280 atoms
Performance: 0.009 ns/day, 2820.746 hours/ns, 0.492 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 0.008 ns/day, 2986.857 hours/ns, 0.465 timesteps/s
98.8% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 18.124 | 18.124 | 18.124 | 0.0 | 89.24
Neigh | 0.072459 | 0.072459 | 0.072459 | 0.0 | 0.36
Comm | 0.00077629 | 0.00077629 | 0.00077629 | 0.0 | 0.00
Output | 0.00075412 | 0.00075412 | 0.00075412 | 0.0 | 0.00
Modify | 2.1109 | 2.1109 | 2.1109 | 0.0 | 10.39
Other | | 0.0005426 | | | 0.00
Pair | 19.008 | 19.008 | 19.008 | 0.0 | 88.39
Neigh | 0.084401 | 0.084401 | 0.084401 | 0.0 | 0.39
Comm | 0.00080419 | 0.00080419 | 0.00080419 | 0.0 | 0.00
Output | 0.00095367 | 0.00095367 | 0.00095367 | 0.0 | 0.00
Modify | 2.4109 | 2.4109 | 2.4109 | 0.0 | 11.21
Other | | 0.0004592 | | | 0.00
Nlocal: 17280 ave 17280 max 17280 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -85,7 +96,7 @@ timestep 0.2
#dump 6 all custom 5000 dumpidtype.dat id type x y z
run 10
Memory usage per processor = 440.212 Mbytes
Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes
Step Temp E_pair TotEng Press
10 497.02486 -836425.19 -810825.72 59260.714
11 601.65141 -841814.22 -810825.91 59489.422
@ -98,20 +109,20 @@ Step Temp E_pair TotEng Press
18 1623.072 -894534.04 -810937.04 61739.541
19 1812.1865 -904337.99 -811000.57 62200.561
20 2011.5899 -915379.19 -811771.41 63361.151
Loop time of 20.3051 on 1 procs for 10 steps with 17280 atoms
Loop time of 21.362 on 1 procs for 10 steps with 17280 atoms
Performance: 0.009 ns/day, 2820.155 hours/ns, 0.492 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 0.008 ns/day, 2966.945 hours/ns, 0.468 timesteps/s
98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 18.008 | 18.008 | 18.008 | 0.0 | 88.69
Neigh | 0.069963 | 0.069963 | 0.069963 | 0.0 | 0.34
Comm | 0.00077033 | 0.00077033 | 0.00077033 | 0.0 | 0.00
Output | 0.00077224 | 0.00077224 | 0.00077224 | 0.0 | 0.00
Modify | 2.225 | 2.225 | 2.225 | 0.0 | 10.96
Other | | 0.0005276 | | | 0.00
Pair | 18.793 | 18.793 | 18.793 | 0.0 | 87.97
Neigh | 0.077047 | 0.077047 | 0.077047 | 0.0 | 0.36
Comm | 0.00080276 | 0.00080276 | 0.00080276 | 0.0 | 0.00
Output | 0.0010097 | 0.0010097 | 0.0010097 | 0.0 | 0.00
Modify | 2.4897 | 2.4897 | 2.4897 | 0.0 | 11.65
Other | | 0.0004568 | | | 0.00
Nlocal: 17280 ave 17280 max 17280 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -127,4 +138,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:45
Total wall time: 0:00:47

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for Nitroamines system
# .....
@ -29,13 +30,23 @@ thermo 1
dump 4 all xyz 5000 dumpnpt.xyz
run 10
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 28 27 17
Memory usage per processor = 140.018 Mbytes
binsize = 6, bins = 28 27 17
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes
Step Temp E_pair TotEng Press
0 0 -808525.04 -808525.04 58194.694
1 4.9935726 -808803.89 -808546.69 58205.825
@ -48,20 +59,20 @@ Step Temp E_pair TotEng Press
8 320.17692 -826387.27 -809896.43 58886.877
9 404.17073 -831129.48 -810312.5 59064.551
10 497.02486 -836425.19 -810825.72 59260.714
Loop time of 5.47494 on 4 procs for 10 steps with 17280 atoms
Loop time of 6.02109 on 4 procs for 10 steps with 17280 atoms
Performance: 0.032 ns/day, 760.408 hours/ns, 1.827 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 0.029 ns/day, 836.262 hours/ns, 1.661 timesteps/s
99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.5958 | 4.7748 | 4.8852 | 5.4 | 87.21
Neigh | 0.021961 | 0.022104 | 0.022431 | 0.1 | 0.40
Comm | 0.0077388 | 0.11804 | 0.29694 | 34.2 | 2.16
Output | 0.00047708 | 0.00051123 | 0.0005939 | 0.2 | 0.01
Modify | 0.55906 | 0.55927 | 0.55946 | 0.0 | 10.22
Other | | 0.0002034 | | | 0.00
Pair | 4.9482 | 5.1186 | 5.3113 | 7.4 | 85.01
Neigh | 0.024811 | 0.025702 | 0.027556 | 0.7 | 0.43
Comm | 0.0027421 | 0.19541 | 0.36565 | 38.1 | 3.25
Output | 0.00053239 | 0.00057119 | 0.00067186 | 0.0 | 0.01
Modify | 0.67876 | 0.68059 | 0.68165 | 0.1 | 11.30
Other | | 0.0001779 | | | 0.00
Nlocal: 4320 ave 4320 max 4320 min
Histogram: 4 0 0 0 0 0 0 0 0 0
@ -85,7 +96,7 @@ timestep 0.2
#dump 6 all custom 5000 dumpidtype.dat id type x y z
run 10
Memory usage per processor = 140.018 Mbytes
Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes
Step Temp E_pair TotEng Press
10 497.02486 -836425.19 -810825.72 59260.714
11 601.65141 -841814.22 -810825.91 59489.422
@ -98,20 +109,20 @@ Step Temp E_pair TotEng Press
18 1623.072 -894534.04 -810937.04 61739.541
19 1812.1865 -904337.99 -811000.57 62200.561
20 2011.5899 -915379.19 -811771.41 63361.151
Loop time of 5.49026 on 4 procs for 10 steps with 17280 atoms
Loop time of 6.08805 on 4 procs for 10 steps with 17280 atoms
Performance: 0.031 ns/day, 762.536 hours/ns, 1.821 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 0.028 ns/day, 845.563 hours/ns, 1.643 timesteps/s
99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.5657 | 4.7603 | 4.8596 | 5.4 | 86.70
Neigh | 0.021023 | 0.021468 | 0.022176 | 0.3 | 0.39
Comm | 0.016467 | 0.1157 | 0.31031 | 34.7 | 2.11
Output | 0.00047684 | 0.00050694 | 0.00059295 | 0.2 | 0.01
Modify | 0.59135 | 0.59207 | 0.59251 | 0.1 | 10.78
Other | | 0.0001938 | | | 0.00
Pair | 4.9124 | 5.1008 | 5.3405 | 8.3 | 83.78
Neigh | 0.023652 | 0.024473 | 0.025996 | 0.6 | 0.40
Comm | 0.0020971 | 0.24171 | 0.43023 | 38.0 | 3.97
Output | 0.00056076 | 0.00060701 | 0.00072312 | 0.0 | 0.01
Modify | 0.71869 | 0.72023 | 0.72107 | 0.1 | 11.83
Other | | 0.0001827 | | | 0.00
Nlocal: 4320 ave 4320 max 4320 min
Histogram: 4 0 0 0 0 0 0 0 0 0
@ -127,4 +138,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:12
Total wall time: 0:00:13

View File

@ -1,6 +1,12 @@
# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS
# See README for more info
variable x index 2
variable y index 2
variable z index 2
variable t index 100
units real
atom_style charge
atom_modify sort 100 0.0 # optional
@ -24,7 +30,7 @@ timestep 0.1
thermo_style custom step temp pe press evdwl ecoul vol
thermo_modify norm yes
thermo 100
thermo 10
velocity all create 300.0 41279 loop geom

View File

@ -0,0 +1,115 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS
# See README for more info
variable x index 2
variable y index 2
variable z index 2
variable t index 100
units real
atom_style charge
atom_modify sort 100 0.0 # optional
dimension 3
boundary p p p
box tilt large
read_data data.hns-equil
triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0)
1 by 1 by 1 MPI processor grid
reading atoms ...
304 atoms
reading velocities ...
304 velocities
replicate $x $y $z bbox
replicate 2 $y $z bbox
replicate 2 2 $z bbox
replicate 2 2 2 bbox
triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0)
1 by 1 by 1 MPI processor grid
2432 atoms
Time spent = 0.000789404 secs
pair_style reax/c NULL
pair_coeff * * ffield.reax.hns C H O N
compute reax all pair reax/c
neighbor 1.0 bin
neigh_modify every 20 delay 0 check no
timestep 0.1
thermo_style custom step temp pe press evdwl ecoul vol
thermo_modify norm yes
thermo 10
velocity all create 300.0 41279 loop geom
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c
run 100
Neighbor list info ...
update every 20 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 11
ghost atom cutoff = 11
binsize = 5.5, bins = 10 5 6
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 262.4 | 262.4 | 262.4 Mbytes
Step Temp PotEng Press E_vdwl E_coul Volume
0 300 -113.27833 437.52103 -111.57687 -1.7014647 27418.867
10 299.87174 -113.27778 2033.6337 -111.57645 -1.7013325 27418.867
20 300.81718 -113.28046 4817.5889 -111.57931 -1.7011463 27418.867
30 301.8622 -113.28323 8303.0039 -111.58237 -1.7008608 27418.867
40 302.4646 -113.28493 10519.459 -111.58446 -1.700467 27418.867
50 300.79064 -113.27989 10402.291 -111.57987 -1.7000218 27418.867
60 296.11534 -113.26599 7929.1348 -111.5664 -1.6995929 27418.867
70 291.73354 -113.25289 5071.5459 -111.5537 -1.6991916 27418.867
80 292.189 -113.25399 5667.0962 -111.55519 -1.6987993 27418.867
90 298.40792 -113.27253 7513.3806 -111.57409 -1.6984403 27418.867
100 303.58246 -113.28809 10017.879 -111.58991 -1.698177 27418.867
Loop time of 59.5461 on 1 procs for 100 steps with 2432 atoms
Performance: 0.015 ns/day, 1654.060 hours/ns, 1.679 timesteps/s
97.0% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 49.922 | 49.922 | 49.922 | 0.0 | 83.84
Neigh | 0.53154 | 0.53154 | 0.53154 | 0.0 | 0.89
Comm | 0.011399 | 0.011399 | 0.011399 | 0.0 | 0.02
Output | 0.00064397 | 0.00064397 | 0.00064397 | 0.0 | 0.00
Modify | 9.0782 | 9.0782 | 9.0782 | 0.0 | 15.25
Other | | 0.002116 | | | 0.00
Nlocal: 2432 ave 2432 max 2432 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 10687 ave 10687 max 10687 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 823977 ave 823977 max 823977 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 823977
Ave neighs/atom = 338.806
Neighbor list builds = 5
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:01:00

View File

@ -0,0 +1,115 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS
# See README for more info
variable x index 2
variable y index 2
variable z index 2
variable t index 100
units real
atom_style charge
atom_modify sort 100 0.0 # optional
dimension 3
boundary p p p
box tilt large
read_data data.hns-equil
triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0)
2 by 1 by 2 MPI processor grid
reading atoms ...
304 atoms
reading velocities ...
304 velocities
replicate $x $y $z bbox
replicate 2 $y $z bbox
replicate 2 2 $z bbox
replicate 2 2 2 bbox
triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0)
2 by 1 by 2 MPI processor grid
2432 atoms
Time spent = 0.000398397 secs
pair_style reax/c NULL
pair_coeff * * ffield.reax.hns C H O N
compute reax all pair reax/c
neighbor 1.0 bin
neigh_modify every 20 delay 0 check no
timestep 0.1
thermo_style custom step temp pe press evdwl ecoul vol
thermo_modify norm yes
thermo 10
velocity all create 300.0 41279 loop geom
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c
run 100
Neighbor list info ...
update every 20 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 11
ghost atom cutoff = 11
binsize = 5.5, bins = 10 5 6
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 126.6 | 126.6 | 126.6 Mbytes
Step Temp PotEng Press E_vdwl E_coul Volume
0 300 -113.27833 437.52112 -111.57687 -1.7014647 27418.867
10 299.87174 -113.27778 2033.632 -111.57645 -1.7013325 27418.867
20 300.81719 -113.28046 4817.5761 -111.57931 -1.7011463 27418.867
30 301.8622 -113.28323 8302.9767 -111.58237 -1.7008609 27418.867
40 302.4646 -113.28493 10519.481 -111.58446 -1.700467 27418.867
50 300.79064 -113.27989 10402.312 -111.57987 -1.7000217 27418.867
60 296.11534 -113.26599 7929.1393 -111.5664 -1.6995929 27418.867
70 291.73354 -113.25289 5071.5368 -111.5537 -1.6991916 27418.867
80 292.18901 -113.25399 5667.1118 -111.55519 -1.6987993 27418.867
90 298.40793 -113.27253 7513.4029 -111.57409 -1.6984403 27418.867
100 303.58247 -113.28809 10017.892 -111.58991 -1.698177 27418.867
Loop time of 21.3933 on 4 procs for 100 steps with 2432 atoms
Performance: 0.040 ns/day, 594.257 hours/ns, 4.674 timesteps/s
97.6% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 14.863 | 16.367 | 18.027 | 28.6 | 76.51
Neigh | 0.23943 | 0.2422 | 0.24658 | 0.6 | 1.13
Comm | 0.024331 | 1.6845 | 3.189 | 89.2 | 7.87
Output | 0.00051165 | 0.00056899 | 0.00068665 | 0.0 | 0.00
Modify | 3.0933 | 3.0969 | 3.0999 | 0.1 | 14.48
Other | | 0.001784 | | | 0.01
Nlocal: 608 ave 608 max 608 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Nghost: 5738.25 ave 5742 max 5734 min
Histogram: 1 1 0 0 0 0 0 0 0 2
Neighs: 231544 ave 231625 max 231466 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Total # of neighbors = 926176
Ave neighs/atom = 380.829
Neighbor list builds = 5
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:21

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for high energy CHON systems
# .....
@ -28,43 +29,53 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 18.1116 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 19 | 19 | 19 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -10197.932 0 -10197.932 38.347492
3000 510.85923 -10091.694 0 -9933.3253 1668.5084
Loop time of 18.9088 on 1 procs for 3000 steps with 105 atoms
3000 510.63767 -10091.537 0 -9933.2374 1144.545
Loop time of 21.2931 on 1 procs for 3000 steps with 105 atoms
Performance: 3.427 ns/day, 7.003 hours/ns, 158.657 timesteps/s
99.5% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 3.043 ns/day, 7.886 hours/ns, 140.891 timesteps/s
97.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 17.724 | 17.724 | 17.724 | 0.0 | 93.73
Neigh | 0.27457 | 0.27457 | 0.27457 | 0.0 | 1.45
Comm | 0.015814 | 0.015814 | 0.015814 | 0.0 | 0.08
Output | 1.1921e-05 | 1.1921e-05 | 1.1921e-05 | 0.0 | 0.00
Modify | 0.89014 | 0.89014 | 0.89014 | 0.0 | 4.71
Other | | 0.004246 | | | 0.02
Pair | 19.887 | 19.887 | 19.887 | 0.0 | 93.40
Neigh | 0.33143 | 0.33143 | 0.33143 | 0.0 | 1.56
Comm | 0.02079 | 0.02079 | 0.02079 | 0.0 | 0.10
Output | 2.5272e-05 | 2.5272e-05 | 2.5272e-05 | 0.0 | 0.00
Modify | 1.0478 | 1.0478 | 1.0478 | 0.0 | 4.92
Other | | 0.006125 | | | 0.03
Nlocal: 105 ave 105 max 105 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 645 ave 645 max 645 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3061 ave 3061 max 3061 min
Neighs: 3063 ave 3063 max 3063 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 3061
Ave neighs/atom = 29.1524
Total # of neighbors = 3063
Ave neighs/atom = 29.1714
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:19
Total wall time: 0:00:21

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for high energy CHON systems
# .....
@ -28,43 +29,53 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 12.2102 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 12.14 | 13.04 | 13.9 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -10197.932 0 -10197.932 38.347492
3000 504.05354 -10089.494 0 -9933.2351 868.32505
Loop time of 9.70759 on 4 procs for 3000 steps with 105 atoms
3000 509.89257 -10091.36 0 -9933.2916 1406.1215
Loop time of 10.8858 on 4 procs for 3000 steps with 105 atoms
Performance: 6.675 ns/day, 3.595 hours/ns, 309.037 timesteps/s
99.2% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 5.953 ns/day, 4.032 hours/ns, 275.588 timesteps/s
98.1% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 8.4621 | 8.5307 | 8.6001 | 1.9 | 87.88
Neigh | 0.12583 | 0.14931 | 0.17341 | 4.5 | 1.54
Comm | 0.053017 | 0.12311 | 0.19244 | 16.2 | 1.27
Output | 1.9073e-05 | 2.0802e-05 | 2.408e-05 | 0.0 | 0.00
Modify | 0.87638 | 0.9012 | 0.92557 | 1.9 | 9.28
Other | | 0.003213 | | | 0.03
Pair | 9.3081 | 9.4054 | 9.4994 | 2.6 | 86.40
Neigh | 0.15541 | 0.18258 | 0.2099 | 4.7 | 1.68
Comm | 0.070516 | 0.16621 | 0.26541 | 19.7 | 1.53
Output | 2.2173e-05 | 2.5153e-05 | 3.3855e-05 | 0.0 | 0.00
Modify | 1.0979 | 1.1272 | 1.1568 | 2.1 | 10.35
Other | | 0.004379 | | | 0.04
Nlocal: 26.25 ave 46 max 8 min
Histogram: 1 0 0 1 0 1 0 0 0 1
Nghost: 399.5 ave 512 max 288 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Neighs: 1010.75 ave 1818 max 420 min
Neighs: 1011.25 ave 1819 max 420 min
Histogram: 1 0 1 1 0 0 0 0 0 1
Total # of neighbors = 4043
Ave neighs/atom = 38.5048
Total # of neighbors = 4045
Ave neighs/atom = 38.5238
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:10
Total wall time: 0:00:11

View File

@ -1,70 +0,0 @@
LAMMPS (5 Oct 2016)
# REAX potential for VOH system
# .....
units real
atom_style charge
read_data data.VOH
orthogonal box = (0 0 0) to (25 25 25)
1 by 2 by 2 MPI processor grid
reading atoms ...
100 atoms
pair_style reax/c lmp_control
pair_coeff * * ffield.reax.V_O_C_H H C O V
Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18
neighbor 2 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
fix 3 all temp/berendsen 500.0 500.0 100.0
timestep 0.25
#dump 1 all atom 30 dump.reax.voh
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 12.1769 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -10246.825 0 -10246.825 42.256092
3000 518.1493 -10196.234 0 -10043.328 -334.5971
Loop time of 5.59178 on 4 procs for 3000 steps with 100 atoms
Performance: 11.588 ns/day, 2.071 hours/ns, 536.502 timesteps/s
99.1% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.2807 | 4.3532 | 4.398 | 2.1 | 77.85
Neigh | 0.12328 | 0.14561 | 0.16815 | 4.2 | 2.60
Comm | 0.051619 | 0.097282 | 0.1697 | 14.1 | 1.74
Output | 1.7881e-05 | 1.9372e-05 | 2.3842e-05 | 0.1 | 0.00
Modify | 0.9701 | 0.99258 | 1.0148 | 1.6 | 17.75
Other | | 0.003097 | | | 0.06
Nlocal: 25 ave 38 max 11 min
Histogram: 1 0 0 0 1 0 1 0 0 1
Nghost: 368.25 ave 449 max 283 min
Histogram: 1 0 0 0 1 0 1 0 0 1
Neighs: 1084.5 ave 1793 max 418 min
Histogram: 1 0 0 1 0 0 1 0 0 1
Total # of neighbors = 4338
Ave neighs/atom = 43.38
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:05

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for VOH system
# .....
@ -28,43 +29,53 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 16.9211 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 17.79 | 17.79 | 17.79 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -10246.825 0 -10246.825 42.256089
3000 479.39686 -10186.225 0 -10044.755 -454.82798
Loop time of 10.4348 on 1 procs for 3000 steps with 100 atoms
3000 476.73301 -10185.256 0 -10044.572 -694.70737
Loop time of 11.0577 on 1 procs for 3000 steps with 100 atoms
Performance: 6.210 ns/day, 3.865 hours/ns, 287.499 timesteps/s
99.2% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 5.860 ns/day, 4.095 hours/ns, 271.304 timesteps/s
98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 9.2216 | 9.2216 | 9.2216 | 0.0 | 88.37
Neigh | 0.2757 | 0.2757 | 0.2757 | 0.0 | 2.64
Comm | 0.015626 | 0.015626 | 0.015626 | 0.0 | 0.15
Output | 1.1921e-05 | 1.1921e-05 | 1.1921e-05 | 0.0 | 0.00
Modify | 0.91782 | 0.91782 | 0.91782 | 0.0 | 8.80
Other | | 0.004039 | | | 0.04
Pair | 9.6785 | 9.6785 | 9.6785 | 0.0 | 87.53
Neigh | 0.32599 | 0.32599 | 0.32599 | 0.0 | 2.95
Comm | 0.017231 | 0.017231 | 0.017231 | 0.0 | 0.16
Output | 2.5511e-05 | 2.5511e-05 | 2.5511e-05 | 0.0 | 0.00
Modify | 1.0311 | 1.0311 | 1.0311 | 0.0 | 9.32
Other | | 0.004857 | | | 0.04
Nlocal: 100 ave 100 max 100 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 598 ave 598 max 598 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3384 ave 3384 max 3384 min
Neighs: 3390 ave 3390 max 3390 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 3384
Ave neighs/atom = 33.84
Total # of neighbors = 3390
Ave neighs/atom = 33.9
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:10
Total wall time: 0:00:11

View File

@ -0,0 +1,81 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for VOH system
# .....
units real
atom_style charge
read_data data.VOH
orthogonal box = (0 0 0) to (25 25 25)
1 by 2 by 2 MPI processor grid
reading atoms ...
100 atoms
pair_style reax/c lmp_control
pair_coeff * * ffield.reax.V_O_C_H H C O V
Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18
neighbor 2 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
fix 3 all temp/berendsen 500.0 500.0 100.0
timestep 0.25
#dump 1 all atom 30 dump.reax.voh
run 3000
Neighbor list info ...
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 11.21 | 12.52 | 13.64 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -10246.825 0 -10246.825 42.256092
3000 489.67803 -10188.866 0 -10044.362 -553.7513
Loop time of 6.49847 on 4 procs for 3000 steps with 100 atoms
Performance: 9.972 ns/day, 2.407 hours/ns, 461.647 timesteps/s
97.7% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.7412 | 4.8453 | 4.9104 | 2.9 | 74.56
Neigh | 0.1468 | 0.17834 | 0.20151 | 4.7 | 2.74
Comm | 0.071841 | 0.14037 | 0.24502 | 17.2 | 2.16
Output | 2.1219e-05 | 2.408e-05 | 3.1948e-05 | 0.0 | 0.00
Modify | 1.3072 | 1.3308 | 1.3627 | 1.7 | 20.48
Other | | 0.003713 | | | 0.06
Nlocal: 25 ave 38 max 11 min
Histogram: 1 0 0 0 1 0 1 0 0 1
Nghost: 369.75 ave 453 max 283 min
Histogram: 1 0 0 0 1 1 0 0 0 1
Neighs: 1082.25 ave 1788 max 417 min
Histogram: 1 0 1 0 0 0 1 0 0 1
Total # of neighbors = 4329
Ave neighs/atom = 43.29
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:06

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for ZnOH2 system
# .....
@ -28,43 +29,53 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 17.485 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 18.36 | 18.36 | 18.36 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -7900.2668 0 -7900.2668 60.076093
3000 522.42599 -7928.9641 0 -7767.0098 -755.28778
Loop time of 6.38119 on 1 procs for 3000 steps with 105 atoms
3000 535.58577 -7934.7287 0 -7768.6948 -475.46237
Loop time of 7.29784 on 1 procs for 3000 steps with 105 atoms
Performance: 10.155 ns/day, 2.363 hours/ns, 470.132 timesteps/s
99.0% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 8.879 ns/day, 2.703 hours/ns, 411.081 timesteps/s
97.3% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 5.2711 | 5.2711 | 5.2711 | 0.0 | 82.60
Neigh | 0.30669 | 0.30669 | 0.30669 | 0.0 | 4.81
Comm | 0.015599 | 0.015599 | 0.015599 | 0.0 | 0.24
Output | 1.0967e-05 | 1.0967e-05 | 1.0967e-05 | 0.0 | 0.00
Modify | 0.78376 | 0.78376 | 0.78376 | 0.0 | 12.28
Other | | 0.004036 | | | 0.06
Pair | 5.9988 | 5.9988 | 5.9988 | 0.0 | 82.20
Neigh | 0.37455 | 0.37455 | 0.37455 | 0.0 | 5.13
Comm | 0.019186 | 0.019186 | 0.019186 | 0.0 | 0.26
Output | 2.4557e-05 | 2.4557e-05 | 2.4557e-05 | 0.0 | 0.00
Modify | 0.89915 | 0.89915 | 0.89915 | 0.0 | 12.32
Other | | 0.006108 | | | 0.08
Nlocal: 105 ave 105 max 105 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 649 ave 649 max 649 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3956 ave 3956 max 3956 min
Neighs: 3971 ave 3971 max 3971 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 3956
Ave neighs/atom = 37.6762
Total # of neighbors = 3971
Ave neighs/atom = 37.819
Neighbor list builds = 300
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:06
Total wall time: 0:00:07

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# REAX potential for ZnOH2 system
# .....
@ -28,40 +29,50 @@ timestep 0.25
run 3000
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 5 5 5
Memory usage per processor = 12.0066 Mbytes
binsize = 6, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 11.28 | 12.77 | 14.21 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0 -7900.2668 0 -7900.2668 60.076093
3000 536.8256 -7935.1437 0 -7768.7255 -479.27959
Loop time of 3.77632 on 4 procs for 3000 steps with 105 atoms
3000 538.25796 -7935.6159 0 -7768.7536 -525.47078
Loop time of 4.48824 on 4 procs for 3000 steps with 105 atoms
Performance: 17.160 ns/day, 1.399 hours/ns, 794.423 timesteps/s
99.0% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 14.438 ns/day, 1.662 hours/ns, 668.414 timesteps/s
97.2% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.7337 | 2.7808 | 2.8316 | 2.5 | 73.64
Neigh | 0.13455 | 0.16558 | 0.19493 | 5.3 | 4.38
Comm | 0.046741 | 0.099375 | 0.14663 | 13.6 | 2.63
Output | 1.7881e-05 | 2.0027e-05 | 2.408e-05 | 0.1 | 0.00
Modify | 0.69792 | 0.7275 | 0.75887 | 2.5 | 19.26
Other | | 0.003084 | | | 0.08
Pair | 3.1031 | 3.1698 | 3.2378 | 3.3 | 70.62
Neigh | 0.16642 | 0.20502 | 0.25003 | 6.6 | 4.57
Comm | 0.074932 | 0.14224 | 0.21025 | 15.6 | 3.17
Output | 0.00011349 | 0.00011736 | 0.00012231 | 0.0 | 0.00
Modify | 0.92089 | 0.96736 | 1.0083 | 3.2 | 21.55
Other | | 0.003731 | | | 0.08
Nlocal: 26.25 ave 45 max 15 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Nghost: 399 ave 509 max 295 min
Histogram: 1 0 0 0 2 0 0 0 0 1
Neighs: 1150 ave 2061 max 701 min
Neighs: 1151.5 ave 2066 max 701 min
Histogram: 1 2 0 0 0 0 0 0 0 1
Total # of neighbors = 4600
Ave neighs/atom = 43.8095
Total # of neighbors = 4606
Ave neighs/atom = 43.8667
Neighbor list builds = 300
Dangerous builds not checked

View File

@ -1,4 +1,5 @@
LAMMPS (23 Oct 2017)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
#ci-reax potential for CH systems with tabulated ZBL correction
atom_style charge
units real
@ -31,6 +32,7 @@ fix 2 all temp/berendsen 500.0 500.0 100.0
#dump 1 all atom 30 dump.ci-reax.lammpstrj
run 3000
WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392)
Neighbor list info ...
update every 1 steps, delay 10 steps, check yes
max neighbors/atom: 2000, page size: 100000
@ -52,20 +54,20 @@ Per MPI rank memory allocation (min/avg/max) = 43.46 | 43.46 | 43.46 Mbytes
Step Temp E_pair E_mol TotEng Press
0 508.42043 -28736.654 0 -28260.785 1678.3276
3000 480.41333 -28707.835 0 -28258.181 -3150.0762
Loop time of 21.5509 on 1 procs for 3000 steps with 315 atoms
Loop time of 45.3959 on 1 procs for 3000 steps with 315 atoms
Performance: 3.007 ns/day, 7.982 hours/ns, 139.205 timesteps/s
100.0% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 1.427 ns/day, 16.813 hours/ns, 66.085 timesteps/s
96.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 21.315 | 21.315 | 21.315 | 0.0 | 98.91
Neigh | 0.17846 | 0.17846 | 0.17846 | 0.0 | 0.83
Comm | 0.028676 | 0.028676 | 0.028676 | 0.0 | 0.13
Output | 2.6941e-05 | 2.6941e-05 | 2.6941e-05 | 0.0 | 0.00
Modify | 0.018969 | 0.018969 | 0.018969 | 0.0 | 0.09
Other | | 0.009438 | | | 0.04
Pair | 44.955 | 44.955 | 44.955 | 0.0 | 99.03
Neigh | 0.29903 | 0.29903 | 0.29903 | 0.0 | 0.66
Comm | 0.056547 | 0.056547 | 0.056547 | 0.0 | 0.12
Output | 4.8399e-05 | 4.8399e-05 | 4.8399e-05 | 0.0 | 0.00
Modify | 0.058722 | 0.058722 | 0.058722 | 0.0 | 0.13
Other | | 0.02632 | | | 0.06
Nlocal: 315 ave 315 max 315 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -81,4 +83,4 @@ Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:21
Total wall time: 0:00:45

View File

@ -0,0 +1,86 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
#ci-reax potential for CH systems with tabulated ZBL correction
atom_style charge
units real
read_data CH4.dat
orthogonal box = (0 0 0) to (20 20 20)
1 by 2 by 2 MPI processor grid
reading atoms ...
315 atoms
reading velocities ...
315 velocities
pair_style hybrid/overlay reax/c control checkqeq no table linear 11000
pair_coeff * * reax/c ffield.ci-reax.CH C H
Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20
pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF
WARNING: 2 of 10000 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:481)
pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF
WARNING: 2 of 11000 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:481)
pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF
WARNING: 2 of 6000 force values in table are inconsistent with -dE/dr.
Should only be flagged at inflection points (../pair_table.cpp:481)
timestep 0.25
fix 1 all nve
fix 2 all temp/berendsen 500.0 500.0 100.0
#dump 1 all atom 30 dump.ci-reax.lammpstrj
run 3000
WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392)
Neighbor list info ...
update every 1 steps, delay 10 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 9.5
ghost atom cutoff = 9.5
binsize = 4.75, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) pair table, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 24.48 | 25.61 | 27.27 Mbytes
Step Temp E_pair E_mol TotEng Press
0 508.42043 -28736.654 0 -28260.785 1678.3276
3000 480.41333 -28707.835 0 -28258.181 -3150.0762
Loop time of 24.7034 on 4 procs for 3000 steps with 315 atoms
Performance: 2.623 ns/day, 9.149 hours/ns, 121.441 timesteps/s
95.8% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 18.945 | 21.367 | 24.046 | 39.3 | 86.49
Neigh | 0.1456 | 0.15254 | 0.16101 | 1.6 | 0.62
Comm | 0.39168 | 3.0859 | 5.5185 | 103.9 | 12.49
Output | 3.5763e-05 | 4.065e-05 | 5.2452e-05 | 0.0 | 0.00
Modify | 0.05831 | 0.068811 | 0.077666 | 2.9 | 0.28
Other | | 0.0292 | | | 0.12
Nlocal: 78.75 ave 96 max 65 min
Histogram: 2 0 0 0 0 0 0 1 0 1
Nghost: 1233 ave 1348 max 1116 min
Histogram: 1 0 1 0 0 0 0 1 0 1
Neighs: 9467.25 ave 12150 max 7160 min
Histogram: 1 1 0 0 0 0 0 1 0 1
Total # of neighbors = 37869
Ave neighs/atom = 120.219
Neighbor list builds = 37
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:24

View File

@ -1,101 +0,0 @@
LAMMPS (5 Oct 2016)
# ReaxFF potential for RDX system
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 1 by 1 MPI processor grid
reading atoms ...
21 atoms
# reax args: hbcut hbnewflag tripflag precision
pair_style reax 6.0 1 1 1.0e-6
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
pair_coeff * * ffield.reax 1 2 3 4
compute reax all pair reax
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
1 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 3 3 3
Memory usage per processor = 2.95105 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1885.1268 -1885.1268 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79971 0 168.88435
10 1281.7558 -1989.1322 -1912.7188 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.020231 3.1070522 -77.710916 0 14.963568 -5.8082204 843.41939 -180.17724 0 107.5115
20 516.83079 -1941.677 -1910.8655 -12525.41 -2801.8626 7.4107974 0.073134188 0 81.986982 0.2281551 -57.494871 0 30.656735 -10.102557 877.78696 -158.93385 0 88.574158
30 467.2641 -1940.978 -1913.1215 -35957.487 -2755.021 -6.9179959 0.049322439 0 78.853175 0.13604392 -51.653634 0 19.862872 -9.7098575 853.79334 -151.232 0 80.861768
40 647.45541 -1951.1994 -1912.6006 -5883.7147 -2798.3556 17.334807 0.15102863 0 63.23512 0.18070931 -54.598962 0 17.325008 -12.052277 883.01667 -164.21335 0 96.777422
50 716.38057 -1949.4749 -1906.767 5473.2085 -2800.931 9.2056917 0.15413274 0 85.371449 3.2986106 -78.253597 0 34.861773 -8.5531236 882.01435 -193.85275 0 117.2096
60 1175.2707 -1975.9611 -1905.8959 -1939.4971 -2726.5816 -11.651982 0.24296788 0 48.320663 7.1799636 -75.363641 0 16.520132 -4.8869463 844.754 -194.23296 0 119.73837
70 1156.7 -1975.3486 -1906.3905 24628.344 -2880.5223 25.652478 0.26894312 0 83.724884 7.1049303 -68.700942 0 24.750744 -8.6338218 911.20067 -183.4058 0 113.21158
80 840.23687 -1955.4768 -1905.3851 -17731.383 -2755.7295 -8.0168306 0.13867962 0 86.14748 2.2387306 -76.945841 0 23.595858 -7.2609645 853.6346 -167.88289 0 94.603895
90 365.79169 -1926.406 -1904.5989 898.37155 -2842.183 47.368211 0.23109 0 92.288131 0.38031313 -61.361483 0 18.476377 -12.255472 900.24202 -186.48056 0 116.88831
100 801.32078 -1953.4177 -1905.646 -2417.5518 -2802.7244 4.6676973 0.18046558 0 76.730114 5.4177372 -77.102556 0 24.997234 -7.7554179 898.67306 -196.8912 0 120.38952
Loop time of 0.512828 on 1 procs for 100 steps with 21 atoms
Performance: 16.848 ns/day, 1.425 hours/ns, 194.997 timesteps/s
99.4% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.51126 | 0.51126 | 0.51126 | 0.0 | 99.69
Neigh | 0.00071597 | 0.00071597 | 0.00071597 | 0.0 | 0.14
Comm | 0.00040317 | 0.00040317 | 0.00040317 | 0.0 | 0.08
Output | 0.00027037 | 0.00027037 | 0.00027037 | 0.0 | 0.05
Modify | 7.2241e-05 | 7.2241e-05 | 7.2241e-05 | 0.0 | 0.01
Other | | 0.000108 | | | 0.02
Nlocal: 21 ave 21 max 21 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 546 ave 546 max 546 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 1106 ave 1106 max 1106 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 1106
Ave neighs/atom = 52.6667
Neighbor list builds = 10
Dangerous builds not checked
Total wall time: 0:00:00

View File

@ -1,101 +0,0 @@
LAMMPS (5 Oct 2016)
# ReaxFF potential for RDX system
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 2 by 2 MPI processor grid
reading atoms ...
21 atoms
# reax args: hbcut hbnewflag tripflag precision
pair_style reax 6.0 1 1 1.0e-6
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
pair_coeff * * ffield.reax 1 2 3 4
compute reax all pair reax
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
1 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 3 3 3
Memory usage per processor = 3.0718 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1885.1268 -1885.1268 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79972 0 168.8843
10 1281.7558 -1989.1322 -1912.7188 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.020231 3.1070523 -77.710916 0 14.963568 -5.8082204 843.41939 -180.17725 0 107.51148
20 516.8308 -1941.677 -1910.8655 -12525.411 -2801.8626 7.4107973 0.07313419 0 81.986982 0.2281551 -57.494871 0 30.656735 -10.102557 877.78696 -158.93385 0 88.574155
30 467.26411 -1940.978 -1913.1215 -35957.487 -2755.021 -6.9179966 0.049322437 0 78.853175 0.13604391 -51.653634 0 19.862872 -9.7098574 853.79333 -151.232 0 80.861765
40 647.45584 -1951.1994 -1912.6006 -5883.7102 -2798.3557 17.334812 0.15102857 0 63.235124 0.18070914 -54.598951 0 17.325006 -12.052278 883.01674 -164.21335 0 96.777418
50 716.38108 -1949.4679 -1906.76 5473.1803 -2800.9311 9.2057064 0.15413272 0 85.371443 3.2986124 -78.253597 0 34.861778 -8.5531235 882.01441 -193.85213 0 117.21596
60 1175.2703 -1975.9632 -1905.898 -1939.6676 -2726.5815 -11.652032 0.24296779 0 48.320636 7.1799647 -75.363643 0 16.520124 -4.8869416 844.75396 -194.25563 0 119.75889
70 1156.7016 -1975.3469 -1906.3887 24628.125 -2880.5225 25.65252 0.26894309 0 83.724869 7.1048931 -68.700978 0 24.750754 -8.6338341 911.20067 -183.41947 0 113.22722
80 840.3323 -1955.4867 -1905.3893 -17732.956 -2755.7336 -8.0168615 0.13869303 0 86.143454 2.2388975 -76.946365 0 23.594977 -7.2608903 853.63682 -167.88599 0 94.604168
90 365.75853 -1926.4192 -1904.6141 902.29004 -2842.1715 47.360077 0.23110905 0 92.28805 0.38040356 -61.364192 0 18.473252 -12.253964 900.23128 -186.47889 0 116.88518
100 801.64661 -1953.4392 -1905.6481 -2464.5533 -2802.6922 4.6510183 0.18048786 0 76.715675 5.41849 -77.102069 0 24.987058 -7.7531389 898.65974 -196.87724 0 120.37303
Loop time of 0.405054 on 4 procs for 100 steps with 21 atoms
Performance: 21.331 ns/day, 1.125 hours/ns, 246.881 timesteps/s
96.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.16194 | 0.24674 | 0.40012 | 18.4 | 60.92
Neigh | 7.3671e-05 | 0.00024015 | 0.00053477 | 1.1 | 0.06
Comm | 0.0037704 | 0.1575 | 0.24247 | 23.1 | 38.88
Output | 0.00037122 | 0.00040913 | 0.0004406 | 0.1 | 0.10
Modify | 4.22e-05 | 6.175e-05 | 8.3685e-05 | 0.2 | 0.02
Other | | 0.0001087 | | | 0.03
Nlocal: 5.25 ave 15 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Nghost: 355.5 ave 432 max 282 min
Histogram: 1 0 0 0 1 1 0 0 0 1
Neighs: 301.25 ave 827 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Total # of neighbors = 1205
Ave neighs/atom = 57.381
Neighbor list builds = 10
Dangerous builds not checked
Total wall time: 0:00:00

View File

@ -1,104 +0,0 @@
LAMMPS (5 Oct 2016)
# ReaxFF potential for RDX system
# this run is equivalent to reax/in.reax.rdx
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 1 by 1 MPI processor grid
reading atoms ...
21 atoms
pair_style reax/c control.reax_c.rdx
pair_coeff * * ffield.reax C H O N
Reading potential file ffield.reax with DATE: 2010-02-19
compute reax all pair reax/c
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all atom 10 dump.reaxc.rdx
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 3 3 3
Memory usage per processor = 14.4462 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1884.3081 -1884.3081 27186.181 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79931 0 168.88396
10 1288.6116 -1989.6644 -1912.8422 -19456.353 -2734.6769 -15.607221 0.2017796 0 54.629557 3.125229 -77.7067 0 14.933901 -5.8108541 843.92073 -180.43321 0 107.75935
20 538.95819 -1942.7037 -1910.5731 -10725.639 -2803.7394 7.9078269 0.07792668 0 81.610053 0.22951941 -57.557107 0 30.331207 -10.178049 878.99009 -159.68914 0 89.313379
30 463.09535 -1933.5765 -1905.9686 -33255.546 -2749.859 -8.0154745 0.02762893 0 81.627395 0.11972413 -50.262293 0 20.820303 -9.6327015 851.88715 -149.49499 0 79.205727
40 885.49171 -1958.9125 -1906.1229 -4814.6856 -2795.644 9.150669 0.13747498 0 70.947982 0.24360485 -57.862663 0 19.076496 -11.141218 873.73893 -159.99393 0 92.434096
50 861.16578 -1954.4599 -1903.1205 -1896.7713 -2784.845 3.8270515 0.15793266 0 79.851823 3.3492142 -78.06613 0 32.629016 -7.956541 872.81838 -190.98567 0 114.75995
60 1167.7852 -1971.8429 -1902.224 -3482.7305 -2705.863 -17.12171 0.22749077 0 44.507654 7.8560745 -74.788955 0 16.256483 -4.6046431 835.8304 -188.33691 0 114.19413
70 1439.9966 -1989.3024 -1903.4553 23845.651 -2890.7895 31.958845 0.26671721 0 85.758695 3.1803544 -71.002903 0 24.357134 -10.31131 905.86775 -175.38471 0 106.79648
80 502.39438 -1930.7544 -1900.8035 -20356.316 -2703.8115 -18.662467 0.11286011 0 99.804201 2.0329024 -76.171317 0 19.237028 -6.2786907 826.47451 -166.03125 0 92.539398
90 749.08499 -1946.9838 -1902.3262 17798.51 -2863.7576 42.068717 0.2433807 0 96.181613 0.96184887 -69.955448 0 24.615302 -11.582765 903.68818 -190.13843 0 120.69141
100 1109.6968 -1968.5874 -1902.4315 -4490.1018 -2755.8965 -7.1231014 0.21757699 0 61.806018 7.0827673 -75.645345 0 20.114997 -6.2371964 863.5635 -198.56976 0 122.09961
Loop time of 0.362895 on 1 procs for 100 steps with 21 atoms
Performance: 23.809 ns/day, 1.008 hours/ns, 275.562 timesteps/s
100.0% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.34367 | 0.34367 | 0.34367 | 0.0 | 94.70
Neigh | 0.0078354 | 0.0078354 | 0.0078354 | 0.0 | 2.16
Comm | 0.00043559 | 0.00043559 | 0.00043559 | 0.0 | 0.12
Output | 0.00019908 | 0.00019908 | 0.00019908 | 0.0 | 0.05
Modify | 0.010645 | 0.010645 | 0.010645 | 0.0 | 2.93
Other | | 0.0001094 | | | 0.03
Nlocal: 21 ave 21 max 21 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 546 ave 546 max 546 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 1096 ave 1096 max 1096 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 1096
Ave neighs/atom = 52.1905
Neighbor list builds = 10
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:00

View File

@ -1,104 +0,0 @@
LAMMPS (5 Oct 2016)
# ReaxFF potential for RDX system
# this run is equivalent to reax/in.reax.rdx
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 2 by 2 MPI processor grid
reading atoms ...
21 atoms
pair_style reax/c control.reax_c.rdx
pair_coeff * * ffield.reax C H O N
Reading potential file ffield.reax with DATE: 2010-02-19
compute reax all pair reax/c
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all atom 10 dump.reaxc.rdx
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
2 neighbor list requests
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 3 3 3
Memory usage per processor = 12.531 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1884.3081 -1884.3081 27186.18 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79953 0 168.88418
10 1288.6115 -1989.6644 -1912.8422 -19456.354 -2734.6769 -15.60722 0.2017796 0 54.629558 3.1252288 -77.7067 0 14.933901 -5.8108542 843.92073 -180.43321 0 107.75934
20 538.95831 -1942.7037 -1910.5731 -10725.671 -2803.7395 7.9078306 0.077926651 0 81.610051 0.22951926 -57.557099 0 30.331204 -10.178049 878.99014 -159.69268 0 89.316921
30 463.09502 -1933.5765 -1905.9685 -33255.512 -2749.8591 -8.015455 0.027628766 0 81.6274 0.11972393 -50.262275 0 20.820315 -9.6327041 851.88722 -149.49498 0 79.205714
40 885.49378 -1958.9125 -1906.1228 -4814.644 -2795.644 9.1506485 0.13747497 0 70.948 0.24360511 -57.862677 0 19.076502 -11.141216 873.73898 -159.99393 0 92.43409
50 861.16297 -1954.4602 -1903.1209 -1896.8002 -2784.8451 3.8270162 0.157933 0 79.851673 3.3492148 -78.066132 0 32.628944 -7.9565368 872.81852 -190.98572 0 114.76001
60 1167.7835 -1971.8433 -1902.2245 -3482.8296 -2705.8635 -17.121613 0.2274909 0 44.507674 7.85602 -74.788998 0 16.256483 -4.6046575 835.83058 -188.33691 0 114.19414
70 1439.9939 -1989.3026 -1903.4556 23846.042 -2890.7893 31.958672 0.26671708 0 85.758381 3.1804035 -71.002944 0 24.357195 -10.311284 905.8679 -175.38487 0 106.79661
80 502.39535 -1930.7548 -1900.8039 -20356.194 -2703.8126 -18.662209 0.11286005 0 99.803849 2.0329206 -76.171278 0 19.23716 -6.2787147 826.47505 -166.03123 0 92.539386
90 749.07874 -1946.9841 -1902.3269 17798.394 -2863.7576 42.068612 0.24338059 0 96.181423 0.96185061 -69.95542 0 24.615344 -11.582758 903.68812 -190.13826 0 120.69124
100 1109.6904 -1968.5879 -1902.4323 -4490.0667 -2755.8991 -7.1224194 0.21757691 0 61.805857 7.0827218 -75.645383 0 20.115437 -6.23727 863.56487 -198.56975 0 122.09963
Loop time of 0.293673 on 4 procs for 100 steps with 21 atoms
Performance: 29.420 ns/day, 0.816 hours/ns, 340.514 timesteps/s
99.1% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.24143 | 0.24223 | 0.24409 | 0.2 | 82.48
Neigh | 0.003767 | 0.0049117 | 0.0061524 | 1.2 | 1.67
Comm | 0.0030656 | 0.0048578 | 0.0057402 | 1.5 | 1.65
Output | 0.00033545 | 0.00036347 | 0.00038052 | 0.1 | 0.12
Modify | 0.039885 | 0.041207 | 0.042435 | 0.4 | 14.03
Other | | 0.0001001 | | | 0.03
Nlocal: 5.25 ave 15 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Nghost: 355.5 ave 432 max 282 min
Histogram: 1 0 0 0 1 1 0 0 0 1
Neighs: 298.75 ave 822 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Total # of neighbors = 1195
Ave neighs/atom = 56.9048
Neighbor list builds = 10
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:00

View File

@ -0,0 +1,107 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for RDX system
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 1 by 1 MPI processor grid
reading atoms ...
21 atoms
# reax args: hbcut hbnewflag tripflag precision
pair_style reax 6.0 1 1 1.0e-6
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
pair_coeff * * ffield.reax 1 2 3 4
compute reax all pair reax
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25, bins = 3 3 3
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair reax, perpetual
attributes: half, newton off
pair build: half/bin/newtoff
stencil: half/bin/3d/newtoff
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.278 | 3.278 | 3.278 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1885.1269 -1885.1269 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79973 0 168.8842
10 1281.7558 -1989.1322 -1912.7188 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.02023 3.1070523 -77.710916 0 14.963568 -5.8082203 843.41939 -180.17724 0 107.5115
20 516.83079 -1941.677 -1910.8655 -12525.412 -2801.8626 7.410797 0.073134186 0 81.986983 0.2281551 -57.494871 0 30.656735 -10.102557 877.78695 -158.93385 0 88.574159
30 467.26411 -1940.978 -1913.1215 -35957.489 -2755.021 -6.9179958 0.049322453 0 78.853173 0.13604393 -51.653635 0 19.862871 -9.7098575 853.79334 -151.232 0 80.86177
40 647.45528 -1951.1994 -1912.6006 -5883.713 -2798.3556 17.334814 0.15102862 0 63.235117 0.18070924 -54.598957 0 17.325007 -12.052278 883.0167 -164.21335 0 96.777424
50 716.38088 -1949.4735 -1906.7656 5473.1969 -2800.9309 9.2056861 0.15413274 0 85.371466 3.2986127 -78.253597 0 34.861774 -8.553123 882.01431 -193.85254 0 117.21068
60 1175.2705 -1975.961 -1905.8958 -1939.4966 -2726.5816 -11.651996 0.24296786 0 48.320654 7.1799691 -75.363638 0 16.520127 -4.8869441 844.75401 -194.23297 0 119.73841
70 1156.701 -1975.3497 -1906.3916 24628.304 -2880.5225 25.652501 0.26894311 0 83.724852 7.1049152 -68.70096 0 24.750735 -8.6338267 911.20079 -183.40562 0 113.21047
80 840.23677 -1955.4769 -1905.3851 -17731.334 -2755.7299 -8.0167723 0.1386797 0 86.147417 2.2387319 -76.945843 0 23.595869 -7.260968 853.63487 -167.88288 0 94.603961
90 365.79122 -1926.4061 -1904.599 898.38479 -2842.1832 47.368107 0.23109002 0 92.288071 0.38031213 -61.361485 0 18.476336 -12.25546 900.24233 -186.48046 0 116.88827
100 801.32158 -1953.418 -1905.6462 -2417.6887 -2802.7247 4.6676477 0.18046575 0 76.729987 5.4177322 -77.102566 0 24.997175 -7.7554074 898.67337 -196.89114 0 120.38946
Loop time of 0.463306 on 1 procs for 100 steps with 21 atoms
Performance: 18.649 ns/day, 1.287 hours/ns, 215.840 timesteps/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.46143 | 0.46143 | 0.46143 | 0.0 | 99.60
Neigh | 0.00087953 | 0.00087953 | 0.00087953 | 0.0 | 0.19
Comm | 0.00042653 | 0.00042653 | 0.00042653 | 0.0 | 0.09
Output | 0.00034237 | 0.00034237 | 0.00034237 | 0.0 | 0.07
Modify | 0.00010109 | 0.00010109 | 0.00010109 | 0.0 | 0.02
Other | | 0.000124 | | | 0.03
Nlocal: 21 ave 21 max 21 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 546 ave 546 max 546 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 1106 ave 1106 max 1106 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 1106
Ave neighs/atom = 52.6667
Neighbor list builds = 10
Dangerous builds not checked
Total wall time: 0:00:00

View File

@ -0,0 +1,107 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for RDX system
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 2 by 2 MPI processor grid
reading atoms ...
21 atoms
# reax args: hbcut hbnewflag tripflag precision
pair_style reax 6.0 1 1 1.0e-6
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
pair_coeff * * ffield.reax 1 2 3 4
compute reax all pair reax
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25, bins = 3 3 3
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair reax, perpetual
attributes: half, newton off
pair build: half/bin/newtoff
stencil: half/bin/3d/newtoff
bin: standard
Per MPI rank memory allocation (min/avg/max) = 3.262 | 3.36 | 3.647 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1885.1268 -1885.1268 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79972 0 168.88428
10 1281.7558 -1989.1322 -1912.7187 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.020231 3.1070523 -77.710916 0 14.963568 -5.8082203 843.41939 -180.17724 0 107.51152
20 516.83079 -1941.677 -1910.8655 -12525.412 -2801.8626 7.410797 0.073134187 0 81.986983 0.2281551 -57.494871 0 30.656735 -10.102557 877.78695 -158.93385 0 88.574168
30 467.26411 -1940.978 -1913.1215 -35957.489 -2755.021 -6.9179959 0.049322449 0 78.853173 0.13604392 -51.653635 0 19.862871 -9.7098575 853.79334 -151.232 0 80.861765
40 647.45479 -1951.1995 -1912.6007 -5883.7199 -2798.3556 17.334805 0.15102868 0 63.235116 0.18070946 -54.59897 0 17.32501 -12.052277 883.0166 -164.21339 0 96.777473
50 716.37927 -1949.466 -1906.7582 5473.2486 -2800.9309 9.2056758 0.15413278 0 85.37143 3.2986099 -78.253596 0 34.861773 -8.5531243 882.01424 -193.85223 0 117.21791
60 1175.2698 -1975.9612 -1905.896 -1939.5206 -2726.5818 -11.651942 0.24296793 0 48.320679 7.1799538 -75.36365 0 16.520134 -4.8869515 844.75405 -194.23289 0 119.7383
70 1156.6963 -1975.3494 -1906.3915 24628.423 -2880.5221 25.65242 0.26894312 0 83.724787 7.1049615 -68.700925 0 24.750729 -8.6338123 911.2006 -183.40591 0 113.21091
80 840.238 -1955.4788 -1905.387 -17731.371 -2755.7301 -8.0167357 0.13868007 0 86.147246 2.2387405 -76.945868 0 23.595868 -7.2609697 853.6349 -167.88312 0 94.602512
90 365.78645 -1926.4072 -1904.6004 898.36945 -2842.1831 47.368307 0.23108998 0 92.288039 0.38031101 -61.361464 0 18.476388 -12.255481 900.24216 -186.48066 0 116.88716
100 801.31322 -1953.4165 -1905.6452 -2417.2041 -2802.7247 4.6678077 0.18046498 0 76.730367 5.4176812 -77.102592 0 24.9973 -7.7554425 898.6732 -196.89097 0 120.39043
Loop time of 0.404551 on 4 procs for 100 steps with 21 atoms
Performance: 21.357 ns/day, 1.124 hours/ns, 247.188 timesteps/s
97.4% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.2191 | 0.28038 | 0.39839 | 13.2 | 69.31
Neigh | 5.8651e-05 | 0.00025928 | 0.00062203 | 0.0 | 0.06
Comm | 0.0046599 | 0.12307 | 0.1845 | 19.9 | 30.42
Output | 0.00055337 | 0.00062728 | 0.00071192 | 0.0 | 0.16
Modify | 5.3167e-05 | 7.844e-05 | 0.00010109 | 0.0 | 0.02
Other | | 0.0001363 | | | 0.03
Nlocal: 5.25 ave 15 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Nghost: 355.5 ave 432 max 282 min
Histogram: 1 0 0 0 1 1 0 0 0 1
Neighs: 301.25 ave 827 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Total # of neighbors = 1205
Ave neighs/atom = 57.381
Neighbor list builds = 10
Dangerous builds not checked
Total wall time: 0:00:00

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for TATB system
units real
@ -12,7 +13,7 @@ read_data data.tatb
# reax args: hbcut hbnewflag tripflag precision
pair_style reax 6.0 1 1 1.0e-6
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
pair_coeff * * ffield.reax 1 2 3 4
compute reax all pair reax
@ -54,34 +55,39 @@ fix 2 all reax/bonds 25 bonds.reax.tatb
run 25
Neighbor list info ...
1 neighbor list requests
update every 5 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 5 4 3
Memory usage per processor = 6.61277 Mbytes
binsize = 6.25, bins = 5 4 3
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair reax, perpetual
attributes: half, newton off
pair build: half/bin/newtoff
stencil: half/bin/3d/newtoff
bin: standard
Per MPI rank memory allocation (min/avg/max) = 7.764 | 7.764 | 7.764 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -44767.08 -44767.08 7294.6353 -61120.591 486.4378 4.7236377 0 1568.024 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6378 0 6391.0231
5 0.63682807 -44767.737 -44767.01 8391.5966 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.2727 0 6388.8127
10 2.4306957 -44769.41 -44766.635 11717.369 -61113.142 487.89093 4.7227063 0 1567.2936 20.705084 -274.37509 -1560.8546 252.87219 -655.43578 18850.19 -8731.0713 0 6381.7946
15 5.0590478 -44772.63 -44766.854 17125.033 -61103.34 489.28007 4.7214008 0 1566.4744 20.590604 -268.28963 -1566.5961 252.97781 -654.93836 18835.335 -8719.3112 0 6370.4665
20 8.0678579 -44775.923 -44766.713 24620.824 -61088.791 490.42346 4.7193467 0 1565.5541 20.415031 -260.38512 -1574.1001 253.39805 -654.26837 18815.312 -8703.3104 0 6355.1097
25 10.975539 -44777.231 -44764.701 34381.278 -61068.889 490.53149 4.7164093 0 1566.5715 20.169755 -251.2311 -1582.8552 253.88696 -653.46042 18790.855 -8683.8362 0 6336.3099
Loop time of 7.48375 on 1 procs for 25 steps with 384 atoms
5 0.63682806 -44767.737 -44767.01 8391.5964 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.2728 0 6388.8127
10 2.4306958 -44769.409 -44766.634 11717.376 -61113.142 487.89093 4.7227063 0 1567.2936 20.705084 -274.37509 -1560.8546 252.87219 -655.43578 18850.19 -8731.0693 0 6381.7942
15 5.0590493 -44772.631 -44766.855 17125.067 -61103.34 489.28007 4.7214008 0 1566.4744 20.590604 -268.28962 -1566.5961 252.97781 -654.93836 18835.335 -8719.3013 0 6370.4551
20 8.067859 -44775.936 -44766.725 24620.627 -61088.791 490.42346 4.7193467 0 1565.5541 20.415031 -260.38512 -1574.1001 253.39805 -654.26837 18815.312 -8703.3748 0 6355.1614
25 10.975538 -44777.233 -44764.702 34381.173 -61068.889 490.53149 4.7164093 0 1566.5715 20.169755 -251.23109 -1582.8552 253.88696 -653.46042 18790.855 -8683.8691 0 6336.3409
Loop time of 7.80129 on 1 procs for 25 steps with 384 atoms
Performance: 0.018 ns/day, 1330.444 hours/ns, 3.341 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 0.017 ns/day, 1386.896 hours/ns, 3.205 timesteps/s
99.5% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 7.4284 | 7.4284 | 7.4284 | 0.0 | 99.26
Neigh | 0.051549 | 0.051549 | 0.051549 | 0.0 | 0.69
Comm | 0.0021887 | 0.0021887 | 0.0021887 | 0.0 | 0.03
Output | 0.00025821 | 0.00025821 | 0.00025821 | 0.0 | 0.00
Modify | 0.00099206 | 0.00099206 | 0.00099206 | 0.0 | 0.01
Other | | 0.0003154 | | | 0.00
Pair | 7.7384 | 7.7384 | 7.7384 | 0.0 | 99.19
Neigh | 0.058615 | 0.058615 | 0.058615 | 0.0 | 0.75
Comm | 0.0022428 | 0.0022428 | 0.0022428 | 0.0 | 0.03
Output | 0.00033212 | 0.00033212 | 0.00033212 | 0.0 | 0.00
Modify | 0.0013618 | 0.0013618 | 0.0013618 | 0.0 | 0.02
Other | | 0.0003309 | | | 0.00
Nlocal: 384 ave 384 max 384 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -94,4 +100,4 @@ Total # of neighbors = 286828
Ave neighs/atom = 746.948
Neighbor list builds = 5
Dangerous builds not checked
Total wall time: 0:00:07
Total wall time: 0:00:08

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for TATB system
units real
@ -12,7 +13,7 @@ read_data data.tatb
# reax args: hbcut hbnewflag tripflag precision
pair_style reax 6.0 1 1 1.0e-6
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
pair_coeff * * ffield.reax 1 2 3 4
compute reax all pair reax
@ -54,34 +55,39 @@ fix 2 all reax/bonds 25 bonds.reax.tatb
run 25
Neighbor list info ...
1 neighbor list requests
update every 5 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 5 4 3
Memory usage per processor = 4.03843 Mbytes
binsize = 6.25, bins = 5 4 3
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair reax, perpetual
attributes: half, newton off
pair build: half/bin/newtoff
stencil: half/bin/3d/newtoff
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.402 | 4.402 | 4.402 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -44767.08 -44767.08 7294.6353 -61120.591 486.4378 4.7236377 0 1568.024 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6378 0 6391.0231
5 0.63682726 -44767.816 -44767.089 8391.165 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.3995 0 6388.86
10 2.4306905 -44769.408 -44766.633 11717.247 -61113.142 487.89094 4.7227063 0 1567.2936 20.705084 -274.3751 -1560.8546 252.87219 -655.43578 18850.19 -8731.0965 0 6381.8216
15 5.0590422 -44772.626 -44766.85 17124.943 -61103.34 489.2801 4.7214008 0 1566.4744 20.590604 -268.28963 -1566.5961 252.97781 -654.93836 18835.335 -8719.3383 0 6370.4973
20 8.0678512 -44775.934 -44766.723 24620.531 -61088.791 490.42349 4.7193467 0 1565.5541 20.415031 -260.38513 -1574.1001 253.39804 -654.26837 18815.312 -8703.4033 0 6355.1921
25 10.97553 -44777.231 -44764.701 34381.242 -61068.889 490.53154 4.7164093 0 1566.5715 20.169755 -251.23111 -1582.8552 253.88696 -653.46042 18790.855 -8683.8451 0 6336.3185
Loop time of 3.27945 on 4 procs for 25 steps with 384 atoms
5 0.63682727 -44767.816 -44767.089 8391.1708 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.3973 0 6388.8581
10 2.4306941 -44769.405 -44766.63 11717.306 -61113.142 487.89094 4.7227063 0 1567.2936 20.705084 -274.3751 -1560.8546 252.87219 -655.43578 18850.19 -8731.08 0 6381.8083
15 5.0590444 -44772.6 -44766.824 17125.207 -61103.34 489.28008 4.7214008 0 1566.4744 20.590604 -268.28963 -1566.5961 252.97781 -654.93836 18835.335 -8719.2653 0 6370.4505
20 8.0678523 -44775.983 -44766.772 24620.114 -61088.791 490.42348 4.7193467 0 1565.5541 20.415031 -260.38513 -1574.1001 253.39804 -654.26837 18815.312 -8703.5228 0 6355.2629
25 10.975532 -44777.234 -44764.704 34381.065 -61068.889 490.53151 4.7164093 0 1566.5715 20.169755 -251.23111 -1582.8552 253.88696 -653.46042 18790.855 -8683.898 0 6336.3682
Loop time of 3.74388 on 4 procs for 25 steps with 384 atoms
Performance: 0.041 ns/day, 583.013 hours/ns, 7.623 timesteps/s
99.8% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 0.036 ns/day, 665.579 hours/ns, 6.678 timesteps/s
98.7% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.0329 | 3.1456 | 3.2612 | 5.2 | 95.92
Neigh | 0.011087 | 0.011261 | 0.011608 | 0.2 | 0.34
Comm | 0.0057111 | 0.12121 | 0.23398 | 26.2 | 3.70
Output | 0.00039172 | 0.0005855 | 0.00080633 | 0.6 | 0.02
Modify | 0.00035787 | 0.00059456 | 0.00082469 | 0.7 | 0.02
Other | | 0.0002265 | | | 0.01
Pair | 3.478 | 3.6025 | 3.7215 | 4.8 | 96.22
Neigh | 0.012731 | 0.01299 | 0.013174 | 0.2 | 0.35
Comm | 0.0073411 | 0.12653 | 0.25119 | 25.4 | 3.38
Output | 0.00050354 | 0.00081849 | 0.0011628 | 0.0 | 0.02
Modify | 0.00049281 | 0.00082356 | 0.001157 | 0.0 | 0.02
Other | | 0.0002663 | | | 0.01
Nlocal: 96 ave 96 max 96 min
Histogram: 4 0 0 0 0 0 0 0 0 0

View File

@ -0,0 +1,115 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for RDX system
# this run is equivalent to reax/in.reax.rdx
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 1 by 1 MPI processor grid
reading atoms ...
21 atoms
pair_style reax/c control.reax_c.rdx
pair_coeff * * ffield.reax C H O N
Reading potential file ffield.reax with DATE: 2010-02-19
compute reax all pair reax/c
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all atom 10 dump.reaxc.rdx
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25, bins = 3 3 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 15.28 | 15.28 | 15.28 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1884.3081 -1884.3081 27186.181 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79937 0 168.88402
10 1288.6114 -1989.6644 -1912.8422 -19456.35 -2734.6769 -15.607219 0.20177961 0 54.629556 3.1252294 -77.7067 0 14.933901 -5.8108541 843.92074 -180.43322 0 107.75935
20 538.95849 -1942.7037 -1910.5731 -10725.658 -2803.7395 7.9078331 0.077926702 0 81.610043 0.22951937 -57.557104 0 30.331203 -10.178049 878.99015 -159.69092 0 89.315159
30 463.09542 -1933.5765 -1905.9685 -33255.507 -2749.8591 -8.0154628 0.027628767 0 81.627403 0.11972403 -50.262284 0 20.82032 -9.6327022 851.88722 -149.495 0 79.205731
40 885.49449 -1958.9126 -1906.1228 -4814.7123 -2795.644 9.1506221 0.1374749 0 70.948046 0.24360579 -57.8627 0 19.076515 -11.141211 873.73892 -159.9939 0 92.434059
50 861.1646 -1954.4599 -1903.1206 -1896.7387 -2784.8446 3.8269113 0.1579328 0 79.851775 3.3492107 -78.066127 0 32.628975 -7.9565255 872.81826 -190.98565 0 114.75994
60 1167.785 -1971.8432 -1902.2243 -3482.6975 -2705.8638 -17.121582 0.22749067 0 44.507705 7.856069 -74.788959 0 16.256519 -4.6046602 835.8308 -188.33691 0 114.19414
70 1439.9947 -1989.3024 -1903.4554 23845.067 -2890.7896 31.958874 0.26671735 0 85.758608 3.1803486 -71.002907 0 24.357106 -10.311315 905.86799 -175.38482 0 106.79659
80 502.40024 -1930.7547 -1900.8035 -20356.557 -2703.8096 -18.663105 0.11286226 0 99.803799 2.0329394 -76.171387 0 19.236609 -6.2786041 826.47358 -166.03157 0 92.539694
90 749.09267 -1946.9834 -1902.3254 17798.812 -2863.7586 42.068927 0.24338042 0 96.18195 0.96181754 -69.955528 0 24.61541 -11.58277 903.68895 -190.13838 0 120.69139
100 1109.7046 -1968.5875 -1902.4311 -4490.6736 -2755.8953 -7.1235173 0.21757663 0 61.806405 7.0825933 -75.645487 0 20.114745 -6.2371664 863.56285 -198.56939 0 122.09923
Loop time of 0.395195 on 1 procs for 100 steps with 21 atoms
Performance: 21.863 ns/day, 1.098 hours/ns, 253.039 timesteps/s
99.3% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.3722 | 0.3722 | 0.3722 | 0.0 | 94.18
Neigh | 0.0098455 | 0.0098455 | 0.0098455 | 0.0 | 2.49
Comm | 0.00047445 | 0.00047445 | 0.00047445 | 0.0 | 0.12
Output | 0.00034022 | 0.00034022 | 0.00034022 | 0.0 | 0.09
Modify | 0.012187 | 0.012187 | 0.012187 | 0.0 | 3.08
Other | | 0.0001521 | | | 0.04
Nlocal: 21 ave 21 max 21 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 546 ave 546 max 546 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 1096 ave 1096 max 1096 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 1096
Ave neighs/atom = 52.1905
Neighbor list builds = 10
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:00

View File

@ -0,0 +1,115 @@
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for RDX system
# this run is equivalent to reax/in.reax.rdx
units real
atom_style charge
read_data data.rdx
orthogonal box = (35 35 35) to (48 48 48)
1 by 2 by 2 MPI processor grid
reading atoms ...
21 atoms
pair_style reax/c control.reax_c.rdx
pair_coeff * * ffield.reax C H O N
Reading potential file ffield.reax with DATE: 2010-02-19
compute reax all pair reax/c
variable eb equal c_reax[1]
variable ea equal c_reax[2]
variable elp equal c_reax[3]
variable emol equal c_reax[4]
variable ev equal c_reax[5]
variable epen equal c_reax[6]
variable ecoa equal c_reax[7]
variable ehb equal c_reax[8]
variable et equal c_reax[9]
variable eco equal c_reax[10]
variable ew equal c_reax[11]
variable ep equal c_reax[12]
variable efi equal c_reax[13]
variable eqeq equal c_reax[14]
neighbor 2.5 bin
neigh_modify every 10 delay 0 check no
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
thermo 10
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
timestep 1.0
#dump 1 all atom 10 dump.reaxc.rdx
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 2 pad 3
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
#dump_modify 3 pad 3
run 100
Neighbor list info ...
update every 10 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25, bins = 3 3 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 10.37 | 11.76 | 13.34 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -1884.3081 -1884.3081 27186.178 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79988 0 168.88453
10 1288.6115 -1989.6644 -1912.8422 -19456.354 -2734.6769 -15.60722 0.2017796 0 54.629558 3.1252286 -77.7067 0 14.933902 -5.8108544 843.92073 -180.43321 0 107.75934
20 538.95818 -1942.7037 -1910.5731 -10725.623 -2803.7394 7.9078307 0.077926702 0 81.61005 0.22951942 -57.557107 0 30.331206 -10.178049 878.9901 -159.68951 0 89.313749
30 463.09514 -1933.5765 -1905.9685 -33255.525 -2749.859 -8.0154737 0.027628797 0 81.627408 0.11972402 -50.262283 0 20.82031 -9.6327021 851.88715 -149.49499 0 79.205724
40 885.49412 -1958.9125 -1906.1227 -4814.6606 -2795.6439 9.150622 0.13747487 0 70.948029 0.24360517 -57.862679 0 19.076509 -11.141214 873.7389 -159.99392 0 92.434078
50 861.16393 -1954.46 -1903.1207 -1896.7323 -2784.8449 3.8270197 0.1579328 0 79.851743 3.3492115 -78.066132 0 32.628992 -7.9565379 872.81841 -190.98568 0 114.75996
60 1167.7846 -1971.8432 -1902.2243 -3482.8111 -2705.8633 -17.121657 0.2274907 0 44.507681 7.8560366 -74.788989 0 16.256493 -4.6046537 835.8305 -188.33687 0 114.1941
70 1439.9942 -1989.3023 -1903.4554 23845.444 -2890.7894 31.958784 0.26671721 0 85.758586 3.1803655 -71.002918 0 24.357158 -10.311304 905.86792 -175.38481 0 106.79657
80 502.3975 -1930.7546 -1900.8036 -20356.439 -2703.8105 -18.662812 0.11286123 0 99.80391 2.0329293 -76.171334 0 19.236803 -6.2786439 826.47397 -166.03141 0 92.539551
90 749.09048 -1946.9837 -1902.3258 17798.718 -2863.7582 42.068719 0.24338057 0 96.181773 0.96183581 -69.955529 0 24.615414 -11.582758 903.68862 -190.1384 0 120.69139
100 1109.6999 -1968.5875 -1902.4314 -4490.3728 -2755.8964 -7.1231468 0.21757685 0 61.806149 7.0826648 -75.645428 0 20.115002 -6.2371958 863.56343 -198.56957 0 122.09942
Loop time of 0.329552 on 4 procs for 100 steps with 21 atoms
Performance: 26.217 ns/day, 0.915 hours/ns, 303.443 timesteps/s
96.9% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.26372 | 0.26499 | 0.26754 | 0.3 | 80.41
Neigh | 0.0045478 | 0.0062494 | 0.0076699 | 1.5 | 1.90
Comm | 0.0041637 | 0.0064691 | 0.0080271 | 1.8 | 1.96
Output | 0.00054169 | 0.00056636 | 0.00060368 | 0.0 | 0.17
Modify | 0.049433 | 0.051134 | 0.05311 | 0.6 | 15.52
Other | | 0.000141 | | | 0.04
Nlocal: 5.25 ave 15 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Nghost: 355.5 ave 432 max 282 min
Histogram: 1 0 0 0 1 1 0 0 0 1
Neighs: 298.75 ave 822 max 0 min
Histogram: 1 0 2 0 0 0 0 0 0 1
Total # of neighbors = 1195
Ave neighs/atom = 56.9048
Neighbor list builds = 10
Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:00

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for TATB system
# this run is equivalent to reax/in.reax.tatb,
@ -56,34 +57,44 @@ fix 3 all reax/c/species 1 5 5 species.tatb
run 25
Neighbor list info ...
2 neighbor list requests
update every 5 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 5 4 3
Memory usage per processor = 155.82 Mbytes
binsize = 6.25, bins = 5 4 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 176.7 | 176.7 | 176.7 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -44760.998 -44760.998 7827.7879 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6394 0 6391.0274
5 0.61603942 -44761.698 -44760.994 8934.6281 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.193 0 6388.6691
10 2.3525551 -44763.227 -44760.541 12288.607 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50358 -1560.7569 252.85309 -655.44063 18850.391 -8730.9688 0 6381.7066
15 4.9013326 -44766.36 -44760.764 17717.015 -61103.434 489.14721 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.237 0 6370.4033
20 7.829471 -44769.686 -44760.747 25205.558 -61089.006 490.21313 4.719302 0 1571.7022 20.420943 -260.85565 -1573.7378 253.3539 -654.31623 18816.07 -8703.5091 0 6355.2604
25 10.697926 -44772.904 -44760.691 34232.793 -61069.308 490.25886 4.7163736 0 1570.7397 20.181346 -251.91377 -1582.3261 253.82253 -653.53184 18791.975 -8684.3608 0 6336.8416
Loop time of 4.34725 on 1 procs for 25 steps with 384 atoms
0 0 -44760.998 -44760.998 7827.7874 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6395 0 6391.0275
5 0.61603968 -44761.698 -44760.994 8934.6347 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.1911 0 6388.6671
10 2.3525551 -44763.227 -44760.541 12288.583 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50357 -1560.7569 252.85309 -655.44063 18850.391 -8730.9768 0 6381.7146
15 4.9013279 -44766.36 -44760.764 17717.01 -61103.434 489.14722 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.2375 0 6370.4038
20 7.8294645 -44769.686 -44760.747 25205.624 -61089.006 490.21314 4.719302 0 1571.7022 20.420943 -260.85564 -1573.7378 253.3539 -654.31623 18816.07 -8703.4889 0 6355.2402
25 10.697904 -44772.904 -44760.691 34232.965 -61069.308 490.25888 4.7163736 0 1570.7397 20.181346 -251.91377 -1582.3261 253.82253 -653.53184 18791.975 -8684.3125 0 6336.7934
Loop time of 4.72562 on 1 procs for 25 steps with 384 atoms
Performance: 0.031 ns/day, 772.845 hours/ns, 5.751 timesteps/s
99.8% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 0.029 ns/day, 840.110 hours/ns, 5.290 timesteps/s
99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 3.5264 | 3.5264 | 3.5264 | 0.0 | 81.12
Neigh | 0.40335 | 0.40335 | 0.40335 | 0.0 | 9.28
Comm | 0.0021031 | 0.0021031 | 0.0021031 | 0.0 | 0.05
Output | 0.00019765 | 0.00019765 | 0.00019765 | 0.0 | 0.00
Modify | 0.41479 | 0.41479 | 0.41479 | 0.0 | 9.54
Other | | 0.0004084 | | | 0.01
Pair | 3.775 | 3.775 | 3.775 | 0.0 | 79.88
Neigh | 0.47047 | 0.47047 | 0.47047 | 0.0 | 9.96
Comm | 0.0025151 | 0.0025151 | 0.0025151 | 0.0 | 0.05
Output | 0.0003159 | 0.0003159 | 0.0003159 | 0.0 | 0.01
Modify | 0.47676 | 0.47676 | 0.47676 | 0.0 | 10.09
Other | | 0.0005293 | | | 0.01
Nlocal: 384 ave 384 max 384 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -99,4 +110,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:04
Total wall time: 0:00:05

View File

@ -1,4 +1,5 @@
LAMMPS (5 Oct 2016)
LAMMPS (8 Mar 2018)
using 1 OpenMP thread(s) per MPI task
# ReaxFF potential for TATB system
# this run is equivalent to reax/in.reax.tatb,
@ -56,34 +57,44 @@ fix 3 all reax/c/species 1 5 5 species.tatb
run 25
Neighbor list info ...
2 neighbor list requests
update every 5 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12.5
ghost atom cutoff = 12.5
binsize = 6.25 -> bins = 5 4 3
Memory usage per processor = 105.386 Mbytes
binsize = 6.25, bins = 5 4 3
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair reax/c, perpetual
attributes: half, newton off, ghost
pair build: half/bin/newtoff/ghost
stencil: half/ghost/bin/3d/newtoff
bin: standard
(2) fix qeq/reax, perpetual, copy from (1)
attributes: half, newton off, ghost
pair build: copy
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 118 | 118 | 118 Mbytes
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
0 0 -44760.998 -44760.998 7827.7867 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6397 0 6391.0277
5 0.61603967 -44761.698 -44760.994 8934.6339 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.1905 0 6388.6665
10 2.3525545 -44763.227 -44760.541 12288.586 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50357 -1560.7569 252.85309 -655.44063 18850.391 -8730.9762 0 6381.714
15 4.9013281 -44766.36 -44760.764 17716.982 -61103.434 489.14722 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.2476 0 6370.4138
20 7.8294637 -44769.686 -44760.747 25205.512 -61089.006 490.21314 4.719302 0 1571.7022 20.420943 -260.85565 -1573.7378 253.3539 -654.31623 18816.07 -8703.518 0 6355.2692
25 10.697905 -44772.904 -44760.691 34232.815 -61069.308 490.25887 4.7163736 0 1570.7397 20.181346 -251.91377 -1582.3261 253.82253 -653.53184 18791.975 -8684.3481 0 6336.829
Loop time of 2.60733 on 4 procs for 25 steps with 384 atoms
0 0 -44760.998 -44760.998 7827.7866 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6398 0 6391.0277
5 0.61603968 -44761.698 -44760.994 8934.6335 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.1906 0 6388.6666
10 2.3525544 -44763.227 -44760.541 12288.587 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50357 -1560.7569 252.85309 -655.44063 18850.391 -8730.9764 0 6381.7141
15 4.9013311 -44766.36 -44760.764 17716.955 -61103.434 489.14721 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.2558 0 6370.4221
20 7.8294715 -44769.686 -44760.747 25205.613 -61089.006 490.21314 4.7193021 0 1571.7022 20.420943 -260.85564 -1573.7378 253.3539 -654.31623 18816.07 -8703.4906 0 6355.2419
25 10.697924 -44772.904 -44760.691 34232.794 -61069.308 490.25886 4.7163736 0 1570.7397 20.181347 -251.91376 -1582.3261 253.82253 -653.53183 18791.975 -8684.3641 0 6336.8449
Loop time of 2.84068 on 4 procs for 25 steps with 384 atoms
Performance: 0.052 ns/day, 463.526 hours/ns, 9.588 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
Performance: 0.048 ns/day, 505.009 hours/ns, 8.801 timesteps/s
98.4% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 2.1835 | 2.1843 | 2.1854 | 0.0 | 83.77
Neigh | 0.22091 | 0.22364 | 0.22821 | 0.6 | 8.58
Comm | 0.005677 | 0.0069622 | 0.0078082 | 1.0 | 0.27
Output | 0.00036621 | 0.0028675 | 0.0037034 | 2.7 | 0.11
Modify | 0.18736 | 0.18921 | 0.19102 | 0.4 | 7.26
Other | | 0.0003636 | | | 0.01
Pair | 2.3253 | 2.328 | 2.3305 | 0.2 | 81.95
Neigh | 0.2589 | 0.26458 | 0.26897 | 0.7 | 9.31
Comm | 0.0094428 | 0.012062 | 0.014872 | 2.3 | 0.42
Output | 0.00043392 | 0.0042209 | 0.0054941 | 3.4 | 0.15
Modify | 0.22563 | 0.23134 | 0.23579 | 0.8 | 8.14
Other | | 0.0005122 | | | 0.02
Nlocal: 96 ave 96 max 96 min
Histogram: 4 0 0 0 0 0 0 0 0 0
@ -99,4 +110,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:02
Total wall time: 0:00:03

View File

@ -1,5 +1,49 @@
# Change Log
## [2.6.00](https://github.com/kokkos/kokkos/tree/2.6.00) (2018-03-07)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.5.00...2.6.00)
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6**
**Implemented enhancements:**
- Support NVIDIA Volta microarchitecture [\#1466](https://github.com/kokkos/kokkos/issues/1466)
- Kokkos - Define empty functions when profiling disabled [\#1424](https://github.com/kokkos/kokkos/issues/1424)
- Don't use \_\_constant\_\_ cache for lock arrays, enable once per run update instead of once per call [\#1385](https://github.com/kokkos/kokkos/issues/1385)
- task dag enhancement. [\#1354](https://github.com/kokkos/kokkos/issues/1354)
- Cuda task team collectives and stack size [\#1353](https://github.com/kokkos/kokkos/issues/1353)
- Replace View operator acceptance of more than rank integers with 'access' function [\#1333](https://github.com/kokkos/kokkos/issues/1333)
- Interoperability: Do not shut down backend execution space runtimes upon calling finalize. [\#1305](https://github.com/kokkos/kokkos/issues/1305)
- shmem\_size for LayoutStride [\#1291](https://github.com/kokkos/kokkos/issues/1291)
- Kokkos::resize performs poorly on 1D Views [\#1270](https://github.com/kokkos/kokkos/issues/1270)
- stride\(\) is inconsistent with dimension\(\), extent\(\), etc. [\#1214](https://github.com/kokkos/kokkos/issues/1214)
- Kokkos::sort defaults to std::sort on host [\#1208](https://github.com/kokkos/kokkos/issues/1208)
- DynamicView with host size grow [\#1206](https://github.com/kokkos/kokkos/issues/1206)
- Unmanaged View with Anonymous Memory Space [\#1175](https://github.com/kokkos/kokkos/issues/1175)
- Sort subset of Kokkos::DynamicView [\#1160](https://github.com/kokkos/kokkos/issues/1160)
- MDRange policy doesn't support lambda reductions [\#1054](https://github.com/kokkos/kokkos/issues/1054)
- Add ability to set hook on Kokkos::finalize [\#714](https://github.com/kokkos/kokkos/issues/714)
- Atomics with Serial Backend - Default should be Disable? [\#549](https://github.com/kokkos/kokkos/issues/549)
- KOKKOS\_ENABLE\_DEPRECATED\_CODE [\#1359](https://github.com/kokkos/kokkos/issues/1359)
**Fixed bugs:**
- cuda\_internal\_maximum\_warp\_count returns 8, but I believe it should return 16 for P100 [\#1269](https://github.com/kokkos/kokkos/issues/1269)
- Cuda: level 1 scratch memory bug \(reported by Stan Moore\) [\#1434](https://github.com/kokkos/kokkos/issues/1434)
- MDRangePolicy Reduction requires value\_type typedef in Functor [\#1379](https://github.com/kokkos/kokkos/issues/1379)
- Kokkos DeepCopy between empty views fails [\#1369](https://github.com/kokkos/kokkos/issues/1369)
- Several issues with new CMake build infrastructure \(reported by Eric Phipps\) [\#1365](https://github.com/kokkos/kokkos/issues/1365)
- deep\_copy between rank-1 host/device views of differing layouts without UVM no longer works \(reported by Eric Phipps\) [\#1363](https://github.com/kokkos/kokkos/issues/1363)
- Profiling can't be disabled in CMake, and a parallel\_for is missing for tasks \(reported by Kyungjoo Kim\) [\#1349](https://github.com/kokkos/kokkos/issues/1349)
- get\_work\_partition int overflow \(reported by berryj5\) [\#1327](https://github.com/kokkos/kokkos/issues/1327)
- Kokkos::deep\_copy must fence even if the two views are the same [\#1303](https://github.com/kokkos/kokkos/issues/1303)
- CudaUVMSpace::allocate/deallocate must fence [\#1302](https://github.com/kokkos/kokkos/issues/1302)
- ViewResize on CUDA fails in Debug because of too many resources requested [\#1299](https://github.com/kokkos/kokkos/issues/1299)
- Cuda 9 and intrepid2 calls from Panzer. [\#1183](https://github.com/kokkos/kokkos/issues/1183)
- Slowdown due to tracking\_enabled\(\) in 2.04.00 \(found by Albany app\) [\#1016](https://github.com/kokkos/kokkos/issues/1016)
- Bounds checking fails with zero-span Views \(reported by Stan Moore\) [\#1411](https://github.com/kokkos/kokkos/issues/1411)
## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00)

View File

@ -7,7 +7,7 @@ ELSE()
ENDIF()
IF(NOT KOKKOS_HAS_TRILINOS)
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
# Define Project Name if this is a standalone build
IF(NOT DEFINED ${PROJECT_NAME})
@ -37,9 +37,19 @@ IF(NOT KOKKOS_HAS_TRILINOS)
COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings
WORKING_DIRECTORY "${Kokkos_BINARY_DIR}"
OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out
RESULT_VARIABLE res
RESULT_VARIABLE GEN_SETTINGS_RESULT
)
if (GEN_SETTINGS_RESULT)
message(FATAL_ERROR "Kokkos settings generation failed:\n"
"${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings")
endif()
include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake)
string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}")
string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}")
string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}")
list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "")
list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "")
list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_NAMES "")
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
#------------ NOW BUILD ------------------------------------------------------

View File

@ -34,7 +34,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -19,7 +19,7 @@ snapshot Kokkos from github.com/kokkos to Trilinos.
3) Snapshot the current commit in the Kokkos clone into the Trilinos clone.
This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}:
${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
${KOKKOS}/scripts/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
4) Verify the snapshot commit happened as expected
cd ${TRILINOS}/packages/kokkos

View File

@ -36,7 +36,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -9,8 +9,8 @@ KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61
# ARM: ARMv80,ARMv81,ARMv8-ThunderX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
# AMD-CPUS: AMDAVX,Ryzen,Epyc
@ -21,7 +21,7 @@ KOKKOS_DEBUG ?= "no"
KOKKOS_USE_TPLS ?= ""
# Options: c++11,c++1z
KOKKOS_CXX_STANDARD ?= "c++11"
# Options: aggressive_vectorization,disable_profiling
# Options: aggressive_vectorization,disable_profiling,disable_deprecated_code
KOKKOS_OPTIONS ?= ""
# Default settings specific options.
@ -48,6 +48,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg)
@ -93,7 +94,7 @@ KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VE
KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI)
KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l))
KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l))
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l))
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l))
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin)
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
@ -229,12 +230,16 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ma
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53)
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61)
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60)
KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@ -249,6 +254,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
@ -267,7 +274,8 @@ endif
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80)
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81)
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc))
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2)
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2) | bc))
# IBM based.
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
@ -316,6 +324,9 @@ endif
# Generating the list of Flags.
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
KOKKOS_TPL_INCLUDE_DIRS =
KOKKOS_TPL_LIBRARY_DIRS =
KOKKOS_TPL_LIBRARY_NAMES =
KOKKOS_CXXFLAGS =
ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
@ -323,7 +334,9 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
endif
KOKKOS_LIBS = -ldl
KOKKOS_TPL_LIBRARY_NAMES += dl
KOKKOS_LDFLAGS = -L$(shell pwd)
KOKKOS_LINK_FLAGS =
KOKKOS_SRC =
KOKKOS_HEADERS =
@ -437,21 +450,32 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
ifneq ($(HWLOC_PATH),)
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib
endif
KOKKOS_LIBS += -lhwloc
KOKKOS_TPL_LIBRARY_NAMES += hwloc
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HWLOC")
endif
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
KOKKOS_LIBS += -lrt
KOKKOS_TPL_LIBRARY_NAMES += rt
endif
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
ifneq ($(MEMKIND_PATH),)
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib
endif
KOKKOS_LIBS += -lmemkind -lnuma
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HBWSPACE")
endif
@ -459,6 +483,10 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
endif
ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
endif
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
@ -560,6 +588,24 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2")
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99
KOKKOS_LDFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
@ -754,10 +800,11 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
KOKKOS_CXXFLAGS += -x cuda
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
KOKKOS_CXXFLAGS += -x cuda
else
$(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
@ -805,6 +852,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
endif
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
@ -850,6 +907,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags)
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
KOKKOS_TPL_LIBRARY_NAMES += hc_am m
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
@ -880,13 +938,17 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
KOKKOS_LIBS += -lcudart -lcuda
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH)
ifneq ($(CUDA_PATH),)
KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH)
endif
endif
KOKKOS_LIBS += -lcudart -lcuda
KOKKOS_TPL_LIBRARY_NAMES += cudart cuda
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
@ -911,20 +973,27 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
endif
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
KOKKOS_LINK_FLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
KOKKOS_LIBS += -lpthread
KOKKOS_TPL_LIBRARY_NAMES += pthread
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
ifneq ($(QTHREADS_PATH),)
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include
KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64
endif
KOKKOS_LIBS += -lqthread
KOKKOS_TPL_LIBRARY_NAMES += qthread
endif
# Explicitly set the GCC Toolchain for Clang.
@ -940,11 +1009,6 @@ ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
endif
# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning.
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1)
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC))
endif
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
# device to avoid a link warning.
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)

View File

@ -1,87 +1,101 @@
Kokkos implements a programming model in C++ for writing performance portable
Kokkos Core implements a programming model in C++ for writing performance portable
applications targeting all major HPC platforms. For that purpose it provides
abstractions for both parallel execution of code and data management.
Kokkos is designed to target complex node architectures with N-level memory
hierarchies and multiple types of execution resources. It currently can use
OpenMP, Pthreads and CUDA as backend programming models.
Kokkos is licensed under standard 3-clause BSD terms of use. For specifics
see the LICENSE file contained in the repository or distribution.
Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem,
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
The core developers of Kokkos are Carter Edwards and Christian Trott
at the Computer Science Research Institute of the Sandia National
Laboratories.
# Learning about Kokkos
The KokkosP interface and associated tools are developed by the Application
Performance Team and Kokkos core developers at Sandia National Laboratories.
A programming guide can be found on the Wiki, the API reference is under development.
To learn more about Kokkos consider watching one of our presentations:
GTC 2015:
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
and feedback is greatly appreciated.
For non-public questions send an email to
crtrott(at)sandia.gov
A separate repository with extensive tutorial material can be found under
https://github.com/kokkos/kokkos-tutorials.
If you have a patch to contribute please feel free to issue a pull request against
the develop branch. For major contributions it is better to contact us first
for guidance.
Furthermore, the 'example/tutorial' directory provides step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make'
in the 'example/tutorial' directory. This will build all examples in the
subfolders. To change the build options refer to the Programming Guide
in the compilation section.
For questions please send an email to
kokkos-users@software.sandia.gov
To learn more about Kokkos consider watching one of our presentations:
* GTC 2015:
- http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
- http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
For non-public questions send an email to
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
============================================================================
====Requirements============================================================
============================================================================
# Contributing to Kokkos
Primary tested compilers on X86 are:
GCC 4.8.4
GCC 4.9.3
GCC 5.1.0
GCC 5.3.0
GCC 6.1.0
Intel 15.0.2
Intel 16.0.1
Intel 17.1.043
Intel 17.4.196
Intel 18.0.128
Clang 3.5.2
Clang 3.6.1
Clang 3.7.1
Clang 3.8.1
Clang 3.9.0
Clang 4.0.0
Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44)
PGI 17.10
NVCC 7.0 for CUDA (with gcc 4.8.4)
NVCC 7.5 for CUDA (with gcc 4.8.4)
NVCC 8.0.44 for CUDA (with gcc 5.3.0)
We are open and try to encourage contributions from external developers.
To do so please first open an issue describing the contribution and then issue
a pull request against the develop branch. For larger features it may be good
to get guidance from the core development team first through the github issue.
Primary tested compilers on Power 8 are:
GCC 5.4.0 (OpenMP,Serial)
IBM XL 13.1.5 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
NVCC 8.0.44 for CUDA (with gcc 5.4.0)
NVCC 9.0.103 for CUDA (with gcc 6.3.0)
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
Which means contributing to Kokkos allows anyone else to use your contributions
not just for public purposes but also for closed source commercial projects.
For specifics see the LICENSE file contained in the repository or distribution.
Primary tested compilers on Intel KNL are:
GCC 6.2.0
Intel 16.4.258 (with gcc 4.7.2)
Intel 17.2.174 (with gcc 4.9.3)
Intel 18.0.128 (with gcc 4.9.3)
# Requirements
Other compilers working:
X86:
Cygwin 2.1.0 64bit with gcc 4.9.3
### Primary tested compilers on X86 are:
* GCC 4.8.4
* GCC 4.9.3
* GCC 5.1.0
* GCC 5.3.0
* GCC 6.1.0
* Intel 15.0.2
* Intel 16.0.1
* Intel 17.1.043
* Intel 17.4.196
* Intel 18.0.128
* Clang 3.6.1
* Clang 3.7.1
* Clang 3.8.1
* Clang 3.9.0
* Clang 4.0.0
* Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44)
* Clang 6.0.0 for CUDA (CUDA Toolkit 9.1)
* PGI 17.10
* NVCC 7.0 for CUDA (with gcc 4.8.4)
* NVCC 7.5 for CUDA (with gcc 4.8.4)
* NVCC 8.0.44 for CUDA (with gcc 5.3.0)
* NVCC 9.1 for CUDA (with gcc 6.1.0)
Known non-working combinations:
Power8:
Pthreads backend
### Primary tested compilers on Power 8 are:
* GCC 5.4.0 (OpenMP,Serial)
* IBM XL 13.1.6 (OpenMP, Serial)
* NVCC 8.0.44 for CUDA (with gcc 5.4.0)
* NVCC 9.0.103 for CUDA (with gcc 6.3.0 and XL 13.1.6)
### Primary tested compilers on Intel KNL are:
* GCC 6.2.0
* Intel 16.4.258 (with gcc 4.7.2)
* Intel 17.2.174 (with gcc 4.9.3)
* Intel 18.0.128 (with gcc 4.9.3)
### Primary tested compilers on ARM
* GCC 6.1.0
### Other compilers working:
* X86:
- Cygwin 2.1.0 64bit with gcc 4.9.3
### Known non-working combinations:
* Power8:
- Pthreads backend
* ARM
- Pthreads backend
Primary tested compiler are passing in release mode
@ -97,20 +111,7 @@ NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitiali
Other compilers are tested occasionally, in particular when pushing from develop to
master branch, without -Werror and only for a select set of backends.
============================================================================
====Getting started=========================================================
============================================================================
In the 'example/tutorial' directory you will find step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make'
in the 'example/tutorial' directory. This will build all examples in the
subfolders. To change the build options refer to the Programming Guide
in the compilation section.
============================================================================
====Running Unit Tests======================================================
============================================================================
# Running Unit Tests
To run the unit tests create a build directory and run the following commands
@ -121,30 +122,35 @@ make test
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====Install the library=====================================================
============================================================================
# Installing the library
To install Kokkos as a library create a build directory and run the following
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
make lib
make kokkoslib
make install
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
changing the device type for which to build.
============================================================================
====CMakeFiles==============================================================
============================================================================
Note that in many cases it is preferable to build Kokkos inline with an
application. The main reason is that you may otherwise need many different
configurations of Kokkos installed depending on the required compile time
features an application needs. For example there is only one default
execution space, which means you need different installations to have OpenMP
or Pthreads as the default space. Also for the CUDA backend there are certain
choices, such as allowing relocatable device code, which must be made at
installation time. Building Kokkos inline uses largely the same process
as compiling an application against an installed Kokkos library. See for
example benchmarks/bytes_and_flops/Makefile which can be used with an installed
library and for an inline build.
The CMake files contained in this repository require Tribits and are used
for integration with Trilinos. They do not currently support a standalone
CMake build.
### CMake
===========================================================================
====Kokkos and CUDA UVM====================================================
===========================================================================
Kokkos supports being build as part of a CMake applications. An example can
be found in example/cmake_build.
# Kokkos and CUDA UVM
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
Allocations made with that space are accessible from host and device.
@ -154,25 +160,16 @@ In either case UVM comes with a number of restrictions:
running. This will lead to segfaults. To avoid that you either need to
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
Furthermore in multi socket multi GPU machines, UVM defaults to using
zero copy allocations for technical reasons related to using multiple
Furthermore in multi socket multi GPU machines without NVLINK, UVM defaults
to using zero copy allocations for technical reasons related to using multiple
GPUs from the same process. If an executable doesn't do that (e.g. each
MPI rank of an application uses a single GPU [can be the same GPU for
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
This will enforce proper UVM allocations, but can lead to errors if
more than a single GPU is used by a single process.
===========================================================================
====Contributing===========================================================
===========================================================================
Contributions to Kokkos are welcome. In order to do so, please open an issue
where a feature request or bug can be discussed. Then issue a pull request
with your contribution. Pull requests must be issued against the develop branch.
===========================================================================
====Citing Kokkos==========================================================
===========================================================================
# Citing Kokkos
If you publish work which mentions Kokkos, please cite the following paper:

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -1530,7 +1530,7 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,1,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0()))
if(idx<static_cast<IndexType>(a.extent(0)))
a(idx) = Rand::draw(gen,range);
}
rand_pool.free_state(gen);
@ -1555,8 +1555,8 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,2,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
a(idx,k) = Rand::draw(gen,range);
}
}
@ -1583,9 +1583,9 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,3,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
a(idx,k,l) = Rand::draw(gen,range);
}
}
@ -1611,10 +1611,10 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,4, IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
a(idx,k,l,m) = Rand::draw(gen,range);
}
}
@ -1640,11 +1640,11 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,5,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
a(idx,k,l,m,n) = Rand::draw(gen,range);
}
}
@ -1670,12 +1670,12 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,6,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
a(idx,k,l,m,n,o) = Rand::draw(gen,range);
}
}
@ -1701,13 +1701,13 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,7,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
a(idx,k,l,m,n,o,p) = Rand::draw(gen,range);
}
}
@ -1733,14 +1733,14 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,8,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
for(IndexType q=0;q<static_cast<IndexType>(a.dimension_7());q++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
for(IndexType q=0;q<static_cast<IndexType>(a.extent(7));q++)
a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range);
}
}
@ -1765,7 +1765,7 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,1,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0()))
if(idx<static_cast<IndexType>(a.extent(0)))
a(idx) = Rand::draw(gen,begin,end);
}
rand_pool.free_state(gen);
@ -1790,8 +1790,8 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,2,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
a(idx,k) = Rand::draw(gen,begin,end);
}
}
@ -1818,9 +1818,9 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,3,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
a(idx,k,l) = Rand::draw(gen,begin,end);
}
}
@ -1846,10 +1846,10 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,4,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
a(idx,k,l,m) = Rand::draw(gen,begin,end);
}
}
@ -1875,11 +1875,11 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,5,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())){
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_1());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_2());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_3());n++)
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_4());o++)
if(idx<static_cast<IndexType>(a.extent(0))){
for(IndexType l=0;l<static_cast<IndexType>(a.extent(1));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(2));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(3));n++)
for(IndexType o=0;o<static_cast<IndexType>(a.extent(4));o++)
a(idx,l,m,n,o) = Rand::draw(gen,begin,end);
}
}
@ -1905,12 +1905,12 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,6,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end);
}
}
@ -1937,13 +1937,13 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,7,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end);
}
}
@ -1969,14 +1969,14 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,8,IndexType>{
typename RandomPool::generator_type gen = rand_pool.get_state();
for(IndexType j=0;j<loops;j++) {
const IndexType idx = i*loops+j;
if(idx<static_cast<IndexType>(a.dimension_0())) {
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
for(IndexType q=0;q<static_cast<IndexType>(a.dimension_7());q++)
if(idx<static_cast<IndexType>(a.extent(0))) {
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
for(IndexType q=0;q<static_cast<IndexType>(a.extent(7));q++)
a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end);
}
}
@ -1988,14 +1988,14 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,8,IndexType>{
template<class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) {
int64_t LDA = a.dimension_0();
int64_t LDA = a.extent(0);
if(LDA>0)
parallel_for((LDA+127)/128,Impl::fill_random_functor_range<ViewType,RandomPool,128,ViewType::Rank,IndexType>(a,g,range));
}
template<class ViewType, class RandomPool, class IndexType = int64_t>
void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) {
int64_t LDA = a.dimension_0();
int64_t LDA = a.extent(0);
if(LDA>0)
parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end<ViewType,RandomPool,128,ViewType::Rank,IndexType>(a,g,begin,end));
}

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -120,7 +120,6 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
// printf("copy: dst(%i) src(%i)\n",i+dst_offset,i);
copy_op::copy(dst_values,i+dst_offset,src_values,i);
}
};
@ -151,20 +150,22 @@ public:
DstViewType dst_values ;
perm_view_type sort_order ;
src_view_type src_values ;
int src_offset ;
copy_permute_functor( DstViewType const & dst_values_
, PermuteViewType const & sort_order_
, SrcViewType const & src_values_
, int const & src_offset_
)
: dst_values( dst_values_ )
, sort_order( sort_order_ )
, src_values( src_values_ )
, src_offset( src_offset_ )
{}
KOKKOS_INLINE_FUNCTION
void operator() (const int& i) const {
// printf("copy_permute: dst(%i) src(%i)\n",i,sort_order(i));
copy_op::copy(dst_values,i,src_values,sort_order(i));
copy_op::copy(dst_values,i,src_values,src_offset+sort_order(i));
}
};
@ -259,19 +260,21 @@ public:
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
void create_permute_vector() {
const size_t len = range_end - range_begin ;
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_count_tag> (0,len),*this);
Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
Kokkos::parallel_for ("Kokkos::Sort::BinCount",Kokkos::RangePolicy<execution_space,bin_count_tag> (0,len),*this);
Kokkos::parallel_scan("Kokkos::Sort::BinOffset",Kokkos::RangePolicy<execution_space,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
Kokkos::deep_copy(bin_count_atomic,0);
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_binning_tag> (0,len),*this);
Kokkos::parallel_for ("Kokkos::Sort::BinBinning",Kokkos::RangePolicy<execution_space,bin_binning_tag> (0,len),*this);
if(sort_within_bins)
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
Kokkos::parallel_for ("Kokkos::Sort::BinSort",Kokkos::RangePolicy<execution_space,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
}
// Sort a view with respect ot the first dimension using the permutation array
// Sort a subset of a view with respect to the first dimension using the permutation array
template<class ValuesViewType>
void sort( ValuesViewType const & values)
void sort( ValuesViewType const & values
, int values_range_begin
, int values_range_end) const
{
typedef
Kokkos::View< typename ValuesViewType::data_type,
@ -280,6 +283,10 @@ public:
scratch_view_type ;
const size_t len = range_end - range_begin ;
const size_t values_len = values_range_end - values_range_begin ;
if (len != values_len) {
Kokkos::abort("BinSort::sort: values range length != permutation vector length");
}
scratch_view_type
sorted_values("Scratch",
@ -297,19 +304,25 @@ public:
, offset_type /* PermuteViewType */
, ValuesViewType /* SrcViewType */
>
functor( sorted_values , sort_order , values );
functor( sorted_values , sort_order , values, values_range_begin - range_begin );
parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor);
parallel_for("Kokkos::Sort::CopyPermute", Kokkos::RangePolicy<execution_space>(0,len),functor);
}
{
copy_functor< ValuesViewType , scratch_view_type >
functor( values , range_begin , sorted_values );
parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor);
parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy<execution_space>(0,len),functor);
}
}
template<class ValuesViewType>
void sort( ValuesViewType const & values ) const
{
this->sort( values, 0, /*values.extent(0)*/ range_end - range_begin );
}
// Get the permutation vector
KOKKOS_INLINE_FUNCTION
offset_type get_permute_vector() const { return sort_order;}
@ -327,7 +340,7 @@ public:
KOKKOS_INLINE_FUNCTION
void operator() (const bin_count_tag& tag, const int& i) const {
const int j = range_begin + i ;
bin_count_atomic(bin_op.bin(keys,j))++;
bin_count_atomic(bin_op.bin(keys, j))++;
}
KOKKOS_INLINE_FUNCTION
@ -512,7 +525,7 @@ void sort( ViewType const & view , bool const always_use_kokkos_sort = false)
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.extent(0)),
parallel_reduce("Kokkos::Sort::FindExtent",Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.extent(0)),
Impl::min_max_functor<ViewType>(view),reducer);
if(result.min_val == result.max_val) return;
BinSort<ViewType, CompType> bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true);
@ -532,7 +545,7 @@ void sort( ViewType view
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
parallel_reduce( range_policy( begin , end )
parallel_reduce("Kokkos::Sort::FindExtent", range_policy( begin , end )
, Impl::min_max_functor<ViewType>(view),reducer );
if(result.min_val == result.max_val) return;
@ -541,8 +554,9 @@ void sort( ViewType view
bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true);
bin_sort.create_permute_vector();
bin_sort.sort(view);
bin_sort.sort(view,begin,end);
}
}
#endif

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -61,14 +61,9 @@ class cuda : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Cuda::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -60,25 +60,10 @@ protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
int threads_count = 0;
#pragma omp parallel
{
#pragma omp atomic
++threads_count;
}
if (threads_count > 3) {
threads_count /= 2;
}
Kokkos::OpenMP::initialize( threads_count );
Kokkos::OpenMP::print_configuration( std::cout );
}
static void TearDownTestCase()
{
Kokkos::OpenMP::finalize();
}
};

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -62,13 +62,9 @@ protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
Kokkos::HostSpace::execution_space::initialize();
Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) );
}
static void TearDownTestCase()
{
Kokkos::Experimental::ROCm::finalize();
Kokkos::HostSpace::execution_space::finalize();
}
};

View File

@ -34,7 +34,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -62,13 +62,10 @@ class serial : public ::testing::Test {
protected:
static void SetUpTestCase()
{
std::cout << std::setprecision (5) << std::scientific;
Kokkos::Serial::initialize ();
}
static void TearDownTestCase ()
{
Kokkos::Serial::finalize ();
}
};

View File

@ -34,7 +34,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -171,10 +171,10 @@ void test_3D_sort(unsigned int n) {
double sum_after = 0.0;
unsigned int sort_fails = 0;
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
Kokkos::parallel_reduce(keys.extent(0),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
int bin_1d = 1;
while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2;
while( bin_1d*bin_1d*bin_1d*4< (int) keys.extent(0) ) bin_1d*=2;
int bin_max[3] = {bin_1d,bin_1d,bin_1d};
typename KeyViewType::value_type min[3] = {0,0,0};
typename KeyViewType::value_type max[3] = {100,100,100};
@ -186,8 +186,8 @@ void test_3D_sort(unsigned int n) {
Sorter.create_permute_vector();
Sorter.template sort< KeyViewType >(keys);
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
Kokkos::parallel_reduce(keys.extent(0),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
Kokkos::parallel_reduce(keys.extent(0)-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
double ratio = sum_before/sum_after;
double epsilon = 1e-10;
@ -205,24 +205,13 @@ void test_3D_sort(unsigned int n) {
template<class ExecutionSpace, typename KeyType>
void test_dynamic_view_sort(unsigned int n )
{
typedef typename ExecutionSpace::memory_space memory_space ;
typedef Kokkos::Experimental::DynamicView<KeyType*,ExecutionSpace> KeyDynamicViewType;
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
const size_t upper_bound = 2 * n ;
const size_t min_chunk_size = 1024;
const size_t total_alloc_size = n * sizeof(KeyType) * 1.2 ;
const size_t superblock_size = std::min(total_alloc_size, size_t(1000000));
typename KeyDynamicViewType::memory_pool
pool( memory_space()
, n * sizeof(KeyType) * 1.2
, 500 /* min block size in bytes */
, 30000 /* max block size in bytes */
, superblock_size
);
KeyDynamicViewType keys("Keys",pool,upper_bound);
KeyDynamicViewType keys("Keys", min_chunk_size, upper_bound);
keys.resize_serial(n);
@ -230,13 +219,15 @@ void test_dynamic_view_sort(unsigned int n )
// Test sorting array with all numbers equal
Kokkos::deep_copy(keys_view,KeyType(1));
Kokkos::Experimental::deep_copy(keys,keys_view);
Kokkos::deep_copy(keys,keys_view);
Kokkos::sort(keys, 0 /* begin */ , n /* end */ );
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
Kokkos::Experimental::deep_copy(keys,keys_view);
ExecutionSpace::fence();
Kokkos::deep_copy(keys,keys_view);
//ExecutionSpace::fence();
double sum_before = 0.0;
double sum_after = 0.0;
@ -246,7 +237,9 @@ void test_dynamic_view_sort(unsigned int n )
Kokkos::sort(keys, 0 /* begin */ , n /* end */ );
Kokkos::Experimental::deep_copy( keys_view , keys );
ExecutionSpace::fence(); // Need this fence to prevent BusError with Cuda
Kokkos::deep_copy( keys_view , keys );
//ExecutionSpace::fence();
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys_view),sum_after);
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys_view),sort_fails);
@ -269,6 +262,74 @@ void test_dynamic_view_sort(unsigned int n )
//----------------------------------------------------------------------------
template<class ExecutionSpace>
void test_issue_1160()
{
Kokkos::View<int*, ExecutionSpace> element_("element", 10);
Kokkos::View<double*, ExecutionSpace> x_("x", 10);
Kokkos::View<double*, ExecutionSpace> v_("y", 10);
auto h_element = Kokkos::create_mirror_view(element_);
auto h_x = Kokkos::create_mirror_view(x_);
auto h_v = Kokkos::create_mirror_view(v_);
h_element(0) = 9;
h_element(1) = 8;
h_element(2) = 7;
h_element(3) = 6;
h_element(4) = 5;
h_element(5) = 4;
h_element(6) = 3;
h_element(7) = 2;
h_element(8) = 1;
h_element(9) = 0;
for (int i = 0; i < 10; ++i) {
h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i));
}
Kokkos::deep_copy(element_, h_element);
Kokkos::deep_copy(x_, h_x);
Kokkos::deep_copy(v_, h_v);
typedef decltype(element_) KeyViewType;
typedef Kokkos::BinOp1D< KeyViewType > BinOp;
int begin = 3;
int end = 8;
auto max = h_element(begin);
auto min = h_element(end - 1);
BinOp binner(end - begin, min, max);
Kokkos::BinSort<KeyViewType , BinOp > Sorter(element_,begin,end,binner,false);
Sorter.create_permute_vector();
Sorter.sort(element_,begin,end);
Sorter.sort(x_,begin,end);
Sorter.sort(v_,begin,end);
Kokkos::deep_copy(h_element, element_);
Kokkos::deep_copy(h_x, x_);
Kokkos::deep_copy(h_v, v_);
ASSERT_EQ(h_element(0), 9);
ASSERT_EQ(h_element(1), 8);
ASSERT_EQ(h_element(2), 7);
ASSERT_EQ(h_element(3), 2);
ASSERT_EQ(h_element(4), 3);
ASSERT_EQ(h_element(5), 4);
ASSERT_EQ(h_element(6), 5);
ASSERT_EQ(h_element(7), 6);
ASSERT_EQ(h_element(8), 1);
ASSERT_EQ(h_element(9), 0);
for (int i = 0; i < 10; ++i) {
ASSERT_EQ(h_element(i), int(h_x.access(i, 0)));
ASSERT_EQ(h_element(i), int(h_v.access(i, 0)));
}
}
//----------------------------------------------------------------------------
template<class ExecutionSpace, typename KeyType>
void test_sort(unsigned int N)
{
@ -278,6 +339,7 @@ void test_sort(unsigned int N)
test_3D_sort<ExecutionSpace,KeyType>(N);
test_dynamic_view_sort<ExecutionSpace,KeyType>(N*N);
#endif
test_issue_1160<ExecutionSpace>();
}
}

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -63,25 +63,10 @@ protected:
static void SetUpTestCase()
{
std::cout << std::setprecision(5) << std::scientific;
unsigned num_threads = 4;
if (Kokkos::hwloc::available()) {
num_threads = Kokkos::hwloc::get_available_numa_count()
* Kokkos::hwloc::get_available_cores_per_numa()
// * Kokkos::hwloc::get_available_threads_per_core()
;
}
std::cout << "Threads: " << num_threads << std::endl;
Kokkos::Threads::initialize( num_threads );
}
static void TearDownTestCase()
{
Kokkos::Threads::finalize();
}
};

View File

@ -35,16 +35,20 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <gtest/gtest.h>
#include <Kokkos_Core.hpp>
int main(int argc, char *argv[]) {
Kokkos::initialize(argc,argv);
::testing::InitGoogleTest(&argc,argv);
return RUN_ALL_TESTS();
int result = RUN_ALL_TESTS();
Kokkos::finalize();
return result;
}

View File

@ -10,7 +10,7 @@ default: build
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
EXE = ${EXE_NAME}.cuda
KOKKOS_CUDA_OPTIONS = "enable_lambda"
else

View File

@ -3,7 +3,7 @@
# BytesAndFlops
cd build/bytes_and_flops
USE_CUDA=`grep "_CUDA 1" KokkosCore_config.h | wc -l`
USE_CUDA=`grep "_CUDA" KokkosCore_config.h | wc -l`
if [[ ${USE_CUDA} > 0 ]]; then
BAF_EXE=bytes_and_flops.cuda
@ -41,4 +41,4 @@ cd ../..
echo "MiniFE: ${FE_PERF_1} ${FE_PERF_2}"
PERF_RESULT=`echo "${BAF_PERF_1} ${BAF_PERF_2} ${MD_PERF_1} ${MD_PERF_2} ${FE_PERF_1} ${FE_PERF_2}" | awk '{print ($1+$2+$3+$4+$5+$6)/6}'`
echo "Total Result: " ${PERF_RESULT}
echo "Total Result: " ${PERF_RESULT}

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -2,7 +2,7 @@
# FindHWLOC
# ----------
#
# Try to find HWLOC.
# Try to find HWLOC, based on KOKKOS_HWLOC_DIR
#
# The following variables are defined:
#
@ -10,8 +10,8 @@
# HWLOC_INCLUDE_DIR - HWLOC include directory
# HWLOC_LIBRARIES - Libraries needed to use HWLOC
find_path(HWLOC_INCLUDE_DIR hwloc.h)
find_library(HWLOC_LIBRARIES hwloc)
find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include")
find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib")
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(HWLOC DEFAULT_MSG

View File

@ -1,7 +1,3 @@
# kokkos_generated_settings.cmake includes the kokkos library itself in KOKKOS_LIBS
# which we do not want to use for the cmake builds so clean this up
string(REGEX REPLACE "-lkokkos" "" KOKKOS_LIBS ${KOKKOS_LIBS})
############################ Detect if submodule ###############################
#
# With thanks to StackOverflow:
@ -73,6 +69,19 @@ IF(KOKKOS_SEPARATE_LIBS)
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
)
target_include_directories(
kokkoscore
PUBLIC
${KOKKOS_TPL_INCLUDE_DIRS}
)
foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES)
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
target_link_libraries(kokkoscore PUBLIC ${LIB_${lib}})
endforeach()
target_link_libraries(kokkoscore PUBLIC "${KOKKOS_LINK_FLAGS}")
# Install the kokkoscore library
INSTALL (TARGETS kokkoscore
EXPORT KokkosTargets
@ -81,12 +90,6 @@ IF(KOKKOS_SEPARATE_LIBS)
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
)
TARGET_LINK_LIBRARIES(
kokkoscore
${KOKKOS_LD_FLAGS}
${KOKKOS_EXTRA_LIBS_LIST}
)
# kokkoscontainers
if (DEFINED KOKKOS_CONTAINERS_SRCS)
ADD_LIBRARY(
@ -144,12 +147,19 @@ ELSE()
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
)
TARGET_LINK_LIBRARIES(
target_include_directories(
kokkos
${KOKKOS_LD_FLAGS}
${KOKKOS_EXTRA_LIBS_LIST}
PUBLIC
${KOKKOS_TPL_INCLUDE_DIRS}
)
foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES)
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
target_link_libraries(kokkos PUBLIC ${LIB_${lib}})
endforeach()
target_link_libraries(kokkos PUBLIC "${KOKKOS_LINK_FLAGS}")
# Install the kokkos library
INSTALL (TARGETS kokkos
EXPORT KokkosTargets

View File

@ -25,11 +25,12 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST
Cuda_LDG_Intrinsic
Debug
Debug_DualView_Modify_Check
Debug_Bounds_Checkt
Debug_Bounds_Check
Compiler_Warnings
Profiling
Profiling_Load_Print
Aggressive_Vectorization
Deprecated_Code
)
#-------------------------------------------------------------------------------
@ -263,7 +264,8 @@ set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BO
set_kokkos_default_default(PROFILING_LOAD_PRINT OFF)
set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.")
set_kokkos_default_default(DEPRECATED_CODE ON)
set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.")
#-------------------------------------------------------------------------------

View File

@ -14,6 +14,13 @@
#-------------------------------------------------------------------------------
# Ensure that KOKKOS_ARCH is in the ARCH_LIST
if (KOKKOS_ARCH MATCHES ",")
message("-- Detected a comma in: KOKKOS_ARCH=${KOKKOS_ARCH}")
message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow")
message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)")
string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}")
message("-- Commas were changed to semicolons, now KOKKOS_ARCH=${KOKKOS_ARCH}")
endif()
foreach(arch ${KOKKOS_ARCH})
list(FIND KOKKOS_ARCH_LIST ${arch} indx)
if (indx EQUAL -1)
@ -23,14 +30,13 @@ foreach(arch ${KOKKOS_ARCH})
endforeach()
# KOKKOS_SETTINGS uses KOKKOS_ARCH
string(REPLACE ";" "," KOKKOS_ARCH "${KOKKOS_ARCH}")
set(KOKKOS_ARCH ${KOKKOS_ARCH})
string(REPLACE ";" "," KOKKOS_GMAKE_ARCH "${KOKKOS_ARCH}")
# From Makefile.kokkos: Options: yes,no
if(${KOKKOS_ENABLE_DEBUG})
set(KOKKOS_DEBUG yes)
set(KOKKOS_GMAKE_DEBUG yes)
else()
set(KOKKOS_DEBUG no)
set(KOKKOS_GMAKE_DEBUG no)
endif()
#------------------------------- KOKKOS_DEVICES --------------------------------
@ -43,10 +49,10 @@ foreach(devopt ${KOKKOS_DEVICES_LIST})
endif ()
endforeach()
# List needs to be comma-delmitted
string(REPLACE ";" "," KOKKOS_DEVICES "${KOKKOS_DEVICESl}")
string(REPLACE ";" "," KOKKOS_GMAKE_DEVICES "${KOKKOS_DEVICESl}")
#------------------------------- KOKKOS_OPTIONS --------------------------------
# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling
# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling,disable_deprecated_code
#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print
set(KOKKOS_OPTIONSl)
@ -57,7 +63,10 @@ if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION})
list(APPEND KOKKOS_OPTIONSl aggressive_vectorization)
endif()
if(NOT ${KOKKOS_ENABLE_PROFILING})
list(APPEND KOKKOS_OPTIONSl disable_vectorization)
list(APPEND KOKKOS_OPTIONSl disable_profiling)
endif()
if(NOT ${KOKKOS_ENABLE_DEPRECATED_CODE})
list(APPEND KOKKOS_OPTIONSl disable_deprecated_code)
endif()
if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK})
list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check)
@ -66,7 +75,7 @@ if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT})
list(APPEND KOKKOS_OPTIONSl enable_profile_load_print)
endif()
# List needs to be comma-delimitted
string(REPLACE ";" "," KOKKOS_OPTIONS "${KOKKOS_OPTIONSl}")
string(REPLACE ";" "," KOKKOS_GMAKE_OPTIONS "${KOKKOS_OPTIONSl}")
#------------------------------- KOKKOS_USE_TPLS -------------------------------
@ -78,19 +87,19 @@ foreach(tplopt ${KOKKOS_USE_TPLS_LIST})
endif ()
endforeach()
# List needs to be comma-delimitted
string(REPLACE ";" "," KOKKOS_USE_TPLS "${KOKKOS_USE_TPLSl}")
string(REPLACE ";" "," KOKKOS_GMAKE_USE_TPLS "${KOKKOS_USE_TPLSl}")
#------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
# Construct the Makefile options
set(KOKKOS_CUDA_OPTIONS)
set(KOKKOS_CUDA_OPTIONSl)
foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST})
if (${KOKKOS_ENABLE_CUDA_${cudaopt}})
list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}})
endif ()
endforeach()
# List needs to be comma-delmitted
string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
string(REPLACE ";" "," KOKKOS_GMAKE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
#------------------------------- PATH VARIABLES --------------------------------
# Want makefile to use same executables specified which means modifying
@ -100,10 +109,10 @@ string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
set(KOKKOS_INTERNAL_PATHS)
set(addpathl)
foreach(kvar "CUDA;QTHREADS;${KOKKOS_USE_TPLS_LIST}")
foreach(kvar IN LISTS KOKKOS_USE_TPLS_LIST ITEMS CUDA QTHREADS)
if(${KOKKOS_ENABLE_${kvar}})
if(DEFINED KOKKOS_${kvar}_DIR)
set(KOKKOS_INTERNAL_PATHS "${KOKKOS_INTERNAL_PATHS} ${kvar}_PATH=${KOKKOS_${kvar}_DIR}")
set(KOKKOS_INTERNAL_PATHS ${KOKKOS_INTERNAL_PATHS} "${kvar}_PATH=${KOKKOS_${kvar}_DIR}")
if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin)
list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin)
endif()
@ -124,10 +133,9 @@ set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFI
# Form of KOKKOS_foo=$KOKKOS_foo
foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS)
set(KOKKOS_VAR KOKKOS_${kvar})
if(DEFINED KOKKOS_${kvar})
if (NOT "${${KOKKOS_VAR}}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_VAR}=${${KOKKOS_VAR}})
if(DEFINED KOKKOS_GMAKE_${kvar})
if (NOT "${KOKKOS_GMAKE_${kvar}}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_${kvar}=${KOKKOS_GMAKE_${kvar}})
endif()
endif()
endforeach()
@ -147,7 +155,7 @@ if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS})
endif()
if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} PATH=${KOKKOS_INTERNAL_ADDTOPATH}:\${PATH})
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"")
endif()
# Final form that gets passed to make
@ -185,7 +193,7 @@ if(KOKKOS_CMAKE_VERBOSE)
message(STATUS "")
message(STATUS "Architectures:")
message(STATUS " ${KOKKOS_ARCH}")
message(STATUS " ${KOKKOS_GMAKE_ARCH}")
message(STATUS "")
message(STATUS "Enabled options")
@ -194,43 +202,14 @@ if(KOKKOS_CMAKE_VERBOSE)
message(STATUS " KOKKOS_SEPARATE_LIBS")
endif()
if(KOKKOS_ENABLE_HWLOC)
message(STATUS " KOKKOS_ENABLE_HWLOC")
endif()
if(KOKKOS_ENABLE_MEMKIND)
message(STATUS " KOKKOS_ENABLE_MEMKIND")
endif()
if(KOKKOS_ENABLE_DEBUG)
message(STATUS " KOKKOS_ENABLE_DEBUG")
endif()
if(KOKKOS_ENABLE_PROFILING)
message(STATUS " KOKKOS_ENABLE_PROFILING")
endif()
if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION)
message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION")
endif()
foreach(opt IN LISTS KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST)
string(TOUPPER ${opt} OPT)
if (KOKKOS_ENABLE_${OPT})
message(STATUS " KOKKOS_ENABLE_${OPT}")
endif()
endforeach()
if(KOKKOS_ENABLE_CUDA)
if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC)
message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
endif()
if(KOKKOS_ENABLE_CUDA_UVM)
message(STATUS " KOKKOS_ENABLE_CUDA_UVM")
endif()
if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
endif()
if(KOKKOS_ENABLE_CUDA_LAMBDA)
message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA")
endif()
if(KOKKOS_CUDA_DIR)
message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}")
endif()

View File

@ -3,7 +3,7 @@ INCLUDE(CTest)
cmake_policy(SET CMP0054 NEW)
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
MESSAGE(STATUS "The project name is: ${PROJECT_NAME}")
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
SET(${PROJECT_NAME}_ENABLE_OpenMP OFF)
@ -84,9 +84,6 @@ ENDFUNCTION()
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
message(STATUS "ProjectName: " ${PROJECT_NAME})
message(STATUS "Tests: " ${${PROJECT_NAME}_ENABLE_TESTS})
IF(${${PROJECT_NAME}_ENABLE_TESTS})
FOREACH(TEST_DIR ${ARGN})
ADD_SUBDIRECTORY(${TEST_DIR})
@ -95,13 +92,11 @@ MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
ENDMACRO()
MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES)
FOREACH(EXAMPLE_DIR ${ARGN})
ADD_SUBDIRECTORY(${EXAMPLE_DIR})
ENDFOREACH()
ENDIF()
ENDMACRO()

View File

@ -1,190 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build with Intel compiler
INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,186 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build for MIC architecture:
INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,293 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
#-----------------------------------------------------------------------------
USE_CUDA_ARCH=
USE_THREAD=
USE_OPENMP=
USE_INTEL=
USE_XEON_PHI=
HWLOC_BASE_DIR=
MPI_BASE_DIR=
BLAS_LIB_DIR=
LAPACK_LIB_DIR=
if [ 1 ] ; then
# Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu
USE_CUDA_ARCH="35"
USE_OPENMP=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
elif [ ] ; then
# Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu
USE_CUDA_ARCH="35"
USE_THREAD=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
elif [ ] ; then
# Platform 'kokkos-dev' with Xeon Phi and hwloc
USE_OPENMP=ON
USE_INTEL=ON
USE_XEON_PHI=ON
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106"
elif [ ] ; then
# Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu
USE_CUDA_ARCH="20"
USE_OPENMP=ON
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
MPI_BASE_DIR="/home/sems/common/openmpi/current"
elif [ ] ; then
# Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu
USE_CUDA_ARCH="20"
USE_THREAD=ON
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
MPI_BASE_DIR="/home/sems/common/openmpi/current"
fi
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure command line options:
CMAKE_CONFIGURE=""
CMAKE_CXX_FLAGS=""
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
if [ 1 ] ; then
# Configure for Tpetra/Kokkos:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode"
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE"
if [ -n "${USE_CUDA_ARCH}" ] ; then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON"
fi
fi
if [ 1 ] ; then
# Configure for Stokhos:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON"
fi
if [ 1 ] ; then
# Configure for TrilinosCouplings:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON"
if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
fi
#-----------------------------------------------------------------------------
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Kokkos use pthread configuation:
if [ "${USE_THREAD}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Kokkos use OpenMP configuation:
if [ "${USE_OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Hardware locality configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${USE_CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}"
if [ "${USE_OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
# Cross-compile for Intel Xeon Phi:
if [ "${USE_XEON_PHI}" = "ON" ] ;
then
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
if [ -n "${CMAKE_CXX_FLAGS}" ] ; then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'"
fi
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}"
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,88 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
# to build:
# build on bgq-b[1-12]
# module load sierra-devel
# run this configure file
# make
# to run:
# ssh bgq-login
# cd /scratch/username/...
# export OMP_PROC_BIND and XLSMPOPTS environment variables
# run with srun
# Note: hwloc does not work to get or set cpubindings on bgq.
# Use the openmp backend and the openmp environment variables.
#
# Only the mpi wrappers seem to be setup for cross-compile,
# so it is important that this configure enables MPI and uses mpigcc wrappers.
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="../Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2"
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,216 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
#CMAKE_BUILD_TYPE=DEBUG
#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
#CUDA_ARCH=""
#CUDA_ARCH="20"
#CUDA_ARCH="30"
CUDA_ARCH="35"
# Build with OpenMP
OPENMP=ON
PTHREADS=ON
# Build host code with Intel compiler:
INTEL=OFF
# Build for MIC architecture:
INTEL_XEON_PHI=OFF
# Build with HWLOC at location:
#HWLOC_BASE_DIR=""
#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
# Location for MPI to use in examples:
#MPI_BASE_DIR=""
#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3"
#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
if [ "${PTHREADS}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# OpenMP configuation:
if [ "${OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
if [ "${OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,204 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
# CUDA_ARCH=""
CUDA_ARCH="20"
# CUDA_ARCH="30"
# CUDA_ARCH="35"
# Build with OpenMP
OPENMP=ON
# Build host code with Intel compiler:
# INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
# Location for MPI to use in examples:
MPI_BASE_DIR="/home/sems/common/openmpi/current"
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
if [ "${OPENMP}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
if [ "${OPENMP}" = "ON" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
fi
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,190 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
# Additional command-line arguments given to this script will be
# passed directly to CMake.
#
#
# Force CMake to re-evaluate build options.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#-----------------------------------------------------------------------------
# Incrementally construct cmake configure options:
CMAKE_CONFIGURE=""
#-----------------------------------------------------------------------------
# Location of Trilinos source tree:
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
# Location for installation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`"
#-----------------------------------------------------------------------------
# General build options.
# Use a variable so options can be propagated to CUDA compiler.
CMAKE_VERBOSE_MAKEFILE=OFF
CMAKE_BUILD_TYPE=RELEASE
# CMAKE_BUILD_TYPE=DEBUG
#-----------------------------------------------------------------------------
# Build for CUDA architecture:
# CUDA_ARCH=""
# CUDA_ARCH="20"
# CUDA_ARCH="30"
CUDA_ARCH="35"
# Build host code with Intel compiler:
INTEL=ON
# Build for MIC architecture:
# INTEL_XEON_PHI=ON
# Build with HWLOC at location:
HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2"
# Location for MPI to use in examples:
MPI_BASE_DIR=""
#-----------------------------------------------------------------------------
# MPI configuation only used for examples:
#
# Must have the MPI_BASE_DIR so that the
# include path can be passed to the Cuda compiler
if [ -n "${MPI_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
else
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
fi
#-----------------------------------------------------------------------------
# Pthread configuation:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP configuation:
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Configure packages for kokkos-only:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
#-----------------------------------------------------------------------------
# Hardware locality cmake configuration:
if [ -n "${HWLOC_BASE_DIR}" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
fi
#-----------------------------------------------------------------------------
# Cuda cmake configuration:
if [ -n "${CUDA_ARCH}" ] ;
then
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
# this is different than the standard CMAKE_CXX_FLAGS syntax.
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
then
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
else
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
fi
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
fi
#-----------------------------------------------------------------------------
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
fi
#-----------------------------------------------------------------------------
# Cross-compile for Intel Xeon Phi:
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
then
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
# Cannot cross-compile fortran compatibility checks on the MIC:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
# Tell cmake the answers to compile-and-execute tests
# to prevent cmake from executing a cross-compiled program.
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
fi
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
#-----------------------------------------------------------------------------
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,140 +0,0 @@
#!/bin/bash
#
# This script uses CUDA, OpenMP, and MPI.
#
# Before invoking this script, set the OMPI_CXX environment variable
# to point to nvcc_wrapper, wherever it happens to live. (If you use
# an MPI implementation other than OpenMPI, set the corresponding
# environment variable instead.)
#
rm -f CMakeCache.txt;
rm -rf CMakeFiles
EXTRA_ARGS=$@
MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5"
CUDA_PATH="/opt/nvidia/cuda/6.5.14"
#
# As long as there are any .cu files in Trilinos, we'll need to set
# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and
# lets nvcc_wrapper handle them as .cpp files, then we won't need to
# set CUDA_NVCC_FLAGS. As it is, given that we need to set
# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as
# nvcc_wrapper passes to nvcc.
#
CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM"
cmake \
-D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \
-D CMAKE_BUILD_TYPE:STRING=DEBUG \
-D CMAKE_CXX_FLAGS:STRING="-g -Wall" \
-D CMAKE_C_FLAGS:STRING="-g -Wall" \
-D CMAKE_FORTRAN_FLAGS:STRING="" \
-D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \
-D Trilinos_ENABLE_Triutils=OFF \
-D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \
-D Trilinos_ENABLE_DEBUG:BOOL=OFF \
-D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \
-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \
-D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \
-D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \
-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \
-D BUILD_SHARED_LIBS:BOOL=OFF \
-D DART_TESTING_TIMEOUT:STRING=600 \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \
\
\
-D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
-D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
-D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
-D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
-D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \
-D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \
-D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \
\
\
-D Trilinos_ENABLE_CXX11:BOOL=OFF \
-D TPL_ENABLE_MPI:BOOL=ON \
-D Trilinos_ENABLE_OpenMP:BOOL=ON \
-D Trilinos_ENABLE_ThreadPool:BOOL=ON \
\
\
-D TPL_ENABLE_CUDA:BOOL=ON \
-D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \
-D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \
-D TPL_ENABLE_Thrust:BOOL=OFF \
-D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \
-D TPL_ENABLE_CUSPARSE:BOOL=OFF \
-D TPL_ENABLE_Cusp:BOOL=OFF \
-D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \
-D CUDA_VERBOSE_BUILD:BOOL=OFF \
-D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \
\
\
-D TPL_ENABLE_HWLOC=OFF \
-D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \
-D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \
-D TPL_ENABLE_BinUtils=OFF \
-D TPL_ENABLE_BLAS:STRING=ON \
-D TPL_ENABLE_LAPACK:STRING=ON \
-D TPL_ENABLE_MKL:STRING=OFF \
-D TPL_ENABLE_HWLOC:STRING=OFF \
-D TPL_ENABLE_GTEST:STRING=ON \
-D TPL_ENABLE_SuperLU=ON \
-D TPL_ENABLE_BLAS=ON \
-D TPL_ENABLE_LAPACK=ON \
-D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \
-D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \
\
\
-D Trilinos_Enable_Kokkos:BOOL=ON \
-D Trilinos_ENABLE_KokkosCore:BOOL=ON \
-D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \
-D Trilinos_ENABLE_KokkosContainers:BOOL=ON \
-D Trilinos_ENABLE_TpetraKernels:BOOL=ON \
-D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \
-D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \
-D Trilinos_ENABLE_KokkosExample:BOOL=ON \
-D Kokkos_ENABLE_EXAMPLES:BOOL=ON \
-D Kokkos_ENABLE_TESTS:BOOL=OFF \
-D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \
-D TpetraClassic_ENABLE_OpenMPNode=OFF \
-D TpetraClassic_ENABLE_TPINode=OFF \
-D TpetraClassic_ENABLE_MKL=OFF \
-D Kokkos_ENABLE_Cuda_UVM=ON \
\
\
-D Trilinos_ENABLE_Teuchos:BOOL=ON \
-D Teuchos_ENABLE_COMPLEX:BOOL=OFF \
\
\
-D Trilinos_ENABLE_Tpetra:BOOL=ON \
-D Tpetra_ENABLE_KokkosCore=ON \
-D Tpetra_ENABLE_Kokkos_DistObject=OFF \
-D Tpetra_ENABLE_Kokkos_Refactor=ON \
-D Tpetra_ENABLE_TESTS=ON \
-D Tpetra_ENABLE_EXAMPLES=ON \
-D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \
\
\
-D Trilinos_ENABLE_Belos=OFF \
-D Trilinos_ENABLE_Amesos=OFF \
-D Trilinos_ENABLE_Amesos2=OFF \
-D Trilinos_ENABLE_Ifpack=OFF \
-D Trilinos_ENABLE_Ifpack2=OFF \
-D Trilinos_ENABLE_Epetra=OFF \
-D Trilinos_ENABLE_EpetraExt=OFF \
-D Trilinos_ENABLE_Zoltan=OFF \
-D Trilinos_ENABLE_Zoltan2=OFF \
-D Trilinos_ENABLE_MueLu=OFF \
-D Belos_ENABLE_TESTS=ON \
-D Belos_ENABLE_EXAMPLES=ON \
-D MueLu_ENABLE_TESTS=ON \
-D MueLu_ENABLE_EXAMPLES=ON \
-D Ifpack2_ENABLE_TESTS=ON \
-D Ifpack2_ENABLE_EXAMPLES=ON \
$EXTRA_ARGS \
${HOME}/Trilinos

View File

@ -1,148 +0,0 @@
// -------------------------------------------------------------------------------- //
The following steps are for workstations/servers with the SEMS environment installed.
// -------------------------------------------------------------------------------- //
Summary:
- Step 1: Rigorous testing of Kokkos' develop branch for each backend (Serial, OpenMP, Threads, Cuda) with all supported compilers.
- Step 2: Snapshot Kokkos' develop branch into current Trilinos develop branch.
- Step 3: Build and test Trilinos with combinations of compilers, types, backends.
- Step 4: Promote Kokkos develop branch to master if the snapshot does not cause any new tests to fail; else track/fix causes of new failures.
- Step 5: Snapshot Kokkos tagged master branch into Trilinos and push Trilinos.
// -------------------------------------------------------------------------------- //
// -------------------------------------------------------------------------------- //
Step 1:
1.1. Update kokkos develop branch (NOT a fork)
(From kokkos directory):
git fetch --all
git checkout develop
git reset --hard origin/develop
1.2. Create a testing directory - here the directory is created within the kokkos directory
mkdir testing
cd testing
1.3. Run the test_all_sandia script; various compiler and build-list options can be specified
../config/test_all_sandia
1.4 Clean repository of untracked files
cd ../
git clean -df
// -------------------------------------------------------------------------------- //
Step 2:
2.1 Update Trilinos develop branch
(From Trilinos directory):
git checkout develop
git fetch --all
git reset --hard origin/develop
git clean -df
2.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files
module load python/2.7.9
python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
// -------------------------------------------------------------------------------- //
Step 3:
3.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard are provided in kokkos/config/trilinos-integration
Usually its a good idea to run those script via nohup.
You can run all four at the same time, use separate directories for each.
3.2. Compare the failed test output between the pristine and the updated runs; investigate and fix problems if new tests fail after the Kokkos snapshot
// -------------------------------------------------------------------------------- //
Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github
4.1. Generate Changelog (You need a github API token)
Close all Open issues with "InDevelop" tag on github
(Not from kokkos directory)
gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
(Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md)
(Make desired changes to CHANGELOG.md to enhance clarity)
(Commit and push the CHANGELOG to develop)
4.2 Merge develop into Master
- DO NOT fast-forward the merge!!!!
(From kokkos directory):
git checkout master
git fetch --all
# Ensure we are on the current origin/master
git reset --hard origin/master
git merge --no-ff origin/develop
4.3. Update the tag in kokkos/config/master_history.txt
Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
Tag format: #.#.##
# Prepend master_history.txt with
# tag: #.#.##
# date: mm/dd/yyyy
# master: sha1
# develop: sha1
# -----------------------
git commit --amend -a
git tag -a #.#.##
tag: #.#.##
date: mm/dd/yyyy
master: sha1
develop: sha1
4.4. Do NOT push yet
// -------------------------------------------------------------------------------- //
Step 5:
5.1. Make sure Trilinos is up-to-date - chances are other changes have been committed since the integration testing process began. If a substantial change has occurred that may be affected by the snapshot the testing procedure may need to be repeated
(From Trilinos directory):
git checkout develop
git fetch --all
git reset --hard origin/develop
git clean -df
5.2. Snapshot Kokkos master branch into Trilinos
(From kokkos directory):
git fetch --all
git checkout tags/#.#.##
git clean -df
python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
5.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3)
The modules are listed in kokkos/config/trilinos-integration/checkin-test
Run checkin-test, forward dependencies and optional dependencies must be enabled
If push failed because someone else clearly broke something, push manually.
If push failed for unclear reasons, investigate, fix, and potentially start over from step 2 after reseting your local kokkos/master branch
Step 6: Push Kokkos to master
git push --follow-tags origin master
// -------------------------------------------------------------------------------- //

View File

@ -1,110 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda, OpenMP, Threads, Qthreads, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/bin/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# Qthreads
QTHREADS_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREADS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_INCLUDE_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_LIBRARY_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}

View File

@ -1,104 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda, OpenMP, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/bin/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF so tribits doesn't automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,88 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Cuda
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
#
# The 'nvcc-wrapper' module should load a script that matches
# kokkos/bin/nvcc_wrapper
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
#-----------------------------------------------------------------------------
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,84 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# C++11, OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF so tribits doesn't automatically activate
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,78 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# <none>
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,89 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Intel, OpenMP, Cuda
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,84 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Intel, OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 intel/13.SP1.1.106
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Pthread explicitly OFF
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,77 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# OpenMP
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# OpenMP
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,87 +0,0 @@
#!/bin/sh
#
# Copy this script, put it outside the Trilinos source directory, and
# build there.
#
#-----------------------------------------------------------------------------
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
#
# Threads, hwloc
#
# module loaded on 'kokkos-dev.sandia.gov' for this build
#
# module load cmake/2.8.11.2 gcc/4.8.3
#
#-----------------------------------------------------------------------------
# Source and installation directories:
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
CMAKE_CONFIGURE=""
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
#-----------------------------------------------------------------------------
# Debug/optimized
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
#-----------------------------------------------------------------------------
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
#-----------------------------------------------------------------------------
# Configure for Kokkos subpackages and tests:
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
#-----------------------------------------------------------------------------
# Hardware locality configuration:
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
#-----------------------------------------------------------------------------
# Pthread
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
#-----------------------------------------------------------------------------
# C++11
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
#-----------------------------------------------------------------------------
#
# Remove CMake output files to force reconfigure from scratch.
#
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
#
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
#-----------------------------------------------------------------------------

View File

@ -1,340 +0,0 @@
#!/bin/bash
#
# This shell script (nvcc_wrapper) wraps both the host compiler and
# NVCC, if you are building legacy C or C++ code with CUDA enabled.
# The script remedies some differences between the interface of NVCC
# and that of the host compiler, in particular for linking.
# It also means that a legacy code doesn't need separate .cu files;
# it can just use .cpp files.
#
# Default settings: change those according to your machine. For
# example, you may have have two different wrappers with either icpc
# or g++ as their back-end compiler. The defaults can be overwritten
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
default_arch="sm_35"
#default_arch="sm_50"
#
# The default C++ compiler.
#
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
#host_compiler="icpc"
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
#
# Internal variables
#
# C++ files
cpp_files=""
# Host compiler arguments
xcompiler_args=""
# Cuda (NVCC) only arguments
cuda_args=""
# Arguments for both NVCC and Host compiler
shared_args=""
# Argument -c
compile_arg=""
# Argument -o <obj>
output_arg=""
# Linker arguments
xlinker_args=""
# Object files passable to NVCC
object_files=""
# Link objects for the host linker only
object_files_xlinker=""
# Shared libraries with version numbers are not handled correctly by NVCC
shared_versioned_libraries_host=""
shared_versioned_libraries=""
# Does the User set the architecture
arch_set=0
# Does the user overwrite the host compiler
ccbin_set=0
#Error code of compilation
error_code=0
# Do a dry run without actually compiling
dry_run=0
# Skip NVCC compilation and use host compiler directly
host_only=0
host_only_args=""
# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
# Mark first host compiler argument
first_xcompiler_arg=1
temp_dir=${TMPDIR:-/tmp}
# Check if we have an optimization argument already
optimization_applied=0
# Check if we have -std=c++X or --std=c++X already
stdcxx_applied=0
# Run nvcc a second time to generate dependencies if needed
depfile_separate=0
depfile_output_arg=""
depfile_target_arg=""
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
do
case $1 in
#show the executed command
--show|--nvcc-wrapper-show)
dry_run=1
;;
#run host compilation only
--host-only)
host_only=1
;;
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
--replace-pragma-ident)
replace_pragma_ident=1
;;
#handle source files to be compiled as cuda files
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
cpp_files="$cpp_files $1"
;;
# Ensure we only have one optimization flag because NVCC doesn't allow muliple
-O*)
if [ $optimization_applied -eq 1 ]; then
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
else
shared_args="$shared_args $1"
optimization_applied=1
fi
;;
#Handle shared args (valid for both nvcc and the host compiler)
-D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
shared_args="$shared_args $1"
;;
#Handle compilation argument
-c)
compile_arg="$1"
;;
#Handle output argument
-o)
output_arg="$output_arg $1 $2"
shift
;;
# Handle depfile arguments. We map them to a separate call to nvcc.
-MD|-MMD)
depfile_separate=1
host_only_args="$host_only_args $1"
;;
-MF)
depfile_output_arg="-o $2"
host_only_args="$host_only_args $1 $2"
shift
;;
-MT)
depfile_target_arg="$1 $2"
host_only_args="$host_only_args $1 $2"
shift
;;
#Handle known nvcc args
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
--expt-extended-lambda|--expt-relaxed-constexpr)
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-rdc|-maxrregcount|--default-stream)
cuda_args="$cuda_args $1 $2"
shift
;;
#Handle c++11
--std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z)
if [ $stdcxx_applied -eq 1 ]; then
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting"
else
shared_args="$shared_args $1"
stdcxx_applied=1
fi
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip of -Woverloaded-virtual to avoid "cc1: warning: command line option -Woverloaded-virtual is valid for C++/ObjC++ but not for C"
-Woverloaded-virtual)
;;
#strip -Xcompiler because we add it
-Xcompiler)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$2"
fi
shift
;;
#strip of "-x cu" because we add that
-x)
if [[ $2 != "cu" ]]; then
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="-x,$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,-x,$2"
fi
fi
shift
;;
#Handle -ccbin (if its not set we can set it to a default value)
-ccbin)
cuda_args="$cuda_args $1 $2"
ccbin_set=1
host_compiler=$2
shift
;;
#Handle -arch argument (if its not set use a default
-arch*)
cuda_args="$cuda_args $1"
arch_set=1
;;
#Handle -Xcudafe argument
-Xcudafe)
cuda_args="$cuda_args -Xcudafe $2"
shift
;;
#Handle args that should be sent to the linker
-Wl*)
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
host_linker_args="$host_linker_args ${1:4:${#1}}"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj)
object_files="$object_files $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
@*|*.dylib)
object_files="$object_files -Xlinker $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle shared libraries with *.so.* names which nvcc can't do.
*.so.*)
shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
;;
#All other args are sent to the host compiler
*)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args=$1
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$1"
fi
;;
esac
shift
done
#Add default host compiler if necessary
if [ $ccbin_set -ne 1 ]; then
cuda_args="$cuda_args -ccbin $host_compiler"
fi
#Add architecture command
if [ $arch_set -ne 1 ]; then
cuda_args="$cuda_args -arch=$default_arch"
fi
#Compose compilation command
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
if [ $first_xcompiler_arg -eq 0 ]; then
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
fi
#Compose host only command
host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
cpp_files2=""
for file in $cpp_files
do
var=`grep pragma ${file} | grep ident | grep "#"`
if [ "${#var}" -gt 0 ]
then
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
else
cpp_files2="$cpp_files2 $file"
fi
done
cpp_files=$cpp_files2
#echo $cpp_files
fi
if [ "$cpp_files" ]; then
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
else
nvcc_command="$nvcc_command $object_files"
fi
if [ "$cpp_files" ]; then
host_command="$host_command $object_files $cpp_files"
else
host_command="$host_command $object_files"
fi
if [ $depfile_separate -eq 1 ]; then
# run nvcc a second time to generate dependencies (without compiling)
nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg"
else
nvcc_depfile_command=""
fi
nvcc_command="$nvcc_command $compile_arg $output_arg"
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
if [ $host_only -eq 1 ]; then
echo $host_command
elif [ -n "$nvcc_depfile_command" ]; then
echo $nvcc_command "&&" $nvcc_depfile_command
else
echo $nvcc_command
fi
exit 0
fi
#Run compilation command
if [ $host_only -eq 1 ]; then
$host_command
elif [ -n "$nvcc_depfile_command" ]; then
$nvcc_command && $nvcc_depfile_command
else
$nvcc_command
fi
error_code=$?
#Report error code
exit $error_code

View File

@ -14,25 +14,52 @@ PROCESSOR=`uname -p`
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
MACHINE=white
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
module load git
fi
if [[ "$HOSTNAME" =~ .*bowman.* ]]; then
MACHINE=bowman
elif [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name
module load git
fi
if [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name
if [[ "$PROCESSOR" = "aarch64" ]]; then
MACHINE=sullivan
module load git
fi
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
fi
if [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
if [[ "$MACHINE" = "" ]]; then
MACHINE=shepard
elif [[ "$HOSTNAME" =~ apollo ]]; then
module load git
fi
fi
if [[ "$HOSTNAME" =~ apollo ]]; then
MACHINE=apollo
elif [[ "$HOSTNAME" =~ sullivan ]]; then
module load git
fi
if [[ "$HOSTNAME" =~ sullivan ]]; then
MACHINE=sullivan
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
MACHINE=sems
else
module load git
fi
if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
if [[ "$MACHINE" = "" ]]; then
MACHINE=sems
module load sems-git
fi
fi
if [[ "$MACHINE" = "" ]]; then
echo "Unrecognized machine" >&2
exit 1
fi
echo "Running on machine: $MACHINE"
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
@ -45,7 +72,8 @@ GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
PGI_WARNING_FLAGS=""
# Default. Machine specific can override.
@ -142,6 +170,18 @@ else
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null`
if ! [ -z "$UNCOMMITTED" ]; then
echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :"
echo "$UNCOMMITTED"
echo ""
fi
GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline`
echo "Repository Status: " ${GITSTATUS}
echo ""
echo ""
#
# Machine specific config.
#
@ -149,7 +189,7 @@ fi
if [ "$MACHINE" = "sems" ]; then
source /projects/sems/modulefiles/utils/sems-modules-init.sh
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
@ -178,9 +218,9 @@ if [ "$MACHINE" = "sems" ]; then
"clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
fi
elif [ "$MACHINE" = "white" ]; then
@ -191,14 +231,14 @@ elif [ "$MACHINE" = "white" ]; then
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6-BETA"
CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6"
# Don't do pthread on white.
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
"ibm/13.1.6 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
"cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
@ -281,7 +321,7 @@ elif [ "$MACHINE" = "apollo" ]; then
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44"
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.0.69"
NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
@ -294,13 +334,13 @@ elif [ "$MACHINE" = "apollo" ]; then
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
"clang/4.0.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
"cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
"cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
else
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"clang/4.0.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
@ -311,13 +351,11 @@ elif [ "$MACHINE" = "apollo" ]; then
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
fi
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=SNB,Kepler35"
ARCH_FLAG="--arch=SNB,Volta70"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
@ -700,17 +738,19 @@ wait_summarize_and_exit() {
echo $passed_test $(cat $PASSED_DIR/$passed_test)
done
echo "#######################################################"
echo "FAILED TESTS"
echo "#######################################################"
local failed_test
local -i rv=0
for failed_test in $(\ls -1 $FAILED_DIR | sort)
do
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
rv=$rv+1
done
if [ "$(ls -A $FAILED_DIR)" ]; then
echo "#######################################################"
echo "FAILED TESTS"
echo "#######################################################"
local failed_test
for failed_test in $(\ls -1 $FAILED_DIR | sort)
do
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
rv=$rv+1
done
fi
exit $rv
}

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -64,8 +64,8 @@ struct InitViewFunctor {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
for (unsigned j = 0; j < _inview.extent(1); ++j) {
for (unsigned k = 0; k < _inview.extent(2); ++k) {
_inview(i,j,k) = i/2 -j*j + k/3;
}
}
@ -84,8 +84,8 @@ struct InitViewFunctor {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
for (unsigned j = 0; j < _inview.extent(1); ++j) {
for (unsigned k = 0; k < _inview.extent(2); ++k) {
_outview(i) += _inview(i,j,k) ;
}
}
@ -104,8 +104,8 @@ struct InitStrideViewFunctor {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
for (unsigned j = 0; j < _inview.extent(1); ++j) {
for (unsigned k = 0; k < _inview.extent(2); ++k) {
_inview(i,j,k) = i/2 -j*j + k/3;
}
}
@ -123,8 +123,8 @@ struct InitViewRank7Functor {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
for (unsigned j = 0; j < _inview.extent(1); ++j) {
for (unsigned k = 0; k < _inview.extent(2); ++k) {
_inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3;
}
}
@ -143,8 +143,8 @@ struct InitDynRankViewFunctor {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
for (unsigned j = 0; j < _inview.extent(1); ++j) {
for (unsigned k = 0; k < _inview.extent(2); ++k) {
_inview(i,j,k) = i/2 -j*j + k/3;
}
}
@ -163,8 +163,8 @@ struct InitDynRankViewFunctor {
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
for (unsigned j = 0; j < _inview.extent(1); ++j) {
for (unsigned k = 0; k < _inview.extent(2); ++k) {
_outview(i) += _inview(i,j,k) ;
}
}

View File

@ -34,7 +34,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -76,7 +76,7 @@ struct generate_ids
generate_ids( local_id_view & ids)
: local_2_global(ids)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
Kokkos::parallel_for(local_2_global.extent(0), *this);
}
@ -116,7 +116,7 @@ struct fill_map
fill_map( global_id_view gIds, local_id_view lIds)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
Kokkos::parallel_for(local_2_global.extent(0), *this);
}
KOKKOS_INLINE_FUNCTION
@ -143,7 +143,7 @@ struct find_test
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
: global_2_local(gIds) , local_2_global(lIds)
{
Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors);
Kokkos::parallel_reduce(local_2_global.extent(0), *this, num_errors);
}
KOKKOS_INLINE_FUNCTION

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -34,7 +34,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -147,7 +147,7 @@ public:
if (m_last_block_mask) {
//clear the unused bits in the last block
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
raw_deep_copy( m_blocks.data() + (m_blocks.extent(0) -1u), &m_last_block_mask, sizeof(unsigned));
}
}
@ -212,7 +212,7 @@ public:
KOKKOS_FORCEINLINE_FUNCTION
unsigned max_hint() const
{
return m_blocks.dimension_0();
return m_blocks.extent(0);
}
/// find a bit set to 1 near the hint
@ -221,10 +221,10 @@ public:
KOKKOS_INLINE_FUNCTION
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
{
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
const unsigned block_idx = (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1)) ? block : block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
@ -238,7 +238,7 @@ public:
const unsigned block_idx = hint >> block_shift;
const unsigned offset = hint & block_mask;
unsigned block = volatile_load(&m_blocks[ block_idx ]);
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1) ) ? ~block : ~block & m_last_block_mask ;
return find_any_helper(block_idx, offset, block, scan_direction);
}
@ -281,8 +281,8 @@ private:
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
{
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1;
block_idx = block_idx < static_cast<long long>(m_blocks.extent(0)) ? block_idx : 0;
return static_cast<unsigned>(block_idx)*block_size + offset;
}
@ -407,7 +407,7 @@ void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0));
}
template <typename DstDevice, typename SrcDevice>
@ -418,7 +418,7 @@ void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0));
}
template <typename DstDevice, typename SrcDevice>
@ -429,7 +429,7 @@ void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
}
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0));
}
} // namespace Kokkos

View File

@ -35,7 +35,7 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
@ -262,14 +262,14 @@ public:
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
{
if ( int(d_view.rank) != int(h_view.rank) ||
d_view.dimension_0() != h_view.dimension_0() ||
d_view.dimension_1() != h_view.dimension_1() ||
d_view.dimension_2() != h_view.dimension_2() ||
d_view.dimension_3() != h_view.dimension_3() ||
d_view.dimension_4() != h_view.dimension_4() ||
d_view.dimension_5() != h_view.dimension_5() ||
d_view.dimension_6() != h_view.dimension_6() ||
d_view.dimension_7() != h_view.dimension_7() ||
d_view.extent(0) != h_view.extent(0) ||
d_view.extent(1) != h_view.extent(1) ||
d_view.extent(2) != h_view.extent(2) ||
d_view.extent(3) != h_view.extent(3) ||
d_view.extent(4) != h_view.extent(4) ||
d_view.extent(5) != h_view.extent(5) ||
d_view.extent(6) != h_view.extent(6) ||
d_view.extent(7) != h_view.extent(7) ||
d_view.stride_0() != h_view.stride_0() ||
d_view.stride_1() != h_view.stride_1() ||
d_view.stride_2() != h_view.stride_2() ||
@ -503,6 +503,18 @@ public:
/* Realloc on Device */
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
const bool sizeMismatch = ( h_view.extent(0) != n0 ) ||
( h_view.extent(1) != n1 ) ||
( h_view.extent(2) != n2 ) ||
( h_view.extent(3) != n3 ) ||
( h_view.extent(4) != n4 ) ||
( h_view.extent(5) != n5 ) ||
( h_view.extent(6) != n6 ) ||
( h_view.extent(7) != n7 );
if ( sizeMismatch )
::Kokkos::resize(h_view,n0,n1,n2,n3,n4,n5,n6,n7);
t_host temp_view = create_mirror_view( d_view );
/* Remap on Host */
@ -510,6 +522,8 @@ public:
h_view = temp_view;
d_view = create_mirror_view( typename t_dev::execution_space(), h_view );
/* Mark Host copy as modified */
modified_host() = modified_host()+1;
}
@ -530,22 +544,34 @@ public:
d_view.stride(stride_);
}
template< typename iType >
KOKKOS_INLINE_FUNCTION constexpr
typename std::enable_if< std::is_integral<iType>::value , size_t >::type
extent( const iType & r ) const
{ return d_view.extent(r); }
template< typename iType >
KOKKOS_INLINE_FUNCTION constexpr
typename std::enable_if< std::is_integral<iType>::value , int >::type
extent_int( const iType & r ) const
{ return static_cast<int>(d_view.extent(r)); }
/* \brief return size of dimension 0 */
size_t dimension_0() const {return d_view.dimension_0();}
size_t dimension_0() const {return d_view.extent(0);}
/* \brief return size of dimension 1 */
size_t dimension_1() const {return d_view.dimension_1();}
size_t dimension_1() const {return d_view.extent(1);}
/* \brief return size of dimension 2 */
size_t dimension_2() const {return d_view.dimension_2();}
size_t dimension_2() const {return d_view.extent(2);}
/* \brief return size of dimension 3 */
size_t dimension_3() const {return d_view.dimension_3();}
size_t dimension_3() const {return d_view.extent(3);}
/* \brief return size of dimension 4 */
size_t dimension_4() const {return d_view.dimension_4();}
size_t dimension_4() const {return d_view.extent(4);}
/* \brief return size of dimension 5 */
size_t dimension_5() const {return d_view.dimension_5();}
size_t dimension_5() const {return d_view.extent(5);}
/* \brief return size of dimension 6 */
size_t dimension_6() const {return d_view.dimension_6();}
size_t dimension_6() const {return d_view.extent(6);}
/* \brief return size of dimension 7 */
size_t dimension_7() const {return d_view.dimension_7();}
size_t dimension_7() const {return d_view.extent(7);}
//@}
};

View File

@ -35,16 +35,16 @@
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/// \file Kokkos_DynRankView.hpp
/// \brief Declaration and definition of Kokkos::Experimental::DynRankView.
/// \brief Declaration and definition of Kokkos::DynRankView.
///
/// This header file declares and defines Kokkos::Experimental::DynRankView and its
/// This header file declares and defines Kokkos::DynRankView and its
/// related nonmember functions.
#ifndef KOKKOS_DYNRANKVIEW_HPP
@ -55,7 +55,6 @@
#include <type_traits>
namespace Kokkos {
namespace Experimental {
template< typename DataType , class ... Properties >
class DynRankView; //forward declare
@ -156,7 +155,7 @@ struct DynRankDimTraits {
// Extra overload to match that for specialize types
template <typename Traits, typename ... P>
KOKKOS_INLINE_FUNCTION
static typename std::enable_if< (std::is_same<typename Traits::array_layout , Kokkos::LayoutRight>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutLeft>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutStride>::value) , typename Traits::array_layout >::type createLayout( const ViewCtorProp<P...>& prop, const typename Traits::array_layout& layout )
static typename std::enable_if< (std::is_same<typename Traits::array_layout , Kokkos::LayoutRight>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutLeft>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutStride>::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp<P...>& prop, const typename Traits::array_layout& layout )
{
return createLayout( layout );
}
@ -318,7 +317,6 @@ void dyn_rank_view_verify_operator_bounds
struct ViewToDynRankViewTag {};
} // namespace Impl
} // namespace Experimental
namespace Impl {
@ -348,7 +346,7 @@ class ViewMapping< DstTraits , SrcTraits ,
)
)
)
) , Kokkos::Experimental::Impl::ViewToDynRankViewTag >::type >
) , Kokkos::Impl::ViewToDynRankViewTag >::type >
{
private:
@ -375,7 +373,7 @@ public:
template < typename DT , typename ... DP , typename ST , typename ... SP >
KOKKOS_INLINE_FUNCTION
static void assign( Kokkos::Experimental::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src )
static void assign( Kokkos::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src )
{
static_assert( is_assignable_value_type
, "View assignment must have same value type or const = non-const" );
@ -395,8 +393,6 @@ public:
} //end Impl
namespace Experimental {
/* \class DynRankView
* \brief Container that creates a Kokkos view with rank determined at runtime.
* Essentially this is a rank 7 view
@ -415,7 +411,7 @@ namespace Experimental {
template< class > struct is_dyn_rank_view : public std::false_type {};
template< class D, class ... P >
struct is_dyn_rank_view< Kokkos::Experimental::DynRankView<D,P...> > : public std::true_type {};
struct is_dyn_rank_view< Kokkos::DynRankView<D,P...> > : public std::true_type {};
template< typename DataType , class ... Properties >
@ -425,7 +421,7 @@ class DynRankView : public ViewTraits< DataType , Properties ... >
private:
template < class , class ... > friend class DynRankView ;
template < class , class ... > friend class Impl::ViewMapping ;
template < class , class ... > friend class Kokkos::Impl::ViewMapping ;
public:
typedef ViewTraits< DataType , Properties ... > drvtraits ;
@ -437,7 +433,7 @@ public:
private:
typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
track_type m_track ;
map_type m_map ;
@ -601,7 +597,7 @@ private:
// rank of the calling operator - included as first argument in ARG
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ;
Kokkos::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ;
#else
@ -778,6 +774,140 @@ public:
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
}
// Rank 0
KOKKOS_INLINE_FUNCTION
reference_type access() const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) )
return implementation_map().reference();
//return m_map.reference(0,0,0,0,0,0,0);
}
// Rank 1
// Rank 1 parenthesis
template< typename iType >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
access(const iType & i0 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) )
return m_map.reference(i0);
}
template< typename iType >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< !(std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
access(const iType & i0 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) )
return m_map.reference(i0,0,0,0,0,0,0);
}
// Rank 2
template< typename iType0 , typename iType1 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) )
return m_map.reference(i0,i1);
}
template< typename iType0 , typename iType1 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) )
return m_map.reference(i0,i1,0,0,0,0,0);
}
// Rank 3
template< typename iType0 , typename iType1 , typename iType2 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) )
return m_map.reference(i0,i1,i2);
}
template< typename iType0 , typename iType1 , typename iType2 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) )
return m_map.reference(i0,i1,i2,0,0,0,0);
}
// Rank 4
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) )
return m_map.reference(i0,i1,i2,i3);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) )
return m_map.reference(i0,i1,i2,i3,0,0,0);
}
// Rank 5
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) )
return m_map.reference(i0,i1,i2,i3,i4);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) )
return m_map.reference(i0,i1,i2,i3,i4,0,0);
}
// Rank 6
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) )
return m_map.reference(i0,i1,i2,i3,i4,i5);
}
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) )
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
}
// Rank 7
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 >
KOKKOS_INLINE_FUNCTION
typename std::enable_if< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), reference_type>::type
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
{
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) )
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
}
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
//----------------------------------------
@ -830,7 +960,6 @@ public:
return *this;
}
// Experimental
// Copy/Assign View to DynRankView
template< class RT , class ... RP >
KOKKOS_INLINE_FUNCTION
@ -840,7 +969,7 @@ public:
, m_rank( rhs.Rank )
{
typedef typename View<RT,RP...>::traits SrcTraits ;
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ;
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ;
static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
Mapping::assign( *this , rhs );
}
@ -850,7 +979,7 @@ public:
DynRankView & operator = ( const View<RT,RP...> & rhs )
{
typedef typename View<RT,RP...>::traits SrcTraits ;
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ;
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ;
static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" );
Mapping::assign( *this , rhs );
return *this ;
@ -872,8 +1001,8 @@ public:
// unused arg_layout dimensions must be set to ~size_t(0) so that rank deduction can properly take place
template< class ... P >
explicit inline
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer
, typename traits::array_layout
>::type const & arg_layout
)
@ -882,11 +1011,11 @@ public:
, m_rank( Impl::DynRankDimTraits<typename traits::specialize>::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) )
{
// Append layout and spaces if not input
typedef Impl::ViewCtorProp< P ... > alloc_prop_input ;
typedef Kokkos::Impl::ViewCtorProp< P ... > alloc_prop_input ;
// use 'std::integral_constant<unsigned,I>' for non-types
// to avoid duplicate class error.
typedef Impl::ViewCtorProp
typedef Kokkos::Impl::ViewCtorProp
< P ...
, typename std::conditional
< alloc_prop_input::has_label
@ -931,7 +1060,7 @@ public:
#endif
//------------------------------------------------------------
Kokkos::Experimental::Impl::SharedAllocationRecord<> *
Kokkos::Impl::SharedAllocationRecord<> *
record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::template createLayout<traits, P...>(arg_prop, arg_layout) );
//------------------------------------------------------------
@ -950,8 +1079,8 @@ public:
// Wrappers
template< class ... P >
explicit KOKKOS_INLINE_FUNCTION
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer
, typename traits::array_layout
>::type const & arg_layout
)
@ -972,8 +1101,8 @@ public:
// Simple dimension-only layout
template< class ... P >
explicit inline
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer
, size_t
>::type const arg_N0 = ~size_t(0)
, const size_t arg_N1 = ~size_t(0)
@ -992,8 +1121,8 @@ public:
template< class ... P >
explicit KOKKOS_INLINE_FUNCTION
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
, typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer
, size_t
>::type const arg_N0 = ~size_t(0)
, const size_t arg_N1 = ~size_t(0)
@ -1015,10 +1144,10 @@ public:
explicit inline
DynRankView( const Label & arg_label
, typename std::enable_if<
Kokkos::Experimental::Impl::is_view_label<Label>::value ,
Kokkos::Impl::is_view_label<Label>::value ,
typename traits::array_layout >::type const & arg_layout
)
: DynRankView( Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout )
: DynRankView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout )
{}
// Allocate label and layout, must disambiguate from subview constructor
@ -1026,7 +1155,7 @@ public:
explicit inline
DynRankView( const Label & arg_label
, typename std::enable_if<
Kokkos::Experimental::Impl::is_view_label<Label>::value ,
Kokkos::Impl::is_view_label<Label>::value ,
const size_t >::type arg_N0 = ~size_t(0)
, const size_t arg_N1 = ~size_t(0)
, const size_t arg_N2 = ~size_t(0)
@ -1036,7 +1165,7 @@ public:
, const size_t arg_N6 = ~size_t(0)
, const size_t arg_N7 = ~size_t(0)
)
: DynRankView( Impl::ViewCtorProp< std::string >( arg_label )
: DynRankView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label )
, typename traits::array_layout
( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
)
@ -1048,7 +1177,8 @@ public:
DynRankView( const ViewAllocateWithoutInitializing & arg_prop
, const typename traits::array_layout & arg_layout
)
: DynRankView( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing )
: DynRankView( Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing )
, Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout)
)
{}
@ -1064,7 +1194,7 @@ public:
, const size_t arg_N6 = ~size_t(0)
, const size_t arg_N7 = ~size_t(0)
)
: DynRankView(Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
: DynRankView(Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
{}
//----------------------------------------
@ -1097,14 +1227,14 @@ public:
, const size_t arg_N6 = ~size_t(0)
, const size_t arg_N7 = ~size_t(0)
)
: DynRankView( Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
{}
explicit KOKKOS_INLINE_FUNCTION
DynRankView( pointer_type arg_ptr
, typename traits::array_layout & arg_layout
)
: DynRankView( Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_layout )
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_layout )
{}
@ -1140,7 +1270,7 @@ public:
explicit KOKKOS_INLINE_FUNCTION
DynRankView( const typename traits::execution_space::scratch_memory_space & arg_space
, const typename traits::array_layout & arg_layout )
: DynRankView( Impl::ViewCtorProp<pointer_type>(
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(
reinterpret_cast<pointer_type>(
arg_space.get_shmem( map_type::memory_span(
Impl::DynRankDimTraits<typename traits::specialize>::createLayout( arg_layout ) //is this correct?
@ -1159,7 +1289,7 @@ public:
, const size_t arg_N6 = ~size_t(0)
, const size_t arg_N7 = ~size_t(0) )
: DynRankView( Impl::ViewCtorProp<pointer_type>(
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(
reinterpret_cast<pointer_type>(
arg_space.get_shmem(
map_type::memory_span(
@ -1190,7 +1320,6 @@ namespace Impl {
struct DynRankSubviewTag {};
} // namespace Impl
} // namespace Experimental
namespace Impl {
@ -1207,7 +1336,7 @@ struct ViewMapping
std::is_same< typename SrcTraits::array_layout
, Kokkos::LayoutStride >::value
)
), Kokkos::Experimental::Impl::DynRankSubviewTag >::type
), Kokkos::Impl::DynRankSubviewTag >::type
, SrcTraits
, Args ... >
{
@ -1279,11 +1408,11 @@ public:
};
typedef Kokkos::Experimental::DynRankView< value_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > ret_type;
typedef Kokkos::DynRankView< value_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > ret_type;
template < typename T , class ... P >
KOKKOS_INLINE_FUNCTION
static ret_type subview( const unsigned src_rank , Kokkos::Experimental::DynRankView< T , P...> const & src
static ret_type subview( const unsigned src_rank , Kokkos::DynRankView< T , P...> const & src
, Args ... args )
{
@ -1351,20 +1480,19 @@ public:
} // end Impl
namespace Experimental {
template< class V , class ... Args >
using Subdynrankview = typename Kokkos::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , V , Args... >::ret_type ;
using Subdynrankview = typename Kokkos::Impl::ViewMapping< Kokkos::Impl::DynRankSubviewTag , V , Args... >::ret_type ;
template< class D , class ... P , class ...Args >
KOKKOS_INLINE_FUNCTION
Subdynrankview< ViewTraits<D******* , P...> , Args... >
subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args)
subdynrankview( const Kokkos::DynRankView< D , P... > &src , Args...args)
{
if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args
{ Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); }
typedef Kokkos::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
typedef Kokkos::Impl::ViewMapping< Kokkos::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
return metafcn::subview( src.rank() , src , args... );
}
@ -1373,16 +1501,14 @@ subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args.
template< class D , class ... P , class ...Args >
KOKKOS_INLINE_FUNCTION
Subdynrankview< ViewTraits<D******* , P...> , Args... >
subview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args)
subview( const Kokkos::DynRankView< D , P... > &src , Args...args)
{
return subdynrankview( src , args... );
}
} // namespace Experimental
} // namespace Kokkos
namespace Kokkos {
namespace Experimental {
// overload == and !=
template< class LT , class ... LP , class RT , class ... RP >
@ -1422,13 +1548,11 @@ bool operator != ( const DynRankView<LT,LP...> & lhs ,
return ! ( operator==(lhs,rhs) );
}
} //end Experimental
} //end Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
template< class OutputView , typename Enable = void >
@ -1455,7 +1579,7 @@ struct DynRankViewFill {
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) {
output(i0,i1,i2,i3,i4,i5,i6) = input ;
output.access(i0,i1,i2,i3,i4,i5,i6) = input ;
}}}}}}
}
@ -1498,14 +1622,14 @@ struct DynRankViewRemap {
DynRankViewRemap( const OutputView & arg_out , const InputView & arg_in )
: output( arg_out ), input( arg_in )
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
, n0( std::min( (size_t)arg_out.extent(0) , (size_t)arg_in.extent(0) ) )
, n1( std::min( (size_t)arg_out.extent(1) , (size_t)arg_in.extent(1) ) )
, n2( std::min( (size_t)arg_out.extent(2) , (size_t)arg_in.extent(2) ) )
, n3( std::min( (size_t)arg_out.extent(3) , (size_t)arg_in.extent(3) ) )
, n4( std::min( (size_t)arg_out.extent(4) , (size_t)arg_in.extent(4) ) )
, n5( std::min( (size_t)arg_out.extent(5) , (size_t)arg_in.extent(5) ) )
, n6( std::min( (size_t)arg_out.extent(6) , (size_t)arg_in.extent(6) ) )
, n7( std::min( (size_t)arg_out.extent(7) , (size_t)arg_in.extent(7) ) )
{
typedef Kokkos::RangePolicy< ExecSpace > Policy ;
const Kokkos::Impl::ParallelFor< DynRankViewRemap , Policy > closure( *this , Policy( 0 , n0 ) );
@ -1521,18 +1645,16 @@ struct DynRankViewRemap {
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) {
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) {
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) {
output(i0,i1,i2,i3,i4,i5,i6) = input(i0,i1,i2,i3,i4,i5,i6);
output.access(i0,i1,i2,i3,i4,i5,i6) = input.access(i0,i1,i2,i3,i4,i5,i6);
}}}}}}
}
};
} /* namespace Impl */
} /* namespace Experimental */
} /* namespace Kokkos */
namespace Kokkos {
namespace Experimental {
/** \brief Deep copy a value from Host memory into a view. */
template< class DT , class ... DP >
@ -1549,7 +1671,7 @@ void deep_copy
typename ViewTraits<DT,DP...>::value_type >::value
, "deep_copy requires non-const type" );
Kokkos::Experimental::Impl::DynRankViewFill< DynRankView<DT,DP...> >( dst , value );
Kokkos::Impl::DynRankViewFill< DynRankView<DT,DP...> >( dst , value );
}
/** \brief Deep copy into a value in Host memory from a view. */
@ -1585,7 +1707,7 @@ void deep_copy
std::is_same< typename DstType::traits::specialize , void >::value &&
std::is_same< typename SrcType::traits::specialize , void >::value
&&
( Kokkos::Experimental::is_dyn_rank_view<DstType>::value || Kokkos::Experimental::is_dyn_rank_view<SrcType>::value)
( Kokkos::is_dyn_rank_view<DstType>::value || Kokkos::is_dyn_rank_view<SrcType>::value)
)>::type * = 0 )
{
static_assert(
@ -1641,14 +1763,15 @@ void deep_copy
dst.span_is_contiguous() &&
src.span_is_contiguous() &&
dst.span() == src.span() &&
dst.dimension_0() == src.dimension_0() &&
dst.dimension_1() == src.dimension_1() &&
dst.dimension_2() == src.dimension_2() &&
dst.dimension_3() == src.dimension_3() &&
dst.dimension_4() == src.dimension_4() &&
dst.dimension_5() == src.dimension_5() &&
dst.dimension_6() == src.dimension_6() &&
dst.dimension_7() == src.dimension_7() ) {
dst.extent(0) == src.extent(0) &&
dst.extent(1) == src.extent(1) &&
dst.extent(2) == src.extent(2) &&
dst.extent(3) == src.extent(3) &&
dst.extent(4) == src.extent(4) &&
dst.extent(5) == src.extent(5) &&
dst.extent(6) == src.extent(6) &&
dst.extent(7) == src.extent(7) ) {
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
@ -1673,14 +1796,14 @@ void deep_copy
dst.span_is_contiguous() &&
src.span_is_contiguous() &&
dst.span() == src.span() &&
dst.dimension_0() == src.dimension_0() &&
dst.dimension_1() == src.dimension_1() &&
dst.dimension_2() == src.dimension_2() &&
dst.dimension_3() == src.dimension_3() &&
dst.dimension_4() == src.dimension_4() &&
dst.dimension_5() == src.dimension_5() &&
dst.dimension_6() == src.dimension_6() &&
dst.dimension_7() == src.dimension_7() &&
dst.extent(0) == src.extent(0) &&
dst.extent(1) == src.extent(1) &&
dst.extent(2) == src.extent(2) &&
dst.extent(3) == src.extent(3) &&
dst.extent(4) == src.extent(4) &&
dst.extent(5) == src.extent(5) &&
dst.extent(6) == src.extent(6) &&
dst.extent(7) == src.extent(7) &&
dst.stride_0() == src.stride_0() &&
dst.stride_1() == src.stride_1() &&
dst.stride_2() == src.stride_2() &&
@ -1697,11 +1820,11 @@ void deep_copy
}
else if ( DstExecCanAccessSrc ) {
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
Kokkos::Experimental::Impl::DynRankViewRemap< dst_type , src_type >( dst , src );
Kokkos::Impl::DynRankViewRemap< dst_type , src_type >( dst , src );
}
else if ( SrcExecCanAccessDst ) {
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
Kokkos::Experimental::Impl::DynRankViewRemap< dst_type , src_type , src_execution_space >( dst , src );
Kokkos::Impl::DynRankViewRemap< dst_type , src_type , src_execution_space >( dst , src );
}
else {
Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
@ -1709,7 +1832,6 @@ void deep_copy
}
}
} //end Experimental
} //end Kokkos
@ -1717,8 +1839,6 @@ void deep_copy
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
namespace Impl {
@ -1726,7 +1846,7 @@ namespace Impl {
template<class Space, class T, class ... P>
struct MirrorDRViewType {
// The incoming view_type
typedef typename Kokkos::Experimental::DynRankView<T,P...> src_view_type;
typedef typename Kokkos::DynRankView<T,P...> src_view_type;
// The memory space for the mirror view
typedef typename Space::memory_space memory_space;
// Check whether it is the same memory space
@ -1736,7 +1856,7 @@ struct MirrorDRViewType {
// The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
typedef typename src_view_type::non_const_data_type data_type;
// The destination view type if it is not the same memory space
typedef Kokkos::Experimental::DynRankView<data_type,array_layout,Space> dest_view_type;
typedef Kokkos::DynRankView<data_type,array_layout,Space> dest_view_type;
// If it is the same memory_space return the existsing view_type
// This will also keep the unmanaged trait if necessary
typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type;
@ -1745,7 +1865,7 @@ struct MirrorDRViewType {
template<class Space, class T, class ... P>
struct MirrorDRVType {
// The incoming view_type
typedef typename Kokkos::Experimental::DynRankView<T,P...> src_view_type;
typedef typename Kokkos::DynRankView<T,P...> src_view_type;
// The memory space for the mirror view
typedef typename Space::memory_space memory_space;
// Check whether it is the same memory space
@ -1755,12 +1875,11 @@ struct MirrorDRVType {
// The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
typedef typename src_view_type::non_const_data_type data_type;
// The destination view type if it is not the same memory space
typedef Kokkos::Experimental::DynRankView<data_type,array_layout,Space> view_type;
typedef Kokkos::DynRankView<data_type,array_layout,Space> view_type;
};
}
template< class T , class ... P >
inline
typename DynRankView<T,P...>::HostMirror
@ -1799,7 +1918,7 @@ create_mirror( const DynRankView<T,P...> & src
// Create a mirror in a new space (specialization for different space)
template<class Space, class T, class ... P>
typename Impl::MirrorDRVType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src) {
typename Impl::MirrorDRVType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::DynRankView<T,P...> & src) {
return typename Impl::MirrorDRVType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) );
}
@ -1836,13 +1955,13 @@ create_mirror_view( const DynRankView<T,P...> & src
)>::type * = 0
)
{
return Kokkos::Experimental::create_mirror( src );
return Kokkos::create_mirror( src );
}
// Create a mirror view in a new space (specialization for same space)
template<class Space, class T, class ... P>
typename Impl::MirrorDRViewType<Space,T,P ...>::view_type
create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src
create_mirror_view(const Space& , const Kokkos::DynRankView<T,P...> & src
, typename std::enable_if<Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
return src;
}
@ -1850,12 +1969,11 @@ create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...
// Create a mirror view in a new space (specialization for different space)
template<class Space, class T, class ... P>
typename Impl::MirrorDRViewType<Space,T,P ...>::view_type
create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src
create_mirror_view(const Space& , const Kokkos::DynRankView<T,P...> & src
, typename std::enable_if<!Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
return typename Impl::MirrorDRViewType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) );
}
} //end Experimental
} //end Kokkos
@ -1863,7 +1981,6 @@ create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
/** \brief Resize a view with copying old data to new data at the corresponding indices. */
template< class T , class ... P >
inline
@ -1877,13 +1994,13 @@ void resize( DynRankView<T,P...> & v ,
const size_t n6 = ~size_t(0) ,
const size_t n7 = ~size_t(0) )
{
typedef DynRankView<T,P...> drview_type ;
typedef DynRankView<T,P...> drview_type ;
static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
drview_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6 );
Kokkos::Experimental::Impl::DynRankViewRemap< drview_type , drview_type >( v_resized, v );
Kokkos::Impl::DynRankViewRemap< drview_type , drview_type >( v_resized, v );
v = v_resized ;
}
@ -1911,25 +2028,7 @@ void realloc( DynRankView<T,P...> & v ,
v = drview_type( label, n0, n1, n2, n3, n4, n5, n6 );
}
} //end Experimental
} //end Kokkos
using Kokkos::Experimental::is_dyn_rank_view ;
namespace Kokkos {
template< typename D , class ... P >
using DynRankView = Kokkos::Experimental::DynRankView< D , P... > ;
using Kokkos::Experimental::deep_copy ;
using Kokkos::Experimental::create_mirror ;
using Kokkos::Experimental::create_mirror_view ;
using Kokkos::Experimental::subdynrankview ;
using Kokkos::Experimental::subview ;
using Kokkos::Experimental::resize ;
using Kokkos::Experimental::realloc ;
} //end Kokkos
#endif

Some files were not shown because too many files have changed in this diff Show More