Compare commits
25 Commits
patch_8Mar
...
patch_16Ma
| Author | SHA1 | Date | |
|---|---|---|---|
| f6c76e04b8 | |||
| 3befd4b603 | |||
| e9ac8ba01e | |||
| 59dbb49cf9 | |||
| ee862d8bf5 | |||
| fc3de22c17 | |||
| ab914a9220 | |||
| 7c300eebd5 | |||
| 94a923191a | |||
| 7d2ada9d80 | |||
| 15a9600569 | |||
| d62534665f | |||
| d00908ea3e | |||
| 6965307250 | |||
| d9c6278844 | |||
| 821b18641d | |||
| ce4ffe5933 | |||
| 9c3296aad2 | |||
| b2c8c40204 | |||
| 25c46593ee | |||
| 35abbab966 | |||
| d358e886c5 | |||
| 62d446668c | |||
| fcfbdb13ab | |||
| 39786b1740 |
@ -1,7 +1,7 @@
|
||||
<!-- HTML_ONLY -->
|
||||
<HEAD>
|
||||
<TITLE>LAMMPS Users Manual</TITLE>
|
||||
<META NAME="docnumber" CONTENT="8 Mar 2018 version">
|
||||
<META NAME="docnumber" CONTENT="16 Mar 2018 version">
|
||||
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
|
||||
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
|
||||
</HEAD>
|
||||
@ -21,7 +21,7 @@
|
||||
<H1></H1>
|
||||
|
||||
LAMMPS Documentation :c,h3
|
||||
8 Mar 2018 version :c,h4
|
||||
16 Mar 2018 version :c,h4
|
||||
|
||||
Version info: :h4
|
||||
|
||||
|
||||
@ -1,70 +0,0 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
# REAX potential for Nitroamines system
|
||||
# .....
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.AB
|
||||
orthogonal box = (0 0 0) to (25 25 25)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
104 atoms
|
||||
|
||||
pair_style reax/c lmp_control
|
||||
pair_coeff * * ffield.reax.AB H B N
|
||||
Reading potential file ffield.reax.AB with DATE: 2011-02-18
|
||||
|
||||
neighbor 2 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
|
||||
fix 3 all temp/berendsen 500.0 500.0 100.0
|
||||
|
||||
timestep 0.25
|
||||
|
||||
#dump 1 all atom 30 dump.reax.ab
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 12.622 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -8505.1816 0 -8505.1816 -673.36566
|
||||
3000 496.56561 -8405.3755 0 -8252.9182 472.58916
|
||||
Loop time of 7.23109 on 4 procs for 3000 steps with 104 atoms
|
||||
|
||||
Performance: 8.961 ns/day, 2.678 hours/ns, 414.875 timesteps/s
|
||||
99.4% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 5.705 | 5.7262 | 5.7504 | 0.7 | 79.19
|
||||
Neigh | 0.14367 | 0.15976 | 0.16805 | 2.4 | 2.21
|
||||
Comm | 0.053353 | 0.077311 | 0.097821 | 5.7 | 1.07
|
||||
Output | 1.812e-05 | 1.9848e-05 | 2.408e-05 | 0.1 | 0.00
|
||||
Modify | 1.2559 | 1.2647 | 1.2818 | 0.9 | 17.49
|
||||
Other | | 0.003126 | | | 0.04
|
||||
|
||||
Nlocal: 26 ave 35 max 13 min
|
||||
Histogram: 1 0 0 0 0 1 0 0 1 1
|
||||
Nghost: 421 ave 450 max 377 min
|
||||
Histogram: 1 0 0 0 0 1 0 0 1 1
|
||||
Neighs: 847.25 ave 1149 max 444 min
|
||||
Histogram: 1 0 0 0 1 0 0 0 1 1
|
||||
|
||||
Total # of neighbors = 3389
|
||||
Ave neighs/atom = 32.5865
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:07
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for Nitroamines system
|
||||
# .....
|
||||
|
||||
@ -28,43 +29,53 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 18.4119 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 19.3 | 19.3 | 19.3 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -8505.1816 0 -8505.1816 -673.36566
|
||||
3000 499.30579 -8405.1387 0 -8251.8401 -94.844317
|
||||
Loop time of 12.5114 on 1 procs for 3000 steps with 104 atoms
|
||||
3000 478.18595 -8398.4168 0 -8251.6025 1452.6935
|
||||
Loop time of 14.3573 on 1 procs for 3000 steps with 104 atoms
|
||||
|
||||
Performance: 5.179 ns/day, 4.634 hours/ns, 239.782 timesteps/s
|
||||
99.3% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 4.513 ns/day, 5.318 hours/ns, 208.952 timesteps/s
|
||||
96.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 11.137 | 11.137 | 11.137 | 0.0 | 89.01
|
||||
Neigh | 0.29816 | 0.29816 | 0.29816 | 0.0 | 2.38
|
||||
Comm | 0.016993 | 0.016993 | 0.016993 | 0.0 | 0.14
|
||||
Output | 1.1921e-05 | 1.1921e-05 | 1.1921e-05 | 0.0 | 0.00
|
||||
Modify | 1.0552 | 1.0552 | 1.0552 | 0.0 | 8.43
|
||||
Other | | 0.004142 | | | 0.03
|
||||
Pair | 12.709 | 12.709 | 12.709 | 0.0 | 88.52
|
||||
Neigh | 0.36804 | 0.36804 | 0.36804 | 0.0 | 2.56
|
||||
Comm | 0.022419 | 0.022419 | 0.022419 | 0.0 | 0.16
|
||||
Output | 2.8133e-05 | 2.8133e-05 | 2.8133e-05 | 0.0 | 0.00
|
||||
Modify | 1.2513 | 1.2513 | 1.2513 | 0.0 | 8.72
|
||||
Other | | 0.006263 | | | 0.04
|
||||
|
||||
Nlocal: 104 ave 104 max 104 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 694 ave 694 max 694 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 2927 ave 2927 max 2927 min
|
||||
Neighs: 2866 ave 2866 max 2866 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 2927
|
||||
Ave neighs/atom = 28.1442
|
||||
Total # of neighbors = 2866
|
||||
Ave neighs/atom = 27.5577
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:12
|
||||
Total wall time: 0:00:14
|
||||
81
examples/reax/AB/log.8Mar18.AB.g++.4
Normal file
81
examples/reax/AB/log.8Mar18.AB.g++.4
Normal file
@ -0,0 +1,81 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for Nitroamines system
|
||||
# .....
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.AB
|
||||
orthogonal box = (0 0 0) to (25 25 25)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
104 atoms
|
||||
|
||||
pair_style reax/c lmp_control
|
||||
pair_coeff * * ffield.reax.AB H B N
|
||||
Reading potential file ffield.reax.AB with DATE: 2011-02-18
|
||||
|
||||
neighbor 2 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
|
||||
fix 3 all temp/berendsen 500.0 500.0 100.0
|
||||
|
||||
timestep 0.25
|
||||
|
||||
#dump 1 all atom 30 dump.reax.ab
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 12.38 | 13.22 | 13.64 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -8505.1816 0 -8505.1816 -673.36566
|
||||
3000 555.17702 -8426.5541 0 -8256.1017 219.26856
|
||||
Loop time of 9.03521 on 4 procs for 3000 steps with 104 atoms
|
||||
|
||||
Performance: 7.172 ns/day, 3.346 hours/ns, 332.034 timesteps/s
|
||||
94.6% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 7.0347 | 7.0652 | 7.1049 | 1.0 | 78.20
|
||||
Neigh | 0.18481 | 0.20727 | 0.22108 | 3.0 | 2.29
|
||||
Comm | 0.075175 | 0.11496 | 0.14517 | 7.4 | 1.27
|
||||
Output | 2.2888e-05 | 2.569e-05 | 3.1948e-05 | 0.0 | 0.00
|
||||
Modify | 1.6286 | 1.6421 | 1.6649 | 1.1 | 18.17
|
||||
Other | | 0.005646 | | | 0.06
|
||||
|
||||
Nlocal: 26 ave 35 max 13 min
|
||||
Histogram: 1 0 0 0 0 1 0 0 1 1
|
||||
Nghost: 420.25 ave 454 max 370 min
|
||||
Histogram: 1 0 0 0 0 1 0 0 1 1
|
||||
Neighs: 862.5 ave 1178 max 444 min
|
||||
Histogram: 1 0 0 0 1 0 0 0 1 1
|
||||
|
||||
Total # of neighbors = 3450
|
||||
Ave neighs/atom = 33.1731
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:09
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for AuO system
|
||||
# .....
|
||||
|
||||
@ -28,30 +29,40 @@ timestep 0.25
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 4 5
|
||||
Memory usage per processor = 144.382 Mbytes
|
||||
binsize = 6, bins = 5 4 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 157.6 | 157.6 | 157.6 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -72201.743 0 -72201.743 -166.1947
|
||||
100 69.043346 -72076.31 0 -71878.943 22702.308
|
||||
Loop time of 17.7559 on 1 procs for 100 steps with 960 atoms
|
||||
0 0 -72201.743 0 -72201.743 -166.19482
|
||||
100 69.043331 -72076.309 0 -71878.942 22702.89
|
||||
Loop time of 18.4369 on 1 procs for 100 steps with 960 atoms
|
||||
|
||||
Performance: 0.122 ns/day, 197.288 hours/ns, 5.632 timesteps/s
|
||||
99.8% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 0.117 ns/day, 204.854 hours/ns, 5.424 timesteps/s
|
||||
98.7% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 15.102 | 15.102 | 15.102 | 0.0 | 85.05
|
||||
Neigh | 0.49358 | 0.49358 | 0.49358 | 0.0 | 2.78
|
||||
Comm | 0.0067561 | 0.0067561 | 0.0067561 | 0.0 | 0.04
|
||||
Output | 1.502e-05 | 1.502e-05 | 1.502e-05 | 0.0 | 0.00
|
||||
Modify | 2.1525 | 2.1525 | 2.1525 | 0.0 | 12.12
|
||||
Other | | 0.001267 | | | 0.01
|
||||
Pair | 15.373 | 15.373 | 15.373 | 0.0 | 83.38
|
||||
Neigh | 0.58774 | 0.58774 | 0.58774 | 0.0 | 3.19
|
||||
Comm | 0.0079026 | 0.0079026 | 0.0079026 | 0.0 | 0.04
|
||||
Output | 3.171e-05 | 3.171e-05 | 3.171e-05 | 0.0 | 0.00
|
||||
Modify | 2.4665 | 2.4665 | 2.4665 | 0.0 | 13.38
|
||||
Other | | 0.001366 | | | 0.01
|
||||
|
||||
Nlocal: 960 ave 960 max 960 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for AuO system
|
||||
# .....
|
||||
|
||||
@ -28,30 +29,40 @@ timestep 0.25
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 4 5
|
||||
Memory usage per processor = 80.1039 Mbytes
|
||||
binsize = 6, bins = 5 4 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 87.17 | 87.17 | 87.17 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -72201.743 0 -72201.743 -166.20356
|
||||
100 69.043372 -72076.31 0 -71878.943 22701.855
|
||||
Loop time of 7.66838 on 4 procs for 100 steps with 960 atoms
|
||||
0 0 -72201.743 0 -72201.743 -166.2027
|
||||
100 69.043379 -72076.31 0 -71878.943 22701.771
|
||||
Loop time of 8.44797 on 4 procs for 100 steps with 960 atoms
|
||||
|
||||
Performance: 0.282 ns/day, 85.204 hours/ns, 13.041 timesteps/s
|
||||
99.7% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 0.256 ns/day, 93.866 hours/ns, 11.837 timesteps/s
|
||||
96.5% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 6.7833 | 6.7864 | 6.7951 | 0.2 | 88.50
|
||||
Neigh | 0.2412 | 0.24206 | 0.24396 | 0.2 | 3.16
|
||||
Comm | 0.010402 | 0.019419 | 0.022561 | 3.7 | 0.25
|
||||
Output | 2.0981e-05 | 2.3007e-05 | 2.9087e-05 | 0.1 | 0.00
|
||||
Modify | 0.61733 | 0.61964 | 0.62064 | 0.2 | 8.08
|
||||
Other | | 0.0007888 | | | 0.01
|
||||
Pair | 7.3702 | 7.3757 | 7.3879 | 0.3 | 87.31
|
||||
Neigh | 0.28875 | 0.29449 | 0.29747 | 0.6 | 3.49
|
||||
Comm | 0.015008 | 0.027055 | 0.032681 | 4.3 | 0.32
|
||||
Output | 2.4319e-05 | 2.8551e-05 | 3.8624e-05 | 0.0 | 0.00
|
||||
Modify | 0.74721 | 0.74985 | 0.75539 | 0.4 | 8.88
|
||||
Other | | 0.0008975 | | | 0.01
|
||||
|
||||
Nlocal: 240 ave 240 max 240 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
@ -67,4 +78,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:07
|
||||
Total wall time: 0:00:08
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for CHO system
|
||||
# .....
|
||||
|
||||
@ -28,30 +29,40 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 17.7936 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 18.68 | 18.68 | 18.68 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -10226.557 0 -10226.557 -106.09789
|
||||
3000 548.72503 -10170.457 0 -10000.349 34.314945
|
||||
Loop time of 11.5678 on 1 procs for 3000 steps with 105 atoms
|
||||
0 0 -10226.557 0 -10226.557 -106.09755
|
||||
3000 548.5116 -10170.389 0 -10000.348 40.372297
|
||||
Loop time of 12.6046 on 1 procs for 3000 steps with 105 atoms
|
||||
|
||||
Performance: 5.602 ns/day, 4.284 hours/ns, 259.340 timesteps/s
|
||||
99.3% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 5.141 ns/day, 4.668 hours/ns, 238.008 timesteps/s
|
||||
98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 10.111 | 10.111 | 10.111 | 0.0 | 87.41
|
||||
Neigh | 0.27992 | 0.27992 | 0.27992 | 0.0 | 2.42
|
||||
Comm | 0.01603 | 0.01603 | 0.01603 | 0.0 | 0.14
|
||||
Output | 1.2159e-05 | 1.2159e-05 | 1.2159e-05 | 0.0 | 0.00
|
||||
Modify | 1.1563 | 1.1563 | 1.1563 | 0.0 | 10.00
|
||||
Other | | 0.004084 | | | 0.04
|
||||
Pair | 10.931 | 10.931 | 10.931 | 0.0 | 86.72
|
||||
Neigh | 0.33107 | 0.33107 | 0.33107 | 0.0 | 2.63
|
||||
Comm | 0.017975 | 0.017975 | 0.017975 | 0.0 | 0.14
|
||||
Output | 2.0742e-05 | 2.0742e-05 | 2.0742e-05 | 0.0 | 0.00
|
||||
Modify | 1.3197 | 1.3197 | 1.3197 | 0.0 | 10.47
|
||||
Other | | 0.005059 | | | 0.04
|
||||
|
||||
Nlocal: 105 ave 105 max 105 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -67,4 +78,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:11
|
||||
Total wall time: 0:00:12
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for CHO system
|
||||
# .....
|
||||
|
||||
@ -28,30 +29,40 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 12.9938 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 11.75 | 12.85 | 13.81 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -10226.557 0 -10226.557 -106.0974
|
||||
3000 547.91377 -10170.194 0 -10000.338 61.118402
|
||||
Loop time of 6.51546 on 4 procs for 3000 steps with 105 atoms
|
||||
0 0 -10226.557 0 -10226.557 -106.09745
|
||||
3000 548.30567 -10170.323 0 -10000.346 47.794514
|
||||
Loop time of 7.42367 on 4 procs for 3000 steps with 105 atoms
|
||||
|
||||
Performance: 9.946 ns/day, 2.413 hours/ns, 460.443 timesteps/s
|
||||
99.1% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 8.729 ns/day, 2.750 hours/ns, 404.113 timesteps/s
|
||||
97.7% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 4.9869 | 5.0615 | 5.1246 | 2.3 | 77.68
|
||||
Neigh | 0.12213 | 0.14723 | 0.17304 | 5.5 | 2.26
|
||||
Comm | 0.05189 | 0.11582 | 0.18932 | 15.4 | 1.78
|
||||
Output | 1.812e-05 | 2.0564e-05 | 2.5988e-05 | 0.1 | 0.00
|
||||
Modify | 1.1626 | 1.1878 | 1.2122 | 1.9 | 18.23
|
||||
Other | | 0.003059 | | | 0.05
|
||||
Pair | 5.3058 | 5.4086 | 5.4922 | 3.1 | 72.86
|
||||
Neigh | 0.14791 | 0.17866 | 0.2106 | 6.5 | 2.41
|
||||
Comm | 0.080185 | 0.16666 | 0.26933 | 17.7 | 2.24
|
||||
Output | 2.5988e-05 | 2.8491e-05 | 3.4571e-05 | 0.0 | 0.00
|
||||
Modify | 1.6364 | 1.6658 | 1.6941 | 2.0 | 22.44
|
||||
Other | | 0.003964 | | | 0.05
|
||||
|
||||
Nlocal: 26.25 ave 45 max 6 min
|
||||
Histogram: 1 0 1 0 0 0 0 0 1 1
|
||||
@ -67,4 +78,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:06
|
||||
Total wall time: 0:00:07
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for Nitroamines system
|
||||
# .....
|
||||
|
||||
@ -29,13 +30,23 @@ thermo 1
|
||||
dump 4 all xyz 5000 dumpnpt.xyz
|
||||
run 10
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 28 27 17
|
||||
Memory usage per processor = 440.212 Mbytes
|
||||
binsize = 6, bins = 28 27 17
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes
|
||||
Step Temp E_pair TotEng Press
|
||||
0 0 -808525.04 -808525.04 58194.694
|
||||
1 4.9935726 -808803.89 -808546.69 58205.825
|
||||
@ -48,20 +59,20 @@ Step Temp E_pair TotEng Press
|
||||
8 320.17692 -826387.27 -809896.43 58886.877
|
||||
9 404.17073 -831129.48 -810312.5 59064.551
|
||||
10 497.02486 -836425.19 -810825.72 59260.714
|
||||
Loop time of 20.3094 on 1 procs for 10 steps with 17280 atoms
|
||||
Loop time of 21.5054 on 1 procs for 10 steps with 17280 atoms
|
||||
|
||||
Performance: 0.009 ns/day, 2820.746 hours/ns, 0.492 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 0.008 ns/day, 2986.857 hours/ns, 0.465 timesteps/s
|
||||
98.8% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 18.124 | 18.124 | 18.124 | 0.0 | 89.24
|
||||
Neigh | 0.072459 | 0.072459 | 0.072459 | 0.0 | 0.36
|
||||
Comm | 0.00077629 | 0.00077629 | 0.00077629 | 0.0 | 0.00
|
||||
Output | 0.00075412 | 0.00075412 | 0.00075412 | 0.0 | 0.00
|
||||
Modify | 2.1109 | 2.1109 | 2.1109 | 0.0 | 10.39
|
||||
Other | | 0.0005426 | | | 0.00
|
||||
Pair | 19.008 | 19.008 | 19.008 | 0.0 | 88.39
|
||||
Neigh | 0.084401 | 0.084401 | 0.084401 | 0.0 | 0.39
|
||||
Comm | 0.00080419 | 0.00080419 | 0.00080419 | 0.0 | 0.00
|
||||
Output | 0.00095367 | 0.00095367 | 0.00095367 | 0.0 | 0.00
|
||||
Modify | 2.4109 | 2.4109 | 2.4109 | 0.0 | 11.21
|
||||
Other | | 0.0004592 | | | 0.00
|
||||
|
||||
Nlocal: 17280 ave 17280 max 17280 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -85,7 +96,7 @@ timestep 0.2
|
||||
#dump 6 all custom 5000 dumpidtype.dat id type x y z
|
||||
|
||||
run 10
|
||||
Memory usage per processor = 440.212 Mbytes
|
||||
Per MPI rank memory allocation (min/avg/max) = 470 | 470 | 470 Mbytes
|
||||
Step Temp E_pair TotEng Press
|
||||
10 497.02486 -836425.19 -810825.72 59260.714
|
||||
11 601.65141 -841814.22 -810825.91 59489.422
|
||||
@ -98,20 +109,20 @@ Step Temp E_pair TotEng Press
|
||||
18 1623.072 -894534.04 -810937.04 61739.541
|
||||
19 1812.1865 -904337.99 -811000.57 62200.561
|
||||
20 2011.5899 -915379.19 -811771.41 63361.151
|
||||
Loop time of 20.3051 on 1 procs for 10 steps with 17280 atoms
|
||||
Loop time of 21.362 on 1 procs for 10 steps with 17280 atoms
|
||||
|
||||
Performance: 0.009 ns/day, 2820.155 hours/ns, 0.492 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 0.008 ns/day, 2966.945 hours/ns, 0.468 timesteps/s
|
||||
98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 18.008 | 18.008 | 18.008 | 0.0 | 88.69
|
||||
Neigh | 0.069963 | 0.069963 | 0.069963 | 0.0 | 0.34
|
||||
Comm | 0.00077033 | 0.00077033 | 0.00077033 | 0.0 | 0.00
|
||||
Output | 0.00077224 | 0.00077224 | 0.00077224 | 0.0 | 0.00
|
||||
Modify | 2.225 | 2.225 | 2.225 | 0.0 | 10.96
|
||||
Other | | 0.0005276 | | | 0.00
|
||||
Pair | 18.793 | 18.793 | 18.793 | 0.0 | 87.97
|
||||
Neigh | 0.077047 | 0.077047 | 0.077047 | 0.0 | 0.36
|
||||
Comm | 0.00080276 | 0.00080276 | 0.00080276 | 0.0 | 0.00
|
||||
Output | 0.0010097 | 0.0010097 | 0.0010097 | 0.0 | 0.00
|
||||
Modify | 2.4897 | 2.4897 | 2.4897 | 0.0 | 11.65
|
||||
Other | | 0.0004568 | | | 0.00
|
||||
|
||||
Nlocal: 17280 ave 17280 max 17280 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -127,4 +138,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:45
|
||||
Total wall time: 0:00:47
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for Nitroamines system
|
||||
# .....
|
||||
|
||||
@ -29,13 +30,23 @@ thermo 1
|
||||
dump 4 all xyz 5000 dumpnpt.xyz
|
||||
run 10
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 28 27 17
|
||||
Memory usage per processor = 140.018 Mbytes
|
||||
binsize = 6, bins = 28 27 17
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes
|
||||
Step Temp E_pair TotEng Press
|
||||
0 0 -808525.04 -808525.04 58194.694
|
||||
1 4.9935726 -808803.89 -808546.69 58205.825
|
||||
@ -48,20 +59,20 @@ Step Temp E_pair TotEng Press
|
||||
8 320.17692 -826387.27 -809896.43 58886.877
|
||||
9 404.17073 -831129.48 -810312.5 59064.551
|
||||
10 497.02486 -836425.19 -810825.72 59260.714
|
||||
Loop time of 5.47494 on 4 procs for 10 steps with 17280 atoms
|
||||
Loop time of 6.02109 on 4 procs for 10 steps with 17280 atoms
|
||||
|
||||
Performance: 0.032 ns/day, 760.408 hours/ns, 1.827 timesteps/s
|
||||
99.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 0.029 ns/day, 836.262 hours/ns, 1.661 timesteps/s
|
||||
99.0% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 4.5958 | 4.7748 | 4.8852 | 5.4 | 87.21
|
||||
Neigh | 0.021961 | 0.022104 | 0.022431 | 0.1 | 0.40
|
||||
Comm | 0.0077388 | 0.11804 | 0.29694 | 34.2 | 2.16
|
||||
Output | 0.00047708 | 0.00051123 | 0.0005939 | 0.2 | 0.01
|
||||
Modify | 0.55906 | 0.55927 | 0.55946 | 0.0 | 10.22
|
||||
Other | | 0.0002034 | | | 0.00
|
||||
Pair | 4.9482 | 5.1186 | 5.3113 | 7.4 | 85.01
|
||||
Neigh | 0.024811 | 0.025702 | 0.027556 | 0.7 | 0.43
|
||||
Comm | 0.0027421 | 0.19541 | 0.36565 | 38.1 | 3.25
|
||||
Output | 0.00053239 | 0.00057119 | 0.00067186 | 0.0 | 0.01
|
||||
Modify | 0.67876 | 0.68059 | 0.68165 | 0.1 | 11.30
|
||||
Other | | 0.0001779 | | | 0.00
|
||||
|
||||
Nlocal: 4320 ave 4320 max 4320 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
@ -85,7 +96,7 @@ timestep 0.2
|
||||
#dump 6 all custom 5000 dumpidtype.dat id type x y z
|
||||
|
||||
run 10
|
||||
Memory usage per processor = 140.018 Mbytes
|
||||
Per MPI rank memory allocation (min/avg/max) = 149.3 | 149.3 | 149.3 Mbytes
|
||||
Step Temp E_pair TotEng Press
|
||||
10 497.02486 -836425.19 -810825.72 59260.714
|
||||
11 601.65141 -841814.22 -810825.91 59489.422
|
||||
@ -98,20 +109,20 @@ Step Temp E_pair TotEng Press
|
||||
18 1623.072 -894534.04 -810937.04 61739.541
|
||||
19 1812.1865 -904337.99 -811000.57 62200.561
|
||||
20 2011.5899 -915379.19 -811771.41 63361.151
|
||||
Loop time of 5.49026 on 4 procs for 10 steps with 17280 atoms
|
||||
Loop time of 6.08805 on 4 procs for 10 steps with 17280 atoms
|
||||
|
||||
Performance: 0.031 ns/day, 762.536 hours/ns, 1.821 timesteps/s
|
||||
99.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 0.028 ns/day, 845.563 hours/ns, 1.643 timesteps/s
|
||||
99.2% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 4.5657 | 4.7603 | 4.8596 | 5.4 | 86.70
|
||||
Neigh | 0.021023 | 0.021468 | 0.022176 | 0.3 | 0.39
|
||||
Comm | 0.016467 | 0.1157 | 0.31031 | 34.7 | 2.11
|
||||
Output | 0.00047684 | 0.00050694 | 0.00059295 | 0.2 | 0.01
|
||||
Modify | 0.59135 | 0.59207 | 0.59251 | 0.1 | 10.78
|
||||
Other | | 0.0001938 | | | 0.00
|
||||
Pair | 4.9124 | 5.1008 | 5.3405 | 8.3 | 83.78
|
||||
Neigh | 0.023652 | 0.024473 | 0.025996 | 0.6 | 0.40
|
||||
Comm | 0.0020971 | 0.24171 | 0.43023 | 38.0 | 3.97
|
||||
Output | 0.00056076 | 0.00060701 | 0.00072312 | 0.0 | 0.01
|
||||
Modify | 0.71869 | 0.72023 | 0.72107 | 0.1 | 11.83
|
||||
Other | | 0.0001827 | | | 0.00
|
||||
|
||||
Nlocal: 4320 ave 4320 max 4320 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
@ -127,4 +138,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:12
|
||||
Total wall time: 0:00:13
|
||||
@ -1,6 +1,12 @@
|
||||
# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS
|
||||
# See README for more info
|
||||
|
||||
variable x index 2
|
||||
variable y index 2
|
||||
variable z index 2
|
||||
variable t index 100
|
||||
|
||||
|
||||
units real
|
||||
atom_style charge
|
||||
atom_modify sort 100 0.0 # optional
|
||||
@ -24,7 +30,7 @@ timestep 0.1
|
||||
|
||||
thermo_style custom step temp pe press evdwl ecoul vol
|
||||
thermo_modify norm yes
|
||||
thermo 100
|
||||
thermo 10
|
||||
|
||||
velocity all create 300.0 41279 loop geom
|
||||
|
||||
|
||||
115
examples/reax/HNS/log.8Mar18.reaxc.hns.g++.1
Normal file
115
examples/reax/HNS/log.8Mar18.reaxc.hns.g++.1
Normal file
@ -0,0 +1,115 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS
|
||||
# See README for more info
|
||||
|
||||
variable x index 2
|
||||
variable y index 2
|
||||
variable z index 2
|
||||
variable t index 100
|
||||
|
||||
|
||||
units real
|
||||
atom_style charge
|
||||
atom_modify sort 100 0.0 # optional
|
||||
dimension 3
|
||||
boundary p p p
|
||||
box tilt large
|
||||
|
||||
read_data data.hns-equil
|
||||
triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
304 atoms
|
||||
reading velocities ...
|
||||
304 velocities
|
||||
replicate $x $y $z bbox
|
||||
replicate 2 $y $z bbox
|
||||
replicate 2 2 $z bbox
|
||||
replicate 2 2 2 bbox
|
||||
triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
2432 atoms
|
||||
Time spent = 0.000789404 secs
|
||||
|
||||
|
||||
pair_style reax/c NULL
|
||||
pair_coeff * * ffield.reax.hns C H O N
|
||||
|
||||
compute reax all pair reax/c
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 20 delay 0 check no
|
||||
|
||||
timestep 0.1
|
||||
|
||||
thermo_style custom step temp pe press evdwl ecoul vol
|
||||
thermo_modify norm yes
|
||||
thermo 10
|
||||
|
||||
velocity all create 300.0 41279 loop geom
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 20 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 11
|
||||
ghost atom cutoff = 11
|
||||
binsize = 5.5, bins = 10 5 6
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 262.4 | 262.4 | 262.4 Mbytes
|
||||
Step Temp PotEng Press E_vdwl E_coul Volume
|
||||
0 300 -113.27833 437.52103 -111.57687 -1.7014647 27418.867
|
||||
10 299.87174 -113.27778 2033.6337 -111.57645 -1.7013325 27418.867
|
||||
20 300.81718 -113.28046 4817.5889 -111.57931 -1.7011463 27418.867
|
||||
30 301.8622 -113.28323 8303.0039 -111.58237 -1.7008608 27418.867
|
||||
40 302.4646 -113.28493 10519.459 -111.58446 -1.700467 27418.867
|
||||
50 300.79064 -113.27989 10402.291 -111.57987 -1.7000218 27418.867
|
||||
60 296.11534 -113.26599 7929.1348 -111.5664 -1.6995929 27418.867
|
||||
70 291.73354 -113.25289 5071.5459 -111.5537 -1.6991916 27418.867
|
||||
80 292.189 -113.25399 5667.0962 -111.55519 -1.6987993 27418.867
|
||||
90 298.40792 -113.27253 7513.3806 -111.57409 -1.6984403 27418.867
|
||||
100 303.58246 -113.28809 10017.879 -111.58991 -1.698177 27418.867
|
||||
Loop time of 59.5461 on 1 procs for 100 steps with 2432 atoms
|
||||
|
||||
Performance: 0.015 ns/day, 1654.060 hours/ns, 1.679 timesteps/s
|
||||
97.0% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 49.922 | 49.922 | 49.922 | 0.0 | 83.84
|
||||
Neigh | 0.53154 | 0.53154 | 0.53154 | 0.0 | 0.89
|
||||
Comm | 0.011399 | 0.011399 | 0.011399 | 0.0 | 0.02
|
||||
Output | 0.00064397 | 0.00064397 | 0.00064397 | 0.0 | 0.00
|
||||
Modify | 9.0782 | 9.0782 | 9.0782 | 0.0 | 15.25
|
||||
Other | | 0.002116 | | | 0.00
|
||||
|
||||
Nlocal: 2432 ave 2432 max 2432 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 10687 ave 10687 max 10687 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 823977 ave 823977 max 823977 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 823977
|
||||
Ave neighs/atom = 338.806
|
||||
Neighbor list builds = 5
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:01:00
|
||||
115
examples/reax/HNS/log.8Mar18.reaxc.hns.g++.4
Normal file
115
examples/reax/HNS/log.8Mar18.reaxc.hns.g++.4
Normal file
@ -0,0 +1,115 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# Pure HNS crystal, ReaxFF tests for benchmarking LAMMPS
|
||||
# See README for more info
|
||||
|
||||
variable x index 2
|
||||
variable y index 2
|
||||
variable z index 2
|
||||
variable t index 100
|
||||
|
||||
|
||||
units real
|
||||
atom_style charge
|
||||
atom_modify sort 100 0.0 # optional
|
||||
dimension 3
|
||||
boundary p p p
|
||||
box tilt large
|
||||
|
||||
read_data data.hns-equil
|
||||
triclinic box = (0 0 0) to (22.326 11.1412 13.779) with tilt (0 -5.02603 0)
|
||||
2 by 1 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
304 atoms
|
||||
reading velocities ...
|
||||
304 velocities
|
||||
replicate $x $y $z bbox
|
||||
replicate 2 $y $z bbox
|
||||
replicate 2 2 $z bbox
|
||||
replicate 2 2 2 bbox
|
||||
triclinic box = (0 0 0) to (44.652 22.2824 27.5579) with tilt (0 -10.0521 0)
|
||||
2 by 1 by 2 MPI processor grid
|
||||
2432 atoms
|
||||
Time spent = 0.000398397 secs
|
||||
|
||||
|
||||
pair_style reax/c NULL
|
||||
pair_coeff * * ffield.reax.hns C H O N
|
||||
|
||||
compute reax all pair reax/c
|
||||
|
||||
neighbor 1.0 bin
|
||||
neigh_modify every 20 delay 0 check no
|
||||
|
||||
timestep 0.1
|
||||
|
||||
thermo_style custom step temp pe press evdwl ecoul vol
|
||||
thermo_modify norm yes
|
||||
thermo 10
|
||||
|
||||
velocity all create 300.0 41279 loop geom
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 20 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 11
|
||||
ghost atom cutoff = 11
|
||||
binsize = 5.5, bins = 10 5 6
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 126.6 | 126.6 | 126.6 Mbytes
|
||||
Step Temp PotEng Press E_vdwl E_coul Volume
|
||||
0 300 -113.27833 437.52112 -111.57687 -1.7014647 27418.867
|
||||
10 299.87174 -113.27778 2033.632 -111.57645 -1.7013325 27418.867
|
||||
20 300.81719 -113.28046 4817.5761 -111.57931 -1.7011463 27418.867
|
||||
30 301.8622 -113.28323 8302.9767 -111.58237 -1.7008609 27418.867
|
||||
40 302.4646 -113.28493 10519.481 -111.58446 -1.700467 27418.867
|
||||
50 300.79064 -113.27989 10402.312 -111.57987 -1.7000217 27418.867
|
||||
60 296.11534 -113.26599 7929.1393 -111.5664 -1.6995929 27418.867
|
||||
70 291.73354 -113.25289 5071.5368 -111.5537 -1.6991916 27418.867
|
||||
80 292.18901 -113.25399 5667.1118 -111.55519 -1.6987993 27418.867
|
||||
90 298.40793 -113.27253 7513.4029 -111.57409 -1.6984403 27418.867
|
||||
100 303.58247 -113.28809 10017.892 -111.58991 -1.698177 27418.867
|
||||
Loop time of 21.3933 on 4 procs for 100 steps with 2432 atoms
|
||||
|
||||
Performance: 0.040 ns/day, 594.257 hours/ns, 4.674 timesteps/s
|
||||
97.6% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 14.863 | 16.367 | 18.027 | 28.6 | 76.51
|
||||
Neigh | 0.23943 | 0.2422 | 0.24658 | 0.6 | 1.13
|
||||
Comm | 0.024331 | 1.6845 | 3.189 | 89.2 | 7.87
|
||||
Output | 0.00051165 | 0.00056899 | 0.00068665 | 0.0 | 0.00
|
||||
Modify | 3.0933 | 3.0969 | 3.0999 | 0.1 | 14.48
|
||||
Other | | 0.001784 | | | 0.01
|
||||
|
||||
Nlocal: 608 ave 608 max 608 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 5738.25 ave 5742 max 5734 min
|
||||
Histogram: 1 1 0 0 0 0 0 0 0 2
|
||||
Neighs: 231544 ave 231625 max 231466 min
|
||||
Histogram: 2 0 0 0 0 0 0 0 0 2
|
||||
|
||||
Total # of neighbors = 926176
|
||||
Ave neighs/atom = 380.829
|
||||
Neighbor list builds = 5
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:21
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for high energy CHON systems
|
||||
# .....
|
||||
|
||||
@ -28,43 +29,53 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 18.1116 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 19 | 19 | 19 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -10197.932 0 -10197.932 38.347492
|
||||
3000 510.85923 -10091.694 0 -9933.3253 1668.5084
|
||||
Loop time of 18.9088 on 1 procs for 3000 steps with 105 atoms
|
||||
3000 510.63767 -10091.537 0 -9933.2374 1144.545
|
||||
Loop time of 21.2931 on 1 procs for 3000 steps with 105 atoms
|
||||
|
||||
Performance: 3.427 ns/day, 7.003 hours/ns, 158.657 timesteps/s
|
||||
99.5% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 3.043 ns/day, 7.886 hours/ns, 140.891 timesteps/s
|
||||
97.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 17.724 | 17.724 | 17.724 | 0.0 | 93.73
|
||||
Neigh | 0.27457 | 0.27457 | 0.27457 | 0.0 | 1.45
|
||||
Comm | 0.015814 | 0.015814 | 0.015814 | 0.0 | 0.08
|
||||
Output | 1.1921e-05 | 1.1921e-05 | 1.1921e-05 | 0.0 | 0.00
|
||||
Modify | 0.89014 | 0.89014 | 0.89014 | 0.0 | 4.71
|
||||
Other | | 0.004246 | | | 0.02
|
||||
Pair | 19.887 | 19.887 | 19.887 | 0.0 | 93.40
|
||||
Neigh | 0.33143 | 0.33143 | 0.33143 | 0.0 | 1.56
|
||||
Comm | 0.02079 | 0.02079 | 0.02079 | 0.0 | 0.10
|
||||
Output | 2.5272e-05 | 2.5272e-05 | 2.5272e-05 | 0.0 | 0.00
|
||||
Modify | 1.0478 | 1.0478 | 1.0478 | 0.0 | 4.92
|
||||
Other | | 0.006125 | | | 0.03
|
||||
|
||||
Nlocal: 105 ave 105 max 105 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 645 ave 645 max 645 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3061 ave 3061 max 3061 min
|
||||
Neighs: 3063 ave 3063 max 3063 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 3061
|
||||
Ave neighs/atom = 29.1524
|
||||
Total # of neighbors = 3063
|
||||
Ave neighs/atom = 29.1714
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:19
|
||||
Total wall time: 0:00:21
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for high energy CHON systems
|
||||
# .....
|
||||
|
||||
@ -28,43 +29,53 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 12.2102 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 12.14 | 13.04 | 13.9 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -10197.932 0 -10197.932 38.347492
|
||||
3000 504.05354 -10089.494 0 -9933.2351 868.32505
|
||||
Loop time of 9.70759 on 4 procs for 3000 steps with 105 atoms
|
||||
3000 509.89257 -10091.36 0 -9933.2916 1406.1215
|
||||
Loop time of 10.8858 on 4 procs for 3000 steps with 105 atoms
|
||||
|
||||
Performance: 6.675 ns/day, 3.595 hours/ns, 309.037 timesteps/s
|
||||
99.2% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 5.953 ns/day, 4.032 hours/ns, 275.588 timesteps/s
|
||||
98.1% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 8.4621 | 8.5307 | 8.6001 | 1.9 | 87.88
|
||||
Neigh | 0.12583 | 0.14931 | 0.17341 | 4.5 | 1.54
|
||||
Comm | 0.053017 | 0.12311 | 0.19244 | 16.2 | 1.27
|
||||
Output | 1.9073e-05 | 2.0802e-05 | 2.408e-05 | 0.0 | 0.00
|
||||
Modify | 0.87638 | 0.9012 | 0.92557 | 1.9 | 9.28
|
||||
Other | | 0.003213 | | | 0.03
|
||||
Pair | 9.3081 | 9.4054 | 9.4994 | 2.6 | 86.40
|
||||
Neigh | 0.15541 | 0.18258 | 0.2099 | 4.7 | 1.68
|
||||
Comm | 0.070516 | 0.16621 | 0.26541 | 19.7 | 1.53
|
||||
Output | 2.2173e-05 | 2.5153e-05 | 3.3855e-05 | 0.0 | 0.00
|
||||
Modify | 1.0979 | 1.1272 | 1.1568 | 2.1 | 10.35
|
||||
Other | | 0.004379 | | | 0.04
|
||||
|
||||
Nlocal: 26.25 ave 46 max 8 min
|
||||
Histogram: 1 0 0 1 0 1 0 0 0 1
|
||||
Nghost: 399.5 ave 512 max 288 min
|
||||
Histogram: 1 0 0 1 0 0 1 0 0 1
|
||||
Neighs: 1010.75 ave 1818 max 420 min
|
||||
Neighs: 1011.25 ave 1819 max 420 min
|
||||
Histogram: 1 0 1 1 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 4043
|
||||
Ave neighs/atom = 38.5048
|
||||
Total # of neighbors = 4045
|
||||
Ave neighs/atom = 38.5238
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:10
|
||||
Total wall time: 0:00:11
|
||||
@ -1,70 +0,0 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
# REAX potential for VOH system
|
||||
# .....
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.VOH
|
||||
orthogonal box = (0 0 0) to (25 25 25)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
100 atoms
|
||||
|
||||
pair_style reax/c lmp_control
|
||||
pair_coeff * * ffield.reax.V_O_C_H H C O V
|
||||
Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18
|
||||
|
||||
neighbor 2 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
|
||||
fix 3 all temp/berendsen 500.0 500.0 100.0
|
||||
|
||||
timestep 0.25
|
||||
|
||||
#dump 1 all atom 30 dump.reax.voh
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 12.1769 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -10246.825 0 -10246.825 42.256092
|
||||
3000 518.1493 -10196.234 0 -10043.328 -334.5971
|
||||
Loop time of 5.59178 on 4 procs for 3000 steps with 100 atoms
|
||||
|
||||
Performance: 11.588 ns/day, 2.071 hours/ns, 536.502 timesteps/s
|
||||
99.1% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 4.2807 | 4.3532 | 4.398 | 2.1 | 77.85
|
||||
Neigh | 0.12328 | 0.14561 | 0.16815 | 4.2 | 2.60
|
||||
Comm | 0.051619 | 0.097282 | 0.1697 | 14.1 | 1.74
|
||||
Output | 1.7881e-05 | 1.9372e-05 | 2.3842e-05 | 0.1 | 0.00
|
||||
Modify | 0.9701 | 0.99258 | 1.0148 | 1.6 | 17.75
|
||||
Other | | 0.003097 | | | 0.06
|
||||
|
||||
Nlocal: 25 ave 38 max 11 min
|
||||
Histogram: 1 0 0 0 1 0 1 0 0 1
|
||||
Nghost: 368.25 ave 449 max 283 min
|
||||
Histogram: 1 0 0 0 1 0 1 0 0 1
|
||||
Neighs: 1084.5 ave 1793 max 418 min
|
||||
Histogram: 1 0 0 1 0 0 1 0 0 1
|
||||
|
||||
Total # of neighbors = 4338
|
||||
Ave neighs/atom = 43.38
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:05
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for VOH system
|
||||
# .....
|
||||
|
||||
@ -28,43 +29,53 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 16.9211 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 17.79 | 17.79 | 17.79 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -10246.825 0 -10246.825 42.256089
|
||||
3000 479.39686 -10186.225 0 -10044.755 -454.82798
|
||||
Loop time of 10.4348 on 1 procs for 3000 steps with 100 atoms
|
||||
3000 476.73301 -10185.256 0 -10044.572 -694.70737
|
||||
Loop time of 11.0577 on 1 procs for 3000 steps with 100 atoms
|
||||
|
||||
Performance: 6.210 ns/day, 3.865 hours/ns, 287.499 timesteps/s
|
||||
99.2% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 5.860 ns/day, 4.095 hours/ns, 271.304 timesteps/s
|
||||
98.9% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 9.2216 | 9.2216 | 9.2216 | 0.0 | 88.37
|
||||
Neigh | 0.2757 | 0.2757 | 0.2757 | 0.0 | 2.64
|
||||
Comm | 0.015626 | 0.015626 | 0.015626 | 0.0 | 0.15
|
||||
Output | 1.1921e-05 | 1.1921e-05 | 1.1921e-05 | 0.0 | 0.00
|
||||
Modify | 0.91782 | 0.91782 | 0.91782 | 0.0 | 8.80
|
||||
Other | | 0.004039 | | | 0.04
|
||||
Pair | 9.6785 | 9.6785 | 9.6785 | 0.0 | 87.53
|
||||
Neigh | 0.32599 | 0.32599 | 0.32599 | 0.0 | 2.95
|
||||
Comm | 0.017231 | 0.017231 | 0.017231 | 0.0 | 0.16
|
||||
Output | 2.5511e-05 | 2.5511e-05 | 2.5511e-05 | 0.0 | 0.00
|
||||
Modify | 1.0311 | 1.0311 | 1.0311 | 0.0 | 9.32
|
||||
Other | | 0.004857 | | | 0.04
|
||||
|
||||
Nlocal: 100 ave 100 max 100 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 598 ave 598 max 598 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3384 ave 3384 max 3384 min
|
||||
Neighs: 3390 ave 3390 max 3390 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 3384
|
||||
Ave neighs/atom = 33.84
|
||||
Total # of neighbors = 3390
|
||||
Ave neighs/atom = 33.9
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:10
|
||||
Total wall time: 0:00:11
|
||||
81
examples/reax/VOH/log.8Mar18.VOH.g++.4
Normal file
81
examples/reax/VOH/log.8Mar18.VOH.g++.4
Normal file
@ -0,0 +1,81 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for VOH system
|
||||
# .....
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.VOH
|
||||
orthogonal box = (0 0 0) to (25 25 25)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
100 atoms
|
||||
|
||||
pair_style reax/c lmp_control
|
||||
pair_coeff * * ffield.reax.V_O_C_H H C O V
|
||||
Reading potential file ffield.reax.V_O_C_H with DATE: 2011-02-18
|
||||
|
||||
neighbor 2 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1e-6 param.qeq
|
||||
fix 3 all temp/berendsen 500.0 500.0 100.0
|
||||
|
||||
timestep 0.25
|
||||
|
||||
#dump 1 all atom 30 dump.reax.voh
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 11.21 | 12.52 | 13.64 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -10246.825 0 -10246.825 42.256092
|
||||
3000 489.67803 -10188.866 0 -10044.362 -553.7513
|
||||
Loop time of 6.49847 on 4 procs for 3000 steps with 100 atoms
|
||||
|
||||
Performance: 9.972 ns/day, 2.407 hours/ns, 461.647 timesteps/s
|
||||
97.7% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 4.7412 | 4.8453 | 4.9104 | 2.9 | 74.56
|
||||
Neigh | 0.1468 | 0.17834 | 0.20151 | 4.7 | 2.74
|
||||
Comm | 0.071841 | 0.14037 | 0.24502 | 17.2 | 2.16
|
||||
Output | 2.1219e-05 | 2.408e-05 | 3.1948e-05 | 0.0 | 0.00
|
||||
Modify | 1.3072 | 1.3308 | 1.3627 | 1.7 | 20.48
|
||||
Other | | 0.003713 | | | 0.06
|
||||
|
||||
Nlocal: 25 ave 38 max 11 min
|
||||
Histogram: 1 0 0 0 1 0 1 0 0 1
|
||||
Nghost: 369.75 ave 453 max 283 min
|
||||
Histogram: 1 0 0 0 1 1 0 0 0 1
|
||||
Neighs: 1082.25 ave 1788 max 417 min
|
||||
Histogram: 1 0 1 0 0 0 1 0 0 1
|
||||
|
||||
Total # of neighbors = 4329
|
||||
Ave neighs/atom = 43.29
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:06
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for ZnOH2 system
|
||||
# .....
|
||||
|
||||
@ -28,43 +29,53 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 17.485 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 18.36 | 18.36 | 18.36 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -7900.2668 0 -7900.2668 60.076093
|
||||
3000 522.42599 -7928.9641 0 -7767.0098 -755.28778
|
||||
Loop time of 6.38119 on 1 procs for 3000 steps with 105 atoms
|
||||
3000 535.58577 -7934.7287 0 -7768.6948 -475.46237
|
||||
Loop time of 7.29784 on 1 procs for 3000 steps with 105 atoms
|
||||
|
||||
Performance: 10.155 ns/day, 2.363 hours/ns, 470.132 timesteps/s
|
||||
99.0% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 8.879 ns/day, 2.703 hours/ns, 411.081 timesteps/s
|
||||
97.3% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 5.2711 | 5.2711 | 5.2711 | 0.0 | 82.60
|
||||
Neigh | 0.30669 | 0.30669 | 0.30669 | 0.0 | 4.81
|
||||
Comm | 0.015599 | 0.015599 | 0.015599 | 0.0 | 0.24
|
||||
Output | 1.0967e-05 | 1.0967e-05 | 1.0967e-05 | 0.0 | 0.00
|
||||
Modify | 0.78376 | 0.78376 | 0.78376 | 0.0 | 12.28
|
||||
Other | | 0.004036 | | | 0.06
|
||||
Pair | 5.9988 | 5.9988 | 5.9988 | 0.0 | 82.20
|
||||
Neigh | 0.37455 | 0.37455 | 0.37455 | 0.0 | 5.13
|
||||
Comm | 0.019186 | 0.019186 | 0.019186 | 0.0 | 0.26
|
||||
Output | 2.4557e-05 | 2.4557e-05 | 2.4557e-05 | 0.0 | 0.00
|
||||
Modify | 0.89915 | 0.89915 | 0.89915 | 0.0 | 12.32
|
||||
Other | | 0.006108 | | | 0.08
|
||||
|
||||
Nlocal: 105 ave 105 max 105 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 649 ave 649 max 649 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3956 ave 3956 max 3956 min
|
||||
Neighs: 3971 ave 3971 max 3971 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 3956
|
||||
Ave neighs/atom = 37.6762
|
||||
Total # of neighbors = 3971
|
||||
Ave neighs/atom = 37.819
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:06
|
||||
Total wall time: 0:00:07
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# REAX potential for ZnOH2 system
|
||||
# .....
|
||||
|
||||
@ -28,40 +29,50 @@ timestep 0.25
|
||||
|
||||
run 3000
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12
|
||||
ghost atom cutoff = 12
|
||||
binsize = 6 -> bins = 5 5 5
|
||||
Memory usage per processor = 12.0066 Mbytes
|
||||
binsize = 6, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 11.28 | 12.77 | 14.21 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 0 -7900.2668 0 -7900.2668 60.076093
|
||||
3000 536.8256 -7935.1437 0 -7768.7255 -479.27959
|
||||
Loop time of 3.77632 on 4 procs for 3000 steps with 105 atoms
|
||||
3000 538.25796 -7935.6159 0 -7768.7536 -525.47078
|
||||
Loop time of 4.48824 on 4 procs for 3000 steps with 105 atoms
|
||||
|
||||
Performance: 17.160 ns/day, 1.399 hours/ns, 794.423 timesteps/s
|
||||
99.0% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 14.438 ns/day, 1.662 hours/ns, 668.414 timesteps/s
|
||||
97.2% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 2.7337 | 2.7808 | 2.8316 | 2.5 | 73.64
|
||||
Neigh | 0.13455 | 0.16558 | 0.19493 | 5.3 | 4.38
|
||||
Comm | 0.046741 | 0.099375 | 0.14663 | 13.6 | 2.63
|
||||
Output | 1.7881e-05 | 2.0027e-05 | 2.408e-05 | 0.1 | 0.00
|
||||
Modify | 0.69792 | 0.7275 | 0.75887 | 2.5 | 19.26
|
||||
Other | | 0.003084 | | | 0.08
|
||||
Pair | 3.1031 | 3.1698 | 3.2378 | 3.3 | 70.62
|
||||
Neigh | 0.16642 | 0.20502 | 0.25003 | 6.6 | 4.57
|
||||
Comm | 0.074932 | 0.14224 | 0.21025 | 15.6 | 3.17
|
||||
Output | 0.00011349 | 0.00011736 | 0.00012231 | 0.0 | 0.00
|
||||
Modify | 0.92089 | 0.96736 | 1.0083 | 3.2 | 21.55
|
||||
Other | | 0.003731 | | | 0.08
|
||||
|
||||
Nlocal: 26.25 ave 45 max 15 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
Nghost: 399 ave 509 max 295 min
|
||||
Histogram: 1 0 0 0 2 0 0 0 0 1
|
||||
Neighs: 1150 ave 2061 max 701 min
|
||||
Neighs: 1151.5 ave 2066 max 701 min
|
||||
Histogram: 1 2 0 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 4600
|
||||
Ave neighs/atom = 43.8095
|
||||
Total # of neighbors = 4606
|
||||
Ave neighs/atom = 43.8667
|
||||
Neighbor list builds = 300
|
||||
Dangerous builds not checked
|
||||
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (23 Oct 2017)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
#ci-reax potential for CH systems with tabulated ZBL correction
|
||||
atom_style charge
|
||||
units real
|
||||
@ -31,6 +32,7 @@ fix 2 all temp/berendsen 500.0 500.0 100.0
|
||||
#dump 1 all atom 30 dump.ci-reax.lammpstrj
|
||||
|
||||
run 3000
|
||||
WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392)
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 10 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
@ -52,20 +54,20 @@ Per MPI rank memory allocation (min/avg/max) = 43.46 | 43.46 | 43.46 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 508.42043 -28736.654 0 -28260.785 1678.3276
|
||||
3000 480.41333 -28707.835 0 -28258.181 -3150.0762
|
||||
Loop time of 21.5509 on 1 procs for 3000 steps with 315 atoms
|
||||
Loop time of 45.3959 on 1 procs for 3000 steps with 315 atoms
|
||||
|
||||
Performance: 3.007 ns/day, 7.982 hours/ns, 139.205 timesteps/s
|
||||
100.0% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 1.427 ns/day, 16.813 hours/ns, 66.085 timesteps/s
|
||||
96.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 21.315 | 21.315 | 21.315 | 0.0 | 98.91
|
||||
Neigh | 0.17846 | 0.17846 | 0.17846 | 0.0 | 0.83
|
||||
Comm | 0.028676 | 0.028676 | 0.028676 | 0.0 | 0.13
|
||||
Output | 2.6941e-05 | 2.6941e-05 | 2.6941e-05 | 0.0 | 0.00
|
||||
Modify | 0.018969 | 0.018969 | 0.018969 | 0.0 | 0.09
|
||||
Other | | 0.009438 | | | 0.04
|
||||
Pair | 44.955 | 44.955 | 44.955 | 0.0 | 99.03
|
||||
Neigh | 0.29903 | 0.29903 | 0.29903 | 0.0 | 0.66
|
||||
Comm | 0.056547 | 0.056547 | 0.056547 | 0.0 | 0.12
|
||||
Output | 4.8399e-05 | 4.8399e-05 | 4.8399e-05 | 0.0 | 0.00
|
||||
Modify | 0.058722 | 0.058722 | 0.058722 | 0.0 | 0.13
|
||||
Other | | 0.02632 | | | 0.06
|
||||
|
||||
Nlocal: 315 ave 315 max 315 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -81,4 +83,4 @@ Dangerous builds = 0
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:21
|
||||
Total wall time: 0:00:45
|
||||
86
examples/reax/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4
Normal file
86
examples/reax/ci-reaxFF/log.8Mar18.ci-reax.CH.g++.4
Normal file
@ -0,0 +1,86 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
#ci-reax potential for CH systems with tabulated ZBL correction
|
||||
atom_style charge
|
||||
units real
|
||||
|
||||
read_data CH4.dat
|
||||
orthogonal box = (0 0 0) to (20 20 20)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
315 atoms
|
||||
reading velocities ...
|
||||
315 velocities
|
||||
|
||||
pair_style hybrid/overlay reax/c control checkqeq no table linear 11000
|
||||
pair_coeff * * reax/c ffield.ci-reax.CH C H
|
||||
Reading potential file ffield.ci-reax.CH with DATE: 2017-11-20
|
||||
pair_coeff 1 1 table ci-reaxFF_ZBL.dat CC_cireaxFF
|
||||
WARNING: 2 of 10000 force values in table are inconsistent with -dE/dr.
|
||||
Should only be flagged at inflection points (../pair_table.cpp:481)
|
||||
pair_coeff 1 2 table ci-reaxFF_ZBL.dat CH_cireaxFF
|
||||
WARNING: 2 of 11000 force values in table are inconsistent with -dE/dr.
|
||||
Should only be flagged at inflection points (../pair_table.cpp:481)
|
||||
pair_coeff 2 2 table ci-reaxFF_ZBL.dat HH_cireaxFF
|
||||
WARNING: 2 of 6000 force values in table are inconsistent with -dE/dr.
|
||||
Should only be flagged at inflection points (../pair_table.cpp:481)
|
||||
|
||||
timestep 0.25
|
||||
fix 1 all nve
|
||||
fix 2 all temp/berendsen 500.0 500.0 100.0
|
||||
|
||||
#dump 1 all atom 30 dump.ci-reax.lammpstrj
|
||||
|
||||
run 3000
|
||||
WARNING: Total cutoff < 2*bond cutoff. May need to use an increased neighbor list skin. (../pair_reaxc.cpp:392)
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 10 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 9.5
|
||||
ghost atom cutoff = 9.5
|
||||
binsize = 4.75, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) pair table, perpetual
|
||||
attributes: half, newton on
|
||||
pair build: half/bin/atomonly/newton
|
||||
stencil: half/bin/3d/newton
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 24.48 | 25.61 | 27.27 Mbytes
|
||||
Step Temp E_pair E_mol TotEng Press
|
||||
0 508.42043 -28736.654 0 -28260.785 1678.3276
|
||||
3000 480.41333 -28707.835 0 -28258.181 -3150.0762
|
||||
Loop time of 24.7034 on 4 procs for 3000 steps with 315 atoms
|
||||
|
||||
Performance: 2.623 ns/day, 9.149 hours/ns, 121.441 timesteps/s
|
||||
95.8% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 18.945 | 21.367 | 24.046 | 39.3 | 86.49
|
||||
Neigh | 0.1456 | 0.15254 | 0.16101 | 1.6 | 0.62
|
||||
Comm | 0.39168 | 3.0859 | 5.5185 | 103.9 | 12.49
|
||||
Output | 3.5763e-05 | 4.065e-05 | 5.2452e-05 | 0.0 | 0.00
|
||||
Modify | 0.05831 | 0.068811 | 0.077666 | 2.9 | 0.28
|
||||
Other | | 0.0292 | | | 0.12
|
||||
|
||||
Nlocal: 78.75 ave 96 max 65 min
|
||||
Histogram: 2 0 0 0 0 0 0 1 0 1
|
||||
Nghost: 1233 ave 1348 max 1116 min
|
||||
Histogram: 1 0 1 0 0 0 0 1 0 1
|
||||
Neighs: 9467.25 ave 12150 max 7160 min
|
||||
Histogram: 1 1 0 0 0 0 0 1 0 1
|
||||
|
||||
Total # of neighbors = 37869
|
||||
Ave neighs/atom = 120.219
|
||||
Neighbor list builds = 37
|
||||
Dangerous builds = 0
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:24
|
||||
@ -1,101 +0,0 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
# ReaxFF potential for RDX system
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
# reax args: hbcut hbnewflag tripflag precision
|
||||
|
||||
pair_style reax 6.0 1 1 1.0e-6
|
||||
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
|
||||
pair_coeff * * ffield.reax 1 2 3 4
|
||||
|
||||
compute reax all pair reax
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
1 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 3 3 3
|
||||
Memory usage per processor = 2.95105 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1885.1268 -1885.1268 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79971 0 168.88435
|
||||
10 1281.7558 -1989.1322 -1912.7188 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.020231 3.1070522 -77.710916 0 14.963568 -5.8082204 843.41939 -180.17724 0 107.5115
|
||||
20 516.83079 -1941.677 -1910.8655 -12525.41 -2801.8626 7.4107974 0.073134188 0 81.986982 0.2281551 -57.494871 0 30.656735 -10.102557 877.78696 -158.93385 0 88.574158
|
||||
30 467.2641 -1940.978 -1913.1215 -35957.487 -2755.021 -6.9179959 0.049322439 0 78.853175 0.13604392 -51.653634 0 19.862872 -9.7098575 853.79334 -151.232 0 80.861768
|
||||
40 647.45541 -1951.1994 -1912.6006 -5883.7147 -2798.3556 17.334807 0.15102863 0 63.23512 0.18070931 -54.598962 0 17.325008 -12.052277 883.01667 -164.21335 0 96.777422
|
||||
50 716.38057 -1949.4749 -1906.767 5473.2085 -2800.931 9.2056917 0.15413274 0 85.371449 3.2986106 -78.253597 0 34.861773 -8.5531236 882.01435 -193.85275 0 117.2096
|
||||
60 1175.2707 -1975.9611 -1905.8959 -1939.4971 -2726.5816 -11.651982 0.24296788 0 48.320663 7.1799636 -75.363641 0 16.520132 -4.8869463 844.754 -194.23296 0 119.73837
|
||||
70 1156.7 -1975.3486 -1906.3905 24628.344 -2880.5223 25.652478 0.26894312 0 83.724884 7.1049303 -68.700942 0 24.750744 -8.6338218 911.20067 -183.4058 0 113.21158
|
||||
80 840.23687 -1955.4768 -1905.3851 -17731.383 -2755.7295 -8.0168306 0.13867962 0 86.14748 2.2387306 -76.945841 0 23.595858 -7.2609645 853.6346 -167.88289 0 94.603895
|
||||
90 365.79169 -1926.406 -1904.5989 898.37155 -2842.183 47.368211 0.23109 0 92.288131 0.38031313 -61.361483 0 18.476377 -12.255472 900.24202 -186.48056 0 116.88831
|
||||
100 801.32078 -1953.4177 -1905.646 -2417.5518 -2802.7244 4.6676973 0.18046558 0 76.730114 5.4177372 -77.102556 0 24.997234 -7.7554179 898.67306 -196.8912 0 120.38952
|
||||
Loop time of 0.512828 on 1 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 16.848 ns/day, 1.425 hours/ns, 194.997 timesteps/s
|
||||
99.4% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.51126 | 0.51126 | 0.51126 | 0.0 | 99.69
|
||||
Neigh | 0.00071597 | 0.00071597 | 0.00071597 | 0.0 | 0.14
|
||||
Comm | 0.00040317 | 0.00040317 | 0.00040317 | 0.0 | 0.08
|
||||
Output | 0.00027037 | 0.00027037 | 0.00027037 | 0.0 | 0.05
|
||||
Modify | 7.2241e-05 | 7.2241e-05 | 7.2241e-05 | 0.0 | 0.01
|
||||
Other | | 0.000108 | | | 0.02
|
||||
|
||||
Nlocal: 21 ave 21 max 21 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 546 ave 546 max 546 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 1106 ave 1106 max 1106 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 1106
|
||||
Ave neighs/atom = 52.6667
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
Total wall time: 0:00:00
|
||||
@ -1,101 +0,0 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
# ReaxFF potential for RDX system
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
# reax args: hbcut hbnewflag tripflag precision
|
||||
|
||||
pair_style reax 6.0 1 1 1.0e-6
|
||||
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
|
||||
pair_coeff * * ffield.reax 1 2 3 4
|
||||
|
||||
compute reax all pair reax
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
1 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 3 3 3
|
||||
Memory usage per processor = 3.0718 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1885.1268 -1885.1268 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79972 0 168.8843
|
||||
10 1281.7558 -1989.1322 -1912.7188 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.020231 3.1070523 -77.710916 0 14.963568 -5.8082204 843.41939 -180.17725 0 107.51148
|
||||
20 516.8308 -1941.677 -1910.8655 -12525.411 -2801.8626 7.4107973 0.07313419 0 81.986982 0.2281551 -57.494871 0 30.656735 -10.102557 877.78696 -158.93385 0 88.574155
|
||||
30 467.26411 -1940.978 -1913.1215 -35957.487 -2755.021 -6.9179966 0.049322437 0 78.853175 0.13604391 -51.653634 0 19.862872 -9.7098574 853.79333 -151.232 0 80.861765
|
||||
40 647.45584 -1951.1994 -1912.6006 -5883.7102 -2798.3557 17.334812 0.15102857 0 63.235124 0.18070914 -54.598951 0 17.325006 -12.052278 883.01674 -164.21335 0 96.777418
|
||||
50 716.38108 -1949.4679 -1906.76 5473.1803 -2800.9311 9.2057064 0.15413272 0 85.371443 3.2986124 -78.253597 0 34.861778 -8.5531235 882.01441 -193.85213 0 117.21596
|
||||
60 1175.2703 -1975.9632 -1905.898 -1939.6676 -2726.5815 -11.652032 0.24296779 0 48.320636 7.1799647 -75.363643 0 16.520124 -4.8869416 844.75396 -194.25563 0 119.75889
|
||||
70 1156.7016 -1975.3469 -1906.3887 24628.125 -2880.5225 25.65252 0.26894309 0 83.724869 7.1048931 -68.700978 0 24.750754 -8.6338341 911.20067 -183.41947 0 113.22722
|
||||
80 840.3323 -1955.4867 -1905.3893 -17732.956 -2755.7336 -8.0168615 0.13869303 0 86.143454 2.2388975 -76.946365 0 23.594977 -7.2608903 853.63682 -167.88599 0 94.604168
|
||||
90 365.75853 -1926.4192 -1904.6141 902.29004 -2842.1715 47.360077 0.23110905 0 92.28805 0.38040356 -61.364192 0 18.473252 -12.253964 900.23128 -186.47889 0 116.88518
|
||||
100 801.64661 -1953.4392 -1905.6481 -2464.5533 -2802.6922 4.6510183 0.18048786 0 76.715675 5.41849 -77.102069 0 24.987058 -7.7531389 898.65974 -196.87724 0 120.37303
|
||||
Loop time of 0.405054 on 4 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 21.331 ns/day, 1.125 hours/ns, 246.881 timesteps/s
|
||||
96.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.16194 | 0.24674 | 0.40012 | 18.4 | 60.92
|
||||
Neigh | 7.3671e-05 | 0.00024015 | 0.00053477 | 1.1 | 0.06
|
||||
Comm | 0.0037704 | 0.1575 | 0.24247 | 23.1 | 38.88
|
||||
Output | 0.00037122 | 0.00040913 | 0.0004406 | 0.1 | 0.10
|
||||
Modify | 4.22e-05 | 6.175e-05 | 8.3685e-05 | 0.2 | 0.02
|
||||
Other | | 0.0001087 | | | 0.03
|
||||
|
||||
Nlocal: 5.25 ave 15 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
Nghost: 355.5 ave 432 max 282 min
|
||||
Histogram: 1 0 0 0 1 1 0 0 0 1
|
||||
Neighs: 301.25 ave 827 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 1205
|
||||
Ave neighs/atom = 57.381
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
Total wall time: 0:00:00
|
||||
@ -1,104 +0,0 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
# ReaxFF potential for RDX system
|
||||
# this run is equivalent to reax/in.reax.rdx
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
pair_style reax/c control.reax_c.rdx
|
||||
pair_coeff * * ffield.reax C H O N
|
||||
Reading potential file ffield.reax with DATE: 2010-02-19
|
||||
|
||||
compute reax all pair reax/c
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all atom 10 dump.reaxc.rdx
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 3 3 3
|
||||
Memory usage per processor = 14.4462 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1884.3081 -1884.3081 27186.181 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79931 0 168.88396
|
||||
10 1288.6116 -1989.6644 -1912.8422 -19456.353 -2734.6769 -15.607221 0.2017796 0 54.629557 3.125229 -77.7067 0 14.933901 -5.8108541 843.92073 -180.43321 0 107.75935
|
||||
20 538.95819 -1942.7037 -1910.5731 -10725.639 -2803.7394 7.9078269 0.07792668 0 81.610053 0.22951941 -57.557107 0 30.331207 -10.178049 878.99009 -159.68914 0 89.313379
|
||||
30 463.09535 -1933.5765 -1905.9686 -33255.546 -2749.859 -8.0154745 0.02762893 0 81.627395 0.11972413 -50.262293 0 20.820303 -9.6327015 851.88715 -149.49499 0 79.205727
|
||||
40 885.49171 -1958.9125 -1906.1229 -4814.6856 -2795.644 9.150669 0.13747498 0 70.947982 0.24360485 -57.862663 0 19.076496 -11.141218 873.73893 -159.99393 0 92.434096
|
||||
50 861.16578 -1954.4599 -1903.1205 -1896.7713 -2784.845 3.8270515 0.15793266 0 79.851823 3.3492142 -78.06613 0 32.629016 -7.956541 872.81838 -190.98567 0 114.75995
|
||||
60 1167.7852 -1971.8429 -1902.224 -3482.7305 -2705.863 -17.12171 0.22749077 0 44.507654 7.8560745 -74.788955 0 16.256483 -4.6046431 835.8304 -188.33691 0 114.19413
|
||||
70 1439.9966 -1989.3024 -1903.4553 23845.651 -2890.7895 31.958845 0.26671721 0 85.758695 3.1803544 -71.002903 0 24.357134 -10.31131 905.86775 -175.38471 0 106.79648
|
||||
80 502.39438 -1930.7544 -1900.8035 -20356.316 -2703.8115 -18.662467 0.11286011 0 99.804201 2.0329024 -76.171317 0 19.237028 -6.2786907 826.47451 -166.03125 0 92.539398
|
||||
90 749.08499 -1946.9838 -1902.3262 17798.51 -2863.7576 42.068717 0.2433807 0 96.181613 0.96184887 -69.955448 0 24.615302 -11.582765 903.68818 -190.13843 0 120.69141
|
||||
100 1109.6968 -1968.5874 -1902.4315 -4490.1018 -2755.8965 -7.1231014 0.21757699 0 61.806018 7.0827673 -75.645345 0 20.114997 -6.2371964 863.5635 -198.56976 0 122.09961
|
||||
Loop time of 0.362895 on 1 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 23.809 ns/day, 1.008 hours/ns, 275.562 timesteps/s
|
||||
100.0% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.34367 | 0.34367 | 0.34367 | 0.0 | 94.70
|
||||
Neigh | 0.0078354 | 0.0078354 | 0.0078354 | 0.0 | 2.16
|
||||
Comm | 0.00043559 | 0.00043559 | 0.00043559 | 0.0 | 0.12
|
||||
Output | 0.00019908 | 0.00019908 | 0.00019908 | 0.0 | 0.05
|
||||
Modify | 0.010645 | 0.010645 | 0.010645 | 0.0 | 2.93
|
||||
Other | | 0.0001094 | | | 0.03
|
||||
|
||||
Nlocal: 21 ave 21 max 21 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 546 ave 546 max 546 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 1096 ave 1096 max 1096 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 1096
|
||||
Ave neighs/atom = 52.1905
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:00
|
||||
@ -1,104 +0,0 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
# ReaxFF potential for RDX system
|
||||
# this run is equivalent to reax/in.reax.rdx
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
pair_style reax/c control.reax_c.rdx
|
||||
pair_coeff * * ffield.reax C H O N
|
||||
Reading potential file ffield.reax with DATE: 2010-02-19
|
||||
|
||||
compute reax all pair reax/c
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all atom 10 dump.reaxc.rdx
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 3 3 3
|
||||
Memory usage per processor = 12.531 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1884.3081 -1884.3081 27186.18 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79953 0 168.88418
|
||||
10 1288.6115 -1989.6644 -1912.8422 -19456.354 -2734.6769 -15.60722 0.2017796 0 54.629558 3.1252288 -77.7067 0 14.933901 -5.8108542 843.92073 -180.43321 0 107.75934
|
||||
20 538.95831 -1942.7037 -1910.5731 -10725.671 -2803.7395 7.9078306 0.077926651 0 81.610051 0.22951926 -57.557099 0 30.331204 -10.178049 878.99014 -159.69268 0 89.316921
|
||||
30 463.09502 -1933.5765 -1905.9685 -33255.512 -2749.8591 -8.015455 0.027628766 0 81.6274 0.11972393 -50.262275 0 20.820315 -9.6327041 851.88722 -149.49498 0 79.205714
|
||||
40 885.49378 -1958.9125 -1906.1228 -4814.644 -2795.644 9.1506485 0.13747497 0 70.948 0.24360511 -57.862677 0 19.076502 -11.141216 873.73898 -159.99393 0 92.43409
|
||||
50 861.16297 -1954.4602 -1903.1209 -1896.8002 -2784.8451 3.8270162 0.157933 0 79.851673 3.3492148 -78.066132 0 32.628944 -7.9565368 872.81852 -190.98572 0 114.76001
|
||||
60 1167.7835 -1971.8433 -1902.2245 -3482.8296 -2705.8635 -17.121613 0.2274909 0 44.507674 7.85602 -74.788998 0 16.256483 -4.6046575 835.83058 -188.33691 0 114.19414
|
||||
70 1439.9939 -1989.3026 -1903.4556 23846.042 -2890.7893 31.958672 0.26671708 0 85.758381 3.1804035 -71.002944 0 24.357195 -10.311284 905.8679 -175.38487 0 106.79661
|
||||
80 502.39535 -1930.7548 -1900.8039 -20356.194 -2703.8126 -18.662209 0.11286005 0 99.803849 2.0329206 -76.171278 0 19.23716 -6.2787147 826.47505 -166.03123 0 92.539386
|
||||
90 749.07874 -1946.9841 -1902.3269 17798.394 -2863.7576 42.068612 0.24338059 0 96.181423 0.96185061 -69.95542 0 24.615344 -11.582758 903.68812 -190.13826 0 120.69124
|
||||
100 1109.6904 -1968.5879 -1902.4323 -4490.0667 -2755.8991 -7.1224194 0.21757691 0 61.805857 7.0827218 -75.645383 0 20.115437 -6.23727 863.56487 -198.56975 0 122.09963
|
||||
Loop time of 0.293673 on 4 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 29.420 ns/day, 0.816 hours/ns, 340.514 timesteps/s
|
||||
99.1% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.24143 | 0.24223 | 0.24409 | 0.2 | 82.48
|
||||
Neigh | 0.003767 | 0.0049117 | 0.0061524 | 1.2 | 1.67
|
||||
Comm | 0.0030656 | 0.0048578 | 0.0057402 | 1.5 | 1.65
|
||||
Output | 0.00033545 | 0.00036347 | 0.00038052 | 0.1 | 0.12
|
||||
Modify | 0.039885 | 0.041207 | 0.042435 | 0.4 | 14.03
|
||||
Other | | 0.0001001 | | | 0.03
|
||||
|
||||
Nlocal: 5.25 ave 15 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
Nghost: 355.5 ave 432 max 282 min
|
||||
Histogram: 1 0 0 0 1 1 0 0 0 1
|
||||
Neighs: 298.75 ave 822 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 1195
|
||||
Ave neighs/atom = 56.9048
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:00
|
||||
107
examples/reax/log.8March18.reax.rdx.g++.1
Normal file
107
examples/reax/log.8March18.reax.rdx.g++.1
Normal file
@ -0,0 +1,107 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for RDX system
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
# reax args: hbcut hbnewflag tripflag precision
|
||||
|
||||
pair_style reax 6.0 1 1 1.0e-6
|
||||
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
|
||||
pair_coeff * * ffield.reax 1 2 3 4
|
||||
|
||||
compute reax all pair reax
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25, bins = 3 3 3
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair reax, perpetual
|
||||
attributes: half, newton off
|
||||
pair build: half/bin/newtoff
|
||||
stencil: half/bin/3d/newtoff
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.278 | 3.278 | 3.278 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1885.1269 -1885.1269 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79973 0 168.8842
|
||||
10 1281.7558 -1989.1322 -1912.7188 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.02023 3.1070523 -77.710916 0 14.963568 -5.8082203 843.41939 -180.17724 0 107.5115
|
||||
20 516.83079 -1941.677 -1910.8655 -12525.412 -2801.8626 7.410797 0.073134186 0 81.986983 0.2281551 -57.494871 0 30.656735 -10.102557 877.78695 -158.93385 0 88.574159
|
||||
30 467.26411 -1940.978 -1913.1215 -35957.489 -2755.021 -6.9179958 0.049322453 0 78.853173 0.13604393 -51.653635 0 19.862871 -9.7098575 853.79334 -151.232 0 80.86177
|
||||
40 647.45528 -1951.1994 -1912.6006 -5883.713 -2798.3556 17.334814 0.15102862 0 63.235117 0.18070924 -54.598957 0 17.325007 -12.052278 883.0167 -164.21335 0 96.777424
|
||||
50 716.38088 -1949.4735 -1906.7656 5473.1969 -2800.9309 9.2056861 0.15413274 0 85.371466 3.2986127 -78.253597 0 34.861774 -8.553123 882.01431 -193.85254 0 117.21068
|
||||
60 1175.2705 -1975.961 -1905.8958 -1939.4966 -2726.5816 -11.651996 0.24296786 0 48.320654 7.1799691 -75.363638 0 16.520127 -4.8869441 844.75401 -194.23297 0 119.73841
|
||||
70 1156.701 -1975.3497 -1906.3916 24628.304 -2880.5225 25.652501 0.26894311 0 83.724852 7.1049152 -68.70096 0 24.750735 -8.6338267 911.20079 -183.40562 0 113.21047
|
||||
80 840.23677 -1955.4769 -1905.3851 -17731.334 -2755.7299 -8.0167723 0.1386797 0 86.147417 2.2387319 -76.945843 0 23.595869 -7.260968 853.63487 -167.88288 0 94.603961
|
||||
90 365.79122 -1926.4061 -1904.599 898.38479 -2842.1832 47.368107 0.23109002 0 92.288071 0.38031213 -61.361485 0 18.476336 -12.25546 900.24233 -186.48046 0 116.88827
|
||||
100 801.32158 -1953.418 -1905.6462 -2417.6887 -2802.7247 4.6676477 0.18046575 0 76.729987 5.4177322 -77.102566 0 24.997175 -7.7554074 898.67337 -196.89114 0 120.38946
|
||||
Loop time of 0.463306 on 1 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 18.649 ns/day, 1.287 hours/ns, 215.840 timesteps/s
|
||||
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.46143 | 0.46143 | 0.46143 | 0.0 | 99.60
|
||||
Neigh | 0.00087953 | 0.00087953 | 0.00087953 | 0.0 | 0.19
|
||||
Comm | 0.00042653 | 0.00042653 | 0.00042653 | 0.0 | 0.09
|
||||
Output | 0.00034237 | 0.00034237 | 0.00034237 | 0.0 | 0.07
|
||||
Modify | 0.00010109 | 0.00010109 | 0.00010109 | 0.0 | 0.02
|
||||
Other | | 0.000124 | | | 0.03
|
||||
|
||||
Nlocal: 21 ave 21 max 21 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 546 ave 546 max 546 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 1106 ave 1106 max 1106 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 1106
|
||||
Ave neighs/atom = 52.6667
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
Total wall time: 0:00:00
|
||||
107
examples/reax/log.8March18.reax.rdx.g++.4
Normal file
107
examples/reax/log.8March18.reax.rdx.g++.4
Normal file
@ -0,0 +1,107 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for RDX system
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
# reax args: hbcut hbnewflag tripflag precision
|
||||
|
||||
pair_style reax 6.0 1 1 1.0e-6
|
||||
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
|
||||
pair_coeff * * ffield.reax 1 2 3 4
|
||||
|
||||
compute reax all pair reax
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all custom 10 dump.reax.rdx id type q xs ys zs
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25, bins = 3 3 3
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair reax, perpetual
|
||||
attributes: half, newton off
|
||||
pair build: half/bin/newtoff
|
||||
stencil: half/bin/3d/newtoff
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 3.262 | 3.36 | 3.647 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1885.1268 -1885.1268 27233.074 -2958.4712 79.527715 0.31082031 0 97.771125 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79972 0 168.88428
|
||||
10 1281.7558 -1989.1322 -1912.7187 -19609.913 -2733.8828 -15.775275 0.20055725 0 55.020231 3.1070523 -77.710916 0 14.963568 -5.8082203 843.41939 -180.17724 0 107.51152
|
||||
20 516.83079 -1941.677 -1910.8655 -12525.412 -2801.8626 7.410797 0.073134187 0 81.986983 0.2281551 -57.494871 0 30.656735 -10.102557 877.78695 -158.93385 0 88.574168
|
||||
30 467.26411 -1940.978 -1913.1215 -35957.489 -2755.021 -6.9179959 0.049322449 0 78.853173 0.13604392 -51.653635 0 19.862871 -9.7098575 853.79334 -151.232 0 80.861765
|
||||
40 647.45479 -1951.1995 -1912.6007 -5883.7199 -2798.3556 17.334805 0.15102868 0 63.235116 0.18070946 -54.59897 0 17.32501 -12.052277 883.0166 -164.21339 0 96.777473
|
||||
50 716.37927 -1949.466 -1906.7582 5473.2486 -2800.9309 9.2056758 0.15413278 0 85.37143 3.2986099 -78.253596 0 34.861773 -8.5531243 882.01424 -193.85223 0 117.21791
|
||||
60 1175.2698 -1975.9612 -1905.896 -1939.5206 -2726.5818 -11.651942 0.24296793 0 48.320679 7.1799538 -75.36365 0 16.520134 -4.8869515 844.75405 -194.23289 0 119.7383
|
||||
70 1156.6963 -1975.3494 -1906.3915 24628.423 -2880.5221 25.65242 0.26894312 0 83.724787 7.1049615 -68.700925 0 24.750729 -8.6338123 911.2006 -183.40591 0 113.21091
|
||||
80 840.238 -1955.4788 -1905.387 -17731.371 -2755.7301 -8.0167357 0.13868007 0 86.147246 2.2387405 -76.945868 0 23.595868 -7.2609697 853.6349 -167.88312 0 94.602512
|
||||
90 365.78645 -1926.4072 -1904.6004 898.36945 -2842.1831 47.368307 0.23108998 0 92.288039 0.38031101 -61.361464 0 18.476388 -12.255481 900.24216 -186.48066 0 116.88716
|
||||
100 801.31322 -1953.4165 -1905.6452 -2417.2041 -2802.7247 4.6678077 0.18046498 0 76.730367 5.4176812 -77.102592 0 24.9973 -7.7554425 898.6732 -196.89097 0 120.39043
|
||||
Loop time of 0.404551 on 4 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 21.357 ns/day, 1.124 hours/ns, 247.188 timesteps/s
|
||||
97.4% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.2191 | 0.28038 | 0.39839 | 13.2 | 69.31
|
||||
Neigh | 5.8651e-05 | 0.00025928 | 0.00062203 | 0.0 | 0.06
|
||||
Comm | 0.0046599 | 0.12307 | 0.1845 | 19.9 | 30.42
|
||||
Output | 0.00055337 | 0.00062728 | 0.00071192 | 0.0 | 0.16
|
||||
Modify | 5.3167e-05 | 7.844e-05 | 0.00010109 | 0.0 | 0.02
|
||||
Other | | 0.0001363 | | | 0.03
|
||||
|
||||
Nlocal: 5.25 ave 15 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
Nghost: 355.5 ave 432 max 282 min
|
||||
Histogram: 1 0 0 0 1 1 0 0 0 1
|
||||
Neighs: 301.25 ave 827 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 1205
|
||||
Ave neighs/atom = 57.381
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
Total wall time: 0:00:00
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for TATB system
|
||||
|
||||
units real
|
||||
@ -12,7 +13,7 @@ read_data data.tatb
|
||||
|
||||
# reax args: hbcut hbnewflag tripflag precision
|
||||
pair_style reax 6.0 1 1 1.0e-6
|
||||
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
|
||||
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
|
||||
pair_coeff * * ffield.reax 1 2 3 4
|
||||
|
||||
compute reax all pair reax
|
||||
@ -54,34 +55,39 @@ fix 2 all reax/bonds 25 bonds.reax.tatb
|
||||
|
||||
run 25
|
||||
Neighbor list info ...
|
||||
1 neighbor list requests
|
||||
update every 5 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 5 4 3
|
||||
Memory usage per processor = 6.61277 Mbytes
|
||||
binsize = 6.25, bins = 5 4 3
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair reax, perpetual
|
||||
attributes: half, newton off
|
||||
pair build: half/bin/newtoff
|
||||
stencil: half/bin/3d/newtoff
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 7.764 | 7.764 | 7.764 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -44767.08 -44767.08 7294.6353 -61120.591 486.4378 4.7236377 0 1568.024 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6378 0 6391.0231
|
||||
5 0.63682807 -44767.737 -44767.01 8391.5966 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.2727 0 6388.8127
|
||||
10 2.4306957 -44769.41 -44766.635 11717.369 -61113.142 487.89093 4.7227063 0 1567.2936 20.705084 -274.37509 -1560.8546 252.87219 -655.43578 18850.19 -8731.0713 0 6381.7946
|
||||
15 5.0590478 -44772.63 -44766.854 17125.033 -61103.34 489.28007 4.7214008 0 1566.4744 20.590604 -268.28963 -1566.5961 252.97781 -654.93836 18835.335 -8719.3112 0 6370.4665
|
||||
20 8.0678579 -44775.923 -44766.713 24620.824 -61088.791 490.42346 4.7193467 0 1565.5541 20.415031 -260.38512 -1574.1001 253.39805 -654.26837 18815.312 -8703.3104 0 6355.1097
|
||||
25 10.975539 -44777.231 -44764.701 34381.278 -61068.889 490.53149 4.7164093 0 1566.5715 20.169755 -251.2311 -1582.8552 253.88696 -653.46042 18790.855 -8683.8362 0 6336.3099
|
||||
Loop time of 7.48375 on 1 procs for 25 steps with 384 atoms
|
||||
5 0.63682806 -44767.737 -44767.01 8391.5964 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.2728 0 6388.8127
|
||||
10 2.4306958 -44769.409 -44766.634 11717.376 -61113.142 487.89093 4.7227063 0 1567.2936 20.705084 -274.37509 -1560.8546 252.87219 -655.43578 18850.19 -8731.0693 0 6381.7942
|
||||
15 5.0590493 -44772.631 -44766.855 17125.067 -61103.34 489.28007 4.7214008 0 1566.4744 20.590604 -268.28962 -1566.5961 252.97781 -654.93836 18835.335 -8719.3013 0 6370.4551
|
||||
20 8.067859 -44775.936 -44766.725 24620.627 -61088.791 490.42346 4.7193467 0 1565.5541 20.415031 -260.38512 -1574.1001 253.39805 -654.26837 18815.312 -8703.3748 0 6355.1614
|
||||
25 10.975538 -44777.233 -44764.702 34381.173 -61068.889 490.53149 4.7164093 0 1566.5715 20.169755 -251.23109 -1582.8552 253.88696 -653.46042 18790.855 -8683.8691 0 6336.3409
|
||||
Loop time of 7.80129 on 1 procs for 25 steps with 384 atoms
|
||||
|
||||
Performance: 0.018 ns/day, 1330.444 hours/ns, 3.341 timesteps/s
|
||||
99.9% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 0.017 ns/day, 1386.896 hours/ns, 3.205 timesteps/s
|
||||
99.5% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 7.4284 | 7.4284 | 7.4284 | 0.0 | 99.26
|
||||
Neigh | 0.051549 | 0.051549 | 0.051549 | 0.0 | 0.69
|
||||
Comm | 0.0021887 | 0.0021887 | 0.0021887 | 0.0 | 0.03
|
||||
Output | 0.00025821 | 0.00025821 | 0.00025821 | 0.0 | 0.00
|
||||
Modify | 0.00099206 | 0.00099206 | 0.00099206 | 0.0 | 0.01
|
||||
Other | | 0.0003154 | | | 0.00
|
||||
Pair | 7.7384 | 7.7384 | 7.7384 | 0.0 | 99.19
|
||||
Neigh | 0.058615 | 0.058615 | 0.058615 | 0.0 | 0.75
|
||||
Comm | 0.0022428 | 0.0022428 | 0.0022428 | 0.0 | 0.03
|
||||
Output | 0.00033212 | 0.00033212 | 0.00033212 | 0.0 | 0.00
|
||||
Modify | 0.0013618 | 0.0013618 | 0.0013618 | 0.0 | 0.02
|
||||
Other | | 0.0003309 | | | 0.00
|
||||
|
||||
Nlocal: 384 ave 384 max 384 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -94,4 +100,4 @@ Total # of neighbors = 286828
|
||||
Ave neighs/atom = 746.948
|
||||
Neighbor list builds = 5
|
||||
Dangerous builds not checked
|
||||
Total wall time: 0:00:07
|
||||
Total wall time: 0:00:08
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for TATB system
|
||||
|
||||
units real
|
||||
@ -12,7 +13,7 @@ read_data data.tatb
|
||||
|
||||
# reax args: hbcut hbnewflag tripflag precision
|
||||
pair_style reax 6.0 1 1 1.0e-6
|
||||
WARNING: The pair_style reax command will be deprecated soon - users should switch to pair_style reax/c (../pair_reax.cpp:49)
|
||||
WARNING: The pair_style reax command is unsupported. Please switch to pair_style reax/c instead (../pair_reax.cpp:49)
|
||||
pair_coeff * * ffield.reax 1 2 3 4
|
||||
|
||||
compute reax all pair reax
|
||||
@ -54,34 +55,39 @@ fix 2 all reax/bonds 25 bonds.reax.tatb
|
||||
|
||||
run 25
|
||||
Neighbor list info ...
|
||||
1 neighbor list requests
|
||||
update every 5 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 5 4 3
|
||||
Memory usage per processor = 4.03843 Mbytes
|
||||
binsize = 6.25, bins = 5 4 3
|
||||
1 neighbor lists, perpetual/occasional/extra = 1 0 0
|
||||
(1) pair reax, perpetual
|
||||
attributes: half, newton off
|
||||
pair build: half/bin/newtoff
|
||||
stencil: half/bin/3d/newtoff
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 4.402 | 4.402 | 4.402 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -44767.08 -44767.08 7294.6353 -61120.591 486.4378 4.7236377 0 1568.024 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6378 0 6391.0231
|
||||
5 0.63682726 -44767.816 -44767.089 8391.165 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.3995 0 6388.86
|
||||
10 2.4306905 -44769.408 -44766.633 11717.247 -61113.142 487.89094 4.7227063 0 1567.2936 20.705084 -274.3751 -1560.8546 252.87219 -655.43578 18850.19 -8731.0965 0 6381.8216
|
||||
15 5.0590422 -44772.626 -44766.85 17124.943 -61103.34 489.2801 4.7214008 0 1566.4744 20.590604 -268.28963 -1566.5961 252.97781 -654.93836 18835.335 -8719.3383 0 6370.4973
|
||||
20 8.0678512 -44775.934 -44766.723 24620.531 -61088.791 490.42349 4.7193467 0 1565.5541 20.415031 -260.38513 -1574.1001 253.39804 -654.26837 18815.312 -8703.4033 0 6355.1921
|
||||
25 10.97553 -44777.231 -44764.701 34381.242 -61068.889 490.53154 4.7164093 0 1566.5715 20.169755 -251.23111 -1582.8552 253.88696 -653.46042 18790.855 -8683.8451 0 6336.3185
|
||||
Loop time of 3.27945 on 4 procs for 25 steps with 384 atoms
|
||||
5 0.63682727 -44767.816 -44767.089 8391.1708 -61118.763 486.82916 4.723415 0 1567.835 20.768662 -278.20804 -1557.6962 252.64683 -655.74117 18859.328 -8738.3973 0 6388.8581
|
||||
10 2.4306941 -44769.405 -44766.63 11717.306 -61113.142 487.89094 4.7227063 0 1567.2936 20.705084 -274.3751 -1560.8546 252.87219 -655.43578 18850.19 -8731.08 0 6381.8083
|
||||
15 5.0590444 -44772.6 -44766.824 17125.207 -61103.34 489.28008 4.7214008 0 1566.4744 20.590604 -268.28963 -1566.5961 252.97781 -654.93836 18835.335 -8719.2653 0 6370.4505
|
||||
20 8.0678523 -44775.983 -44766.772 24620.114 -61088.791 490.42348 4.7193467 0 1565.5541 20.415031 -260.38513 -1574.1001 253.39804 -654.26837 18815.312 -8703.5228 0 6355.2629
|
||||
25 10.975532 -44777.234 -44764.704 34381.065 -61068.889 490.53151 4.7164093 0 1566.5715 20.169755 -251.23111 -1582.8552 253.88696 -653.46042 18790.855 -8683.898 0 6336.3682
|
||||
Loop time of 3.74388 on 4 procs for 25 steps with 384 atoms
|
||||
|
||||
Performance: 0.041 ns/day, 583.013 hours/ns, 7.623 timesteps/s
|
||||
99.8% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 0.036 ns/day, 665.579 hours/ns, 6.678 timesteps/s
|
||||
98.7% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 3.0329 | 3.1456 | 3.2612 | 5.2 | 95.92
|
||||
Neigh | 0.011087 | 0.011261 | 0.011608 | 0.2 | 0.34
|
||||
Comm | 0.0057111 | 0.12121 | 0.23398 | 26.2 | 3.70
|
||||
Output | 0.00039172 | 0.0005855 | 0.00080633 | 0.6 | 0.02
|
||||
Modify | 0.00035787 | 0.00059456 | 0.00082469 | 0.7 | 0.02
|
||||
Other | | 0.0002265 | | | 0.01
|
||||
Pair | 3.478 | 3.6025 | 3.7215 | 4.8 | 96.22
|
||||
Neigh | 0.012731 | 0.01299 | 0.013174 | 0.2 | 0.35
|
||||
Comm | 0.0073411 | 0.12653 | 0.25119 | 25.4 | 3.38
|
||||
Output | 0.00050354 | 0.00081849 | 0.0011628 | 0.0 | 0.02
|
||||
Modify | 0.00049281 | 0.00082356 | 0.001157 | 0.0 | 0.02
|
||||
Other | | 0.0002663 | | | 0.01
|
||||
|
||||
Nlocal: 96 ave 96 max 96 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
115
examples/reax/log.8March18.reaxc.rdx.g++.1
Normal file
115
examples/reax/log.8March18.reaxc.rdx.g++.1
Normal file
@ -0,0 +1,115 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for RDX system
|
||||
# this run is equivalent to reax/in.reax.rdx
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
pair_style reax/c control.reax_c.rdx
|
||||
pair_coeff * * ffield.reax C H O N
|
||||
Reading potential file ffield.reax with DATE: 2010-02-19
|
||||
|
||||
compute reax all pair reax/c
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all atom 10 dump.reaxc.rdx
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25, bins = 3 3 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 15.28 | 15.28 | 15.28 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1884.3081 -1884.3081 27186.181 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79937 0 168.88402
|
||||
10 1288.6114 -1989.6644 -1912.8422 -19456.35 -2734.6769 -15.607219 0.20177961 0 54.629556 3.1252294 -77.7067 0 14.933901 -5.8108541 843.92074 -180.43322 0 107.75935
|
||||
20 538.95849 -1942.7037 -1910.5731 -10725.658 -2803.7395 7.9078331 0.077926702 0 81.610043 0.22951937 -57.557104 0 30.331203 -10.178049 878.99015 -159.69092 0 89.315159
|
||||
30 463.09542 -1933.5765 -1905.9685 -33255.507 -2749.8591 -8.0154628 0.027628767 0 81.627403 0.11972403 -50.262284 0 20.82032 -9.6327022 851.88722 -149.495 0 79.205731
|
||||
40 885.49449 -1958.9126 -1906.1228 -4814.7123 -2795.644 9.1506221 0.1374749 0 70.948046 0.24360579 -57.8627 0 19.076515 -11.141211 873.73892 -159.9939 0 92.434059
|
||||
50 861.1646 -1954.4599 -1903.1206 -1896.7387 -2784.8446 3.8269113 0.1579328 0 79.851775 3.3492107 -78.066127 0 32.628975 -7.9565255 872.81826 -190.98565 0 114.75994
|
||||
60 1167.785 -1971.8432 -1902.2243 -3482.6975 -2705.8638 -17.121582 0.22749067 0 44.507705 7.856069 -74.788959 0 16.256519 -4.6046602 835.8308 -188.33691 0 114.19414
|
||||
70 1439.9947 -1989.3024 -1903.4554 23845.067 -2890.7896 31.958874 0.26671735 0 85.758608 3.1803486 -71.002907 0 24.357106 -10.311315 905.86799 -175.38482 0 106.79659
|
||||
80 502.40024 -1930.7547 -1900.8035 -20356.557 -2703.8096 -18.663105 0.11286226 0 99.803799 2.0329394 -76.171387 0 19.236609 -6.2786041 826.47358 -166.03157 0 92.539694
|
||||
90 749.09267 -1946.9834 -1902.3254 17798.812 -2863.7586 42.068927 0.24338042 0 96.18195 0.96181754 -69.955528 0 24.61541 -11.58277 903.68895 -190.13838 0 120.69139
|
||||
100 1109.7046 -1968.5875 -1902.4311 -4490.6736 -2755.8953 -7.1235173 0.21757663 0 61.806405 7.0825933 -75.645487 0 20.114745 -6.2371664 863.56285 -198.56939 0 122.09923
|
||||
Loop time of 0.395195 on 1 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 21.863 ns/day, 1.098 hours/ns, 253.039 timesteps/s
|
||||
99.3% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.3722 | 0.3722 | 0.3722 | 0.0 | 94.18
|
||||
Neigh | 0.0098455 | 0.0098455 | 0.0098455 | 0.0 | 2.49
|
||||
Comm | 0.00047445 | 0.00047445 | 0.00047445 | 0.0 | 0.12
|
||||
Output | 0.00034022 | 0.00034022 | 0.00034022 | 0.0 | 0.09
|
||||
Modify | 0.012187 | 0.012187 | 0.012187 | 0.0 | 3.08
|
||||
Other | | 0.0001521 | | | 0.04
|
||||
|
||||
Nlocal: 21 ave 21 max 21 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 546 ave 546 max 546 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 1096 ave 1096 max 1096 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 1096
|
||||
Ave neighs/atom = 52.1905
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:00
|
||||
115
examples/reax/log.8March18.reaxc.rdx.g++.4
Normal file
115
examples/reax/log.8March18.reaxc.rdx.g++.4
Normal file
@ -0,0 +1,115 @@
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for RDX system
|
||||
# this run is equivalent to reax/in.reax.rdx
|
||||
|
||||
units real
|
||||
|
||||
atom_style charge
|
||||
read_data data.rdx
|
||||
orthogonal box = (35 35 35) to (48 48 48)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
reading atoms ...
|
||||
21 atoms
|
||||
|
||||
pair_style reax/c control.reax_c.rdx
|
||||
pair_coeff * * ffield.reax C H O N
|
||||
Reading potential file ffield.reax with DATE: 2010-02-19
|
||||
|
||||
compute reax all pair reax/c
|
||||
|
||||
variable eb equal c_reax[1]
|
||||
variable ea equal c_reax[2]
|
||||
variable elp equal c_reax[3]
|
||||
variable emol equal c_reax[4]
|
||||
variable ev equal c_reax[5]
|
||||
variable epen equal c_reax[6]
|
||||
variable ecoa equal c_reax[7]
|
||||
variable ehb equal c_reax[8]
|
||||
variable et equal c_reax[9]
|
||||
variable eco equal c_reax[10]
|
||||
variable ew equal c_reax[11]
|
||||
variable ep equal c_reax[12]
|
||||
variable efi equal c_reax[13]
|
||||
variable eqeq equal c_reax[14]
|
||||
|
||||
neighbor 2.5 bin
|
||||
neigh_modify every 10 delay 0 check no
|
||||
|
||||
fix 1 all nve
|
||||
fix 2 all qeq/reax 1 0.0 10.0 1.0e-6 reax/c
|
||||
|
||||
thermo 10
|
||||
thermo_style custom step temp epair etotal press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
|
||||
timestep 1.0
|
||||
|
||||
#dump 1 all atom 10 dump.reaxc.rdx
|
||||
|
||||
#dump 2 all image 25 image.*.jpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 2 pad 3
|
||||
|
||||
#dump 3 all movie 25 movie.mpg type type # axes yes 0.8 0.02 view 60 -30
|
||||
#dump_modify 3 pad 3
|
||||
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 10 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25, bins = 3 3 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 10.37 | 11.76 | 13.34 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -1884.3081 -1884.3081 27186.178 -2958.4712 79.527715 0.31082031 0 98.589783 25.846176 -0.18034154 0 16.709078 -9.1620736 938.43732 -244.79988 0 168.88453
|
||||
10 1288.6115 -1989.6644 -1912.8422 -19456.354 -2734.6769 -15.60722 0.2017796 0 54.629558 3.1252286 -77.7067 0 14.933902 -5.8108544 843.92073 -180.43321 0 107.75934
|
||||
20 538.95818 -1942.7037 -1910.5731 -10725.623 -2803.7394 7.9078307 0.077926702 0 81.61005 0.22951942 -57.557107 0 30.331206 -10.178049 878.9901 -159.68951 0 89.313749
|
||||
30 463.09514 -1933.5765 -1905.9685 -33255.525 -2749.859 -8.0154737 0.027628797 0 81.627408 0.11972402 -50.262283 0 20.82031 -9.6327021 851.88715 -149.49499 0 79.205724
|
||||
40 885.49412 -1958.9125 -1906.1227 -4814.6606 -2795.6439 9.150622 0.13747487 0 70.948029 0.24360517 -57.862679 0 19.076509 -11.141214 873.7389 -159.99392 0 92.434078
|
||||
50 861.16393 -1954.46 -1903.1207 -1896.7323 -2784.8449 3.8270197 0.1579328 0 79.851743 3.3492115 -78.066132 0 32.628992 -7.9565379 872.81841 -190.98568 0 114.75996
|
||||
60 1167.7846 -1971.8432 -1902.2243 -3482.8111 -2705.8633 -17.121657 0.2274907 0 44.507681 7.8560366 -74.788989 0 16.256493 -4.6046537 835.8305 -188.33687 0 114.1941
|
||||
70 1439.9942 -1989.3023 -1903.4554 23845.444 -2890.7894 31.958784 0.26671721 0 85.758586 3.1803655 -71.002918 0 24.357158 -10.311304 905.86792 -175.38481 0 106.79657
|
||||
80 502.3975 -1930.7546 -1900.8036 -20356.439 -2703.8105 -18.662812 0.11286123 0 99.80391 2.0329293 -76.171334 0 19.236803 -6.2786439 826.47397 -166.03141 0 92.539551
|
||||
90 749.09048 -1946.9837 -1902.3258 17798.718 -2863.7582 42.068719 0.24338057 0 96.181773 0.96183581 -69.955529 0 24.615414 -11.582758 903.68862 -190.1384 0 120.69139
|
||||
100 1109.6999 -1968.5875 -1902.4314 -4490.3728 -2755.8964 -7.1231468 0.21757685 0 61.806149 7.0826648 -75.645428 0 20.115002 -6.2371958 863.56343 -198.56957 0 122.09942
|
||||
Loop time of 0.329552 on 4 procs for 100 steps with 21 atoms
|
||||
|
||||
Performance: 26.217 ns/day, 0.915 hours/ns, 303.443 timesteps/s
|
||||
96.9% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.26372 | 0.26499 | 0.26754 | 0.3 | 80.41
|
||||
Neigh | 0.0045478 | 0.0062494 | 0.0076699 | 1.5 | 1.90
|
||||
Comm | 0.0041637 | 0.0064691 | 0.0080271 | 1.8 | 1.96
|
||||
Output | 0.00054169 | 0.00056636 | 0.00060368 | 0.0 | 0.17
|
||||
Modify | 0.049433 | 0.051134 | 0.05311 | 0.6 | 15.52
|
||||
Other | | 0.000141 | | | 0.04
|
||||
|
||||
Nlocal: 5.25 ave 15 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
Nghost: 355.5 ave 432 max 282 min
|
||||
Histogram: 1 0 0 0 1 1 0 0 0 1
|
||||
Neighs: 298.75 ave 822 max 0 min
|
||||
Histogram: 1 0 2 0 0 0 0 0 0 1
|
||||
|
||||
Total # of neighbors = 1195
|
||||
Ave neighs/atom = 56.9048
|
||||
Neighbor list builds = 10
|
||||
Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:00
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for TATB system
|
||||
# this run is equivalent to reax/in.reax.tatb,
|
||||
|
||||
@ -56,34 +57,44 @@ fix 3 all reax/c/species 1 5 5 species.tatb
|
||||
|
||||
run 25
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 5 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 5 4 3
|
||||
Memory usage per processor = 155.82 Mbytes
|
||||
binsize = 6.25, bins = 5 4 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 176.7 | 176.7 | 176.7 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -44760.998 -44760.998 7827.7879 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6394 0 6391.0274
|
||||
5 0.61603942 -44761.698 -44760.994 8934.6281 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.193 0 6388.6691
|
||||
10 2.3525551 -44763.227 -44760.541 12288.607 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50358 -1560.7569 252.85309 -655.44063 18850.391 -8730.9688 0 6381.7066
|
||||
15 4.9013326 -44766.36 -44760.764 17717.015 -61103.434 489.14721 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.237 0 6370.4033
|
||||
20 7.829471 -44769.686 -44760.747 25205.558 -61089.006 490.21313 4.719302 0 1571.7022 20.420943 -260.85565 -1573.7378 253.3539 -654.31623 18816.07 -8703.5091 0 6355.2604
|
||||
25 10.697926 -44772.904 -44760.691 34232.793 -61069.308 490.25886 4.7163736 0 1570.7397 20.181346 -251.91377 -1582.3261 253.82253 -653.53184 18791.975 -8684.3608 0 6336.8416
|
||||
Loop time of 4.34725 on 1 procs for 25 steps with 384 atoms
|
||||
0 0 -44760.998 -44760.998 7827.7874 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6395 0 6391.0275
|
||||
5 0.61603968 -44761.698 -44760.994 8934.6347 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.1911 0 6388.6671
|
||||
10 2.3525551 -44763.227 -44760.541 12288.583 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50357 -1560.7569 252.85309 -655.44063 18850.391 -8730.9768 0 6381.7146
|
||||
15 4.9013279 -44766.36 -44760.764 17717.01 -61103.434 489.14722 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.2375 0 6370.4038
|
||||
20 7.8294645 -44769.686 -44760.747 25205.624 -61089.006 490.21314 4.719302 0 1571.7022 20.420943 -260.85564 -1573.7378 253.3539 -654.31623 18816.07 -8703.4889 0 6355.2402
|
||||
25 10.697904 -44772.904 -44760.691 34232.965 -61069.308 490.25888 4.7163736 0 1570.7397 20.181346 -251.91377 -1582.3261 253.82253 -653.53184 18791.975 -8684.3125 0 6336.7934
|
||||
Loop time of 4.72562 on 1 procs for 25 steps with 384 atoms
|
||||
|
||||
Performance: 0.031 ns/day, 772.845 hours/ns, 5.751 timesteps/s
|
||||
99.8% CPU use with 1 MPI tasks x no OpenMP threads
|
||||
Performance: 0.029 ns/day, 840.110 hours/ns, 5.290 timesteps/s
|
||||
99.4% CPU use with 1 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 3.5264 | 3.5264 | 3.5264 | 0.0 | 81.12
|
||||
Neigh | 0.40335 | 0.40335 | 0.40335 | 0.0 | 9.28
|
||||
Comm | 0.0021031 | 0.0021031 | 0.0021031 | 0.0 | 0.05
|
||||
Output | 0.00019765 | 0.00019765 | 0.00019765 | 0.0 | 0.00
|
||||
Modify | 0.41479 | 0.41479 | 0.41479 | 0.0 | 9.54
|
||||
Other | | 0.0004084 | | | 0.01
|
||||
Pair | 3.775 | 3.775 | 3.775 | 0.0 | 79.88
|
||||
Neigh | 0.47047 | 0.47047 | 0.47047 | 0.0 | 9.96
|
||||
Comm | 0.0025151 | 0.0025151 | 0.0025151 | 0.0 | 0.05
|
||||
Output | 0.0003159 | 0.0003159 | 0.0003159 | 0.0 | 0.01
|
||||
Modify | 0.47676 | 0.47676 | 0.47676 | 0.0 | 10.09
|
||||
Other | | 0.0005293 | | | 0.01
|
||||
|
||||
Nlocal: 384 ave 384 max 384 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
@ -99,4 +110,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:04
|
||||
Total wall time: 0:00:05
|
||||
@ -1,4 +1,5 @@
|
||||
LAMMPS (5 Oct 2016)
|
||||
LAMMPS (8 Mar 2018)
|
||||
using 1 OpenMP thread(s) per MPI task
|
||||
# ReaxFF potential for TATB system
|
||||
# this run is equivalent to reax/in.reax.tatb,
|
||||
|
||||
@ -56,34 +57,44 @@ fix 3 all reax/c/species 1 5 5 species.tatb
|
||||
|
||||
run 25
|
||||
Neighbor list info ...
|
||||
2 neighbor list requests
|
||||
update every 5 steps, delay 0 steps, check no
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 12.5
|
||||
ghost atom cutoff = 12.5
|
||||
binsize = 6.25 -> bins = 5 4 3
|
||||
Memory usage per processor = 105.386 Mbytes
|
||||
binsize = 6.25, bins = 5 4 3
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair reax/c, perpetual
|
||||
attributes: half, newton off, ghost
|
||||
pair build: half/bin/newtoff/ghost
|
||||
stencil: half/ghost/bin/3d/newtoff
|
||||
bin: standard
|
||||
(2) fix qeq/reax, perpetual, copy from (1)
|
||||
attributes: half, newton off, ghost
|
||||
pair build: copy
|
||||
stencil: none
|
||||
bin: none
|
||||
Per MPI rank memory allocation (min/avg/max) = 118 | 118 | 118 Mbytes
|
||||
Step Temp E_pair TotEng Press v_eb v_ea v_elp v_emol v_ev v_epen v_ecoa v_ehb v_et v_eco v_ew v_ep v_efi v_eqeq
|
||||
0 0 -44760.998 -44760.998 7827.7867 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6397 0 6391.0277
|
||||
5 0.61603967 -44761.698 -44760.994 8934.6339 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.1905 0 6388.6665
|
||||
10 2.3525545 -44763.227 -44760.541 12288.586 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50357 -1560.7569 252.85309 -655.44063 18850.391 -8730.9762 0 6381.714
|
||||
15 4.9013281 -44766.36 -44760.764 17716.982 -61103.434 489.14722 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.2476 0 6370.4138
|
||||
20 7.8294637 -44769.686 -44760.747 25205.512 -61089.006 490.21314 4.719302 0 1571.7022 20.420943 -260.85565 -1573.7378 253.3539 -654.31623 18816.07 -8703.518 0 6355.2692
|
||||
25 10.697905 -44772.904 -44760.691 34232.815 -61069.308 490.25887 4.7163736 0 1570.7397 20.181346 -251.91377 -1582.3261 253.82253 -653.53184 18791.975 -8684.3481 0 6336.829
|
||||
Loop time of 2.60733 on 4 procs for 25 steps with 384 atoms
|
||||
0 0 -44760.998 -44760.998 7827.7866 -61120.591 486.4378 4.7236377 0 1574.1033 20.788929 -279.51642 -1556.4696 252.57147 -655.84699 18862.412 -8740.6398 0 6391.0277
|
||||
5 0.61603968 -44761.698 -44760.994 8934.6335 -61118.769 486.81263 4.7234094 0 1573.9241 20.768834 -278.24084 -1557.6713 252.64377 -655.74435 18859.379 -8738.1906 0 6388.6666
|
||||
10 2.3525544 -44763.227 -44760.541 12288.587 -61113.174 487.82738 4.7226863 0 1573.411 20.705939 -274.50357 -1560.7569 252.85309 -655.44063 18850.391 -8730.9764 0 6381.7141
|
||||
15 4.9013311 -44766.36 -44760.764 17716.955 -61103.434 489.14721 4.7213644 0 1572.6349 20.593139 -268.56847 -1566.3829 252.95174 -654.96611 18835.777 -8719.2558 0 6370.4221
|
||||
20 7.8294715 -44769.686 -44760.747 25205.613 -61089.006 490.21314 4.7193021 0 1571.7022 20.420943 -260.85564 -1573.7378 253.3539 -654.31623 18816.07 -8703.4906 0 6355.2419
|
||||
25 10.697924 -44772.904 -44760.691 34232.794 -61069.308 490.25886 4.7163736 0 1570.7397 20.181347 -251.91376 -1582.3261 253.82253 -653.53183 18791.975 -8684.3641 0 6336.8449
|
||||
Loop time of 2.84068 on 4 procs for 25 steps with 384 atoms
|
||||
|
||||
Performance: 0.052 ns/day, 463.526 hours/ns, 9.588 timesteps/s
|
||||
99.9% CPU use with 4 MPI tasks x no OpenMP threads
|
||||
Performance: 0.048 ns/day, 505.009 hours/ns, 8.801 timesteps/s
|
||||
98.4% CPU use with 4 MPI tasks x 1 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 2.1835 | 2.1843 | 2.1854 | 0.0 | 83.77
|
||||
Neigh | 0.22091 | 0.22364 | 0.22821 | 0.6 | 8.58
|
||||
Comm | 0.005677 | 0.0069622 | 0.0078082 | 1.0 | 0.27
|
||||
Output | 0.00036621 | 0.0028675 | 0.0037034 | 2.7 | 0.11
|
||||
Modify | 0.18736 | 0.18921 | 0.19102 | 0.4 | 7.26
|
||||
Other | | 0.0003636 | | | 0.01
|
||||
Pair | 2.3253 | 2.328 | 2.3305 | 0.2 | 81.95
|
||||
Neigh | 0.2589 | 0.26458 | 0.26897 | 0.7 | 9.31
|
||||
Comm | 0.0094428 | 0.012062 | 0.014872 | 2.3 | 0.42
|
||||
Output | 0.00043392 | 0.0042209 | 0.0054941 | 3.4 | 0.15
|
||||
Modify | 0.22563 | 0.23134 | 0.23579 | 0.8 | 8.14
|
||||
Other | | 0.0005122 | | | 0.02
|
||||
|
||||
Nlocal: 96 ave 96 max 96 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
@ -99,4 +110,4 @@ Dangerous builds not checked
|
||||
|
||||
Please see the log.cite file for references relevant to this simulation
|
||||
|
||||
Total wall time: 0:00:02
|
||||
Total wall time: 0:00:03
|
||||
@ -1,5 +1,49 @@
|
||||
# Change Log
|
||||
|
||||
## [2.6.00](https://github.com/kokkos/kokkos/tree/2.6.00) (2018-03-07)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.5.00...2.6.00)
|
||||
|
||||
**Part of the Kokkos C++ Performance Portability Programming EcoSystem 2.6**
|
||||
|
||||
**Implemented enhancements:**
|
||||
|
||||
- Support NVIDIA Volta microarchitecture [\#1466](https://github.com/kokkos/kokkos/issues/1466)
|
||||
- Kokkos - Define empty functions when profiling disabled [\#1424](https://github.com/kokkos/kokkos/issues/1424)
|
||||
- Don't use \_\_constant\_\_ cache for lock arrays, enable once per run update instead of once per call [\#1385](https://github.com/kokkos/kokkos/issues/1385)
|
||||
- task dag enhancement. [\#1354](https://github.com/kokkos/kokkos/issues/1354)
|
||||
- Cuda task team collectives and stack size [\#1353](https://github.com/kokkos/kokkos/issues/1353)
|
||||
- Replace View operator acceptance of more than rank integers with 'access' function [\#1333](https://github.com/kokkos/kokkos/issues/1333)
|
||||
- Interoperability: Do not shut down backend execution space runtimes upon calling finalize. [\#1305](https://github.com/kokkos/kokkos/issues/1305)
|
||||
- shmem\_size for LayoutStride [\#1291](https://github.com/kokkos/kokkos/issues/1291)
|
||||
- Kokkos::resize performs poorly on 1D Views [\#1270](https://github.com/kokkos/kokkos/issues/1270)
|
||||
- stride\(\) is inconsistent with dimension\(\), extent\(\), etc. [\#1214](https://github.com/kokkos/kokkos/issues/1214)
|
||||
- Kokkos::sort defaults to std::sort on host [\#1208](https://github.com/kokkos/kokkos/issues/1208)
|
||||
- DynamicView with host size grow [\#1206](https://github.com/kokkos/kokkos/issues/1206)
|
||||
- Unmanaged View with Anonymous Memory Space [\#1175](https://github.com/kokkos/kokkos/issues/1175)
|
||||
- Sort subset of Kokkos::DynamicView [\#1160](https://github.com/kokkos/kokkos/issues/1160)
|
||||
- MDRange policy doesn't support lambda reductions [\#1054](https://github.com/kokkos/kokkos/issues/1054)
|
||||
- Add ability to set hook on Kokkos::finalize [\#714](https://github.com/kokkos/kokkos/issues/714)
|
||||
- Atomics with Serial Backend - Default should be Disable? [\#549](https://github.com/kokkos/kokkos/issues/549)
|
||||
- KOKKOS\_ENABLE\_DEPRECATED\_CODE [\#1359](https://github.com/kokkos/kokkos/issues/1359)
|
||||
|
||||
**Fixed bugs:**
|
||||
|
||||
- cuda\_internal\_maximum\_warp\_count returns 8, but I believe it should return 16 for P100 [\#1269](https://github.com/kokkos/kokkos/issues/1269)
|
||||
- Cuda: level 1 scratch memory bug \(reported by Stan Moore\) [\#1434](https://github.com/kokkos/kokkos/issues/1434)
|
||||
- MDRangePolicy Reduction requires value\_type typedef in Functor [\#1379](https://github.com/kokkos/kokkos/issues/1379)
|
||||
- Kokkos DeepCopy between empty views fails [\#1369](https://github.com/kokkos/kokkos/issues/1369)
|
||||
- Several issues with new CMake build infrastructure \(reported by Eric Phipps\) [\#1365](https://github.com/kokkos/kokkos/issues/1365)
|
||||
- deep\_copy between rank-1 host/device views of differing layouts without UVM no longer works \(reported by Eric Phipps\) [\#1363](https://github.com/kokkos/kokkos/issues/1363)
|
||||
- Profiling can't be disabled in CMake, and a parallel\_for is missing for tasks \(reported by Kyungjoo Kim\) [\#1349](https://github.com/kokkos/kokkos/issues/1349)
|
||||
- get\_work\_partition int overflow \(reported by berryj5\) [\#1327](https://github.com/kokkos/kokkos/issues/1327)
|
||||
- Kokkos::deep\_copy must fence even if the two views are the same [\#1303](https://github.com/kokkos/kokkos/issues/1303)
|
||||
- CudaUVMSpace::allocate/deallocate must fence [\#1302](https://github.com/kokkos/kokkos/issues/1302)
|
||||
- ViewResize on CUDA fails in Debug because of too many resources requested [\#1299](https://github.com/kokkos/kokkos/issues/1299)
|
||||
- Cuda 9 and intrepid2 calls from Panzer. [\#1183](https://github.com/kokkos/kokkos/issues/1183)
|
||||
- Slowdown due to tracking\_enabled\(\) in 2.04.00 \(found by Albany app\) [\#1016](https://github.com/kokkos/kokkos/issues/1016)
|
||||
- Bounds checking fails with zero-span Views \(reported by Stan Moore\) [\#1411](https://github.com/kokkos/kokkos/issues/1411)
|
||||
|
||||
|
||||
## [2.5.00](https://github.com/kokkos/kokkos/tree/2.5.00) (2017-12-15)
|
||||
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.04.11...2.5.00)
|
||||
|
||||
|
||||
@ -7,7 +7,7 @@ ELSE()
|
||||
ENDIF()
|
||||
|
||||
IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
cmake_minimum_required(VERSION 3.1 FATAL_ERROR)
|
||||
cmake_minimum_required(VERSION 3.3 FATAL_ERROR)
|
||||
|
||||
# Define Project Name if this is a standalone build
|
||||
IF(NOT DEFINED ${PROJECT_NAME})
|
||||
@ -37,9 +37,19 @@ IF(NOT KOKKOS_HAS_TRILINOS)
|
||||
COMMAND ${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings
|
||||
WORKING_DIRECTORY "${Kokkos_BINARY_DIR}"
|
||||
OUTPUT_FILE ${Kokkos_BINARY_DIR}/core_src_make.out
|
||||
RESULT_VARIABLE res
|
||||
RESULT_VARIABLE GEN_SETTINGS_RESULT
|
||||
)
|
||||
if (GEN_SETTINGS_RESULT)
|
||||
message(FATAL_ERROR "Kokkos settings generation failed:\n"
|
||||
"${KOKKOS_SETTINGS} make -f ${KOKKOS_SRC_PATH}/cmake/Makefile.generate_cmake_settings CXX=${CMAKE_CXX_COMPILER} generate_build_settings")
|
||||
endif()
|
||||
include(${Kokkos_BINARY_DIR}/kokkos_generated_settings.cmake)
|
||||
string(REPLACE " " ";" KOKKOS_TPL_INCLUDE_DIRS "${KOKKOS_GMAKE_TPL_INCLUDE_DIRS}")
|
||||
string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_DIRS "${KOKKOS_GMAKE_TPL_LIBRARY_DIRS}")
|
||||
string(REPLACE " " ";" KOKKOS_TPL_LIBRARY_NAMES "${KOKKOS_GMAKE_TPL_LIBRARY_NAMES}")
|
||||
list(REMOVE_ITEM KOKKOS_TPL_INCLUDE_DIRS "")
|
||||
list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_DIRS "")
|
||||
list(REMOVE_ITEM KOKKOS_TPL_LIBRARY_NAMES "")
|
||||
set_kokkos_srcs(KOKKOS_SRC ${KOKKOS_SRC})
|
||||
|
||||
#------------ NOW BUILD ------------------------------------------------------
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -19,7 +19,7 @@ snapshot Kokkos from github.com/kokkos to Trilinos.
|
||||
|
||||
3) Snapshot the current commit in the Kokkos clone into the Trilinos clone.
|
||||
This overwrites ${TRILINOS}/packages/kokkos with the content of ${KOKKOS}:
|
||||
${KOKKOS}/config/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
|
||||
${KOKKOS}/scripts/snapshot.py --verbose ${KOKKOS} ${TRILINOS}/packages
|
||||
|
||||
4) Verify the snapshot commit happened as expected
|
||||
cd ${TRILINOS}/packages/kokkos
|
||||
|
||||
@ -36,7 +36,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -9,8 +9,8 @@ KOKKOS_DEVICES ?= "OpenMP"
|
||||
#KOKKOS_DEVICES ?= "Pthreads"
|
||||
# Options:
|
||||
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX
|
||||
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,Volta70,Volta72
|
||||
# ARM: ARMv80,ARMv81,ARMv8-ThunderX,ARMv8-TX2
|
||||
# IBM: BGQ,Power7,Power8,Power9
|
||||
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
|
||||
# AMD-CPUS: AMDAVX,Ryzen,Epyc
|
||||
@ -21,7 +21,7 @@ KOKKOS_DEBUG ?= "no"
|
||||
KOKKOS_USE_TPLS ?= ""
|
||||
# Options: c++11,c++1z
|
||||
KOKKOS_CXX_STANDARD ?= "c++11"
|
||||
# Options: aggressive_vectorization,disable_profiling
|
||||
# Options: aggressive_vectorization,disable_profiling,disable_deprecated_code
|
||||
KOKKOS_OPTIONS ?= ""
|
||||
|
||||
# Default settings specific options.
|
||||
@ -48,6 +48,7 @@ KOKKOS_INTERNAL_USE_MEMKIND := $(call kokkos_has_string,$(KOKKOS_USE_TPLS),exper
|
||||
KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS := $(call kokkos_has_string,$(KOKKOS_OPTIONS),compiler_warnings)
|
||||
KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION := $(call kokkos_has_string,$(KOKKOS_OPTIONS),aggressive_vectorization)
|
||||
KOKKOS_INTERNAL_DISABLE_PROFILING := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_profiling)
|
||||
KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_deprecated_code)
|
||||
KOKKOS_INTERNAL_DISABLE_DUALVIEW_MODIFY_CHECK := $(call kokkos_has_string,$(KOKKOS_OPTIONS),disable_dualview_modify_check)
|
||||
KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT := $(call kokkos_has_string,$(KOKKOS_OPTIONS),enable_profile_load_print)
|
||||
KOKKOS_INTERNAL_CUDA_USE_LDG := $(call kokkos_has_string,$(KOKKOS_CUDA_OPTIONS),use_ldg)
|
||||
@ -93,7 +94,7 @@ KOKKOS_INTERNAL_COMPILER_INTEL := $(call kokkos_has_string,$(KOKKOS_CXX_VE
|
||||
KOKKOS_INTERNAL_COMPILER_PGI := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),PGI)
|
||||
KOKKOS_INTERNAL_COMPILER_XL := $(strip $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_CRAY := $(strip $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_NVCC := $(strip $(shell export OMPI_CXX=$(OMPI_CXX); export MPICH_CXX=$(MPICH_CXX); $(CXX) --version 2>&1 | grep nvcc | wc -l))
|
||||
KOKKOS_INTERNAL_COMPILER_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),clang)
|
||||
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),apple-darwin)
|
||||
KOKKOS_INTERNAL_COMPILER_HCC := $(call kokkos_has_string,$(KOKKOS_CXX_VERSION),HCC)
|
||||
@ -229,12 +230,16 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(call kokkos_has_string,$(KOKKOS_ARCH),Ma
|
||||
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(call kokkos_has_string,$(KOKKOS_ARCH),Maxwell53)
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal61)
|
||||
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(call kokkos_has_string,$(KOKKOS_ARCH),Pascal60)
|
||||
KOKKOS_INTERNAL_USE_ARCH_VOLTA70 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta70)
|
||||
KOKKOS_INTERNAL_USE_ARCH_VOLTA72 := $(call kokkos_has_string,$(KOKKOS_ARCH),Volta72)
|
||||
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(shell expr $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
|
||||
@ -249,6 +254,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA70) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_VOLTA72) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
|
||||
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53))
|
||||
@ -267,7 +274,8 @@ endif
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv80)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv81)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-ThunderX)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX) | bc))
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2 := $(call kokkos_has_string,$(KOKKOS_ARCH),ARMv8-TX2)
|
||||
KOKKOS_INTERNAL_USE_ARCH_ARM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2) | bc))
|
||||
|
||||
# IBM based.
|
||||
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(call kokkos_has_string,$(KOKKOS_ARCH),BGQ)
|
||||
@ -316,6 +324,9 @@ endif
|
||||
# Generating the list of Flags.
|
||||
|
||||
KOKKOS_CPPFLAGS = -I./ -I$(KOKKOS_PATH)/core/src -I$(KOKKOS_PATH)/containers/src -I$(KOKKOS_PATH)/algorithms/src
|
||||
KOKKOS_TPL_INCLUDE_DIRS =
|
||||
KOKKOS_TPL_LIBRARY_DIRS =
|
||||
KOKKOS_TPL_LIBRARY_NAMES =
|
||||
|
||||
KOKKOS_CXXFLAGS =
|
||||
ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
|
||||
@ -323,7 +334,9 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_COMPILER_WARNINGS), 1)
|
||||
endif
|
||||
|
||||
KOKKOS_LIBS = -ldl
|
||||
KOKKOS_TPL_LIBRARY_NAMES += dl
|
||||
KOKKOS_LDFLAGS = -L$(shell pwd)
|
||||
KOKKOS_LINK_FLAGS =
|
||||
KOKKOS_SRC =
|
||||
KOKKOS_HEADERS =
|
||||
|
||||
@ -437,21 +450,32 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT), 1)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_HWLOC), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
ifneq ($(HWLOC_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(HWLOC_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(HWLOC_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(HWLOC_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(HWLOC_PATH)/lib
|
||||
endif
|
||||
KOKKOS_LIBS += -lhwloc
|
||||
KOKKOS_TPL_LIBRARY_NAMES += hwloc
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HWLOC")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_LIBRT), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_USE_LIBRT")
|
||||
KOKKOS_LIBS += -lrt
|
||||
KOKKOS_TPL_LIBRARY_NAMES += rt
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
ifneq ($(MEMKIND_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(MEMKIND_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(MEMKIND_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(MEMKIND_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(MEMKIND_PATH)/lib
|
||||
endif
|
||||
KOKKOS_LIBS += -lmemkind -lnuma
|
||||
KOKKOS_TPL_LIBRARY_NAMES += memkind numa
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_HAVE_HBWSPACE")
|
||||
endif
|
||||
|
||||
@ -459,6 +483,10 @@ ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_PROFILING")
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_DEPRECATED_CODE), 0)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ENABLE_DEPRECATED_CODE")
|
||||
endif
|
||||
|
||||
tmp := $(call kokkos_append_header,"/* Optimization Settings */")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_OPT_RANGE_AGGRESSIVE_VECTORIZATION), 1)
|
||||
@ -560,6 +588,24 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX2), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV81")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_ARMV8_THUNDERX2")
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
|
||||
KOKKOS_CXXFLAGS +=
|
||||
KOKKOS_LDFLAGS +=
|
||||
else
|
||||
KOKKOS_CXXFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99
|
||||
KOKKOS_LDFLAGS += -mtune=thunderx2t99 -mcpu=thunderx2t99
|
||||
endif
|
||||
endif
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_SSE42), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_SSE42")
|
||||
|
||||
@ -754,10 +800,11 @@ endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=-arch
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
|
||||
KOKKOS_CXXFLAGS += -x cuda
|
||||
else ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG=--cuda-gpu-arch
|
||||
KOKKOS_CXXFLAGS += -x cuda
|
||||
else
|
||||
$(error Makefile.kokkos: CUDA is enabled but the compiler is neither NVCC nor Clang)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
|
||||
@ -805,6 +852,16 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_PASCAL61")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_61
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA70), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA70")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_70
|
||||
endif
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VOLTA72), 1)
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA")
|
||||
tmp := $(call kokkos_append_header,"\#define KOKKOS_ARCH_VOLTA72")
|
||||
KOKKOS_INTERNAL_CUDA_ARCH_FLAG := $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)=sm_72
|
||||
endif
|
||||
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_ARCH_NVIDIA), 0)
|
||||
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CUDA_ARCH_FLAG)
|
||||
@ -850,6 +907,7 @@ ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
|
||||
|
||||
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags)
|
||||
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
|
||||
KOKKOS_TPL_LIBRARY_NAMES += hc_am m
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
|
||||
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
|
||||
@ -880,13 +938,17 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
|
||||
KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_LIBS += -lcudart -lcuda
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH)
|
||||
ifneq ($(CUDA_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(CUDA_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(CUDA_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(CUDA_PATH)/lib64
|
||||
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
|
||||
KOKKOS_CXXFLAGS += --cuda-path=$(CUDA_PATH)
|
||||
endif
|
||||
endif
|
||||
KOKKOS_LIBS += -lcudart -lcuda
|
||||
KOKKOS_TPL_LIBRARY_NAMES += cudart cuda
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
|
||||
@ -911,20 +973,27 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
|
||||
endif
|
||||
|
||||
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
KOKKOS_LINK_FLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Threads/*.hpp)
|
||||
KOKKOS_LIBS += -lpthread
|
||||
KOKKOS_TPL_LIBRARY_NAMES += pthread
|
||||
endif
|
||||
|
||||
ifeq ($(KOKKOS_INTERNAL_USE_QTHREADS), 1)
|
||||
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.cpp)
|
||||
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Qthreads/*.hpp)
|
||||
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
|
||||
ifneq ($(QTHREADS_PATH),)
|
||||
KOKKOS_CPPFLAGS += -I$(QTHREADS_PATH)/include
|
||||
KOKKOS_LDFLAGS += -L$(QTHREADS_PATH)/lib
|
||||
KOKKOS_TPL_INCLUDE_DIRS += $(QTHREADS_PATH)/include
|
||||
KOKKOS_TPL_LIBRARY_DIRS += $(QTHREADS_PATH)/lib64
|
||||
endif
|
||||
KOKKOS_LIBS += -lqthread
|
||||
KOKKOS_TPL_LIBRARY_NAMES += qthread
|
||||
endif
|
||||
|
||||
# Explicitly set the GCC Toolchain for Clang.
|
||||
@ -940,11 +1009,6 @@ ifneq ($(KOKKOS_INTERNAL_USE_MEMKIND), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp,$(KOKKOS_SRC))
|
||||
endif
|
||||
|
||||
# Don't include Kokkos_Profiling_Interface.cpp if not using profiling to avoid a link warning.
|
||||
ifeq ($(KOKKOS_INTERNAL_DISABLE_PROFILING), 1)
|
||||
KOKKOS_SRC := $(filter-out $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp,$(KOKKOS_SRC))
|
||||
endif
|
||||
|
||||
# Don't include Kokkos_Serial.cpp or Kokkos_Serial_Task.cpp if not using Serial
|
||||
# device to avoid a link warning.
|
||||
ifneq ($(KOKKOS_INTERNAL_USE_SERIAL), 1)
|
||||
|
||||
@ -1,87 +1,101 @@
|
||||
Kokkos implements a programming model in C++ for writing performance portable
|
||||
Kokkos Core implements a programming model in C++ for writing performance portable
|
||||
applications targeting all major HPC platforms. For that purpose it provides
|
||||
abstractions for both parallel execution of code and data management.
|
||||
Kokkos is designed to target complex node architectures with N-level memory
|
||||
hierarchies and multiple types of execution resources. It currently can use
|
||||
OpenMP, Pthreads and CUDA as backend programming models.
|
||||
|
||||
Kokkos is licensed under standard 3-clause BSD terms of use. For specifics
|
||||
see the LICENSE file contained in the repository or distribution.
|
||||
Kokkos Core is part of the Kokkos C++ Performance Portability Programming EcoSystem,
|
||||
which also provides math kernels (https://github.com/kokkos/kokkos-kernels), as well as
|
||||
profiling and debugging tools (https://github.com/kokkos/kokkos-tools).
|
||||
|
||||
The core developers of Kokkos are Carter Edwards and Christian Trott
|
||||
at the Computer Science Research Institute of the Sandia National
|
||||
Laboratories.
|
||||
# Learning about Kokkos
|
||||
|
||||
The KokkosP interface and associated tools are developed by the Application
|
||||
Performance Team and Kokkos core developers at Sandia National Laboratories.
|
||||
A programming guide can be found on the Wiki, the API reference is under development.
|
||||
|
||||
To learn more about Kokkos consider watching one of our presentations:
|
||||
GTC 2015:
|
||||
http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
|
||||
http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
|
||||
For questions find us on Slack: https://kokkosteam.slack.com or open a github issue.
|
||||
|
||||
A programming guide can be found under doc/Kokkos_PG.pdf. This is an initial version
|
||||
and feedback is greatly appreciated.
|
||||
For non-public questions send an email to
|
||||
crtrott(at)sandia.gov
|
||||
|
||||
A separate repository with extensive tutorial material can be found under
|
||||
https://github.com/kokkos/kokkos-tutorials.
|
||||
|
||||
If you have a patch to contribute please feel free to issue a pull request against
|
||||
the develop branch. For major contributions it is better to contact us first
|
||||
for guidance.
|
||||
Furthermore, the 'example/tutorial' directory provides step by step tutorial
|
||||
examples which explain many of the features of Kokkos. They work with
|
||||
simple Makefiles. To build with g++ and OpenMP simply type 'make'
|
||||
in the 'example/tutorial' directory. This will build all examples in the
|
||||
subfolders. To change the build options refer to the Programming Guide
|
||||
in the compilation section.
|
||||
|
||||
For questions please send an email to
|
||||
kokkos-users@software.sandia.gov
|
||||
To learn more about Kokkos consider watching one of our presentations:
|
||||
* GTC 2015:
|
||||
- http://on-demand.gputechconf.com/gtc/2015/video/S5166.html
|
||||
- http://on-demand.gputechconf.com/gtc/2015/presentation/S5166-H-Carter-Edwards.pdf
|
||||
|
||||
For non-public questions send an email to
|
||||
hcedwar(at)sandia.gov and crtrott(at)sandia.gov
|
||||
|
||||
============================================================================
|
||||
====Requirements============================================================
|
||||
============================================================================
|
||||
# Contributing to Kokkos
|
||||
|
||||
Primary tested compilers on X86 are:
|
||||
GCC 4.8.4
|
||||
GCC 4.9.3
|
||||
GCC 5.1.0
|
||||
GCC 5.3.0
|
||||
GCC 6.1.0
|
||||
Intel 15.0.2
|
||||
Intel 16.0.1
|
||||
Intel 17.1.043
|
||||
Intel 17.4.196
|
||||
Intel 18.0.128
|
||||
Clang 3.5.2
|
||||
Clang 3.6.1
|
||||
Clang 3.7.1
|
||||
Clang 3.8.1
|
||||
Clang 3.9.0
|
||||
Clang 4.0.0
|
||||
Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44)
|
||||
PGI 17.10
|
||||
NVCC 7.0 for CUDA (with gcc 4.8.4)
|
||||
NVCC 7.5 for CUDA (with gcc 4.8.4)
|
||||
NVCC 8.0.44 for CUDA (with gcc 5.3.0)
|
||||
We are open and try to encourage contributions from external developers.
|
||||
To do so please first open an issue describing the contribution and then issue
|
||||
a pull request against the develop branch. For larger features it may be good
|
||||
to get guidance from the core development team first through the github issue.
|
||||
|
||||
Primary tested compilers on Power 8 are:
|
||||
GCC 5.4.0 (OpenMP,Serial)
|
||||
IBM XL 13.1.5 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
|
||||
NVCC 8.0.44 for CUDA (with gcc 5.4.0)
|
||||
NVCC 9.0.103 for CUDA (with gcc 6.3.0)
|
||||
Note that Kokkos Core is licensed under standard 3-clause BSD terms of use.
|
||||
Which means contributing to Kokkos allows anyone else to use your contributions
|
||||
not just for public purposes but also for closed source commercial projects.
|
||||
For specifics see the LICENSE file contained in the repository or distribution.
|
||||
|
||||
Primary tested compilers on Intel KNL are:
|
||||
GCC 6.2.0
|
||||
Intel 16.4.258 (with gcc 4.7.2)
|
||||
Intel 17.2.174 (with gcc 4.9.3)
|
||||
Intel 18.0.128 (with gcc 4.9.3)
|
||||
# Requirements
|
||||
|
||||
Other compilers working:
|
||||
X86:
|
||||
Cygwin 2.1.0 64bit with gcc 4.9.3
|
||||
### Primary tested compilers on X86 are:
|
||||
* GCC 4.8.4
|
||||
* GCC 4.9.3
|
||||
* GCC 5.1.0
|
||||
* GCC 5.3.0
|
||||
* GCC 6.1.0
|
||||
* Intel 15.0.2
|
||||
* Intel 16.0.1
|
||||
* Intel 17.1.043
|
||||
* Intel 17.4.196
|
||||
* Intel 18.0.128
|
||||
* Clang 3.6.1
|
||||
* Clang 3.7.1
|
||||
* Clang 3.8.1
|
||||
* Clang 3.9.0
|
||||
* Clang 4.0.0
|
||||
* Clang 4.0.0 for CUDA (CUDA Toolkit 8.0.44)
|
||||
* Clang 6.0.0 for CUDA (CUDA Toolkit 9.1)
|
||||
* PGI 17.10
|
||||
* NVCC 7.0 for CUDA (with gcc 4.8.4)
|
||||
* NVCC 7.5 for CUDA (with gcc 4.8.4)
|
||||
* NVCC 8.0.44 for CUDA (with gcc 5.3.0)
|
||||
* NVCC 9.1 for CUDA (with gcc 6.1.0)
|
||||
|
||||
Known non-working combinations:
|
||||
Power8:
|
||||
Pthreads backend
|
||||
### Primary tested compilers on Power 8 are:
|
||||
* GCC 5.4.0 (OpenMP,Serial)
|
||||
* IBM XL 13.1.6 (OpenMP, Serial)
|
||||
* NVCC 8.0.44 for CUDA (with gcc 5.4.0)
|
||||
* NVCC 9.0.103 for CUDA (with gcc 6.3.0 and XL 13.1.6)
|
||||
|
||||
### Primary tested compilers on Intel KNL are:
|
||||
* GCC 6.2.0
|
||||
* Intel 16.4.258 (with gcc 4.7.2)
|
||||
* Intel 17.2.174 (with gcc 4.9.3)
|
||||
* Intel 18.0.128 (with gcc 4.9.3)
|
||||
|
||||
### Primary tested compilers on ARM
|
||||
* GCC 6.1.0
|
||||
|
||||
### Other compilers working:
|
||||
* X86:
|
||||
- Cygwin 2.1.0 64bit with gcc 4.9.3
|
||||
|
||||
### Known non-working combinations:
|
||||
* Power8:
|
||||
- Pthreads backend
|
||||
* ARM
|
||||
- Pthreads backend
|
||||
|
||||
|
||||
Primary tested compiler are passing in release mode
|
||||
@ -97,20 +111,7 @@ NVCC: -Wall -Wshadow -pedantic -Werror -Wsign-compare -Wtype-limits -Wuninitiali
|
||||
Other compilers are tested occasionally, in particular when pushing from develop to
|
||||
master branch, without -Werror and only for a select set of backends.
|
||||
|
||||
============================================================================
|
||||
====Getting started=========================================================
|
||||
============================================================================
|
||||
|
||||
In the 'example/tutorial' directory you will find step by step tutorial
|
||||
examples which explain many of the features of Kokkos. They work with
|
||||
simple Makefiles. To build with g++ and OpenMP simply type 'make'
|
||||
in the 'example/tutorial' directory. This will build all examples in the
|
||||
subfolders. To change the build options refer to the Programming Guide
|
||||
in the compilation section.
|
||||
|
||||
============================================================================
|
||||
====Running Unit Tests======================================================
|
||||
============================================================================
|
||||
# Running Unit Tests
|
||||
|
||||
To run the unit tests create a build directory and run the following commands
|
||||
|
||||
@ -121,30 +122,35 @@ make test
|
||||
Run KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
|
||||
changing the device type for which to build.
|
||||
|
||||
============================================================================
|
||||
====Install the library=====================================================
|
||||
============================================================================
|
||||
# Installing the library
|
||||
|
||||
To install Kokkos as a library create a build directory and run the following
|
||||
|
||||
KOKKOS_PATH/generate_makefile.bash --prefix=INSTALL_PATH
|
||||
make lib
|
||||
make kokkoslib
|
||||
make install
|
||||
|
||||
KOKKOS_PATH/generate_makefile.bash --help for more detailed options such as
|
||||
changing the device type for which to build.
|
||||
|
||||
============================================================================
|
||||
====CMakeFiles==============================================================
|
||||
============================================================================
|
||||
Note that in many cases it is preferable to build Kokkos inline with an
|
||||
application. The main reason is that you may otherwise need many different
|
||||
configurations of Kokkos installed depending on the required compile time
|
||||
features an application needs. For example there is only one default
|
||||
execution space, which means you need different installations to have OpenMP
|
||||
or Pthreads as the default space. Also for the CUDA backend there are certain
|
||||
choices, such as allowing relocatable device code, which must be made at
|
||||
installation time. Building Kokkos inline uses largely the same process
|
||||
as compiling an application against an installed Kokkos library. See for
|
||||
example benchmarks/bytes_and_flops/Makefile which can be used with an installed
|
||||
library and for an inline build.
|
||||
|
||||
The CMake files contained in this repository require Tribits and are used
|
||||
for integration with Trilinos. They do not currently support a standalone
|
||||
CMake build.
|
||||
### CMake
|
||||
|
||||
===========================================================================
|
||||
====Kokkos and CUDA UVM====================================================
|
||||
===========================================================================
|
||||
Kokkos supports being build as part of a CMake applications. An example can
|
||||
be found in example/cmake_build.
|
||||
|
||||
# Kokkos and CUDA UVM
|
||||
|
||||
Kokkos does support UVM as a specific memory space called CudaUVMSpace.
|
||||
Allocations made with that space are accessible from host and device.
|
||||
@ -154,25 +160,16 @@ In either case UVM comes with a number of restrictions:
|
||||
running. This will lead to segfaults. To avoid that you either need to
|
||||
call Kokkos::Cuda::fence() (or just Kokkos::fence()), after kernels, or
|
||||
you can set the environment variable CUDA_LAUNCH_BLOCKING=1.
|
||||
Furthermore in multi socket multi GPU machines, UVM defaults to using
|
||||
zero copy allocations for technical reasons related to using multiple
|
||||
Furthermore in multi socket multi GPU machines without NVLINK, UVM defaults
|
||||
to using zero copy allocations for technical reasons related to using multiple
|
||||
GPUs from the same process. If an executable doesn't do that (e.g. each
|
||||
MPI rank of an application uses a single GPU [can be the same GPU for
|
||||
multiple MPI ranks]) you can set CUDA_MANAGED_FORCE_DEVICE_ALLOC=1.
|
||||
This will enforce proper UVM allocations, but can lead to errors if
|
||||
more than a single GPU is used by a single process.
|
||||
|
||||
===========================================================================
|
||||
====Contributing===========================================================
|
||||
===========================================================================
|
||||
|
||||
Contributions to Kokkos are welcome. In order to do so, please open an issue
|
||||
where a feature request or bug can be discussed. Then issue a pull request
|
||||
with your contribution. Pull requests must be issued against the develop branch.
|
||||
|
||||
===========================================================================
|
||||
====Citing Kokkos==========================================================
|
||||
===========================================================================
|
||||
# Citing Kokkos
|
||||
|
||||
If you publish work which mentions Kokkos, please cite the following paper:
|
||||
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -1530,7 +1530,7 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,1,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0()))
|
||||
if(idx<static_cast<IndexType>(a.extent(0)))
|
||||
a(idx) = Rand::draw(gen,range);
|
||||
}
|
||||
rand_pool.free_state(gen);
|
||||
@ -1555,8 +1555,8 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,2,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
a(idx,k) = Rand::draw(gen,range);
|
||||
}
|
||||
}
|
||||
@ -1583,9 +1583,9 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,3,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
a(idx,k,l) = Rand::draw(gen,range);
|
||||
}
|
||||
}
|
||||
@ -1611,10 +1611,10 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,4, IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
a(idx,k,l,m) = Rand::draw(gen,range);
|
||||
}
|
||||
}
|
||||
@ -1640,11 +1640,11 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,5,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
|
||||
a(idx,k,l,m,n) = Rand::draw(gen,range);
|
||||
}
|
||||
}
|
||||
@ -1670,12 +1670,12 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,6,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
|
||||
a(idx,k,l,m,n,o) = Rand::draw(gen,range);
|
||||
}
|
||||
}
|
||||
@ -1701,13 +1701,13 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,7,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
|
||||
a(idx,k,l,m,n,o,p) = Rand::draw(gen,range);
|
||||
}
|
||||
}
|
||||
@ -1733,14 +1733,14 @@ struct fill_random_functor_range<ViewType,RandomPool,loops,8,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
|
||||
for(IndexType q=0;q<static_cast<IndexType>(a.dimension_7());q++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
|
||||
for(IndexType q=0;q<static_cast<IndexType>(a.extent(7));q++)
|
||||
a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,range);
|
||||
}
|
||||
}
|
||||
@ -1765,7 +1765,7 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,1,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0()))
|
||||
if(idx<static_cast<IndexType>(a.extent(0)))
|
||||
a(idx) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
rand_pool.free_state(gen);
|
||||
@ -1790,8 +1790,8 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,2,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
a(idx,k) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
}
|
||||
@ -1818,9 +1818,9 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,3,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
a(idx,k,l) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
}
|
||||
@ -1846,10 +1846,10 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,4,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
a(idx,k,l,m) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
}
|
||||
@ -1875,11 +1875,11 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,5,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())){
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_1());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_2());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_3());n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_4());o++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))){
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(1));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(2));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(3));n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.extent(4));o++)
|
||||
a(idx,l,m,n,o) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
}
|
||||
@ -1905,12 +1905,12 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,6,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
|
||||
a(idx,k,l,m,n,o) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
}
|
||||
@ -1937,13 +1937,13 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,7,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
|
||||
a(idx,k,l,m,n,o,p) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
}
|
||||
@ -1969,14 +1969,14 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,8,IndexType>{
|
||||
typename RandomPool::generator_type gen = rand_pool.get_state();
|
||||
for(IndexType j=0;j<loops;j++) {
|
||||
const IndexType idx = i*loops+j;
|
||||
if(idx<static_cast<IndexType>(a.dimension_0())) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.dimension_1());k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.dimension_2());l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.dimension_3());m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.dimension_4());n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.dimension_5());o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.dimension_6());p++)
|
||||
for(IndexType q=0;q<static_cast<IndexType>(a.dimension_7());q++)
|
||||
if(idx<static_cast<IndexType>(a.extent(0))) {
|
||||
for(IndexType k=0;k<static_cast<IndexType>(a.extent(1));k++)
|
||||
for(IndexType l=0;l<static_cast<IndexType>(a.extent(2));l++)
|
||||
for(IndexType m=0;m<static_cast<IndexType>(a.extent(3));m++)
|
||||
for(IndexType n=0;n<static_cast<IndexType>(a.extent(4));n++)
|
||||
for(IndexType o=0;o<static_cast<IndexType>(a.extent(5));o++)
|
||||
for(IndexType p=0;p<static_cast<IndexType>(a.extent(6));p++)
|
||||
for(IndexType q=0;q<static_cast<IndexType>(a.extent(7));q++)
|
||||
a(idx,k,l,m,n,o,p,q) = Rand::draw(gen,begin,end);
|
||||
}
|
||||
}
|
||||
@ -1988,14 +1988,14 @@ struct fill_random_functor_begin_end<ViewType,RandomPool,loops,8,IndexType>{
|
||||
|
||||
template<class ViewType, class RandomPool, class IndexType = int64_t>
|
||||
void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type range) {
|
||||
int64_t LDA = a.dimension_0();
|
||||
int64_t LDA = a.extent(0);
|
||||
if(LDA>0)
|
||||
parallel_for((LDA+127)/128,Impl::fill_random_functor_range<ViewType,RandomPool,128,ViewType::Rank,IndexType>(a,g,range));
|
||||
}
|
||||
|
||||
template<class ViewType, class RandomPool, class IndexType = int64_t>
|
||||
void fill_random(ViewType a, RandomPool g, typename ViewType::const_value_type begin,typename ViewType::const_value_type end ) {
|
||||
int64_t LDA = a.dimension_0();
|
||||
int64_t LDA = a.extent(0);
|
||||
if(LDA>0)
|
||||
parallel_for((LDA+127)/128,Impl::fill_random_functor_begin_end<ViewType,RandomPool,128,ViewType::Rank,IndexType>(a,g,begin,end));
|
||||
}
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -120,7 +120,6 @@ public:
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
// printf("copy: dst(%i) src(%i)\n",i+dst_offset,i);
|
||||
copy_op::copy(dst_values,i+dst_offset,src_values,i);
|
||||
}
|
||||
};
|
||||
@ -151,20 +150,22 @@ public:
|
||||
DstViewType dst_values ;
|
||||
perm_view_type sort_order ;
|
||||
src_view_type src_values ;
|
||||
int src_offset ;
|
||||
|
||||
copy_permute_functor( DstViewType const & dst_values_
|
||||
, PermuteViewType const & sort_order_
|
||||
, SrcViewType const & src_values_
|
||||
, int const & src_offset_
|
||||
)
|
||||
: dst_values( dst_values_ )
|
||||
, sort_order( sort_order_ )
|
||||
, src_values( src_values_ )
|
||||
, src_offset( src_offset_ )
|
||||
{}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const int& i) const {
|
||||
// printf("copy_permute: dst(%i) src(%i)\n",i,sort_order(i));
|
||||
copy_op::copy(dst_values,i,src_values,sort_order(i));
|
||||
copy_op::copy(dst_values,i,src_values,src_offset+sort_order(i));
|
||||
}
|
||||
};
|
||||
|
||||
@ -259,19 +260,21 @@ public:
|
||||
// Create the permutation vector, the bin_offset array and the bin_count array. Can be called again if keys changed
|
||||
void create_permute_vector() {
|
||||
const size_t len = range_end - range_begin ;
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_count_tag> (0,len),*this);
|
||||
Kokkos::parallel_scan(Kokkos::RangePolicy<execution_space,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
|
||||
Kokkos::parallel_for ("Kokkos::Sort::BinCount",Kokkos::RangePolicy<execution_space,bin_count_tag> (0,len),*this);
|
||||
Kokkos::parallel_scan("Kokkos::Sort::BinOffset",Kokkos::RangePolicy<execution_space,bin_offset_tag> (0,bin_op.max_bins()) ,*this);
|
||||
|
||||
Kokkos::deep_copy(bin_count_atomic,0);
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_binning_tag> (0,len),*this);
|
||||
Kokkos::parallel_for ("Kokkos::Sort::BinBinning",Kokkos::RangePolicy<execution_space,bin_binning_tag> (0,len),*this);
|
||||
|
||||
if(sort_within_bins)
|
||||
Kokkos::parallel_for (Kokkos::RangePolicy<execution_space,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
|
||||
Kokkos::parallel_for ("Kokkos::Sort::BinSort",Kokkos::RangePolicy<execution_space,bin_sort_bins_tag>(0,bin_op.max_bins()) ,*this);
|
||||
}
|
||||
|
||||
// Sort a view with respect ot the first dimension using the permutation array
|
||||
// Sort a subset of a view with respect to the first dimension using the permutation array
|
||||
template<class ValuesViewType>
|
||||
void sort( ValuesViewType const & values)
|
||||
void sort( ValuesViewType const & values
|
||||
, int values_range_begin
|
||||
, int values_range_end) const
|
||||
{
|
||||
typedef
|
||||
Kokkos::View< typename ValuesViewType::data_type,
|
||||
@ -280,6 +283,10 @@ public:
|
||||
scratch_view_type ;
|
||||
|
||||
const size_t len = range_end - range_begin ;
|
||||
const size_t values_len = values_range_end - values_range_begin ;
|
||||
if (len != values_len) {
|
||||
Kokkos::abort("BinSort::sort: values range length != permutation vector length");
|
||||
}
|
||||
|
||||
scratch_view_type
|
||||
sorted_values("Scratch",
|
||||
@ -297,19 +304,25 @@ public:
|
||||
, offset_type /* PermuteViewType */
|
||||
, ValuesViewType /* SrcViewType */
|
||||
>
|
||||
functor( sorted_values , sort_order , values );
|
||||
functor( sorted_values , sort_order , values, values_range_begin - range_begin );
|
||||
|
||||
parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor);
|
||||
parallel_for("Kokkos::Sort::CopyPermute", Kokkos::RangePolicy<execution_space>(0,len),functor);
|
||||
}
|
||||
|
||||
{
|
||||
copy_functor< ValuesViewType , scratch_view_type >
|
||||
functor( values , range_begin , sorted_values );
|
||||
|
||||
parallel_for( Kokkos::RangePolicy<execution_space>(0,len),functor);
|
||||
parallel_for("Kokkos::Sort::Copy", Kokkos::RangePolicy<execution_space>(0,len),functor);
|
||||
}
|
||||
}
|
||||
|
||||
template<class ValuesViewType>
|
||||
void sort( ValuesViewType const & values ) const
|
||||
{
|
||||
this->sort( values, 0, /*values.extent(0)*/ range_end - range_begin );
|
||||
}
|
||||
|
||||
// Get the permutation vector
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
offset_type get_permute_vector() const { return sort_order;}
|
||||
@ -327,7 +340,7 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator() (const bin_count_tag& tag, const int& i) const {
|
||||
const int j = range_begin + i ;
|
||||
bin_count_atomic(bin_op.bin(keys,j))++;
|
||||
bin_count_atomic(bin_op.bin(keys, j))++;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -512,7 +525,7 @@ void sort( ViewType const & view , bool const always_use_kokkos_sort = false)
|
||||
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.extent(0)),
|
||||
parallel_reduce("Kokkos::Sort::FindExtent",Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.extent(0)),
|
||||
Impl::min_max_functor<ViewType>(view),reducer);
|
||||
if(result.min_val == result.max_val) return;
|
||||
BinSort<ViewType, CompType> bin_sort(view,CompType(view.extent(0)/2,result.min_val,result.max_val),true);
|
||||
@ -532,7 +545,7 @@ void sort( ViewType view
|
||||
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
|
||||
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
|
||||
|
||||
parallel_reduce( range_policy( begin , end )
|
||||
parallel_reduce("Kokkos::Sort::FindExtent", range_policy( begin , end )
|
||||
, Impl::min_max_functor<ViewType>(view),reducer );
|
||||
|
||||
if(result.min_val == result.max_val) return;
|
||||
@ -541,8 +554,9 @@ void sort( ViewType view
|
||||
bin_sort(view,begin,end,CompType((end-begin)/2,result.min_val,result.max_val),true);
|
||||
|
||||
bin_sort.create_permute_vector();
|
||||
bin_sort.sort(view);
|
||||
bin_sort.sort(view,begin,end);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -61,14 +61,9 @@ class cuda : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
std::cout << std::setprecision(5) << std::scientific;
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Cuda::initialize( Kokkos::Cuda::SelectDevice(0) );
|
||||
}
|
||||
static void TearDownTestCase()
|
||||
{
|
||||
Kokkos::Cuda::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -60,25 +60,10 @@ protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
std::cout << std::setprecision(5) << std::scientific;
|
||||
|
||||
int threads_count = 0;
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp atomic
|
||||
++threads_count;
|
||||
}
|
||||
|
||||
if (threads_count > 3) {
|
||||
threads_count /= 2;
|
||||
}
|
||||
|
||||
Kokkos::OpenMP::initialize( threads_count );
|
||||
Kokkos::OpenMP::print_configuration( std::cout );
|
||||
}
|
||||
|
||||
static void TearDownTestCase()
|
||||
{
|
||||
Kokkos::OpenMP::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -62,13 +62,9 @@ protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
std::cout << std::setprecision(5) << std::scientific;
|
||||
Kokkos::HostSpace::execution_space::initialize();
|
||||
Kokkos::Experimental::ROCm::initialize( Kokkos::Experimental::ROCm::SelectDevice(0) );
|
||||
}
|
||||
static void TearDownTestCase()
|
||||
{
|
||||
Kokkos::Experimental::ROCm::finalize();
|
||||
Kokkos::HostSpace::execution_space::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -62,13 +62,10 @@ class serial : public ::testing::Test {
|
||||
protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
std::cout << std::setprecision (5) << std::scientific;
|
||||
Kokkos::Serial::initialize ();
|
||||
}
|
||||
|
||||
static void TearDownTestCase ()
|
||||
{
|
||||
Kokkos::Serial::finalize ();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -171,10 +171,10 @@ void test_3D_sort(unsigned int n) {
|
||||
double sum_after = 0.0;
|
||||
unsigned int sort_fails = 0;
|
||||
|
||||
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
|
||||
Kokkos::parallel_reduce(keys.extent(0),sum3D<ExecutionSpace, KeyType>(keys),sum_before);
|
||||
|
||||
int bin_1d = 1;
|
||||
while( bin_1d*bin_1d*bin_1d*4< (int) keys.dimension_0() ) bin_1d*=2;
|
||||
while( bin_1d*bin_1d*bin_1d*4< (int) keys.extent(0) ) bin_1d*=2;
|
||||
int bin_max[3] = {bin_1d,bin_1d,bin_1d};
|
||||
typename KeyViewType::value_type min[3] = {0,0,0};
|
||||
typename KeyViewType::value_type max[3] = {100,100,100};
|
||||
@ -186,8 +186,8 @@ void test_3D_sort(unsigned int n) {
|
||||
Sorter.create_permute_vector();
|
||||
Sorter.template sort< KeyViewType >(keys);
|
||||
|
||||
Kokkos::parallel_reduce(keys.dimension_0(),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
|
||||
Kokkos::parallel_reduce(keys.dimension_0()-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
|
||||
Kokkos::parallel_reduce(keys.extent(0),sum3D<ExecutionSpace, KeyType>(keys),sum_after);
|
||||
Kokkos::parallel_reduce(keys.extent(0)-1,bin3d_is_sorted_struct<ExecutionSpace, KeyType>(keys,bin_1d,min[0],max[0]),sort_fails);
|
||||
|
||||
double ratio = sum_before/sum_after;
|
||||
double epsilon = 1e-10;
|
||||
@ -205,24 +205,13 @@ void test_3D_sort(unsigned int n) {
|
||||
template<class ExecutionSpace, typename KeyType>
|
||||
void test_dynamic_view_sort(unsigned int n )
|
||||
{
|
||||
typedef typename ExecutionSpace::memory_space memory_space ;
|
||||
typedef Kokkos::Experimental::DynamicView<KeyType*,ExecutionSpace> KeyDynamicViewType;
|
||||
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
|
||||
|
||||
const size_t upper_bound = 2 * n ;
|
||||
const size_t min_chunk_size = 1024;
|
||||
|
||||
const size_t total_alloc_size = n * sizeof(KeyType) * 1.2 ;
|
||||
const size_t superblock_size = std::min(total_alloc_size, size_t(1000000));
|
||||
|
||||
typename KeyDynamicViewType::memory_pool
|
||||
pool( memory_space()
|
||||
, n * sizeof(KeyType) * 1.2
|
||||
, 500 /* min block size in bytes */
|
||||
, 30000 /* max block size in bytes */
|
||||
, superblock_size
|
||||
);
|
||||
|
||||
KeyDynamicViewType keys("Keys",pool,upper_bound);
|
||||
KeyDynamicViewType keys("Keys", min_chunk_size, upper_bound);
|
||||
|
||||
keys.resize_serial(n);
|
||||
|
||||
@ -230,13 +219,15 @@ void test_dynamic_view_sort(unsigned int n )
|
||||
|
||||
// Test sorting array with all numbers equal
|
||||
Kokkos::deep_copy(keys_view,KeyType(1));
|
||||
Kokkos::Experimental::deep_copy(keys,keys_view);
|
||||
Kokkos::deep_copy(keys,keys_view);
|
||||
Kokkos::sort(keys, 0 /* begin */ , n /* end */ );
|
||||
|
||||
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
|
||||
Kokkos::fill_random(keys_view,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
|
||||
|
||||
Kokkos::Experimental::deep_copy(keys,keys_view);
|
||||
ExecutionSpace::fence();
|
||||
Kokkos::deep_copy(keys,keys_view);
|
||||
//ExecutionSpace::fence();
|
||||
|
||||
double sum_before = 0.0;
|
||||
double sum_after = 0.0;
|
||||
@ -246,7 +237,9 @@ void test_dynamic_view_sort(unsigned int n )
|
||||
|
||||
Kokkos::sort(keys, 0 /* begin */ , n /* end */ );
|
||||
|
||||
Kokkos::Experimental::deep_copy( keys_view , keys );
|
||||
ExecutionSpace::fence(); // Need this fence to prevent BusError with Cuda
|
||||
Kokkos::deep_copy( keys_view , keys );
|
||||
//ExecutionSpace::fence();
|
||||
|
||||
Kokkos::parallel_reduce(n,sum<ExecutionSpace, KeyType>(keys_view),sum_after);
|
||||
Kokkos::parallel_reduce(n-1,is_sorted_struct<ExecutionSpace, KeyType>(keys_view),sort_fails);
|
||||
@ -269,6 +262,74 @@ void test_dynamic_view_sort(unsigned int n )
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<class ExecutionSpace>
|
||||
void test_issue_1160()
|
||||
{
|
||||
Kokkos::View<int*, ExecutionSpace> element_("element", 10);
|
||||
Kokkos::View<double*, ExecutionSpace> x_("x", 10);
|
||||
Kokkos::View<double*, ExecutionSpace> v_("y", 10);
|
||||
|
||||
auto h_element = Kokkos::create_mirror_view(element_);
|
||||
auto h_x = Kokkos::create_mirror_view(x_);
|
||||
auto h_v = Kokkos::create_mirror_view(v_);
|
||||
|
||||
h_element(0) = 9;
|
||||
h_element(1) = 8;
|
||||
h_element(2) = 7;
|
||||
h_element(3) = 6;
|
||||
h_element(4) = 5;
|
||||
h_element(5) = 4;
|
||||
h_element(6) = 3;
|
||||
h_element(7) = 2;
|
||||
h_element(8) = 1;
|
||||
h_element(9) = 0;
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
h_v.access(i, 0) = h_x.access(i, 0) = double(h_element(i));
|
||||
}
|
||||
Kokkos::deep_copy(element_, h_element);
|
||||
Kokkos::deep_copy(x_, h_x);
|
||||
Kokkos::deep_copy(v_, h_v);
|
||||
|
||||
typedef decltype(element_) KeyViewType;
|
||||
typedef Kokkos::BinOp1D< KeyViewType > BinOp;
|
||||
|
||||
int begin = 3;
|
||||
int end = 8;
|
||||
auto max = h_element(begin);
|
||||
auto min = h_element(end - 1);
|
||||
BinOp binner(end - begin, min, max);
|
||||
|
||||
Kokkos::BinSort<KeyViewType , BinOp > Sorter(element_,begin,end,binner,false);
|
||||
Sorter.create_permute_vector();
|
||||
Sorter.sort(element_,begin,end);
|
||||
|
||||
Sorter.sort(x_,begin,end);
|
||||
Sorter.sort(v_,begin,end);
|
||||
|
||||
Kokkos::deep_copy(h_element, element_);
|
||||
Kokkos::deep_copy(h_x, x_);
|
||||
Kokkos::deep_copy(h_v, v_);
|
||||
|
||||
ASSERT_EQ(h_element(0), 9);
|
||||
ASSERT_EQ(h_element(1), 8);
|
||||
ASSERT_EQ(h_element(2), 7);
|
||||
ASSERT_EQ(h_element(3), 2);
|
||||
ASSERT_EQ(h_element(4), 3);
|
||||
ASSERT_EQ(h_element(5), 4);
|
||||
ASSERT_EQ(h_element(6), 5);
|
||||
ASSERT_EQ(h_element(7), 6);
|
||||
ASSERT_EQ(h_element(8), 1);
|
||||
ASSERT_EQ(h_element(9), 0);
|
||||
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
ASSERT_EQ(h_element(i), int(h_x.access(i, 0)));
|
||||
ASSERT_EQ(h_element(i), int(h_v.access(i, 0)));
|
||||
}
|
||||
}
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
template<class ExecutionSpace, typename KeyType>
|
||||
void test_sort(unsigned int N)
|
||||
{
|
||||
@ -278,6 +339,7 @@ void test_sort(unsigned int N)
|
||||
test_3D_sort<ExecutionSpace,KeyType>(N);
|
||||
test_dynamic_view_sort<ExecutionSpace,KeyType>(N*N);
|
||||
#endif
|
||||
test_issue_1160<ExecutionSpace>();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -63,25 +63,10 @@ protected:
|
||||
static void SetUpTestCase()
|
||||
{
|
||||
std::cout << std::setprecision(5) << std::scientific;
|
||||
|
||||
unsigned num_threads = 4;
|
||||
|
||||
if (Kokkos::hwloc::available()) {
|
||||
num_threads = Kokkos::hwloc::get_available_numa_count()
|
||||
* Kokkos::hwloc::get_available_cores_per_numa()
|
||||
// * Kokkos::hwloc::get_available_threads_per_core()
|
||||
;
|
||||
|
||||
}
|
||||
|
||||
std::cout << "Threads: " << num_threads << std::endl;
|
||||
|
||||
Kokkos::Threads::initialize( num_threads );
|
||||
}
|
||||
|
||||
static void TearDownTestCase()
|
||||
{
|
||||
Kokkos::Threads::finalize();
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
@ -35,16 +35,20 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
#include <gtest/gtest.h>
|
||||
#include <Kokkos_Core.hpp>
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
Kokkos::initialize(argc,argv);
|
||||
::testing::InitGoogleTest(&argc,argv);
|
||||
return RUN_ALL_TESTS();
|
||||
int result = RUN_ALL_TESTS();
|
||||
Kokkos::finalize();
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
@ -10,7 +10,7 @@ default: build
|
||||
|
||||
|
||||
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
|
||||
CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
|
||||
CXX = ${KOKKOS_PATH}/bin/nvcc_wrapper
|
||||
EXE = ${EXE_NAME}.cuda
|
||||
KOKKOS_CUDA_OPTIONS = "enable_lambda"
|
||||
else
|
||||
|
||||
@ -3,7 +3,7 @@
|
||||
# BytesAndFlops
|
||||
cd build/bytes_and_flops
|
||||
|
||||
USE_CUDA=`grep "_CUDA 1" KokkosCore_config.h | wc -l`
|
||||
USE_CUDA=`grep "_CUDA" KokkosCore_config.h | wc -l`
|
||||
|
||||
if [[ ${USE_CUDA} > 0 ]]; then
|
||||
BAF_EXE=bytes_and_flops.cuda
|
||||
@ -41,4 +41,4 @@ cd ../..
|
||||
echo "MiniFE: ${FE_PERF_1} ${FE_PERF_2}"
|
||||
|
||||
PERF_RESULT=`echo "${BAF_PERF_1} ${BAF_PERF_2} ${MD_PERF_1} ${MD_PERF_2} ${FE_PERF_1} ${FE_PERF_2}" | awk '{print ($1+$2+$3+$4+$5+$6)/6}'`
|
||||
echo "Total Result: " ${PERF_RESULT}
|
||||
echo "Total Result: " ${PERF_RESULT}
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -2,7 +2,7 @@
|
||||
# FindHWLOC
|
||||
# ----------
|
||||
#
|
||||
# Try to find HWLOC.
|
||||
# Try to find HWLOC, based on KOKKOS_HWLOC_DIR
|
||||
#
|
||||
# The following variables are defined:
|
||||
#
|
||||
@ -10,8 +10,8 @@
|
||||
# HWLOC_INCLUDE_DIR - HWLOC include directory
|
||||
# HWLOC_LIBRARIES - Libraries needed to use HWLOC
|
||||
|
||||
find_path(HWLOC_INCLUDE_DIR hwloc.h)
|
||||
find_library(HWLOC_LIBRARIES hwloc)
|
||||
find_path(HWLOC_INCLUDE_DIR hwloc.h PATHS "${KOKKOS_HWLOC_DIR}/include")
|
||||
find_library(HWLOC_LIBRARIES hwloc PATHS "${KOKKOS_HWLOC_DIR}/lib")
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
find_package_handle_standard_args(HWLOC DEFAULT_MSG
|
||||
|
||||
@ -1,7 +1,3 @@
|
||||
# kokkos_generated_settings.cmake includes the kokkos library itself in KOKKOS_LIBS
|
||||
# which we do not want to use for the cmake builds so clean this up
|
||||
string(REGEX REPLACE "-lkokkos" "" KOKKOS_LIBS ${KOKKOS_LIBS})
|
||||
|
||||
############################ Detect if submodule ###############################
|
||||
#
|
||||
# With thanks to StackOverflow:
|
||||
@ -73,6 +69,19 @@ IF(KOKKOS_SEPARATE_LIBS)
|
||||
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
|
||||
)
|
||||
|
||||
target_include_directories(
|
||||
kokkoscore
|
||||
PUBLIC
|
||||
${KOKKOS_TPL_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES)
|
||||
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
|
||||
target_link_libraries(kokkoscore PUBLIC ${LIB_${lib}})
|
||||
endforeach()
|
||||
|
||||
target_link_libraries(kokkoscore PUBLIC "${KOKKOS_LINK_FLAGS}")
|
||||
|
||||
# Install the kokkoscore library
|
||||
INSTALL (TARGETS kokkoscore
|
||||
EXPORT KokkosTargets
|
||||
@ -81,12 +90,6 @@ IF(KOKKOS_SEPARATE_LIBS)
|
||||
RUNTIME DESTINATION ${CMAKE_INSTALL_PREFIX}/bin
|
||||
)
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
kokkoscore
|
||||
${KOKKOS_LD_FLAGS}
|
||||
${KOKKOS_EXTRA_LIBS_LIST}
|
||||
)
|
||||
|
||||
# kokkoscontainers
|
||||
if (DEFINED KOKKOS_CONTAINERS_SRCS)
|
||||
ADD_LIBRARY(
|
||||
@ -144,12 +147,19 @@ ELSE()
|
||||
PUBLIC $<$<COMPILE_LANGUAGE:CXX>:${KOKKOS_CXX_FLAGS}>
|
||||
)
|
||||
|
||||
TARGET_LINK_LIBRARIES(
|
||||
target_include_directories(
|
||||
kokkos
|
||||
${KOKKOS_LD_FLAGS}
|
||||
${KOKKOS_EXTRA_LIBS_LIST}
|
||||
PUBLIC
|
||||
${KOKKOS_TPL_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
foreach(lib IN LISTS KOKKOS_TPL_LIBRARY_NAMES)
|
||||
find_library(LIB_${lib} ${lib} PATHS ${KOKKOS_TPL_LIBRARY_DIRS})
|
||||
target_link_libraries(kokkos PUBLIC ${LIB_${lib}})
|
||||
endforeach()
|
||||
|
||||
target_link_libraries(kokkos PUBLIC "${KOKKOS_LINK_FLAGS}")
|
||||
|
||||
# Install the kokkos library
|
||||
INSTALL (TARGETS kokkos
|
||||
EXPORT KokkosTargets
|
||||
|
||||
@ -25,11 +25,12 @@ list(APPEND KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST
|
||||
Cuda_LDG_Intrinsic
|
||||
Debug
|
||||
Debug_DualView_Modify_Check
|
||||
Debug_Bounds_Checkt
|
||||
Debug_Bounds_Check
|
||||
Compiler_Warnings
|
||||
Profiling
|
||||
Profiling_Load_Print
|
||||
Aggressive_Vectorization
|
||||
Deprecated_Code
|
||||
)
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
@ -263,7 +264,8 @@ set(KOKKOS_ENABLE_PROFILING ${KOKKOS_INTERNAL_ENABLE_PROFILING_DEFAULT} CACHE BO
|
||||
set_kokkos_default_default(PROFILING_LOAD_PRINT OFF)
|
||||
set(KOKKOS_ENABLE_PROFILING_LOAD_PRINT ${KOKKOS_INTERNAL_ENABLE_PROFILING_LOAD_PRINT_DEFAULT} CACHE BOOL "Enable profile load print.")
|
||||
|
||||
|
||||
set_kokkos_default_default(DEPRECATED_CODE ON)
|
||||
set(KOKKOS_ENABLE_DEPRECATED_CODE ${KOKKOS_INTERNAL_ENABLE_DEPRECATED_CODE_DEFAULT} CACHE BOOL "Enable deprecated code.")
|
||||
|
||||
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
@ -14,6 +14,13 @@
|
||||
#-------------------------------------------------------------------------------
|
||||
|
||||
# Ensure that KOKKOS_ARCH is in the ARCH_LIST
|
||||
if (KOKKOS_ARCH MATCHES ",")
|
||||
message("-- Detected a comma in: KOKKOS_ARCH=${KOKKOS_ARCH}")
|
||||
message("-- Although we prefer KOKKOS_ARCH to be semicolon-delimited, we do allow")
|
||||
message("-- comma-delimited values for compatibility with scripts (see github.com/trilinos/Trilinos/issues/2330)")
|
||||
string(REPLACE "," ";" KOKKOS_ARCH "${KOKKOS_ARCH}")
|
||||
message("-- Commas were changed to semicolons, now KOKKOS_ARCH=${KOKKOS_ARCH}")
|
||||
endif()
|
||||
foreach(arch ${KOKKOS_ARCH})
|
||||
list(FIND KOKKOS_ARCH_LIST ${arch} indx)
|
||||
if (indx EQUAL -1)
|
||||
@ -23,14 +30,13 @@ foreach(arch ${KOKKOS_ARCH})
|
||||
endforeach()
|
||||
|
||||
# KOKKOS_SETTINGS uses KOKKOS_ARCH
|
||||
string(REPLACE ";" "," KOKKOS_ARCH "${KOKKOS_ARCH}")
|
||||
set(KOKKOS_ARCH ${KOKKOS_ARCH})
|
||||
string(REPLACE ";" "," KOKKOS_GMAKE_ARCH "${KOKKOS_ARCH}")
|
||||
|
||||
# From Makefile.kokkos: Options: yes,no
|
||||
if(${KOKKOS_ENABLE_DEBUG})
|
||||
set(KOKKOS_DEBUG yes)
|
||||
set(KOKKOS_GMAKE_DEBUG yes)
|
||||
else()
|
||||
set(KOKKOS_DEBUG no)
|
||||
set(KOKKOS_GMAKE_DEBUG no)
|
||||
endif()
|
||||
|
||||
#------------------------------- KOKKOS_DEVICES --------------------------------
|
||||
@ -43,10 +49,10 @@ foreach(devopt ${KOKKOS_DEVICES_LIST})
|
||||
endif ()
|
||||
endforeach()
|
||||
# List needs to be comma-delmitted
|
||||
string(REPLACE ";" "," KOKKOS_DEVICES "${KOKKOS_DEVICESl}")
|
||||
string(REPLACE ";" "," KOKKOS_GMAKE_DEVICES "${KOKKOS_DEVICESl}")
|
||||
|
||||
#------------------------------- KOKKOS_OPTIONS --------------------------------
|
||||
# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling
|
||||
# From Makefile.kokkos: Options: aggressive_vectorization,disable_profiling,disable_deprecated_code
|
||||
#compiler_warnings, aggressive_vectorization, disable_profiling, disable_dualview_modify_check, enable_profile_load_print
|
||||
|
||||
set(KOKKOS_OPTIONSl)
|
||||
@ -57,7 +63,10 @@ if(${KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION})
|
||||
list(APPEND KOKKOS_OPTIONSl aggressive_vectorization)
|
||||
endif()
|
||||
if(NOT ${KOKKOS_ENABLE_PROFILING})
|
||||
list(APPEND KOKKOS_OPTIONSl disable_vectorization)
|
||||
list(APPEND KOKKOS_OPTIONSl disable_profiling)
|
||||
endif()
|
||||
if(NOT ${KOKKOS_ENABLE_DEPRECATED_CODE})
|
||||
list(APPEND KOKKOS_OPTIONSl disable_deprecated_code)
|
||||
endif()
|
||||
if(NOT ${KOKKOS_ENABLE_DEBUG_DUALVIEW_MODIFY_CHECK})
|
||||
list(APPEND KOKKOS_OPTIONSl disable_dualview_modify_check)
|
||||
@ -66,7 +75,7 @@ if(${KOKKOS_ENABLE_PROFILING_LOAD_PRINT})
|
||||
list(APPEND KOKKOS_OPTIONSl enable_profile_load_print)
|
||||
endif()
|
||||
# List needs to be comma-delimitted
|
||||
string(REPLACE ";" "," KOKKOS_OPTIONS "${KOKKOS_OPTIONSl}")
|
||||
string(REPLACE ";" "," KOKKOS_GMAKE_OPTIONS "${KOKKOS_OPTIONSl}")
|
||||
|
||||
|
||||
#------------------------------- KOKKOS_USE_TPLS -------------------------------
|
||||
@ -78,19 +87,19 @@ foreach(tplopt ${KOKKOS_USE_TPLS_LIST})
|
||||
endif ()
|
||||
endforeach()
|
||||
# List needs to be comma-delimitted
|
||||
string(REPLACE ";" "," KOKKOS_USE_TPLS "${KOKKOS_USE_TPLSl}")
|
||||
string(REPLACE ";" "," KOKKOS_GMAKE_USE_TPLS "${KOKKOS_USE_TPLSl}")
|
||||
|
||||
|
||||
#------------------------------- KOKKOS_CUDA_OPTIONS ---------------------------
|
||||
# Construct the Makefile options
|
||||
set(KOKKOS_CUDA_OPTIONS)
|
||||
set(KOKKOS_CUDA_OPTIONSl)
|
||||
foreach(cudaopt ${KOKKOS_CUDA_OPTIONS_LIST})
|
||||
if (${KOKKOS_ENABLE_CUDA_${cudaopt}})
|
||||
list(APPEND KOKKOS_CUDA_OPTIONSl ${KOKKOS_INTERNAL_${cudaopt}})
|
||||
endif ()
|
||||
endforeach()
|
||||
# List needs to be comma-delmitted
|
||||
string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
|
||||
string(REPLACE ";" "," KOKKOS_GMAKE_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
|
||||
|
||||
#------------------------------- PATH VARIABLES --------------------------------
|
||||
# Want makefile to use same executables specified which means modifying
|
||||
@ -100,10 +109,10 @@ string(REPLACE ";" "," KOKKOS_CUDA_OPTIONS "${KOKKOS_CUDA_OPTIONSl}")
|
||||
|
||||
set(KOKKOS_INTERNAL_PATHS)
|
||||
set(addpathl)
|
||||
foreach(kvar "CUDA;QTHREADS;${KOKKOS_USE_TPLS_LIST}")
|
||||
foreach(kvar IN LISTS KOKKOS_USE_TPLS_LIST ITEMS CUDA QTHREADS)
|
||||
if(${KOKKOS_ENABLE_${kvar}})
|
||||
if(DEFINED KOKKOS_${kvar}_DIR)
|
||||
set(KOKKOS_INTERNAL_PATHS "${KOKKOS_INTERNAL_PATHS} ${kvar}_PATH=${KOKKOS_${kvar}_DIR}")
|
||||
set(KOKKOS_INTERNAL_PATHS ${KOKKOS_INTERNAL_PATHS} "${kvar}_PATH=${KOKKOS_${kvar}_DIR}")
|
||||
if(IS_DIRECTORY ${KOKKOS_${kvar}_DIR}/bin)
|
||||
list(APPEND addpathl ${KOKKOS_${kvar}_DIR}/bin)
|
||||
endif()
|
||||
@ -124,10 +133,9 @@ set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_INSTALL_PATH=${CMAKE_INSTALL_PREFI
|
||||
|
||||
# Form of KOKKOS_foo=$KOKKOS_foo
|
||||
foreach(kvar ARCH;DEVICES;DEBUG;OPTIONS;CUDA_OPTIONS;USE_TPLS)
|
||||
set(KOKKOS_VAR KOKKOS_${kvar})
|
||||
if(DEFINED KOKKOS_${kvar})
|
||||
if (NOT "${${KOKKOS_VAR}}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_VAR}=${${KOKKOS_VAR}})
|
||||
if(DEFINED KOKKOS_GMAKE_${kvar})
|
||||
if (NOT "${KOKKOS_GMAKE_${kvar}}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} KOKKOS_${kvar}=${KOKKOS_GMAKE_${kvar}})
|
||||
endif()
|
||||
endif()
|
||||
endforeach()
|
||||
@ -147,7 +155,7 @@ if (NOT "${KOKKOS_INTERNAL_PATHS}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} ${KOKKOS_INTERNAL_PATHS})
|
||||
endif()
|
||||
if (NOT "${KOKKOS_INTERNAL_ADDTOPATH}" STREQUAL "")
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} PATH=${KOKKOS_INTERNAL_ADDTOPATH}:\${PATH})
|
||||
set(KOKKOS_SETTINGS ${KOKKOS_SETTINGS} "PATH=\"${KOKKOS_INTERNAL_ADDTOPATH}:$ENV{PATH}\"")
|
||||
endif()
|
||||
|
||||
# Final form that gets passed to make
|
||||
@ -185,7 +193,7 @@ if(KOKKOS_CMAKE_VERBOSE)
|
||||
|
||||
message(STATUS "")
|
||||
message(STATUS "Architectures:")
|
||||
message(STATUS " ${KOKKOS_ARCH}")
|
||||
message(STATUS " ${KOKKOS_GMAKE_ARCH}")
|
||||
|
||||
message(STATUS "")
|
||||
message(STATUS "Enabled options")
|
||||
@ -194,43 +202,14 @@ if(KOKKOS_CMAKE_VERBOSE)
|
||||
message(STATUS " KOKKOS_SEPARATE_LIBS")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_HWLOC)
|
||||
message(STATUS " KOKKOS_ENABLE_HWLOC")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_MEMKIND)
|
||||
message(STATUS " KOKKOS_ENABLE_MEMKIND")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_DEBUG)
|
||||
message(STATUS " KOKKOS_ENABLE_DEBUG")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_PROFILING)
|
||||
message(STATUS " KOKKOS_ENABLE_PROFILING")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION)
|
||||
message(STATUS " KOKKOS_ENABLE_AGGRESSIVE_VECTORIZATION")
|
||||
endif()
|
||||
foreach(opt IN LISTS KOKKOS_INTERNAL_ENABLE_OPTIONS_LIST)
|
||||
string(TOUPPER ${opt} OPT)
|
||||
if (KOKKOS_ENABLE_${OPT})
|
||||
message(STATUS " KOKKOS_ENABLE_${OPT}")
|
||||
endif()
|
||||
endforeach()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA)
|
||||
if(KOKKOS_ENABLE_CUDA_LDG_INTRINSIC)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_LDG_INTRINSIC")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA_UVM)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_UVM")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_ENABLE_CUDA_LAMBDA)
|
||||
message(STATUS " KOKKOS_ENABLE_CUDA_LAMBDA")
|
||||
endif()
|
||||
|
||||
if(KOKKOS_CUDA_DIR)
|
||||
message(STATUS " KOKKOS_CUDA_DIR: ${KOKKOS_CUDA_DIR}")
|
||||
endif()
|
||||
|
||||
@ -3,7 +3,7 @@ INCLUDE(CTest)
|
||||
|
||||
cmake_policy(SET CMP0054 NEW)
|
||||
|
||||
MESSAGE(WARNING "The project name is: ${PROJECT_NAME}")
|
||||
MESSAGE(STATUS "The project name is: ${PROJECT_NAME}")
|
||||
|
||||
IF(NOT DEFINED ${PROJECT_NAME}_ENABLE_OpenMP)
|
||||
SET(${PROJECT_NAME}_ENABLE_OpenMP OFF)
|
||||
@ -84,9 +84,6 @@ ENDFUNCTION()
|
||||
|
||||
|
||||
MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
|
||||
message(STATUS "ProjectName: " ${PROJECT_NAME})
|
||||
message(STATUS "Tests: " ${${PROJECT_NAME}_ENABLE_TESTS})
|
||||
|
||||
IF(${${PROJECT_NAME}_ENABLE_TESTS})
|
||||
FOREACH(TEST_DIR ${ARGN})
|
||||
ADD_SUBDIRECTORY(${TEST_DIR})
|
||||
@ -95,13 +92,11 @@ MACRO(TRIBITS_ADD_TEST_DIRECTORIES)
|
||||
ENDMACRO()
|
||||
|
||||
MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
|
||||
|
||||
IF(${PACKAGE_NAME}_ENABLE_EXAMPLES OR ${PARENT_PACKAGE_NAME}_ENABLE_EXAMPLES)
|
||||
FOREACH(EXAMPLE_DIR ${ARGN})
|
||||
ADD_SUBDIRECTORY(${EXAMPLE_DIR})
|
||||
ENDFOREACH()
|
||||
ENDIF()
|
||||
|
||||
ENDMACRO()
|
||||
|
||||
|
||||
|
||||
@ -1,190 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
# Additional command-line arguments given to this script will be
|
||||
# passed directly to CMake.
|
||||
#
|
||||
|
||||
#
|
||||
# Force CMake to re-evaluate build options.
|
||||
#
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Incrementally construct cmake configure options:
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Location of Trilinos source tree:
|
||||
|
||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
||||
|
||||
# Location for installation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/host/`date +%F`"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# General build options.
|
||||
# Use a variable so options can be propagated to CUDA compiler.
|
||||
|
||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
||||
CMAKE_BUILD_TYPE=RELEASE
|
||||
# CMAKE_BUILD_TYPE=DEBUG
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Build for CUDA architecture:
|
||||
|
||||
CUDA_ARCH=""
|
||||
# CUDA_ARCH="20"
|
||||
# CUDA_ARCH="30"
|
||||
# CUDA_ARCH="35"
|
||||
|
||||
# Build with Intel compiler
|
||||
|
||||
INTEL=ON
|
||||
|
||||
# Build for MIC architecture:
|
||||
|
||||
# INTEL_XEON_PHI=ON
|
||||
|
||||
# Build with HWLOC at location:
|
||||
|
||||
HWLOC_BASE_DIR="/home/projects/libraries/host/hwloc/1.6.2"
|
||||
|
||||
# Location for MPI to use in examples:
|
||||
|
||||
MPI_BASE_DIR=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# MPI configuation only used for examples:
|
||||
#
|
||||
# Must have the MPI_BASE_DIR so that the
|
||||
# include path can be passed to the Cuda compiler
|
||||
|
||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread configuation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP configuation:
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure packages for kokkos-only:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality cmake configuration:
|
||||
|
||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda cmake configuration:
|
||||
|
||||
if [ -n "${CUDA_ARCH}" ] ;
|
||||
then
|
||||
|
||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
||||
|
||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
||||
|
||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
||||
fi
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# Cross-compile for Intel Xeon Phi:
|
||||
|
||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
||||
|
||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
|
||||
# Tell cmake the answers to compile-and-execute tests
|
||||
# to prevent cmake from executing a cross-compiled program.
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,186 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
# Additional command-line arguments given to this script will be
|
||||
# passed directly to CMake.
|
||||
#
|
||||
|
||||
#
|
||||
# Force CMake to re-evaluate build options.
|
||||
#
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Incrementally construct cmake configure options:
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Location of Trilinos source tree:
|
||||
|
||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
||||
|
||||
# Location for installation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/mic/`date +%F`"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# General build options.
|
||||
# Use a variable so options can be propagated to CUDA compiler.
|
||||
|
||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
||||
CMAKE_BUILD_TYPE=RELEASE
|
||||
# CMAKE_BUILD_TYPE=DEBUG
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Build for CUDA architecture:
|
||||
|
||||
CUDA_ARCH=""
|
||||
# CUDA_ARCH="20"
|
||||
# CUDA_ARCH="30"
|
||||
# CUDA_ARCH="35"
|
||||
|
||||
# Build for MIC architecture:
|
||||
|
||||
INTEL_XEON_PHI=ON
|
||||
|
||||
# Build with HWLOC at location:
|
||||
|
||||
HWLOC_BASE_DIR="/home/projects/libraries/mic/hwloc/1.6.2"
|
||||
|
||||
# Location for MPI to use in examples:
|
||||
|
||||
MPI_BASE_DIR=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# MPI configuation only used for examples:
|
||||
#
|
||||
# Must have the MPI_BASE_DIR so that the
|
||||
# include path can be passed to the Cuda compiler
|
||||
|
||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread configuation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP configuation:
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure packages for kokkos-only:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality cmake configuration:
|
||||
|
||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda cmake configuration:
|
||||
|
||||
if [ -n "${CUDA_ARCH}" ] ;
|
||||
then
|
||||
|
||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
||||
|
||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
||||
|
||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
||||
fi
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# Cross-compile for Intel Xeon Phi:
|
||||
|
||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
||||
|
||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
|
||||
# Tell cmake the answers to compile-and-execute tests
|
||||
# to prevent cmake from executing a cross-compiled program.
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,293 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# General build options.
|
||||
# Use a variable so options can be propagated to CUDA compiler.
|
||||
|
||||
CMAKE_BUILD_TYPE=RELEASE
|
||||
# CMAKE_BUILD_TYPE=DEBUG
|
||||
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
USE_CUDA_ARCH=
|
||||
USE_THREAD=
|
||||
USE_OPENMP=
|
||||
USE_INTEL=
|
||||
USE_XEON_PHI=
|
||||
HWLOC_BASE_DIR=
|
||||
MPI_BASE_DIR=
|
||||
BLAS_LIB_DIR=
|
||||
LAPACK_LIB_DIR=
|
||||
|
||||
if [ 1 ] ; then
|
||||
# Platform 'kokkos-dev' with Cuda, OpenMP, hwloc, mpi, gnu
|
||||
USE_CUDA_ARCH="35"
|
||||
USE_OPENMP=ON
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
|
||||
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
|
||||
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
|
||||
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
|
||||
|
||||
elif [ ] ; then
|
||||
# Platform 'kokkos-dev' with Cuda, Threads, hwloc, mpi, gnu
|
||||
USE_CUDA_ARCH="35"
|
||||
USE_THREAD=ON
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
|
||||
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
|
||||
BLAS_LIB_DIR="/home/projects/blas/host/gnu/lib"
|
||||
LAPACK_LIB_DIR="/home/projects/lapack/host/gnu/lib"
|
||||
|
||||
elif [ ] ; then
|
||||
# Platform 'kokkos-dev' with Xeon Phi and hwloc
|
||||
USE_OPENMP=ON
|
||||
USE_INTEL=ON
|
||||
USE_XEON_PHI=ON
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/mic/intel/13.SP1.1.106"
|
||||
|
||||
elif [ ] ; then
|
||||
# Platform 'kokkos-nvidia' with Cuda, OpenMP, hwloc, mpi, gnu
|
||||
USE_CUDA_ARCH="20"
|
||||
USE_OPENMP=ON
|
||||
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
|
||||
MPI_BASE_DIR="/home/sems/common/openmpi/current"
|
||||
|
||||
elif [ ] ; then
|
||||
# Platform 'kokkos-nvidia' with Cuda, Threads, hwloc, mpi, gnu
|
||||
USE_CUDA_ARCH="20"
|
||||
USE_THREAD=ON
|
||||
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
|
||||
MPI_BASE_DIR="/home/sems/common/openmpi/current"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Incrementally construct cmake configure command line options:
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CXX_FLAGS=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ 1 ] ; then
|
||||
|
||||
# Configure for Tpetra/Kokkos:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${BLAS_LIB_DIR}"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_DIRS:FILEPATH=${LAPACK_LIB_DIR}"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Tpetra:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Kokkos:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraClassic:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Tpetra_ENABLE_Kokkos_Refactor:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D KokkosClassic_DefaultNode:STRING=Kokkos::Compat::KokkosOpenMPWrapperNode"
|
||||
|
||||
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS}-DKOKKOS_FAST_COMPILE"
|
||||
|
||||
if [ -n "${USE_CUDA_ARCH}" ] ; then
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Cuda:BOOL=ON"
|
||||
|
||||
fi
|
||||
|
||||
fi
|
||||
|
||||
if [ 1 ] ; then
|
||||
|
||||
# Configure for Stokhos:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Sacado:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Stokhos:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Stokhos_ENABLE_Belos:BOOL=ON"
|
||||
|
||||
fi
|
||||
|
||||
if [ 1 ] ; then
|
||||
|
||||
# Configure for TrilinosCouplings:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TrilinosCouplings:BOOL=ON"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=ON"
|
||||
|
||||
if [ "${CMAKE_BUILD_TYPE}" == "DEBUG" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Location for installation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# MPI configuation only used for examples:
|
||||
#
|
||||
# Must have the MPI_BASE_DIR so that the
|
||||
# include path can be passed to the Cuda compiler
|
||||
|
||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Kokkos use pthread configuation:
|
||||
|
||||
if [ "${USE_THREAD}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Kokkos use OpenMP configuation:
|
||||
|
||||
if [ "${USE_OPENMP}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality configuration:
|
||||
|
||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda cmake configuration:
|
||||
|
||||
if [ -n "${USE_CUDA_ARCH}" ] ;
|
||||
then
|
||||
|
||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
||||
|
||||
CUDA_NVCC_FLAGS="-DKOKKOS_HAVE_CUDA_ARCH=${USE_CUDA_ARCH}0;-gencode;arch=compute_${USE_CUDA_ARCH},code=sm_${USE_CUDA_ARCH}"
|
||||
|
||||
if [ "${USE_OPENMP}" = "ON" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
||||
fi
|
||||
|
||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
||||
fi
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ "${USE_INTEL}" = "ON" -o "${USE_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
fi
|
||||
|
||||
# Cross-compile for Intel Xeon Phi:
|
||||
|
||||
if [ "${USE_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
|
||||
CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -mmic"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
||||
|
||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
|
||||
# Tell cmake the answers to compile-and-execute tests
|
||||
# to prevent cmake from executing a cross-compiled program.
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ -n "${CMAKE_CXX_FLAGS}" ] ; then
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING='${CMAKE_CXX_FLAGS}'"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo "cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}"
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,88 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
# Additional command-line arguments given to this script will be
|
||||
# passed directly to CMake.
|
||||
#
|
||||
|
||||
# to build:
|
||||
# build on bgq-b[1-12]
|
||||
# module load sierra-devel
|
||||
# run this configure file
|
||||
# make
|
||||
|
||||
# to run:
|
||||
# ssh bgq-login
|
||||
# cd /scratch/username/...
|
||||
# export OMP_PROC_BIND and XLSMPOPTS environment variables
|
||||
# run with srun
|
||||
|
||||
# Note: hwloc does not work to get or set cpubindings on bgq.
|
||||
# Use the openmp backend and the openmp environment variables.
|
||||
#
|
||||
# Only the mpi wrappers seem to be setup for cross-compile,
|
||||
# so it is important that this configure enables MPI and uses mpigcc wrappers.
|
||||
|
||||
|
||||
|
||||
#
|
||||
# Force CMake to re-evaluate build options.
|
||||
#
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Incrementally construct cmake configure options:
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Location of Trilinos source tree:
|
||||
|
||||
CMAKE_PROJECT_DIR="../Trilinos"
|
||||
|
||||
# Location for installation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=../TrilinosInstall/`date +%F`"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# General build options.
|
||||
# Use a variable so options can be propagated to CUDA compiler.
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=mpigcc-4.7.2"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=mpig++-4.7.2"
|
||||
|
||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
||||
CMAKE_BUILD_TYPE=RELEASE
|
||||
# CMAKE_BUILD_TYPE=DEBUG
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure packages for kokkos-only:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,216 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
# Additional command-line arguments given to this script will be
|
||||
# passed directly to CMake.
|
||||
#
|
||||
|
||||
#
|
||||
# Force CMake to re-evaluate build options.
|
||||
#
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Incrementally construct cmake configure options:
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Location of Trilinos source tree:
|
||||
|
||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
||||
|
||||
# Location for installation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${HOME}/TrilinosInstall/`date +%F`"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# General build options.
|
||||
# Use a variable so options can be propagated to CUDA compiler.
|
||||
|
||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
||||
CMAKE_BUILD_TYPE=RELEASE
|
||||
#CMAKE_BUILD_TYPE=DEBUG
|
||||
#CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Build for CUDA architecture:
|
||||
|
||||
#CUDA_ARCH=""
|
||||
#CUDA_ARCH="20"
|
||||
#CUDA_ARCH="30"
|
||||
CUDA_ARCH="35"
|
||||
|
||||
# Build with OpenMP
|
||||
|
||||
OPENMP=ON
|
||||
PTHREADS=ON
|
||||
|
||||
# Build host code with Intel compiler:
|
||||
|
||||
INTEL=OFF
|
||||
|
||||
# Build for MIC architecture:
|
||||
|
||||
INTEL_XEON_PHI=OFF
|
||||
|
||||
# Build with HWLOC at location:
|
||||
|
||||
#HWLOC_BASE_DIR=""
|
||||
#HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.4.7"
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
||||
|
||||
# Location for MPI to use in examples:
|
||||
|
||||
#MPI_BASE_DIR=""
|
||||
#MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.4.7"
|
||||
MPI_BASE_DIR="/home/projects/mvapich/2.0.0b/gnu/4.7.3"
|
||||
#MPI_BASE_DIR="/home/projects/openmpi/1.7.3/llvm/2013-12-02/"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# MPI configuation only used for examples:
|
||||
#
|
||||
# Must have the MPI_BASE_DIR so that the
|
||||
# include path can be passed to the Cuda compiler
|
||||
|
||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread configuation:
|
||||
|
||||
if [ "${PTHREADS}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP configuation:
|
||||
|
||||
if [ "${OPENMP}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure packages for kokkos-only:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality cmake configuration:
|
||||
|
||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda cmake configuration:
|
||||
|
||||
if [ -n "${CUDA_ARCH}" ] ;
|
||||
then
|
||||
|
||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
||||
|
||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
||||
|
||||
if [ "${OPENMP}" = "ON" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
||||
fi
|
||||
|
||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
||||
fi
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# Cross-compile for Intel Xeon Phi:
|
||||
|
||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
||||
|
||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
|
||||
# Tell cmake the answers to compile-and-execute tests
|
||||
# to prevent cmake from executing a cross-compiled program.
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,204 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
# Additional command-line arguments given to this script will be
|
||||
# passed directly to CMake.
|
||||
#
|
||||
|
||||
#
|
||||
# Force CMake to re-evaluate build options.
|
||||
#
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Incrementally construct cmake configure options:
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Location of Trilinos source tree:
|
||||
|
||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
||||
|
||||
# Location for installation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/sems/common/kokkos/`date +%F`"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# General build options.
|
||||
# Use a variable so options can be propagated to CUDA compiler.
|
||||
|
||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
||||
CMAKE_BUILD_TYPE=RELEASE
|
||||
# CMAKE_BUILD_TYPE=DEBUG
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Build for CUDA architecture:
|
||||
|
||||
# CUDA_ARCH=""
|
||||
CUDA_ARCH="20"
|
||||
# CUDA_ARCH="30"
|
||||
# CUDA_ARCH="35"
|
||||
|
||||
# Build with OpenMP
|
||||
|
||||
OPENMP=ON
|
||||
|
||||
# Build host code with Intel compiler:
|
||||
|
||||
# INTEL=ON
|
||||
|
||||
# Build for MIC architecture:
|
||||
|
||||
# INTEL_XEON_PHI=ON
|
||||
|
||||
# Build with HWLOC at location:
|
||||
|
||||
HWLOC_BASE_DIR="/home/sems/common/hwloc/current"
|
||||
|
||||
# Location for MPI to use in examples:
|
||||
|
||||
MPI_BASE_DIR="/home/sems/common/openmpi/current"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# MPI configuation only used for examples:
|
||||
#
|
||||
# Must have the MPI_BASE_DIR so that the
|
||||
# include path can be passed to the Cuda compiler
|
||||
|
||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread configuation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP configuation:
|
||||
|
||||
if [ "${OPENMP}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure packages for kokkos-only:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality cmake configuration:
|
||||
|
||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda cmake configuration:
|
||||
|
||||
if [ -n "${CUDA_ARCH}" ] ;
|
||||
then
|
||||
|
||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
||||
|
||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
||||
|
||||
if [ "${OPENMP}" = "ON" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
||||
fi
|
||||
|
||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
||||
fi
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# Cross-compile for Intel Xeon Phi:
|
||||
|
||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
||||
|
||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
|
||||
# Tell cmake the answers to compile-and-execute tests
|
||||
# to prevent cmake from executing a cross-compiled program.
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,190 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
# Additional command-line arguments given to this script will be
|
||||
# passed directly to CMake.
|
||||
#
|
||||
|
||||
#
|
||||
# Force CMake to re-evaluate build options.
|
||||
#
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Incrementally construct cmake configure options:
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Location of Trilinos source tree:
|
||||
|
||||
CMAKE_PROJECT_DIR="${HOME}/Trilinos"
|
||||
|
||||
# Location for installation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=/home/projects/kokkos/`date +%F`"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# General build options.
|
||||
# Use a variable so options can be propagated to CUDA compiler.
|
||||
|
||||
CMAKE_VERBOSE_MAKEFILE=OFF
|
||||
CMAKE_BUILD_TYPE=RELEASE
|
||||
# CMAKE_BUILD_TYPE=DEBUG
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Build for CUDA architecture:
|
||||
|
||||
# CUDA_ARCH=""
|
||||
# CUDA_ARCH="20"
|
||||
# CUDA_ARCH="30"
|
||||
CUDA_ARCH="35"
|
||||
|
||||
# Build host code with Intel compiler:
|
||||
|
||||
INTEL=ON
|
||||
|
||||
# Build for MIC architecture:
|
||||
|
||||
# INTEL_XEON_PHI=ON
|
||||
|
||||
# Build with HWLOC at location:
|
||||
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.6.2"
|
||||
|
||||
# Location for MPI to use in examples:
|
||||
|
||||
MPI_BASE_DIR=""
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# MPI configuation only used for examples:
|
||||
#
|
||||
# Must have the MPI_BASE_DIR so that the
|
||||
# include path can be passed to the Cuda compiler
|
||||
|
||||
if [ -n "${MPI_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D MPI_BASE_DIR:PATH=${MPI_BASE_DIR}"
|
||||
else
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_MPI:BOOL=OFF"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread configuation:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP configuation:
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure packages for kokkos-only:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality cmake configuration:
|
||||
|
||||
if [ -n "${HWLOC_BASE_DIR}" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda cmake configuration:
|
||||
|
||||
if [ -n "${CUDA_ARCH}" ] ;
|
||||
then
|
||||
|
||||
# Options to CUDA_NVCC_FLAGS must be semi-colon delimited,
|
||||
# this is different than the standard CMAKE_CXX_FLAGS syntax.
|
||||
|
||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_${CUDA_ARCH},code=sm_${CUDA_ARCH}"
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi"
|
||||
|
||||
if [ "${CMAKE_BUILD_TYPE}" = "DEBUG" ] ;
|
||||
then
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-g"
|
||||
else
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3"
|
||||
fi
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_VERBOSE_BUILD:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS}"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if [ "${INTEL}" = "ON" -o "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
# Cross-compile for Intel Xeon Phi:
|
||||
|
||||
if [ "${INTEL_XEON_PHI}" = "ON" ] ;
|
||||
then
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_SYSTEM_NAME=Linux"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_FLAGS:STRING=-mmic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_Fortran_COMPILER:FILEPATH=ifort"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_DIRS:FILEPATH=${MKLROOT}/lib/mic"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BLAS_LIBRARY_NAMES='mkl_intel_lp64;mkl_sequential;mkl_core;pthread;m'"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D BUILD_SHARED_LIBS:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D DART_TESTING_TIMEOUT:STRING=600"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_LIBRARY_NAMES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_LAPACK_LIBRARIES=''"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_BinUtils=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_Pthread_LIBRARIES=pthread"
|
||||
|
||||
# Cannot cross-compile fortran compatibility checks on the MIC:
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
|
||||
# Tell cmake the answers to compile-and-execute tests
|
||||
# to prevent cmake from executing a cross-compiled program.
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_GCC_ABI_DEMANGLE_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HAVE_TEUCHOS_BLASFLOAT_EXITCODE=0"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D LAPACK_SLAPY2_WORKS_EXITCODE=0"
|
||||
|
||||
fi
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_VERBOSE_MAKEFILE:BOOL=${CMAKE_VERBOSE_MAKEFILE}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
echo "cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}"
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${CMAKE_PROJECT_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,140 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This script uses CUDA, OpenMP, and MPI.
|
||||
#
|
||||
# Before invoking this script, set the OMPI_CXX environment variable
|
||||
# to point to nvcc_wrapper, wherever it happens to live. (If you use
|
||||
# an MPI implementation other than OpenMPI, set the corresponding
|
||||
# environment variable instead.)
|
||||
#
|
||||
|
||||
rm -f CMakeCache.txt;
|
||||
rm -rf CMakeFiles
|
||||
EXTRA_ARGS=$@
|
||||
MPI_PATH="/opt/mpi/openmpi/1.8.2/nvcc-gcc/4.8.3-6.5"
|
||||
CUDA_PATH="/opt/nvidia/cuda/6.5.14"
|
||||
|
||||
#
|
||||
# As long as there are any .cu files in Trilinos, we'll need to set
|
||||
# CUDA_NVCC_FLAGS. If Trilinos gets rid of all of its .cu files and
|
||||
# lets nvcc_wrapper handle them as .cpp files, then we won't need to
|
||||
# set CUDA_NVCC_FLAGS. As it is, given that we need to set
|
||||
# CUDA_NVCC_FLAGS, we must make sure that they are the same flags as
|
||||
# nvcc_wrapper passes to nvcc.
|
||||
#
|
||||
CUDA_NVCC_FLAGS="-gencode;arch=compute_35,code=sm_35;-I${MPI_PATH}/include"
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-Xcompiler;-Wall,-ansi,-fopenmp"
|
||||
CUDA_NVCC_FLAGS="${CUDA_NVCC_FLAGS};-O3;-DKOKKOS_USE_CUDA_UVM"
|
||||
|
||||
cmake \
|
||||
-D CMAKE_INSTALL_PREFIX:PATH="$PWD/../install/" \
|
||||
-D CMAKE_BUILD_TYPE:STRING=DEBUG \
|
||||
-D CMAKE_CXX_FLAGS:STRING="-g -Wall" \
|
||||
-D CMAKE_C_FLAGS:STRING="-g -Wall" \
|
||||
-D CMAKE_FORTRAN_FLAGS:STRING="" \
|
||||
-D CMAKE_SHARED_LIBRARY_LINK_CXX_FLAGS="" \
|
||||
-D Trilinos_ENABLE_Triutils=OFF \
|
||||
-D Trilinos_ENABLE_INSTALL_CMAKE_CONFIG_FILES:BOOL=OFF \
|
||||
-D Trilinos_ENABLE_DEBUG:BOOL=OFF \
|
||||
-D Trilinos_ENABLE_CHECKED_STL:BOOL=OFF \
|
||||
-D Trilinos_ENABLE_EXPLICIT_INSTANTIATION:BOOL=OFF \
|
||||
-D Trilinos_WARNINGS_AS_ERRORS_FLAGS:STRING="" \
|
||||
-D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF \
|
||||
-D Trilinos_ENABLE_ALL_OPTIONAL_PACKAGES:BOOL=OFF \
|
||||
-D BUILD_SHARED_LIBS:BOOL=OFF \
|
||||
-D DART_TESTING_TIMEOUT:STRING=600 \
|
||||
-D CMAKE_VERBOSE_MAKEFILE:BOOL=OFF \
|
||||
\
|
||||
\
|
||||
-D CMAKE_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
|
||||
-D CMAKE_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
|
||||
-D MPI_CXX_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicxx" \
|
||||
-D MPI_C_COMPILER:FILEPATH="${MPI_PATH}/bin/mpicc" \
|
||||
-D CMAKE_Fortran_COMPILER:FILEPATH="${MPI_PATH}/bin/mpif77" \
|
||||
-D MPI_EXEC:FILEPATH="${MPI_PATH}/bin/mpirun" \
|
||||
-D MPI_EXEC_POST_NUMPROCS_FLAGS:STRING="-bind-to;socket;--map-by;socket;env;CUDA_MANAGED_FORCE_DEVICE_ALLOC=1;CUDA_LAUNCH_BLOCKING=1;OMP_NUM_THREADS=2" \
|
||||
\
|
||||
\
|
||||
-D Trilinos_ENABLE_CXX11:BOOL=OFF \
|
||||
-D TPL_ENABLE_MPI:BOOL=ON \
|
||||
-D Trilinos_ENABLE_OpenMP:BOOL=ON \
|
||||
-D Trilinos_ENABLE_ThreadPool:BOOL=ON \
|
||||
\
|
||||
\
|
||||
-D TPL_ENABLE_CUDA:BOOL=ON \
|
||||
-D CUDA_TOOLKIT_ROOT_DIR:FILEPATH="${CUDA_PATH}" \
|
||||
-D CUDA_PROPAGATE_HOST_FLAGS:BOOL=OFF \
|
||||
-D TPL_ENABLE_Thrust:BOOL=OFF \
|
||||
-D Thrust_INCLUDE_DIRS:FILEPATH="${CUDA_PATH}/include" \
|
||||
-D TPL_ENABLE_CUSPARSE:BOOL=OFF \
|
||||
-D TPL_ENABLE_Cusp:BOOL=OFF \
|
||||
-D Cusp_INCLUDE_DIRS="/home/crtrott/Software/cusp" \
|
||||
-D CUDA_VERBOSE_BUILD:BOOL=OFF \
|
||||
-D CUDA_NVCC_FLAGS:STRING=${CUDA_NVCC_FLAGS} \
|
||||
\
|
||||
\
|
||||
-D TPL_ENABLE_HWLOC=OFF \
|
||||
-D HWLOC_INCLUDE_DIRS="/usr/local/software/hwloc/current/include" \
|
||||
-D HWLOC_LIBRARY_DIRS="/usr/local/software/hwloc/current/lib" \
|
||||
-D TPL_ENABLE_BinUtils=OFF \
|
||||
-D TPL_ENABLE_BLAS:STRING=ON \
|
||||
-D TPL_ENABLE_LAPACK:STRING=ON \
|
||||
-D TPL_ENABLE_MKL:STRING=OFF \
|
||||
-D TPL_ENABLE_HWLOC:STRING=OFF \
|
||||
-D TPL_ENABLE_GTEST:STRING=ON \
|
||||
-D TPL_ENABLE_SuperLU=ON \
|
||||
-D TPL_ENABLE_BLAS=ON \
|
||||
-D TPL_ENABLE_LAPACK=ON \
|
||||
-D TPL_SuperLU_LIBRARIES="/home/crtrott/Software/SuperLU_4.3/lib/libsuperlu_4.3.a" \
|
||||
-D TPL_SuperLU_INCLUDE_DIRS="/home/crtrott/Software/SuperLU_4.3/SRC" \
|
||||
\
|
||||
\
|
||||
-D Trilinos_Enable_Kokkos:BOOL=ON \
|
||||
-D Trilinos_ENABLE_KokkosCore:BOOL=ON \
|
||||
-D Trilinos_ENABLE_TeuchosKokkosCompat:BOOL=ON \
|
||||
-D Trilinos_ENABLE_KokkosContainers:BOOL=ON \
|
||||
-D Trilinos_ENABLE_TpetraKernels:BOOL=ON \
|
||||
-D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON \
|
||||
-D Trilinos_ENABLE_TeuchosKokkosComm:BOOL=ON \
|
||||
-D Trilinos_ENABLE_KokkosExample:BOOL=ON \
|
||||
-D Kokkos_ENABLE_EXAMPLES:BOOL=ON \
|
||||
-D Kokkos_ENABLE_TESTS:BOOL=OFF \
|
||||
-D KokkosClassic_DefaultNode:STRING="Kokkos::Compat::KokkosCudaWrapperNode" \
|
||||
-D TpetraClassic_ENABLE_OpenMPNode=OFF \
|
||||
-D TpetraClassic_ENABLE_TPINode=OFF \
|
||||
-D TpetraClassic_ENABLE_MKL=OFF \
|
||||
-D Kokkos_ENABLE_Cuda_UVM=ON \
|
||||
\
|
||||
\
|
||||
-D Trilinos_ENABLE_Teuchos:BOOL=ON \
|
||||
-D Teuchos_ENABLE_COMPLEX:BOOL=OFF \
|
||||
\
|
||||
\
|
||||
-D Trilinos_ENABLE_Tpetra:BOOL=ON \
|
||||
-D Tpetra_ENABLE_KokkosCore=ON \
|
||||
-D Tpetra_ENABLE_Kokkos_DistObject=OFF \
|
||||
-D Tpetra_ENABLE_Kokkos_Refactor=ON \
|
||||
-D Tpetra_ENABLE_TESTS=ON \
|
||||
-D Tpetra_ENABLE_EXAMPLES=ON \
|
||||
-D Tpetra_ENABLE_MPI_CUDA_RDMA:BOOL=ON \
|
||||
\
|
||||
\
|
||||
-D Trilinos_ENABLE_Belos=OFF \
|
||||
-D Trilinos_ENABLE_Amesos=OFF \
|
||||
-D Trilinos_ENABLE_Amesos2=OFF \
|
||||
-D Trilinos_ENABLE_Ifpack=OFF \
|
||||
-D Trilinos_ENABLE_Ifpack2=OFF \
|
||||
-D Trilinos_ENABLE_Epetra=OFF \
|
||||
-D Trilinos_ENABLE_EpetraExt=OFF \
|
||||
-D Trilinos_ENABLE_Zoltan=OFF \
|
||||
-D Trilinos_ENABLE_Zoltan2=OFF \
|
||||
-D Trilinos_ENABLE_MueLu=OFF \
|
||||
-D Belos_ENABLE_TESTS=ON \
|
||||
-D Belos_ENABLE_EXAMPLES=ON \
|
||||
-D MueLu_ENABLE_TESTS=ON \
|
||||
-D MueLu_ENABLE_EXAMPLES=ON \
|
||||
-D Ifpack2_ENABLE_TESTS=ON \
|
||||
-D Ifpack2_ENABLE_EXAMPLES=ON \
|
||||
$EXTRA_ARGS \
|
||||
${HOME}/Trilinos
|
||||
|
||||
@ -1,148 +0,0 @@
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
The following steps are for workstations/servers with the SEMS environment installed.
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
Summary:
|
||||
|
||||
- Step 1: Rigorous testing of Kokkos' develop branch for each backend (Serial, OpenMP, Threads, Cuda) with all supported compilers.
|
||||
|
||||
- Step 2: Snapshot Kokkos' develop branch into current Trilinos develop branch.
|
||||
|
||||
- Step 3: Build and test Trilinos with combinations of compilers, types, backends.
|
||||
|
||||
- Step 4: Promote Kokkos develop branch to master if the snapshot does not cause any new tests to fail; else track/fix causes of new failures.
|
||||
|
||||
- Step 5: Snapshot Kokkos tagged master branch into Trilinos and push Trilinos.
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
Step 1:
|
||||
1.1. Update kokkos develop branch (NOT a fork)
|
||||
|
||||
(From kokkos directory):
|
||||
git fetch --all
|
||||
git checkout develop
|
||||
git reset --hard origin/develop
|
||||
|
||||
1.2. Create a testing directory - here the directory is created within the kokkos directory
|
||||
|
||||
mkdir testing
|
||||
cd testing
|
||||
|
||||
1.3. Run the test_all_sandia script; various compiler and build-list options can be specified
|
||||
|
||||
../config/test_all_sandia
|
||||
|
||||
1.4 Clean repository of untracked files
|
||||
|
||||
cd ../
|
||||
git clean -df
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
Step 2:
|
||||
2.1 Update Trilinos develop branch
|
||||
|
||||
(From Trilinos directory):
|
||||
git checkout develop
|
||||
git fetch --all
|
||||
git reset --hard origin/develop
|
||||
git clean -df
|
||||
|
||||
2.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files
|
||||
|
||||
module load python/2.7.9
|
||||
python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
Step 3:
|
||||
3.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard are provided in kokkos/config/trilinos-integration
|
||||
|
||||
Usually its a good idea to run those script via nohup.
|
||||
You can run all four at the same time, use separate directories for each.
|
||||
|
||||
3.2. Compare the failed test output between the pristine and the updated runs; investigate and fix problems if new tests fail after the Kokkos snapshot
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github
|
||||
4.1. Generate Changelog (You need a github API token)
|
||||
|
||||
Close all Open issues with "InDevelop" tag on github
|
||||
|
||||
(Not from kokkos directory)
|
||||
gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
|
||||
|
||||
(Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md)
|
||||
(Make desired changes to CHANGELOG.md to enhance clarity)
|
||||
(Commit and push the CHANGELOG to develop)
|
||||
|
||||
4.2 Merge develop into Master
|
||||
|
||||
- DO NOT fast-forward the merge!!!!
|
||||
|
||||
(From kokkos directory):
|
||||
git checkout master
|
||||
git fetch --all
|
||||
# Ensure we are on the current origin/master
|
||||
git reset --hard origin/master
|
||||
git merge --no-ff origin/develop
|
||||
|
||||
4.3. Update the tag in kokkos/config/master_history.txt
|
||||
Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
|
||||
Tag format: #.#.##
|
||||
|
||||
# Prepend master_history.txt with
|
||||
|
||||
# tag: #.#.##
|
||||
# date: mm/dd/yyyy
|
||||
# master: sha1
|
||||
# develop: sha1
|
||||
# -----------------------
|
||||
|
||||
git commit --amend -a
|
||||
|
||||
git tag -a #.#.##
|
||||
tag: #.#.##
|
||||
date: mm/dd/yyyy
|
||||
master: sha1
|
||||
develop: sha1
|
||||
|
||||
4.4. Do NOT push yet
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
|
||||
Step 5:
|
||||
5.1. Make sure Trilinos is up-to-date - chances are other changes have been committed since the integration testing process began. If a substantial change has occurred that may be affected by the snapshot the testing procedure may need to be repeated
|
||||
|
||||
(From Trilinos directory):
|
||||
git checkout develop
|
||||
git fetch --all
|
||||
git reset --hard origin/develop
|
||||
git clean -df
|
||||
|
||||
5.2. Snapshot Kokkos master branch into Trilinos
|
||||
|
||||
(From kokkos directory):
|
||||
git fetch --all
|
||||
git checkout tags/#.#.##
|
||||
git clean -df
|
||||
|
||||
python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
|
||||
|
||||
5.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3)
|
||||
|
||||
The modules are listed in kokkos/config/trilinos-integration/checkin-test
|
||||
Run checkin-test, forward dependencies and optional dependencies must be enabled
|
||||
If push failed because someone else clearly broke something, push manually.
|
||||
If push failed for unclear reasons, investigate, fix, and potentially start over from step 2 after reseting your local kokkos/master branch
|
||||
|
||||
Step 6: Push Kokkos to master
|
||||
|
||||
git push --follow-tags origin master
|
||||
|
||||
// -------------------------------------------------------------------------------- //
|
||||
@ -1,110 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# Cuda, OpenMP, Threads, Qthreads, hwloc
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
|
||||
#
|
||||
# The 'nvcc-wrapper' module should load a script that matches
|
||||
# kokkos/bin/nvcc_wrapper
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality configuration:
|
||||
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Qthreads
|
||||
|
||||
QTHREADS_BASE_DIR="/home/projects/qthreads/2014-07-08/host/gnu/4.7.3"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_QTHREADS:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_INCLUDE_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D QTHREADS_LIBRARY_DIRS:FILEPATH=${QTHREADS_BASE_DIR}/lib"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
@ -1,104 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# Cuda, OpenMP, hwloc
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
|
||||
#
|
||||
# The 'nvcc-wrapper' module should load a script that matches
|
||||
# kokkos/bin/nvcc_wrapper
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality configuration:
|
||||
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread explicitly OFF so tribits doesn't automatically turn it on
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,88 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# Cuda
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 gcc/4.8.3 cuda/6.5.14 nvcc-wrapper/gnu
|
||||
#
|
||||
# The 'nvcc-wrapper' module should load a script that matches
|
||||
# kokkos/bin/nvcc_wrapper
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Cuda using GNU, use the nvcc_wrapper to build CUDA source
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
|
||||
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,84 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# C++11, OpenMP
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread explicitly OFF so tribits doesn't automatically activate
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,78 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# <none>
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Kokkos Pthread explicitly OFF, TPL Pthread ON for gtest
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,89 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# Intel, OpenMP, Cuda
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 cuda/7.0.4 intel/2015.0.090 nvcc-wrapper/intel
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=nvcc_wrapper"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUDA:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_CUSPARSE:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread explicitly OFF
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,84 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# Intel, OpenMP
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 intel/13.SP1.1.106
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=icc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=icpc"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread explicitly OFF
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,77 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# OpenMP
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# OpenMP
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_OpenMP:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_OpenMP:BOOL=ON"
|
||||
|
||||
# Pthread explicitly OFF, otherwise tribits will automatically turn it on
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=OFF"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,87 +0,0 @@
|
||||
#!/bin/sh
|
||||
#
|
||||
# Copy this script, put it outside the Trilinos source directory, and
|
||||
# build there.
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Building on 'kokkos-dev.sandia.gov' with enabled capabilities:
|
||||
#
|
||||
# Threads, hwloc
|
||||
#
|
||||
# module loaded on 'kokkos-dev.sandia.gov' for this build
|
||||
#
|
||||
# module load cmake/2.8.11.2 gcc/4.8.3
|
||||
#
|
||||
#-----------------------------------------------------------------------------
|
||||
# Source and installation directories:
|
||||
|
||||
TRILINOS_SOURCE_DIR=${HOME}/Trilinos
|
||||
TRILINOS_INSTALL_DIR=${HOME}/TrilinosInstall/`date +%F`
|
||||
|
||||
CMAKE_CONFIGURE=""
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_INSTALL_PREFIX=${TRILINOS_INSTALL_DIR}"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Debug/optimized
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=DEBUG"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_BOUNDS_CHECK:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_BUILD_TYPE:STRING=RELEASE"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_FLAGS:STRING=-Wall"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_C_COMPILER=gcc"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D CMAKE_CXX_COMPILER=g++"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Configure for Kokkos subpackages and tests:
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_Fortran:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_ALL_PACKAGES:BOOL=OFF"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_EXAMPLES:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TESTS:BOOL=ON"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosCore:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosContainers:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosAlgorithms:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_TpetraKernels:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_KokkosExample:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Hardware locality configuration:
|
||||
|
||||
HWLOC_BASE_DIR="/home/projects/hwloc/1.7.1/host/gnu/4.7.3"
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_HWLOC:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_INCLUDE_DIRS:FILEPATH=${HWLOC_BASE_DIR}/include"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D HWLOC_LIBRARY_DIRS:FILEPATH=${HWLOC_BASE_DIR}/lib"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# Pthread
|
||||
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D TPL_ENABLE_Pthread:BOOL=ON"
|
||||
CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_Pthread:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
# C++11
|
||||
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Trilinos_ENABLE_CXX11:BOOL=ON"
|
||||
# CMAKE_CONFIGURE="${CMAKE_CONFIGURE} -D Kokkos_ENABLE_CXX11:BOOL=ON"
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
#
|
||||
# Remove CMake output files to force reconfigure from scratch.
|
||||
#
|
||||
|
||||
rm -rf CMake* Trilinos* packages Dart* Testing cmake_install.cmake MakeFile*
|
||||
|
||||
#
|
||||
|
||||
echo cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
cmake ${CMAKE_CONFIGURE} ${TRILINOS_SOURCE_DIR}
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
@ -1,340 +0,0 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# This shell script (nvcc_wrapper) wraps both the host compiler and
|
||||
# NVCC, if you are building legacy C or C++ code with CUDA enabled.
|
||||
# The script remedies some differences between the interface of NVCC
|
||||
# and that of the host compiler, in particular for linking.
|
||||
# It also means that a legacy code doesn't need separate .cu files;
|
||||
# it can just use .cpp files.
|
||||
#
|
||||
# Default settings: change those according to your machine. For
|
||||
# example, you may have have two different wrappers with either icpc
|
||||
# or g++ as their back-end compiler. The defaults can be overwritten
|
||||
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
|
||||
|
||||
default_arch="sm_35"
|
||||
#default_arch="sm_50"
|
||||
|
||||
#
|
||||
# The default C++ compiler.
|
||||
#
|
||||
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
|
||||
#host_compiler="icpc"
|
||||
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
|
||||
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
|
||||
|
||||
#
|
||||
# Internal variables
|
||||
#
|
||||
|
||||
# C++ files
|
||||
cpp_files=""
|
||||
|
||||
# Host compiler arguments
|
||||
xcompiler_args=""
|
||||
|
||||
# Cuda (NVCC) only arguments
|
||||
cuda_args=""
|
||||
|
||||
# Arguments for both NVCC and Host compiler
|
||||
shared_args=""
|
||||
|
||||
# Argument -c
|
||||
compile_arg=""
|
||||
|
||||
# Argument -o <obj>
|
||||
output_arg=""
|
||||
|
||||
# Linker arguments
|
||||
xlinker_args=""
|
||||
|
||||
# Object files passable to NVCC
|
||||
object_files=""
|
||||
|
||||
# Link objects for the host linker only
|
||||
object_files_xlinker=""
|
||||
|
||||
# Shared libraries with version numbers are not handled correctly by NVCC
|
||||
shared_versioned_libraries_host=""
|
||||
shared_versioned_libraries=""
|
||||
|
||||
# Does the User set the architecture
|
||||
arch_set=0
|
||||
|
||||
# Does the user overwrite the host compiler
|
||||
ccbin_set=0
|
||||
|
||||
#Error code of compilation
|
||||
error_code=0
|
||||
|
||||
# Do a dry run without actually compiling
|
||||
dry_run=0
|
||||
|
||||
# Skip NVCC compilation and use host compiler directly
|
||||
host_only=0
|
||||
host_only_args=""
|
||||
|
||||
# Enable workaround for CUDA 6.5 for pragma ident
|
||||
replace_pragma_ident=0
|
||||
|
||||
# Mark first host compiler argument
|
||||
first_xcompiler_arg=1
|
||||
|
||||
temp_dir=${TMPDIR:-/tmp}
|
||||
|
||||
# Check if we have an optimization argument already
|
||||
optimization_applied=0
|
||||
|
||||
# Check if we have -std=c++X or --std=c++X already
|
||||
stdcxx_applied=0
|
||||
|
||||
# Run nvcc a second time to generate dependencies if needed
|
||||
depfile_separate=0
|
||||
depfile_output_arg=""
|
||||
depfile_target_arg=""
|
||||
|
||||
#echo "Arguments: $# $@"
|
||||
|
||||
while [ $# -gt 0 ]
|
||||
do
|
||||
case $1 in
|
||||
#show the executed command
|
||||
--show|--nvcc-wrapper-show)
|
||||
dry_run=1
|
||||
;;
|
||||
#run host compilation only
|
||||
--host-only)
|
||||
host_only=1
|
||||
;;
|
||||
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
|
||||
--replace-pragma-ident)
|
||||
replace_pragma_ident=1
|
||||
;;
|
||||
#handle source files to be compiled as cuda files
|
||||
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
|
||||
cpp_files="$cpp_files $1"
|
||||
;;
|
||||
# Ensure we only have one optimization flag because NVCC doesn't allow muliple
|
||||
-O*)
|
||||
if [ $optimization_applied -eq 1 ]; then
|
||||
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
|
||||
else
|
||||
shared_args="$shared_args $1"
|
||||
optimization_applied=1
|
||||
fi
|
||||
;;
|
||||
#Handle shared args (valid for both nvcc and the host compiler)
|
||||
-D*|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
|
||||
shared_args="$shared_args $1"
|
||||
;;
|
||||
#Handle compilation argument
|
||||
-c)
|
||||
compile_arg="$1"
|
||||
;;
|
||||
#Handle output argument
|
||||
-o)
|
||||
output_arg="$output_arg $1 $2"
|
||||
shift
|
||||
;;
|
||||
# Handle depfile arguments. We map them to a separate call to nvcc.
|
||||
-MD|-MMD)
|
||||
depfile_separate=1
|
||||
host_only_args="$host_only_args $1"
|
||||
;;
|
||||
-MF)
|
||||
depfile_output_arg="-o $2"
|
||||
host_only_args="$host_only_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
-MT)
|
||||
depfile_target_arg="$1 $2"
|
||||
host_only_args="$host_only_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle known nvcc args
|
||||
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle more known nvcc args
|
||||
--expt-extended-lambda|--expt-relaxed-constexpr)
|
||||
cuda_args="$cuda_args $1"
|
||||
;;
|
||||
#Handle known nvcc args that have an argument
|
||||
-rdc|-maxrregcount|--default-stream)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
shift
|
||||
;;
|
||||
#Handle c++11
|
||||
--std=c++11|-std=c++11|--std=c++14|-std=c++14|--std=c++1z|-std=c++1z)
|
||||
if [ $stdcxx_applied -eq 1 ]; then
|
||||
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-std=c++1* or --std=c++1*), only the first is used because nvcc can only accept a single std setting"
|
||||
else
|
||||
shared_args="$shared_args $1"
|
||||
stdcxx_applied=1
|
||||
fi
|
||||
;;
|
||||
|
||||
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
|
||||
-std=c++98|--std=c++98)
|
||||
;;
|
||||
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
|
||||
-pedantic|-Wpedantic|-ansi)
|
||||
;;
|
||||
#strip of -Woverloaded-virtual to avoid "cc1: warning: command line option ‘-Woverloaded-virtual’ is valid for C++/ObjC++ but not for C"
|
||||
-Woverloaded-virtual)
|
||||
;;
|
||||
#strip -Xcompiler because we add it
|
||||
-Xcompiler)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$2"
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
#strip of "-x cu" because we add that
|
||||
-x)
|
||||
if [[ $2 != "cu" ]]; then
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args="-x,$2"
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,-x,$2"
|
||||
fi
|
||||
fi
|
||||
shift
|
||||
;;
|
||||
#Handle -ccbin (if its not set we can set it to a default value)
|
||||
-ccbin)
|
||||
cuda_args="$cuda_args $1 $2"
|
||||
ccbin_set=1
|
||||
host_compiler=$2
|
||||
shift
|
||||
;;
|
||||
#Handle -arch argument (if its not set use a default
|
||||
-arch*)
|
||||
cuda_args="$cuda_args $1"
|
||||
arch_set=1
|
||||
;;
|
||||
#Handle -Xcudafe argument
|
||||
-Xcudafe)
|
||||
cuda_args="$cuda_args -Xcudafe $2"
|
||||
shift
|
||||
;;
|
||||
#Handle args that should be sent to the linker
|
||||
-Wl*)
|
||||
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
|
||||
host_linker_args="$host_linker_args ${1:4:${#1}}"
|
||||
;;
|
||||
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
|
||||
*.a|*.so|*.o|*.obj)
|
||||
object_files="$object_files $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
|
||||
@*|*.dylib)
|
||||
object_files="$object_files -Xlinker $1"
|
||||
object_files_xlinker="$object_files_xlinker -Xlinker $1"
|
||||
;;
|
||||
#Handle shared libraries with *.so.* names which nvcc can't do.
|
||||
*.so.*)
|
||||
shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
|
||||
shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
|
||||
;;
|
||||
#All other args are sent to the host compiler
|
||||
*)
|
||||
if [ $first_xcompiler_arg -eq 1 ]; then
|
||||
xcompiler_args=$1
|
||||
first_xcompiler_arg=0
|
||||
else
|
||||
xcompiler_args="$xcompiler_args,$1"
|
||||
fi
|
||||
;;
|
||||
esac
|
||||
|
||||
shift
|
||||
done
|
||||
|
||||
#Add default host compiler if necessary
|
||||
if [ $ccbin_set -ne 1 ]; then
|
||||
cuda_args="$cuda_args -ccbin $host_compiler"
|
||||
fi
|
||||
|
||||
#Add architecture command
|
||||
if [ $arch_set -ne 1 ]; then
|
||||
cuda_args="$cuda_args -arch=$default_arch"
|
||||
fi
|
||||
|
||||
#Compose compilation command
|
||||
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
|
||||
if [ $first_xcompiler_arg -eq 0 ]; then
|
||||
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
|
||||
fi
|
||||
|
||||
#Compose host only command
|
||||
host_command="$host_compiler $shared_args $host_only_args $compile_arg $output_arg $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
|
||||
|
||||
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
|
||||
if [ $replace_pragma_ident -eq 1 ]; then
|
||||
cpp_files2=""
|
||||
for file in $cpp_files
|
||||
do
|
||||
var=`grep pragma ${file} | grep ident | grep "#"`
|
||||
if [ "${#var}" -gt 0 ]
|
||||
then
|
||||
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
|
||||
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
|
||||
else
|
||||
cpp_files2="$cpp_files2 $file"
|
||||
fi
|
||||
done
|
||||
cpp_files=$cpp_files2
|
||||
#echo $cpp_files
|
||||
fi
|
||||
|
||||
if [ "$cpp_files" ]; then
|
||||
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
|
||||
else
|
||||
nvcc_command="$nvcc_command $object_files"
|
||||
fi
|
||||
|
||||
if [ "$cpp_files" ]; then
|
||||
host_command="$host_command $object_files $cpp_files"
|
||||
else
|
||||
host_command="$host_command $object_files"
|
||||
fi
|
||||
|
||||
if [ $depfile_separate -eq 1 ]; then
|
||||
# run nvcc a second time to generate dependencies (without compiling)
|
||||
nvcc_depfile_command="$nvcc_command -M $depfile_target_arg $depfile_output_arg"
|
||||
else
|
||||
nvcc_depfile_command=""
|
||||
fi
|
||||
|
||||
nvcc_command="$nvcc_command $compile_arg $output_arg"
|
||||
|
||||
#Print command for dryrun
|
||||
if [ $dry_run -eq 1 ]; then
|
||||
if [ $host_only -eq 1 ]; then
|
||||
echo $host_command
|
||||
elif [ -n "$nvcc_depfile_command" ]; then
|
||||
echo $nvcc_command "&&" $nvcc_depfile_command
|
||||
else
|
||||
echo $nvcc_command
|
||||
fi
|
||||
exit 0
|
||||
fi
|
||||
|
||||
#Run compilation command
|
||||
if [ $host_only -eq 1 ]; then
|
||||
$host_command
|
||||
elif [ -n "$nvcc_depfile_command" ]; then
|
||||
$nvcc_command && $nvcc_depfile_command
|
||||
else
|
||||
$nvcc_command
|
||||
fi
|
||||
error_code=$?
|
||||
|
||||
#Report error code
|
||||
exit $error_code
|
||||
@ -14,25 +14,52 @@ PROCESSOR=`uname -p`
|
||||
|
||||
if [[ "$HOSTNAME" =~ (white|ride).* ]]; then
|
||||
MACHINE=white
|
||||
elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ .*bowman.* ]]; then
|
||||
MACHINE=bowman
|
||||
elif [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ n.* ]]; then # Warning: very generic name
|
||||
if [[ "$PROCESSOR" = "aarch64" ]]; then
|
||||
MACHINE=sullivan
|
||||
module load git
|
||||
fi
|
||||
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
MACHINE=shepard
|
||||
elif [[ "$HOSTNAME" =~ apollo ]]; then
|
||||
module load git
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ apollo ]]; then
|
||||
MACHINE=apollo
|
||||
elif [[ "$HOSTNAME" =~ sullivan ]]; then
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [[ "$HOSTNAME" =~ sullivan ]]; then
|
||||
MACHINE=sullivan
|
||||
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
|
||||
MACHINE=sems
|
||||
else
|
||||
module load git
|
||||
fi
|
||||
|
||||
if [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
MACHINE=sems
|
||||
module load sems-git
|
||||
fi
|
||||
fi
|
||||
|
||||
if [[ "$MACHINE" = "" ]]; then
|
||||
echo "Unrecognized machine" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "Running on machine: $MACHINE"
|
||||
|
||||
GCC_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
|
||||
IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
ARM_GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
@ -45,7 +72,8 @@ GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits
|
||||
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
CLANG_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
INTEL_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
#CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
CUDA_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Wsign-compare,-Wtype-limits,-Wuninitialized"
|
||||
PGI_WARNING_FLAGS=""
|
||||
|
||||
# Default. Machine specific can override.
|
||||
@ -142,6 +170,18 @@ else
|
||||
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
|
||||
fi
|
||||
|
||||
UNCOMMITTED=`cd ${KOKKOS_PATH}; git status --porcelain 2>/dev/null`
|
||||
if ! [ -z "$UNCOMMITTED" ]; then
|
||||
echo "WARNING!! THE FOLLOWING CHANGES ARE UNCOMMITTED!! :"
|
||||
echo "$UNCOMMITTED"
|
||||
echo ""
|
||||
fi
|
||||
|
||||
GITSTATUS=`cd ${KOKKOS_PATH}; git log -n 1 --format=oneline`
|
||||
echo "Repository Status: " ${GITSTATUS}
|
||||
echo ""
|
||||
echo ""
|
||||
|
||||
#
|
||||
# Machine specific config.
|
||||
#
|
||||
@ -149,7 +189,7 @@ fi
|
||||
if [ "$MACHINE" = "sems" ]; then
|
||||
source /projects/sems/modulefiles/utils/sems-modules-init.sh
|
||||
|
||||
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
|
||||
BASE_MODULE_LIST="sems-env,kokkos-env,kokkos-hwloc/1.10.1/base,sems-<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
|
||||
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
|
||||
|
||||
@ -178,9 +218,9 @@ if [ "$MACHINE" = "sems" ]; then
|
||||
"clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
fi
|
||||
elif [ "$MACHINE" = "white" ]; then
|
||||
@ -191,14 +231,14 @@ elif [ "$MACHINE" = "white" ]; then
|
||||
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
|
||||
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
|
||||
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
|
||||
CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6-BETA"
|
||||
CUDA_MODULE_LIST2="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/6.3.0,ibm/xl/13.1.6"
|
||||
|
||||
# Don't do pthread on white.
|
||||
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
|
||||
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
|
||||
"ibm/13.1.6 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/9.0.103 $CUDA_MODULE_LIST2 $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
@ -281,7 +321,7 @@ elif [ "$MACHINE" = "apollo" ]; then
|
||||
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
|
||||
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
|
||||
|
||||
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44"
|
||||
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/9.0.69"
|
||||
NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
|
||||
|
||||
BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
|
||||
@ -294,13 +334,13 @@ elif [ "$MACHINE" = "apollo" ]; then
|
||||
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
|
||||
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/4.0.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
|
||||
"cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"clang/6.0 $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
|
||||
"cuda/9.1 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
else
|
||||
# Format: (compiler module-list build-list exe-name warning-flag)
|
||||
COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"clang/4.0.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
|
||||
COMPILERS=("cuda/9.1 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"clang/6.0 $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
|
||||
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
|
||||
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
"gcc/4.9.3 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
|
||||
@ -311,13 +351,11 @@ elif [ "$MACHINE" = "apollo" ]; then
|
||||
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
|
||||
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
|
||||
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/bin/nvcc_wrapper $CUDA_WARNING_FLAGS"
|
||||
)
|
||||
fi
|
||||
|
||||
if [ -z "$ARCH_FLAG" ]; then
|
||||
ARCH_FLAG="--arch=SNB,Kepler35"
|
||||
ARCH_FLAG="--arch=SNB,Volta70"
|
||||
fi
|
||||
|
||||
NUM_JOBS_TO_RUN_IN_PARALLEL=2
|
||||
@ -700,17 +738,19 @@ wait_summarize_and_exit() {
|
||||
echo $passed_test $(cat $PASSED_DIR/$passed_test)
|
||||
done
|
||||
|
||||
echo "#######################################################"
|
||||
echo "FAILED TESTS"
|
||||
echo "#######################################################"
|
||||
|
||||
local failed_test
|
||||
local -i rv=0
|
||||
for failed_test in $(\ls -1 $FAILED_DIR | sort)
|
||||
do
|
||||
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
|
||||
rv=$rv+1
|
||||
done
|
||||
if [ "$(ls -A $FAILED_DIR)" ]; then
|
||||
echo "#######################################################"
|
||||
echo "FAILED TESTS"
|
||||
echo "#######################################################"
|
||||
|
||||
local failed_test
|
||||
for failed_test in $(\ls -1 $FAILED_DIR | sort)
|
||||
do
|
||||
echo $failed_test "("$(cat $FAILED_DIR/$failed_test)" failed)"
|
||||
rv=$rv+1
|
||||
done
|
||||
fi
|
||||
|
||||
exit $rv
|
||||
}
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -64,8 +64,8 @@ struct InitViewFunctor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
|
||||
for (unsigned j = 0; j < _inview.extent(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.extent(2); ++k) {
|
||||
_inview(i,j,k) = i/2 -j*j + k/3;
|
||||
}
|
||||
}
|
||||
@ -84,8 +84,8 @@ struct InitViewFunctor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
|
||||
for (unsigned j = 0; j < _inview.extent(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.extent(2); ++k) {
|
||||
_outview(i) += _inview(i,j,k) ;
|
||||
}
|
||||
}
|
||||
@ -104,8 +104,8 @@ struct InitStrideViewFunctor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
|
||||
for (unsigned j = 0; j < _inview.extent(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.extent(2); ++k) {
|
||||
_inview(i,j,k) = i/2 -j*j + k/3;
|
||||
}
|
||||
}
|
||||
@ -123,8 +123,8 @@ struct InitViewRank7Functor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
|
||||
for (unsigned j = 0; j < _inview.extent(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.extent(2); ++k) {
|
||||
_inview(i,j,k,0,0,0,0) = i/2 -j*j + k/3;
|
||||
}
|
||||
}
|
||||
@ -143,8 +143,8 @@ struct InitDynRankViewFunctor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
|
||||
for (unsigned j = 0; j < _inview.extent(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.extent(2); ++k) {
|
||||
_inview(i,j,k) = i/2 -j*j + k/3;
|
||||
}
|
||||
}
|
||||
@ -163,8 +163,8 @@ struct InitDynRankViewFunctor {
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
for (unsigned j = 0; j < _inview.dimension(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.dimension(2); ++k) {
|
||||
for (unsigned j = 0; j < _inview.extent(1); ++j) {
|
||||
for (unsigned k = 0; k < _inview.extent(2); ++k) {
|
||||
_outview(i) += _inview(i,j,k) ;
|
||||
}
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -76,7 +76,7 @@ struct generate_ids
|
||||
generate_ids( local_id_view & ids)
|
||||
: local_2_global(ids)
|
||||
{
|
||||
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
|
||||
Kokkos::parallel_for(local_2_global.extent(0), *this);
|
||||
}
|
||||
|
||||
|
||||
@ -116,7 +116,7 @@ struct fill_map
|
||||
fill_map( global_id_view gIds, local_id_view lIds)
|
||||
: global_2_local(gIds) , local_2_global(lIds)
|
||||
{
|
||||
Kokkos::parallel_for(local_2_global.dimension_0(), *this);
|
||||
Kokkos::parallel_for(local_2_global.extent(0), *this);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -143,7 +143,7 @@ struct find_test
|
||||
find_test( global_id_view gIds, local_id_view lIds, value_type & num_errors)
|
||||
: global_2_local(gIds) , local_2_global(lIds)
|
||||
{
|
||||
Kokkos::parallel_reduce(local_2_global.dimension_0(), *this, num_errors);
|
||||
Kokkos::parallel_reduce(local_2_global.extent(0), *this, num_errors);
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -34,7 +34,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -147,7 +147,7 @@ public:
|
||||
if (m_last_block_mask) {
|
||||
//clear the unused bits in the last block
|
||||
typedef Kokkos::Impl::DeepCopy< typename execution_space::memory_space, Kokkos::HostSpace > raw_deep_copy;
|
||||
raw_deep_copy( m_blocks.ptr_on_device() + (m_blocks.dimension_0() -1u), &m_last_block_mask, sizeof(unsigned));
|
||||
raw_deep_copy( m_blocks.data() + (m_blocks.extent(0) -1u), &m_last_block_mask, sizeof(unsigned));
|
||||
}
|
||||
}
|
||||
|
||||
@ -212,7 +212,7 @@ public:
|
||||
KOKKOS_FORCEINLINE_FUNCTION
|
||||
unsigned max_hint() const
|
||||
{
|
||||
return m_blocks.dimension_0();
|
||||
return m_blocks.extent(0);
|
||||
}
|
||||
|
||||
/// find a bit set to 1 near the hint
|
||||
@ -221,10 +221,10 @@ public:
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Kokkos::pair<bool, unsigned> find_any_set_near( unsigned hint , unsigned scan_direction = BIT_SCAN_FORWARD_MOVE_HINT_FORWARD ) const
|
||||
{
|
||||
const unsigned block_idx = (hint >> block_shift) < m_blocks.dimension_0() ? (hint >> block_shift) : 0;
|
||||
const unsigned block_idx = (hint >> block_shift) < m_blocks.extent(0) ? (hint >> block_shift) : 0;
|
||||
const unsigned offset = hint & block_mask;
|
||||
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
||||
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1)) ? block : block & m_last_block_mask ;
|
||||
block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1)) ? block : block & m_last_block_mask ;
|
||||
|
||||
return find_any_helper(block_idx, offset, block, scan_direction);
|
||||
}
|
||||
@ -238,7 +238,7 @@ public:
|
||||
const unsigned block_idx = hint >> block_shift;
|
||||
const unsigned offset = hint & block_mask;
|
||||
unsigned block = volatile_load(&m_blocks[ block_idx ]);
|
||||
block = !m_last_block_mask || (block_idx < (m_blocks.dimension_0()-1) ) ? ~block : ~block & m_last_block_mask ;
|
||||
block = !m_last_block_mask || (block_idx < (m_blocks.extent(0)-1) ) ? ~block : ~block & m_last_block_mask ;
|
||||
|
||||
return find_any_helper(block_idx, offset, block, scan_direction);
|
||||
}
|
||||
@ -281,8 +281,8 @@ private:
|
||||
unsigned update_hint( long long block_idx, unsigned offset, unsigned scan_direction ) const
|
||||
{
|
||||
block_idx += scan_direction & MOVE_HINT_BACKWARD ? -1 : 1;
|
||||
block_idx = block_idx >= 0 ? block_idx : m_blocks.dimension_0() - 1;
|
||||
block_idx = block_idx < static_cast<long long>(m_blocks.dimension_0()) ? block_idx : 0;
|
||||
block_idx = block_idx >= 0 ? block_idx : m_blocks.extent(0) - 1;
|
||||
block_idx = block_idx < static_cast<long long>(m_blocks.extent(0)) ? block_idx : 0;
|
||||
|
||||
return static_cast<unsigned>(block_idx)*block_size + offset;
|
||||
}
|
||||
@ -407,7 +407,7 @@ void deep_copy( Bitset<DstDevice> & dst, Bitset<SrcDevice> const& src)
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0));
|
||||
}
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
@ -418,7 +418,7 @@ void deep_copy( Bitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0));
|
||||
}
|
||||
|
||||
template <typename DstDevice, typename SrcDevice>
|
||||
@ -429,7 +429,7 @@ void deep_copy( ConstBitset<DstDevice> & dst, ConstBitset<SrcDevice> const& src)
|
||||
}
|
||||
|
||||
typedef Kokkos::Impl::DeepCopy< typename DstDevice::memory_space, typename SrcDevice::memory_space > raw_deep_copy;
|
||||
raw_deep_copy(dst.m_blocks.ptr_on_device(), src.m_blocks.ptr_on_device(), sizeof(unsigned)*src.m_blocks.dimension_0());
|
||||
raw_deep_copy(dst.m_blocks.data(), src.m_blocks.data(), sizeof(unsigned)*src.m_blocks.extent(0));
|
||||
}
|
||||
|
||||
} // namespace Kokkos
|
||||
|
||||
@ -35,7 +35,7 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
@ -262,14 +262,14 @@ public:
|
||||
modified_host (View<unsigned int,LayoutLeft,typename t_host::execution_space> ("DualView::modified_host"))
|
||||
{
|
||||
if ( int(d_view.rank) != int(h_view.rank) ||
|
||||
d_view.dimension_0() != h_view.dimension_0() ||
|
||||
d_view.dimension_1() != h_view.dimension_1() ||
|
||||
d_view.dimension_2() != h_view.dimension_2() ||
|
||||
d_view.dimension_3() != h_view.dimension_3() ||
|
||||
d_view.dimension_4() != h_view.dimension_4() ||
|
||||
d_view.dimension_5() != h_view.dimension_5() ||
|
||||
d_view.dimension_6() != h_view.dimension_6() ||
|
||||
d_view.dimension_7() != h_view.dimension_7() ||
|
||||
d_view.extent(0) != h_view.extent(0) ||
|
||||
d_view.extent(1) != h_view.extent(1) ||
|
||||
d_view.extent(2) != h_view.extent(2) ||
|
||||
d_view.extent(3) != h_view.extent(3) ||
|
||||
d_view.extent(4) != h_view.extent(4) ||
|
||||
d_view.extent(5) != h_view.extent(5) ||
|
||||
d_view.extent(6) != h_view.extent(6) ||
|
||||
d_view.extent(7) != h_view.extent(7) ||
|
||||
d_view.stride_0() != h_view.stride_0() ||
|
||||
d_view.stride_1() != h_view.stride_1() ||
|
||||
d_view.stride_2() != h_view.stride_2() ||
|
||||
@ -503,6 +503,18 @@ public:
|
||||
/* Realloc on Device */
|
||||
|
||||
::Kokkos::realloc(d_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
|
||||
const bool sizeMismatch = ( h_view.extent(0) != n0 ) ||
|
||||
( h_view.extent(1) != n1 ) ||
|
||||
( h_view.extent(2) != n2 ) ||
|
||||
( h_view.extent(3) != n3 ) ||
|
||||
( h_view.extent(4) != n4 ) ||
|
||||
( h_view.extent(5) != n5 ) ||
|
||||
( h_view.extent(6) != n6 ) ||
|
||||
( h_view.extent(7) != n7 );
|
||||
if ( sizeMismatch )
|
||||
::Kokkos::resize(h_view,n0,n1,n2,n3,n4,n5,n6,n7);
|
||||
|
||||
t_host temp_view = create_mirror_view( d_view );
|
||||
|
||||
/* Remap on Host */
|
||||
@ -510,6 +522,8 @@ public:
|
||||
|
||||
h_view = temp_view;
|
||||
|
||||
d_view = create_mirror_view( typename t_dev::execution_space(), h_view );
|
||||
|
||||
/* Mark Host copy as modified */
|
||||
modified_host() = modified_host()+1;
|
||||
}
|
||||
@ -530,22 +544,34 @@ public:
|
||||
d_view.stride(stride_);
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
typename std::enable_if< std::is_integral<iType>::value , size_t >::type
|
||||
extent( const iType & r ) const
|
||||
{ return d_view.extent(r); }
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION constexpr
|
||||
typename std::enable_if< std::is_integral<iType>::value , int >::type
|
||||
extent_int( const iType & r ) const
|
||||
{ return static_cast<int>(d_view.extent(r)); }
|
||||
|
||||
/* \brief return size of dimension 0 */
|
||||
size_t dimension_0() const {return d_view.dimension_0();}
|
||||
size_t dimension_0() const {return d_view.extent(0);}
|
||||
/* \brief return size of dimension 1 */
|
||||
size_t dimension_1() const {return d_view.dimension_1();}
|
||||
size_t dimension_1() const {return d_view.extent(1);}
|
||||
/* \brief return size of dimension 2 */
|
||||
size_t dimension_2() const {return d_view.dimension_2();}
|
||||
size_t dimension_2() const {return d_view.extent(2);}
|
||||
/* \brief return size of dimension 3 */
|
||||
size_t dimension_3() const {return d_view.dimension_3();}
|
||||
size_t dimension_3() const {return d_view.extent(3);}
|
||||
/* \brief return size of dimension 4 */
|
||||
size_t dimension_4() const {return d_view.dimension_4();}
|
||||
size_t dimension_4() const {return d_view.extent(4);}
|
||||
/* \brief return size of dimension 5 */
|
||||
size_t dimension_5() const {return d_view.dimension_5();}
|
||||
size_t dimension_5() const {return d_view.extent(5);}
|
||||
/* \brief return size of dimension 6 */
|
||||
size_t dimension_6() const {return d_view.dimension_6();}
|
||||
size_t dimension_6() const {return d_view.extent(6);}
|
||||
/* \brief return size of dimension 7 */
|
||||
size_t dimension_7() const {return d_view.dimension_7();}
|
||||
size_t dimension_7() const {return d_view.extent(7);}
|
||||
|
||||
//@}
|
||||
};
|
||||
|
||||
@ -35,16 +35,16 @@
|
||||
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
//
|
||||
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
|
||||
// Questions? Contact Christian R. Trott (crtrott@sandia.gov)
|
||||
//
|
||||
// ************************************************************************
|
||||
//@HEADER
|
||||
*/
|
||||
|
||||
/// \file Kokkos_DynRankView.hpp
|
||||
/// \brief Declaration and definition of Kokkos::Experimental::DynRankView.
|
||||
/// \brief Declaration and definition of Kokkos::DynRankView.
|
||||
///
|
||||
/// This header file declares and defines Kokkos::Experimental::DynRankView and its
|
||||
/// This header file declares and defines Kokkos::DynRankView and its
|
||||
/// related nonmember functions.
|
||||
|
||||
#ifndef KOKKOS_DYNRANKVIEW_HPP
|
||||
@ -55,7 +55,6 @@
|
||||
#include <type_traits>
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
template< typename DataType , class ... Properties >
|
||||
class DynRankView; //forward declare
|
||||
@ -156,7 +155,7 @@ struct DynRankDimTraits {
|
||||
// Extra overload to match that for specialize types
|
||||
template <typename Traits, typename ... P>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static typename std::enable_if< (std::is_same<typename Traits::array_layout , Kokkos::LayoutRight>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutLeft>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutStride>::value) , typename Traits::array_layout >::type createLayout( const ViewCtorProp<P...>& prop, const typename Traits::array_layout& layout )
|
||||
static typename std::enable_if< (std::is_same<typename Traits::array_layout , Kokkos::LayoutRight>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutLeft>::value || std::is_same<typename Traits::array_layout , Kokkos::LayoutStride>::value) , typename Traits::array_layout >::type createLayout( const Kokkos::Impl::ViewCtorProp<P...>& prop, const typename Traits::array_layout& layout )
|
||||
{
|
||||
return createLayout( layout );
|
||||
}
|
||||
@ -318,7 +317,6 @@ void dyn_rank_view_verify_operator_bounds
|
||||
struct ViewToDynRankViewTag {};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
|
||||
namespace Impl {
|
||||
|
||||
@ -348,7 +346,7 @@ class ViewMapping< DstTraits , SrcTraits ,
|
||||
)
|
||||
)
|
||||
)
|
||||
) , Kokkos::Experimental::Impl::ViewToDynRankViewTag >::type >
|
||||
) , Kokkos::Impl::ViewToDynRankViewTag >::type >
|
||||
{
|
||||
private:
|
||||
|
||||
@ -375,7 +373,7 @@ public:
|
||||
|
||||
template < typename DT , typename ... DP , typename ST , typename ... SP >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static void assign( Kokkos::Experimental::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src )
|
||||
static void assign( Kokkos::DynRankView< DT , DP...> & dst , const Kokkos::View< ST , SP... > & src )
|
||||
{
|
||||
static_assert( is_assignable_value_type
|
||||
, "View assignment must have same value type or const = non-const" );
|
||||
@ -395,8 +393,6 @@ public:
|
||||
|
||||
} //end Impl
|
||||
|
||||
namespace Experimental {
|
||||
|
||||
/* \class DynRankView
|
||||
* \brief Container that creates a Kokkos view with rank determined at runtime.
|
||||
* Essentially this is a rank 7 view
|
||||
@ -415,7 +411,7 @@ namespace Experimental {
|
||||
template< class > struct is_dyn_rank_view : public std::false_type {};
|
||||
|
||||
template< class D, class ... P >
|
||||
struct is_dyn_rank_view< Kokkos::Experimental::DynRankView<D,P...> > : public std::true_type {};
|
||||
struct is_dyn_rank_view< Kokkos::DynRankView<D,P...> > : public std::true_type {};
|
||||
|
||||
|
||||
template< typename DataType , class ... Properties >
|
||||
@ -425,7 +421,7 @@ class DynRankView : public ViewTraits< DataType , Properties ... >
|
||||
|
||||
private:
|
||||
template < class , class ... > friend class DynRankView ;
|
||||
template < class , class ... > friend class Impl::ViewMapping ;
|
||||
template < class , class ... > friend class Kokkos::Impl::ViewMapping ;
|
||||
|
||||
public:
|
||||
typedef ViewTraits< DataType , Properties ... > drvtraits ;
|
||||
@ -437,7 +433,7 @@ public:
|
||||
|
||||
private:
|
||||
typedef Kokkos::Impl::ViewMapping< traits , void > map_type ;
|
||||
typedef Kokkos::Experimental::Impl::SharedAllocationTracker track_type ;
|
||||
typedef Kokkos::Impl::SharedAllocationTracker track_type ;
|
||||
|
||||
track_type m_track ;
|
||||
map_type m_map ;
|
||||
@ -601,7 +597,7 @@ private:
|
||||
// rank of the calling operator - included as first argument in ARG
|
||||
#define KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( ARG ) \
|
||||
DynRankView::template verify_space< Kokkos::Impl::ActiveExecutionMemorySpace >::check(); \
|
||||
Kokkos::Experimental::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ;
|
||||
Kokkos::Impl::dyn_rank_view_verify_operator_bounds< typename traits::memory_space > ARG ;
|
||||
|
||||
#else
|
||||
|
||||
@ -778,6 +774,140 @@ public:
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
}
|
||||
|
||||
// Rank 0
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
reference_type access() const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (0 , this->rank(), m_track, m_map) )
|
||||
return implementation_map().reference();
|
||||
//return m_map.reference(0,0,0,0,0,0,0);
|
||||
}
|
||||
|
||||
// Rank 1
|
||||
// Rank 1 parenthesis
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
|
||||
access(const iType & i0 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) )
|
||||
return m_map.reference(i0);
|
||||
}
|
||||
|
||||
template< typename iType >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< !(std::is_same<typename traits::specialize , void>::value && std::is_integral<iType>::value), reference_type>::type
|
||||
access(const iType & i0 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (1 , this->rank(), m_track, m_map, i0) )
|
||||
return m_map.reference(i0,0,0,0,0,0,0);
|
||||
}
|
||||
|
||||
// Rank 2
|
||||
template< typename iType0 , typename iType1 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) )
|
||||
return m_map.reference(i0,i1);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (2 , this->rank(), m_track, m_map, i0, i1) )
|
||||
return m_map.reference(i0,i1,0,0,0,0,0);
|
||||
}
|
||||
|
||||
// Rank 3
|
||||
template< typename iType0 , typename iType1 , typename iType2 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) )
|
||||
return m_map.reference(i0,i1,i2);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (3 , this->rank(), m_track, m_map, i0, i1, i2) )
|
||||
return m_map.reference(i0,i1,i2,0,0,0,0);
|
||||
}
|
||||
|
||||
// Rank 4
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) )
|
||||
return m_map.reference(i0,i1,i2,i3);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (4 , this->rank(), m_track, m_map, i0, i1, i2, i3) )
|
||||
return m_map.reference(i0,i1,i2,i3,0,0,0);
|
||||
}
|
||||
|
||||
// Rank 5
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) )
|
||||
return m_map.reference(i0,i1,i2,i3,i4);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (5 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4) )
|
||||
return m_map.reference(i0,i1,i2,i3,i4,0,0);
|
||||
}
|
||||
|
||||
// Rank 6
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_same<typename traits::specialize , void>::value && std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) )
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5);
|
||||
}
|
||||
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< !(std::is_same<typename drvtraits::specialize , void>::value && std::is_integral<iType0>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (6 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5) )
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,0);
|
||||
}
|
||||
|
||||
// Rank 7
|
||||
template< typename iType0 , typename iType1 , typename iType2 , typename iType3, typename iType4 , typename iType5 , typename iType6 >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
typename std::enable_if< (std::is_integral<iType0>::value && std::is_integral<iType1>::value && std::is_integral<iType2>::value && std::is_integral<iType3>::value && std::is_integral<iType4>::value && std::is_integral<iType5>::value && std::is_integral<iType6>::value), reference_type>::type
|
||||
access(const iType0 & i0 , const iType1 & i1 , const iType2 & i2 , const iType3 & i3 , const iType4 & i4 , const iType5 & i5 , const iType6 & i6 ) const
|
||||
{
|
||||
KOKKOS_IMPL_VIEW_OPERATOR_VERIFY( (7 , this->rank(), m_track, m_map, i0, i1, i2, i3, i4, i5, i6) )
|
||||
return m_map.reference(i0,i1,i2,i3,i4,i5,i6);
|
||||
}
|
||||
|
||||
#undef KOKKOS_IMPL_VIEW_OPERATOR_VERIFY
|
||||
|
||||
//----------------------------------------
|
||||
@ -830,7 +960,6 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
// Experimental
|
||||
// Copy/Assign View to DynRankView
|
||||
template< class RT , class ... RP >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -840,7 +969,7 @@ public:
|
||||
, m_rank( rhs.Rank )
|
||||
{
|
||||
typedef typename View<RT,RP...>::traits SrcTraits ;
|
||||
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ;
|
||||
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ;
|
||||
static_assert( Mapping::is_assignable , "Incompatible DynRankView copy construction" );
|
||||
Mapping::assign( *this , rhs );
|
||||
}
|
||||
@ -850,7 +979,7 @@ public:
|
||||
DynRankView & operator = ( const View<RT,RP...> & rhs )
|
||||
{
|
||||
typedef typename View<RT,RP...>::traits SrcTraits ;
|
||||
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Experimental::Impl::ViewToDynRankViewTag > Mapping ;
|
||||
typedef Kokkos::Impl::ViewMapping< traits , SrcTraits , Kokkos::Impl::ViewToDynRankViewTag > Mapping ;
|
||||
static_assert( Mapping::is_assignable , "Incompatible View to DynRankView copy assignment" );
|
||||
Mapping::assign( *this , rhs );
|
||||
return *this ;
|
||||
@ -872,8 +1001,8 @@ public:
|
||||
// unused arg_layout dimensions must be set to ~size_t(0) so that rank deduction can properly take place
|
||||
template< class ... P >
|
||||
explicit inline
|
||||
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer
|
||||
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer
|
||||
, typename traits::array_layout
|
||||
>::type const & arg_layout
|
||||
)
|
||||
@ -882,11 +1011,11 @@ public:
|
||||
, m_rank( Impl::DynRankDimTraits<typename traits::specialize>::template computeRank< typename traits::array_layout, P...>(arg_prop, arg_layout) )
|
||||
{
|
||||
// Append layout and spaces if not input
|
||||
typedef Impl::ViewCtorProp< P ... > alloc_prop_input ;
|
||||
typedef Kokkos::Impl::ViewCtorProp< P ... > alloc_prop_input ;
|
||||
|
||||
// use 'std::integral_constant<unsigned,I>' for non-types
|
||||
// to avoid duplicate class error.
|
||||
typedef Impl::ViewCtorProp
|
||||
typedef Kokkos::Impl::ViewCtorProp
|
||||
< P ...
|
||||
, typename std::conditional
|
||||
< alloc_prop_input::has_label
|
||||
@ -931,7 +1060,7 @@ public:
|
||||
#endif
|
||||
//------------------------------------------------------------
|
||||
|
||||
Kokkos::Experimental::Impl::SharedAllocationRecord<> *
|
||||
Kokkos::Impl::SharedAllocationRecord<> *
|
||||
record = m_map.allocate_shared( prop , Impl::DynRankDimTraits<typename traits::specialize>::template createLayout<traits, P...>(arg_prop, arg_layout) );
|
||||
|
||||
//------------------------------------------------------------
|
||||
@ -950,8 +1079,8 @@ public:
|
||||
// Wrappers
|
||||
template< class ... P >
|
||||
explicit KOKKOS_INLINE_FUNCTION
|
||||
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer
|
||||
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer
|
||||
, typename traits::array_layout
|
||||
>::type const & arg_layout
|
||||
)
|
||||
@ -972,8 +1101,8 @@ public:
|
||||
// Simple dimension-only layout
|
||||
template< class ... P >
|
||||
explicit inline
|
||||
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< ! Impl::ViewCtorProp< P... >::has_pointer
|
||||
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< ! Kokkos::Impl::ViewCtorProp< P... >::has_pointer
|
||||
, size_t
|
||||
>::type const arg_N0 = ~size_t(0)
|
||||
, const size_t arg_N1 = ~size_t(0)
|
||||
@ -992,8 +1121,8 @@ public:
|
||||
|
||||
template< class ... P >
|
||||
explicit KOKKOS_INLINE_FUNCTION
|
||||
DynRankView( const Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< Impl::ViewCtorProp< P... >::has_pointer
|
||||
DynRankView( const Kokkos::Impl::ViewCtorProp< P ... > & arg_prop
|
||||
, typename std::enable_if< Kokkos::Impl::ViewCtorProp< P... >::has_pointer
|
||||
, size_t
|
||||
>::type const arg_N0 = ~size_t(0)
|
||||
, const size_t arg_N1 = ~size_t(0)
|
||||
@ -1015,10 +1144,10 @@ public:
|
||||
explicit inline
|
||||
DynRankView( const Label & arg_label
|
||||
, typename std::enable_if<
|
||||
Kokkos::Experimental::Impl::is_view_label<Label>::value ,
|
||||
Kokkos::Impl::is_view_label<Label>::value ,
|
||||
typename traits::array_layout >::type const & arg_layout
|
||||
)
|
||||
: DynRankView( Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout )
|
||||
: DynRankView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label ) , arg_layout )
|
||||
{}
|
||||
|
||||
// Allocate label and layout, must disambiguate from subview constructor
|
||||
@ -1026,7 +1155,7 @@ public:
|
||||
explicit inline
|
||||
DynRankView( const Label & arg_label
|
||||
, typename std::enable_if<
|
||||
Kokkos::Experimental::Impl::is_view_label<Label>::value ,
|
||||
Kokkos::Impl::is_view_label<Label>::value ,
|
||||
const size_t >::type arg_N0 = ~size_t(0)
|
||||
, const size_t arg_N1 = ~size_t(0)
|
||||
, const size_t arg_N2 = ~size_t(0)
|
||||
@ -1036,7 +1165,7 @@ public:
|
||||
, const size_t arg_N6 = ~size_t(0)
|
||||
, const size_t arg_N7 = ~size_t(0)
|
||||
)
|
||||
: DynRankView( Impl::ViewCtorProp< std::string >( arg_label )
|
||||
: DynRankView( Kokkos::Impl::ViewCtorProp< std::string >( arg_label )
|
||||
, typename traits::array_layout
|
||||
( arg_N0 , arg_N1 , arg_N2 , arg_N3 , arg_N4 , arg_N5 , arg_N6 , arg_N7 )
|
||||
)
|
||||
@ -1048,7 +1177,8 @@ public:
|
||||
DynRankView( const ViewAllocateWithoutInitializing & arg_prop
|
||||
, const typename traits::array_layout & arg_layout
|
||||
)
|
||||
: DynRankView( Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing )
|
||||
: DynRankView( Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing )
|
||||
|
||||
, Impl::DynRankDimTraits<typename traits::specialize>::createLayout(arg_layout)
|
||||
)
|
||||
{}
|
||||
@ -1064,7 +1194,7 @@ public:
|
||||
, const size_t arg_N6 = ~size_t(0)
|
||||
, const size_t arg_N7 = ~size_t(0)
|
||||
)
|
||||
: DynRankView(Impl::ViewCtorProp< std::string , Kokkos::Experimental::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::Experimental::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
|
||||
: DynRankView(Kokkos::Impl::ViewCtorProp< std::string , Kokkos::Impl::WithoutInitializing_t >( arg_prop.label , Kokkos::WithoutInitializing ), arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
|
||||
{}
|
||||
|
||||
//----------------------------------------
|
||||
@ -1097,14 +1227,14 @@ public:
|
||||
, const size_t arg_N6 = ~size_t(0)
|
||||
, const size_t arg_N7 = ~size_t(0)
|
||||
)
|
||||
: DynRankView( Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
|
||||
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7 )
|
||||
{}
|
||||
|
||||
explicit KOKKOS_INLINE_FUNCTION
|
||||
DynRankView( pointer_type arg_ptr
|
||||
, typename traits::array_layout & arg_layout
|
||||
)
|
||||
: DynRankView( Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_layout )
|
||||
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(arg_ptr) , arg_layout )
|
||||
{}
|
||||
|
||||
|
||||
@ -1140,7 +1270,7 @@ public:
|
||||
explicit KOKKOS_INLINE_FUNCTION
|
||||
DynRankView( const typename traits::execution_space::scratch_memory_space & arg_space
|
||||
, const typename traits::array_layout & arg_layout )
|
||||
: DynRankView( Impl::ViewCtorProp<pointer_type>(
|
||||
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(
|
||||
reinterpret_cast<pointer_type>(
|
||||
arg_space.get_shmem( map_type::memory_span(
|
||||
Impl::DynRankDimTraits<typename traits::specialize>::createLayout( arg_layout ) //is this correct?
|
||||
@ -1159,7 +1289,7 @@ public:
|
||||
, const size_t arg_N6 = ~size_t(0)
|
||||
, const size_t arg_N7 = ~size_t(0) )
|
||||
|
||||
: DynRankView( Impl::ViewCtorProp<pointer_type>(
|
||||
: DynRankView( Kokkos::Impl::ViewCtorProp<pointer_type>(
|
||||
reinterpret_cast<pointer_type>(
|
||||
arg_space.get_shmem(
|
||||
map_type::memory_span(
|
||||
@ -1190,7 +1320,6 @@ namespace Impl {
|
||||
struct DynRankSubviewTag {};
|
||||
|
||||
} // namespace Impl
|
||||
} // namespace Experimental
|
||||
|
||||
namespace Impl {
|
||||
|
||||
@ -1207,7 +1336,7 @@ struct ViewMapping
|
||||
std::is_same< typename SrcTraits::array_layout
|
||||
, Kokkos::LayoutStride >::value
|
||||
)
|
||||
), Kokkos::Experimental::Impl::DynRankSubviewTag >::type
|
||||
), Kokkos::Impl::DynRankSubviewTag >::type
|
||||
, SrcTraits
|
||||
, Args ... >
|
||||
{
|
||||
@ -1279,11 +1408,11 @@ public:
|
||||
};
|
||||
|
||||
|
||||
typedef Kokkos::Experimental::DynRankView< value_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > ret_type;
|
||||
typedef Kokkos::DynRankView< value_type , array_layout , typename SrcTraits::device_type , typename SrcTraits::memory_traits > ret_type;
|
||||
|
||||
template < typename T , class ... P >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static ret_type subview( const unsigned src_rank , Kokkos::Experimental::DynRankView< T , P...> const & src
|
||||
static ret_type subview( const unsigned src_rank , Kokkos::DynRankView< T , P...> const & src
|
||||
, Args ... args )
|
||||
{
|
||||
|
||||
@ -1351,20 +1480,19 @@ public:
|
||||
|
||||
} // end Impl
|
||||
|
||||
namespace Experimental {
|
||||
|
||||
template< class V , class ... Args >
|
||||
using Subdynrankview = typename Kokkos::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , V , Args... >::ret_type ;
|
||||
using Subdynrankview = typename Kokkos::Impl::ViewMapping< Kokkos::Impl::DynRankSubviewTag , V , Args... >::ret_type ;
|
||||
|
||||
template< class D , class ... P , class ...Args >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Subdynrankview< ViewTraits<D******* , P...> , Args... >
|
||||
subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args)
|
||||
subdynrankview( const Kokkos::DynRankView< D , P... > &src , Args...args)
|
||||
{
|
||||
if ( src.rank() > sizeof...(Args) ) //allow sizeof...(Args) >= src.rank(), ignore the remaining args
|
||||
{ Kokkos::abort("subdynrankview: num of args must be >= rank of the source DynRankView"); }
|
||||
|
||||
typedef Kokkos::Impl::ViewMapping< Kokkos::Experimental::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
|
||||
typedef Kokkos::Impl::ViewMapping< Kokkos::Impl::DynRankSubviewTag , Kokkos::ViewTraits< D*******, P... > , Args... > metafcn ;
|
||||
|
||||
return metafcn::subview( src.rank() , src , args... );
|
||||
}
|
||||
@ -1373,16 +1501,14 @@ subdynrankview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args.
|
||||
template< class D , class ... P , class ...Args >
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
Subdynrankview< ViewTraits<D******* , P...> , Args... >
|
||||
subview( const Kokkos::Experimental::DynRankView< D , P... > &src , Args...args)
|
||||
subview( const Kokkos::DynRankView< D , P... > &src , Args...args)
|
||||
{
|
||||
return subdynrankview( src , args... );
|
||||
}
|
||||
|
||||
} // namespace Experimental
|
||||
} // namespace Kokkos
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
// overload == and !=
|
||||
template< class LT , class ... LP , class RT , class ... RP >
|
||||
@ -1422,13 +1548,11 @@ bool operator != ( const DynRankView<LT,LP...> & lhs ,
|
||||
return ! ( operator==(lhs,rhs) );
|
||||
}
|
||||
|
||||
} //end Experimental
|
||||
} //end Kokkos
|
||||
|
||||
//----------------------------------------------------------------------------
|
||||
//----------------------------------------------------------------------------
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
namespace Impl {
|
||||
|
||||
template< class OutputView , typename Enable = void >
|
||||
@ -1455,7 +1579,7 @@ struct DynRankViewFill {
|
||||
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) {
|
||||
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) {
|
||||
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) {
|
||||
output(i0,i1,i2,i3,i4,i5,i6) = input ;
|
||||
output.access(i0,i1,i2,i3,i4,i5,i6) = input ;
|
||||
}}}}}}
|
||||
}
|
||||
|
||||
@ -1498,14 +1622,14 @@ struct DynRankViewRemap {
|
||||
|
||||
DynRankViewRemap( const OutputView & arg_out , const InputView & arg_in )
|
||||
: output( arg_out ), input( arg_in )
|
||||
, n0( std::min( (size_t)arg_out.dimension_0() , (size_t)arg_in.dimension_0() ) )
|
||||
, n1( std::min( (size_t)arg_out.dimension_1() , (size_t)arg_in.dimension_1() ) )
|
||||
, n2( std::min( (size_t)arg_out.dimension_2() , (size_t)arg_in.dimension_2() ) )
|
||||
, n3( std::min( (size_t)arg_out.dimension_3() , (size_t)arg_in.dimension_3() ) )
|
||||
, n4( std::min( (size_t)arg_out.dimension_4() , (size_t)arg_in.dimension_4() ) )
|
||||
, n5( std::min( (size_t)arg_out.dimension_5() , (size_t)arg_in.dimension_5() ) )
|
||||
, n6( std::min( (size_t)arg_out.dimension_6() , (size_t)arg_in.dimension_6() ) )
|
||||
, n7( std::min( (size_t)arg_out.dimension_7() , (size_t)arg_in.dimension_7() ) )
|
||||
, n0( std::min( (size_t)arg_out.extent(0) , (size_t)arg_in.extent(0) ) )
|
||||
, n1( std::min( (size_t)arg_out.extent(1) , (size_t)arg_in.extent(1) ) )
|
||||
, n2( std::min( (size_t)arg_out.extent(2) , (size_t)arg_in.extent(2) ) )
|
||||
, n3( std::min( (size_t)arg_out.extent(3) , (size_t)arg_in.extent(3) ) )
|
||||
, n4( std::min( (size_t)arg_out.extent(4) , (size_t)arg_in.extent(4) ) )
|
||||
, n5( std::min( (size_t)arg_out.extent(5) , (size_t)arg_in.extent(5) ) )
|
||||
, n6( std::min( (size_t)arg_out.extent(6) , (size_t)arg_in.extent(6) ) )
|
||||
, n7( std::min( (size_t)arg_out.extent(7) , (size_t)arg_in.extent(7) ) )
|
||||
{
|
||||
typedef Kokkos::RangePolicy< ExecSpace > Policy ;
|
||||
const Kokkos::Impl::ParallelFor< DynRankViewRemap , Policy > closure( *this , Policy( 0 , n0 ) );
|
||||
@ -1521,18 +1645,16 @@ struct DynRankViewRemap {
|
||||
for ( size_t i4 = 0 ; i4 < n4 ; ++i4 ) {
|
||||
for ( size_t i5 = 0 ; i5 < n5 ; ++i5 ) {
|
||||
for ( size_t i6 = 0 ; i6 < n6 ; ++i6 ) {
|
||||
output(i0,i1,i2,i3,i4,i5,i6) = input(i0,i1,i2,i3,i4,i5,i6);
|
||||
output.access(i0,i1,i2,i3,i4,i5,i6) = input.access(i0,i1,i2,i3,i4,i5,i6);
|
||||
}}}}}}
|
||||
}
|
||||
};
|
||||
|
||||
} /* namespace Impl */
|
||||
} /* namespace Experimental */
|
||||
} /* namespace Kokkos */
|
||||
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
/** \brief Deep copy a value from Host memory into a view. */
|
||||
template< class DT , class ... DP >
|
||||
@ -1549,7 +1671,7 @@ void deep_copy
|
||||
typename ViewTraits<DT,DP...>::value_type >::value
|
||||
, "deep_copy requires non-const type" );
|
||||
|
||||
Kokkos::Experimental::Impl::DynRankViewFill< DynRankView<DT,DP...> >( dst , value );
|
||||
Kokkos::Impl::DynRankViewFill< DynRankView<DT,DP...> >( dst , value );
|
||||
}
|
||||
|
||||
/** \brief Deep copy into a value in Host memory from a view. */
|
||||
@ -1585,7 +1707,7 @@ void deep_copy
|
||||
std::is_same< typename DstType::traits::specialize , void >::value &&
|
||||
std::is_same< typename SrcType::traits::specialize , void >::value
|
||||
&&
|
||||
( Kokkos::Experimental::is_dyn_rank_view<DstType>::value || Kokkos::Experimental::is_dyn_rank_view<SrcType>::value)
|
||||
( Kokkos::is_dyn_rank_view<DstType>::value || Kokkos::is_dyn_rank_view<SrcType>::value)
|
||||
)>::type * = 0 )
|
||||
{
|
||||
static_assert(
|
||||
@ -1641,14 +1763,15 @@ void deep_copy
|
||||
dst.span_is_contiguous() &&
|
||||
src.span_is_contiguous() &&
|
||||
dst.span() == src.span() &&
|
||||
dst.dimension_0() == src.dimension_0() &&
|
||||
dst.dimension_1() == src.dimension_1() &&
|
||||
dst.dimension_2() == src.dimension_2() &&
|
||||
dst.dimension_3() == src.dimension_3() &&
|
||||
dst.dimension_4() == src.dimension_4() &&
|
||||
dst.dimension_5() == src.dimension_5() &&
|
||||
dst.dimension_6() == src.dimension_6() &&
|
||||
dst.dimension_7() == src.dimension_7() ) {
|
||||
dst.extent(0) == src.extent(0) &&
|
||||
|
||||
dst.extent(1) == src.extent(1) &&
|
||||
dst.extent(2) == src.extent(2) &&
|
||||
dst.extent(3) == src.extent(3) &&
|
||||
dst.extent(4) == src.extent(4) &&
|
||||
dst.extent(5) == src.extent(5) &&
|
||||
dst.extent(6) == src.extent(6) &&
|
||||
dst.extent(7) == src.extent(7) ) {
|
||||
|
||||
const size_t nbytes = sizeof(typename dst_type::value_type) * dst.span();
|
||||
|
||||
@ -1673,14 +1796,14 @@ void deep_copy
|
||||
dst.span_is_contiguous() &&
|
||||
src.span_is_contiguous() &&
|
||||
dst.span() == src.span() &&
|
||||
dst.dimension_0() == src.dimension_0() &&
|
||||
dst.dimension_1() == src.dimension_1() &&
|
||||
dst.dimension_2() == src.dimension_2() &&
|
||||
dst.dimension_3() == src.dimension_3() &&
|
||||
dst.dimension_4() == src.dimension_4() &&
|
||||
dst.dimension_5() == src.dimension_5() &&
|
||||
dst.dimension_6() == src.dimension_6() &&
|
||||
dst.dimension_7() == src.dimension_7() &&
|
||||
dst.extent(0) == src.extent(0) &&
|
||||
dst.extent(1) == src.extent(1) &&
|
||||
dst.extent(2) == src.extent(2) &&
|
||||
dst.extent(3) == src.extent(3) &&
|
||||
dst.extent(4) == src.extent(4) &&
|
||||
dst.extent(5) == src.extent(5) &&
|
||||
dst.extent(6) == src.extent(6) &&
|
||||
dst.extent(7) == src.extent(7) &&
|
||||
dst.stride_0() == src.stride_0() &&
|
||||
dst.stride_1() == src.stride_1() &&
|
||||
dst.stride_2() == src.stride_2() &&
|
||||
@ -1697,11 +1820,11 @@ void deep_copy
|
||||
}
|
||||
else if ( DstExecCanAccessSrc ) {
|
||||
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
|
||||
Kokkos::Experimental::Impl::DynRankViewRemap< dst_type , src_type >( dst , src );
|
||||
Kokkos::Impl::DynRankViewRemap< dst_type , src_type >( dst , src );
|
||||
}
|
||||
else if ( SrcExecCanAccessDst ) {
|
||||
// Copying data between views in accessible memory spaces and either non-contiguous or incompatible shape.
|
||||
Kokkos::Experimental::Impl::DynRankViewRemap< dst_type , src_type , src_execution_space >( dst , src );
|
||||
Kokkos::Impl::DynRankViewRemap< dst_type , src_type , src_execution_space >( dst , src );
|
||||
}
|
||||
else {
|
||||
Kokkos::Impl::throw_runtime_exception("deep_copy given views that would require a temporary allocation");
|
||||
@ -1709,7 +1832,6 @@ void deep_copy
|
||||
}
|
||||
}
|
||||
|
||||
} //end Experimental
|
||||
} //end Kokkos
|
||||
|
||||
|
||||
@ -1717,8 +1839,6 @@ void deep_copy
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
|
||||
namespace Impl {
|
||||
|
||||
|
||||
@ -1726,7 +1846,7 @@ namespace Impl {
|
||||
template<class Space, class T, class ... P>
|
||||
struct MirrorDRViewType {
|
||||
// The incoming view_type
|
||||
typedef typename Kokkos::Experimental::DynRankView<T,P...> src_view_type;
|
||||
typedef typename Kokkos::DynRankView<T,P...> src_view_type;
|
||||
// The memory space for the mirror view
|
||||
typedef typename Space::memory_space memory_space;
|
||||
// Check whether it is the same memory space
|
||||
@ -1736,7 +1856,7 @@ struct MirrorDRViewType {
|
||||
// The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
|
||||
typedef typename src_view_type::non_const_data_type data_type;
|
||||
// The destination view type if it is not the same memory space
|
||||
typedef Kokkos::Experimental::DynRankView<data_type,array_layout,Space> dest_view_type;
|
||||
typedef Kokkos::DynRankView<data_type,array_layout,Space> dest_view_type;
|
||||
// If it is the same memory_space return the existsing view_type
|
||||
// This will also keep the unmanaged trait if necessary
|
||||
typedef typename std::conditional<is_same_memspace,src_view_type,dest_view_type>::type view_type;
|
||||
@ -1745,7 +1865,7 @@ struct MirrorDRViewType {
|
||||
template<class Space, class T, class ... P>
|
||||
struct MirrorDRVType {
|
||||
// The incoming view_type
|
||||
typedef typename Kokkos::Experimental::DynRankView<T,P...> src_view_type;
|
||||
typedef typename Kokkos::DynRankView<T,P...> src_view_type;
|
||||
// The memory space for the mirror view
|
||||
typedef typename Space::memory_space memory_space;
|
||||
// Check whether it is the same memory space
|
||||
@ -1755,12 +1875,11 @@ struct MirrorDRVType {
|
||||
// The data type (we probably want it non-const since otherwise we can't even deep_copy to it.
|
||||
typedef typename src_view_type::non_const_data_type data_type;
|
||||
// The destination view type if it is not the same memory space
|
||||
typedef Kokkos::Experimental::DynRankView<data_type,array_layout,Space> view_type;
|
||||
typedef Kokkos::DynRankView<data_type,array_layout,Space> view_type;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
|
||||
template< class T , class ... P >
|
||||
inline
|
||||
typename DynRankView<T,P...>::HostMirror
|
||||
@ -1799,7 +1918,7 @@ create_mirror( const DynRankView<T,P...> & src
|
||||
|
||||
// Create a mirror in a new space (specialization for different space)
|
||||
template<class Space, class T, class ... P>
|
||||
typename Impl::MirrorDRVType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src) {
|
||||
typename Impl::MirrorDRVType<Space,T,P ...>::view_type create_mirror(const Space& , const Kokkos::DynRankView<T,P...> & src) {
|
||||
return typename Impl::MirrorDRVType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) );
|
||||
}
|
||||
|
||||
@ -1836,13 +1955,13 @@ create_mirror_view( const DynRankView<T,P...> & src
|
||||
)>::type * = 0
|
||||
)
|
||||
{
|
||||
return Kokkos::Experimental::create_mirror( src );
|
||||
return Kokkos::create_mirror( src );
|
||||
}
|
||||
|
||||
// Create a mirror view in a new space (specialization for same space)
|
||||
template<class Space, class T, class ... P>
|
||||
typename Impl::MirrorDRViewType<Space,T,P ...>::view_type
|
||||
create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src
|
||||
create_mirror_view(const Space& , const Kokkos::DynRankView<T,P...> & src
|
||||
, typename std::enable_if<Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
|
||||
return src;
|
||||
}
|
||||
@ -1850,12 +1969,11 @@ create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...
|
||||
// Create a mirror view in a new space (specialization for different space)
|
||||
template<class Space, class T, class ... P>
|
||||
typename Impl::MirrorDRViewType<Space,T,P ...>::view_type
|
||||
create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...> & src
|
||||
create_mirror_view(const Space& , const Kokkos::DynRankView<T,P...> & src
|
||||
, typename std::enable_if<!Impl::MirrorDRViewType<Space,T,P ...>::is_same_memspace>::type* = 0 ) {
|
||||
return typename Impl::MirrorDRViewType<Space,T,P ...>::view_type(src.label(), Impl::reconstructLayout(src.layout(), src.rank()) );
|
||||
}
|
||||
|
||||
} //end Experimental
|
||||
} //end Kokkos
|
||||
|
||||
|
||||
@ -1863,7 +1981,6 @@ create_mirror_view(const Space& , const Kokkos::Experimental::DynRankView<T,P...
|
||||
//----------------------------------------------------------------------------
|
||||
|
||||
namespace Kokkos {
|
||||
namespace Experimental {
|
||||
/** \brief Resize a view with copying old data to new data at the corresponding indices. */
|
||||
template< class T , class ... P >
|
||||
inline
|
||||
@ -1877,13 +1994,13 @@ void resize( DynRankView<T,P...> & v ,
|
||||
const size_t n6 = ~size_t(0) ,
|
||||
const size_t n7 = ~size_t(0) )
|
||||
{
|
||||
typedef DynRankView<T,P...> drview_type ;
|
||||
typedef DynRankView<T,P...> drview_type ;
|
||||
|
||||
static_assert( Kokkos::ViewTraits<T,P...>::is_managed , "Can only resize managed views" );
|
||||
|
||||
drview_type v_resized( v.label(), n0, n1, n2, n3, n4, n5, n6 );
|
||||
|
||||
Kokkos::Experimental::Impl::DynRankViewRemap< drview_type , drview_type >( v_resized, v );
|
||||
Kokkos::Impl::DynRankViewRemap< drview_type , drview_type >( v_resized, v );
|
||||
|
||||
v = v_resized ;
|
||||
}
|
||||
@ -1911,25 +2028,7 @@ void realloc( DynRankView<T,P...> & v ,
|
||||
v = drview_type( label, n0, n1, n2, n3, n4, n5, n6 );
|
||||
}
|
||||
|
||||
} //end Experimental
|
||||
|
||||
} //end Kokkos
|
||||
|
||||
using Kokkos::Experimental::is_dyn_rank_view ;
|
||||
|
||||
namespace Kokkos {
|
||||
|
||||
template< typename D , class ... P >
|
||||
using DynRankView = Kokkos::Experimental::DynRankView< D , P... > ;
|
||||
|
||||
using Kokkos::Experimental::deep_copy ;
|
||||
using Kokkos::Experimental::create_mirror ;
|
||||
using Kokkos::Experimental::create_mirror_view ;
|
||||
using Kokkos::Experimental::subdynrankview ;
|
||||
using Kokkos::Experimental::subview ;
|
||||
using Kokkos::Experimental::resize ;
|
||||
using Kokkos::Experimental::realloc ;
|
||||
|
||||
} //end Kokkos
|
||||
#endif
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user