Compare commits

...

108 Commits

Author SHA1 Message Date
f8506fee23 sync GHub with SVN 2017-01-26 14:06:43 -07:00
18e5584311 Merge pull request #354 from stanmoore1/kokkos_bugfixes
Kokkos bugfixes
2017-01-26 13:51:47 -07:00
851f80464f Merge pull request #361 from akohlmey/user-omp-fix-per-atom-data
fix USER-OMP bug on per-atom data with hybrid styles
2017-01-26 13:50:13 -07:00
5971d4c994 Merge pull request #358 from ibaned/warnings
warning fixes (Kokkos+CUDA)
2017-01-26 13:49:56 -07:00
868d95f0a5 Merge pull request #352 from akohlmey/fix-skip-with-ghost-issue
Fix skip with ghost issue
2017-01-26 13:47:12 -07:00
a5ff35435a Merge pull request #351 from timattox/USER-DPD_pair_exp6_bugfix
USER-DPD: Possible uninitialized variable in pair_exp6_rx.cpp bugfix.
2017-01-26 13:45:45 -07:00
8b7bd9d88e fix bug where per atom data for USER-OMP was reducing the wrong arrays with hybrid styles 2017-01-26 14:59:10 -05:00
672bbbe494 add more missing KOKKOS_INLINE_FUNCTION attributes 2017-01-25 16:03:11 -07:00
03c9c46533 add missing KOKKOS_INLINE_FUNCTION attributes 2017-01-25 15:49:05 -07:00
e992bfe510 remove unused variable 2017-01-25 15:40:52 -07:00
053ee54a27 remove unused variable 2017-01-25 15:38:41 -07:00
1074c6734b add missing return keywords 2017-01-25 15:37:27 -07:00
60b48c9d66 add missing KOKKOS_INLINE_FUNCTION attributes
this structure gets put inside a DualView,
so these members need to be able to execute
on the GPU
2017-01-25 15:36:24 -07:00
3d40b51708 remove unused variable 2017-01-25 15:24:52 -07:00
effbe18c46 fix domain boundary indexing
the compiler pointed out that
boundary[2][2] doesn't exist.
If I understand this correctly,
those checks should be against
boundary[*][0].
2017-01-25 15:24:01 -07:00
6328beb7d7 fix double-return warning
this #ifdef adds a return statement
for little endian machines, but leaves
the old one, which the compiler comlains
is unreachable. this commit combines
the conditionals so we can use #else
2017-01-25 15:22:42 -07:00
26c8d3d98f Fixing GPU memory issue in fix_langevin_kokkos 2017-01-25 12:53:55 -07:00
73177d650d Fixing GPU memory issue in domain_kokkos 2017-01-25 11:18:03 -07:00
b5cb74bd33 skip list build is compatible with NP_GHOST 2017-01-23 19:21:48 -05:00
31976d1dee skip list definition was missing NP_HALFFULL flag 2017-01-23 19:20:05 -05:00
c8260af37c Possible uninitialized variable in USER-DPD/pair_exp6_rx.cpp bugfix.
Added explicit initialization (to zero) for several variables inside the
inner j-loop to avoid using them uninitialized or from prior iterations
within rmOldij_12 == 0.
2017-01-23 13:34:51 -05:00
caea8973a3 add neighbor list kind output to screen 2017-01-20 13:24:09 -07:00
aa0ad9b483 Merge pull request #349 from akohlmey/collected-small-fixes
collected fixes and improvements
2017-01-20 13:19:43 -07:00
5d0e4e1ba9 Merge pull request #346 from stanmoore1/kokkos_fixes
Kokkos fixes
2017-01-20 13:15:16 -07:00
f8d3c4c740 Merge pull request #345 from timattox/USER-DPD_another_zero_compute
USER-DPD another zero compute optimization
2017-01-20 13:14:59 -07:00
e6996121d1 remove dead code 2017-01-20 14:30:46 -05:00
fbfb1df5eb fix typo causing wrong neighbor list copy selections 2017-01-19 20:47:10 -05:00
9a299875da simplified neighbor list copying to avoid possible same-timestep re-build issues 2017-01-19 17:01:15 -07:00
fc94f1bd18 Fixing GPU memory issues in Kokkos 2017-01-19 12:14:25 -07:00
5ce8e2fbae Fixing GPU memory issue in modify_kokkos, need to cherry pick back to Master 2017-01-19 12:13:48 -07:00
f6cd98636b USER-DPD: Also apply "check if a0 is zero" optimization to pair_dpd_fdt
This relates to commit 4eb08a5822 that was applied to pair_dpd_fdt_energy
2017-01-18 16:17:11 -05:00
05cafb716f USER-DPD: cleanup initialization of splitFDT_flag in pair_dpd_fdt.cpp 2017-01-18 15:51:50 -05:00
3af4b3c28c Merge pull request #337 from ohenrich/user-cgdna
Added source code and documentation for USER-CGDNA
2017-01-18 11:31:35 -07:00
7fc0970587 Merge pull request #344 from timattox/USER-DPD_zero_compute
USER-DPD: Skip a0*stuff computations, if a0 was set to zero in pair_coeff
2017-01-18 11:31:14 -07:00
93262b52b4 Merge pull request #343 from timattox/USER-DPD_bugfix_molecule
USER-DPD: bugfix for a segfault when using MOLECULE and DPD together.
2017-01-18 11:30:58 -07:00
4eb08a5822 USER-DPD: Skip a0*stuff computations, if a0 was set to zero in pair_coeff.
This saves around 10% of the runtime for many of our tests using SSA.
2017-01-17 15:55:39 -05:00
01609f55e2 USER-DPD: bugfix for a segfault when using MOLECULE and DPD together. 2017-01-17 12:47:59 -05:00
d2fc88a626 patch 17Jan17 2017-01-17 10:14:53 -07:00
c52a26382f Merge pull request #339 from akohlmey/fixes-for-srp-example
Fixes for srp example
2017-01-17 09:36:28 -07:00
ad4d299975 Merge pull request #335 from stanmoore1/neighbor_fixes
Neighbor fixes
2017-01-17 09:33:25 -07:00
83408b195f Merge pull request #342 from epfl-cosmo/ipi-multiinit-bug
Bugfix in the fix_ipi initialization - prevents multiple open_socket calls
2017-01-17 09:14:03 -07:00
cd7bdf9251 Merge pull request #341 from stanmoore1/qeq_kk_neighlist
Make fix_qeq_reax_kokkos request its own neighbor list
2017-01-17 09:13:47 -07:00
8c5b108900 Merge pull request #340 from stanmoore1/fix_rx_neighborlist
Make fix_rx request its own neighbor list
2017-01-17 09:13:27 -07:00
c19d2011bb Merge pull request #334 from sstrong99/flow_gauss_changeRef
Updated the reference for the flow/gauss method
2017-01-17 09:12:22 -07:00
973bef4d45 Merge pull request #332 from akohlmey/coord-atom-orientorder-atom-enhancements
Coord atom orientorder atom enhancements
2017-01-17 09:11:45 -07:00
1b9e50c8cb Merge pull request #331 from timattox/USER-DPD_fix_example_typo
USER-DPD: fix a typo in the DPD-H example input; update reference output.
2017-01-17 09:08:14 -07:00
252e07e083 Merge pull request #330 from akohlmey/collected-small-bugfixes
Collected small bugfixes
2017-01-17 09:08:00 -07:00
74a661ae26 Merge pull request #328 from akohlmey/print-last-command-on-error
print the last input line, when error->all() is called
2017-01-17 09:05:19 -07:00
d8bc590aaf Merge pull request #327 from stanmoore1/kokkos_lib_update
Updating Kokkos lib
2017-01-17 09:04:12 -07:00
c9bea60710 Merge pull request #326 from Pakketeretet2/github-tutorial-update
Updated images of succesful merge.
2017-01-17 09:03:46 -07:00
5cd856c97f fix spring doc page update 2017-01-17 09:02:56 -07:00
2f13365cf5 avoid spurious error message, when no storage fix is active/used 2017-01-16 17:08:00 -05:00
0a2b78acb8 rather than adjusting the communication cutoff, we just print out the minimum value needed and error out
i suspect, this communication cutoff adjustment was included into the code before it was possible to separately set it via comm_modify. stopping with an error message printing the needed/current value is cleaner, in keeping with other modules in LAMMPS and much less problematic.
2017-01-16 15:47:02 -05:00
3f46b6d782 fix bugs from incorrect code synchronization 2017-01-16 11:15:54 -05:00
5abd6e5122 reordering operations in Pair::init_style() to avoid segfaults w/o a kspace style 2017-01-16 11:08:48 -05:00
f3a82f454e Included a flag to prevent multiple open_socket calls if run is included multiple times in the LAMMPS input 2017-01-16 08:42:23 +01:00
473a3ebeef fix for bug with compute rdf with pair reax/c. we must not copy a neighbor list, if newton settings are not compatible
an alternate route to address this issue would be to allow an "ANY" setting for neighbor list requests and then query the neighbor list for newton setting instead of the force class.
2017-01-15 12:05:19 -05:00
b220850377 Removing neighbor list hack in fix_qeq_reax_kokkos 2017-01-14 16:16:02 -07:00
fa00e0593f Make fix_rx request its own neighbor list 2017-01-14 15:39:37 -07:00
4a09399dc6 during setup, checking timestep doesn't seem to be sufficient. comparing bins and stencil point, too.
in addition, relevant pointers were not properly initialized to NULL
2017-01-14 17:13:22 -05:00
5821fe8dd5 correct out-of-bounds accesses 2017-01-14 17:06:23 -05:00
8360e70f4e update USER-CGDNA examples to follow LAMMPS style 2017-01-13 18:56:45 -05:00
b988b29413 remove dead code 2017-01-13 18:43:35 -05:00
5d48bfdcab USER-CGDNA whitespace cleanup: expand tabs and remove trailing whitespace 2017-01-13 18:40:34 -05:00
fe8caa8a56 apply some LAMMPS formatting style conventions for include files 2017-01-13 18:33:32 -05:00
afaacc6173 add USER-CGDNA package with dependencies into the build system 2017-01-13 18:32:32 -05:00
98ceb6feb1 add missing html files to lammps.book 2017-01-13 18:11:23 -05:00
374abea0f0 some minor documentation integration tweaks for USER-CGDNA package 2017-01-13 18:09:45 -05:00
61cff85435 avoid not only division by zero, but also computing variance for short runs with insufficient resolution 2017-01-13 14:35:35 -05:00
aa0b327f7e Merge branch 'bugfix_dividebyzero' of https://github.com/timattox/lammps_USER-DPD into collected-small-bugfixes 2017-01-13 14:26:10 -05:00
04fe071968 Merge pull request #6 from ibaned/cuda-lj-ctor-warning
fix a CUDA constructor warning
2017-01-13 12:13:43 -07:00
78498715b4 Protect from divide by zero in mpi_timings() when printing results.
e.g. If neighbor list(s) are never rebuilt, the Neigh time will be zero.
2017-01-13 13:32:15 -05:00
96259ea2d2 Added source code and documentation for USER-CGDNA 2017-01-13 13:36:54 +00:00
b2f67fea30 Merge branch 'collected-small-bugfixes' of github.com:akohlmey/lammps into collected-small-bugfixes 2017-01-13 08:12:10 -05:00
c59bcf31d1 change $MKLROOT to $(MKLROOT) as reported by @WeiLiPenguin
This closes #336
2017-01-13 08:10:51 -05:00
2540fc281c Merge branch 'flow_gauss_changeRef' of github.com:sstrong99/lammps into pull-334 2017-01-12 23:54:52 -05:00
e8e03dd440 Updated the reference for the flow/gauss method, the new reference is much more comprehensive 2017-01-12 23:44:33 -05:00
daf766d4f8 Fixing Kokkos neighbor bug 2017-01-12 16:22:38 -07:00
630783c8e8 Fixing neighbor bug 2017-01-12 16:22:24 -07:00
c94030d966 put pair_lj_coul in kokkos_type.h
also rename pair_lj_coul_gromacs
so it doesn't conflict with the
one now in kokkos_type.h
2017-01-12 13:37:53 -07:00
1229f6f60b Updated the reference for the flow/gauss method, the new reference is much more comprehensive 2017-01-12 10:15:18 -07:00
0b081b0086 whitespace cleanup 2017-01-11 21:05:32 -05:00
8e1cf6643c apply bugfix to fix wall/gran by eric_lyster@agilent.com on lammps-users 2017-01-11 20:59:40 -05:00
6950a99162 Revert "remove obsolete warning about fix rigid image flag restrictions"
This reverts commit 51e52b477a.
2017-01-11 19:49:58 -05:00
9f4e5e0661 fix a CUDA constructor warning
The class params_lj_coul was copy-pasted
into many different pair styles, and only
one of them had the proper KOKKOS_INLINE_FUNCTION
annotations for CUDA.
created a header file for this class that
most of the pair styles now include.
One pair style did add extra members,
so it keeps a local copy of the class.
2017-01-11 09:11:35 -07:00
34cb4027df make formatting comment consistent 2017-01-11 07:46:07 -05:00
1d0e600ab7 formatting improvements and small corrections for timer settings and output discussions 2017-01-10 23:47:14 -05:00
7162cafdf5 Squelching output from Makefile 2017-01-10 14:46:30 -07:00
ee9e7cfbd5 Fixing Kokkos CUDA Makefile issue 2017-01-10 13:22:36 -07:00
7839c335da Fixing compile error with Kokkos CUDA Makefiles 2017-01-10 13:05:00 -07:00
622d926849 adapt example inputs for TAD and PRD to the change in compute coord/atom 2017-01-10 13:41:35 -05:00
92d15d4a89 replace string compare with enums, fix memory leak, formatting cleanup 2017-01-10 12:52:37 -05:00
95706ac846 import contributed code for computes coord/atom and orientorder/atom 2017-01-10 12:29:22 -05:00
d06688bb91 USER-DPD: fix a typo in the DPD-H example input; update reference output. 2017-01-10 12:11:20 -05:00
d014e00e53 ignore some newly added styles from packages. 2017-01-09 17:51:38 -05:00
0db2a07993 another workaround for duplicate labels (which sphinx does not like) 2017-01-09 17:51:19 -05:00
33412c76ed correct some formatting issues with USER-NC-DUMP 2017-01-09 17:50:49 -05:00
e5ac49d1de Merge branch 'master' into collected-small-bugfixes 2017-01-09 17:13:46 -05:00
1a81da0f73 print the last input line, when error->all() is called
this should help tracking down input file errors for many
common cases without having to repeat the run with -echo screen
and avoid having to explain how to use that feature all the time
2017-01-09 17:03:06 -05:00
c31f1e9f22 add fix mscg command, example, lib 2017-01-09 13:36:40 -07:00
ebd25cc078 Updating docs for Kokkos package 2017-01-09 12:40:33 -07:00
9250a55923 Adding enable_lambda to KOKKOS_CUDA_OPTIONS 2017-01-09 12:24:30 -07:00
a9f0b7d523 Updating Kokkos lib 2017-01-09 10:39:46 -07:00
20f8a8c219 Merge branch 'master' into github-tutorial-update 2017-01-09 14:38:09 +01:00
09af780aa8 remove misleading comments 2017-01-06 21:31:39 -05:00
51e52b477a remove obsolete warning about fix rigid image flag restrictions 2017-01-06 21:30:33 -05:00
20a4e365b7 reduce warning when processing manual with sphinx 2017-01-06 21:30:13 -05:00
ccd09e3967 Updated images of succesful merge. 2017-01-06 19:04:26 +01:00
566 changed files with 58045 additions and 28520 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 3.8 KiB

View File

@ -0,0 +1,10 @@
\documentclass[12pt]{article}
\pagestyle{empty}
\begin{document}
$$
E = - \frac{\epsilon}{2} \ln \left[ 1 - \left(\frac{r-r0}{\Delta}\right)^2\right]
$$
\end{document}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 57 KiB

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="6 Jan 2017 version">
<META NAME="docnumber" CONTENT="26 Jan 2017 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
<H1></H1>
LAMMPS Documentation :c,h3
6 Jan 2017 version :c,h4
26 Jan 2017 version :c,h4
Version info: :h4

Binary file not shown.

View File

@ -583,6 +583,7 @@ USER-INTEL, k = KOKKOS, o = USER-OMP, t = OPT.
"lineforce"_fix_lineforce.html,
"momentum (k)"_fix_momentum.html,
"move"_fix_move.html,
"mscg"_fix_mscg.html,
"msst"_fix_msst.html,
"neb"_fix_neb.html,
"nph (ko)"_fix_nh.html,
@ -701,6 +702,8 @@ package"_Section_start.html#start_3.
"meso"_fix_meso.html,
"manifoldforce"_fix_manifoldforce.html,
"meso/stationary"_fix_meso_stationary.html,
"nve/dot"_fix_nve_dot.html,
"nve/dotc/langevin"_fix_nve_dotc_langevin.html,
"nve/manifold/rattle"_fix_nve_manifold_rattle.html,
"nvk"_fix_nvk.html,
"nvt/manifold/rattle"_fix_nvt_manifold_rattle.html,
@ -918,7 +921,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"dpd (go)"_pair_dpd.html,
"dpd/tstat (go)"_pair_dpd.html,
"dsmc"_pair_dsmc.html,
"eam (gkot)"_pair_eam.html,
"eam (gkiot)"_pair_eam.html,
"eam/alloy (gkot)"_pair_eam.html,
"eam/fs (gkot)"_pair_eam.html,
"eim (o)"_pair_eim.html,
@ -1034,6 +1037,11 @@ package"_Section_start.html#start_3.
"morse/soft"_pair_morse.html,
"multi/lucy"_pair_multi_lucy.html,
"multi/lucy/rx"_pair_multi_lucy_rx.html,
"oxdna/coaxstk"_pair_oxdna.html,
"oxdna/excv"_pair_oxdna.html,
"oxdna/hbond"_pair_oxdna.html,
"oxdna/stk"_pair_oxdna.html,
"oxdna/xstk"_pair_oxdna.html,
"quip"_pair_quip.html,
"reax/c (k)"_pair_reax_c.html,
"smd/hertz"_pair_smd_hertz.html,
@ -1082,7 +1090,8 @@ if "LAMMPS is built with the appropriate
package"_Section_start.html#start_3.
"harmonic/shift (o)"_bond_harmonic_shift.html,
"harmonic/shift/cut (o)"_bond_harmonic_shift_cut.html :tb(c=4,ea=c)
"harmonic/shift/cut (o)"_bond_harmonic_shift_cut.html,
"oxdna/fene"_bond_oxdna_fene.html :tb(c=4,ea=c)
:line

View File

@ -55,12 +55,13 @@ LAMMPS errors are detected at setup time; others like a bond
stretching too far may not occur until the middle of a run.
LAMMPS tries to flag errors and print informative error messages so
you can fix the problem. Of course, LAMMPS cannot figure out your
physics or numerical mistakes, like choosing too big a timestep,
specifying erroneous force field coefficients, or putting 2 atoms on
top of each other! If you run into errors that LAMMPS doesn't catch
that you think it should flag, please send an email to the
"developers"_http://lammps.sandia.gov/authors.html.
you can fix the problem. For most errors it will also print the last
input script command that it was processing. Of course, LAMMPS cannot
figure out your physics or numerical mistakes, like choosing too big a
timestep, specifying erroneous force field coefficients, or putting 2
atoms on top of each other! If you run into errors that LAMMPS
doesn't catch that you think it should flag, please send an email to
the "developers"_http://lammps.sandia.gov/authors.html.
If you get an error message about an invalid command in your input
script, you can determine what command is causing the problem by

View File

@ -84,7 +84,7 @@ Package, Description, Author(s), Doc page, Example, Library
"PERI"_#PERI, Peridynamics models, Mike Parks (Sandia), "pair_style peri"_pair_peri.html, peri, -
"POEMS"_#POEMS, coupled rigid body motion, Rudra Mukherjee (JPL), "fix poems"_fix_poems.html, rigid, lib/poems
"PYTHON"_#PYTHON, embed Python code in an input script, -, "python"_python.html, python, lib/python
"REAX"_#REAX, ReaxFF potential, Aidan Thompson (Sandia), "pair_style reax"_pair_reax.html, reax, lib/reax
"REAX"_#REAX, ReaxFF potential, Aidan Thompson (Sandia), "pair_style reax"_pair_reax.html, reax, lib/reax
"REPLICA"_#REPLICA, multi-replica methods, -, "Section 6.6.5"_Section_howto.html#howto_5, tad, -
"RIGID"_#RIGID, rigid bodies, -, "fix rigid"_fix_rigid.html, rigid, -
"SHOCK"_#SHOCK, shock loading methods, -, "fix msst"_fix_msst.html, -, -
@ -1140,6 +1140,7 @@ Package, Description, Author(s), Doc page, Example, Pic/movie, Library
"USER-ATC"_#USER-ATC, atom-to-continuum coupling, Jones & Templeton & Zimmerman (1), "fix atc"_fix_atc.html, USER/atc, "atc"_atc, lib/atc
"USER-AWPMD"_#USER-AWPMD, wave-packet MD, Ilya Valuev (JIHT), "pair_style awpmd/cut"_pair_awpmd.html, USER/awpmd, -, lib/awpmd
"USER-CG-CMM"_#USER-CG-CMM, coarse-graining model, Axel Kohlmeyer (Temple U), "pair_style lj/sdk"_pair_sdk.html, USER/cg-cmm, "cg"_cg, -
"USER-CGDNA"_#USER-CGDNA, coarse-grained DNA force fields, Oliver Henrich (U Edinburgh), src/USER-CGDNA/README, USER/cgdna, -, -
"USER-COLVARS"_#USER-COLVARS, collective variables, Fiorin & Henin & Kohlmeyer (2), "fix colvars"_fix_colvars.html, USER/colvars, "colvars"_colvars, lib/colvars
"USER-DIFFRACTION"_#USER-DIFFRACTION, virutal x-ray and electron diffraction, Shawn Coleman (ARL),"compute xrd"_compute_xrd.html, USER/diffraction, -, -
"USER-DPD"_#USER-DPD, reactive dissipative particle dynamics (DPD), Larentzos & Mattox & Brennan (5), src/USER-DPD/README, USER/dpd, -, -
@ -1153,7 +1154,7 @@ Package, Description, Author(s), Doc page, Example, Pic/movie, Library
"USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER-MISC/README, -, -, -
"USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surface, Stefan Paquay (Eindhoven U of Technology), "fix manifoldforce"_fix_manifoldforce.html, USER/manifold, "manifold"_manifold, -
"USER-MOLFILE"_#USER-MOLFILE, "VMD"_VMD molfile plug-ins, Axel Kohlmeyer (Temple U), "dump molfile"_dump_molfile.html, -, -, VMD-MOLFILE
"USER-NC-DUMP"_#USER-NC-DUMP, dump output via NetCDF, Lars Pastewka (Karlsruhe Institute of Technology, KIT), "dump nc, dump nc/mpiio"_dump_nc.html, -, -, lib/netcdf
"USER-NC-DUMP"_#USER-NC-DUMP, dump output via NetCDF, Lars Pastewka (Karlsruhe Institute of Technology, KIT), "dump nc / dump nc/mpiio"_dump_nc.html, -, -, lib/netcdf
"USER-OMP"_#USER-OMP, OpenMP threaded styles, Axel Kohlmeyer (Temple U), "Section 5.3.4"_accelerate_omp.html, -, -, -
"USER-PHONON"_#USER-PHONON, phonon dynamical matrix, Ling-Ti Kong (Shanghai Jiao Tong U), "fix phonon"_fix_phonon.html, USER/phonon, -, -
"USER-QMMM"_#USER-QMMM, QM/MM coupling, Axel Kohlmeyer (Temple U), "fix qmmm"_fix_qmmm.html, USER/qmmm, -, lib/qmmm
@ -1284,6 +1285,31 @@ him directly if you have questions.
:line
USER-CGDNA package :link(USER-CGDNA),h5
Contents: The CGDNA package implements coarse-grained force fields for
single- and double-stranded DNA. This is at the moment mainly the
oxDNA model, developed by Doye, Louis and Ouldridge at the University
of Oxford. The package also contains Langevin-type rigid-body
integrators with improved stability.
See these doc pages to get started:
"bond_style oxdna_fene"_bond_oxdna_fene.html
"pair_style oxdna_excv"_pair_oxdna_excv.html
"fix nve/dotc/langevin"_fix_nve_dotc_langevin.html :ul
Supporting info: /src/USER-CGDNA/README, "bond_style
oxdna_fene"_bond_oxdna_fene.html, "pair_style
oxdna_excv"_pair_oxdna_excv.html, "fix
nve/dotc/langevin"_fix_nve_dotc_langevin.html
Author: Oliver Henrich at the University of Edinburgh, UK (o.henrich
at epcc.ed.ac.uk or ohenrich at ph.ed.ac.uk). Contact him directly if
you have any questions.
:line
USER-COLVARS package :link(USER-COLVARS),h5
Contents: COLVARS stands for collective variables which can be used to
@ -1610,11 +1636,12 @@ and a "dump nc/mpiio"_dump_nc.html command to output LAMMPS snapshots
in this format. See src/USER-NC-DUMP/README for more details.
NetCDF files can be directly visualized with the following tools:
Ovito (http://www.ovito.org/). Ovito supports the AMBER convention
and all of the above extensions. :ulb,l
and all of the above extensions. :ulb,l
VMD (http://www.ks.uiuc.edu/Research/vmd/) :l
AtomEye (http://www.libatoms.org/). The libAtoms version of AtomEye contains
a NetCDF reader that is not present in the standard distribution of AtomEye :l,ule
a NetCDF reader that is not present in the standard distribution of AtomEye :l,ule
The person who created these files is Lars Pastewka at
Karlsruhe Institute of Technology (lars.pastewka at kit.edu).

View File

@ -1727,7 +1727,7 @@ thermodynamic state and a total run time for the simulation. It then
appends statistics about the CPU time and storage requirements for the
simulation. An example set of statistics is shown here:
Loop time of 2.81192 on 4 procs for 300 steps with 2004 atoms
Loop time of 2.81192 on 4 procs for 300 steps with 2004 atoms :pre
Performance: 18.436 ns/day 1.302 hours/ns 106.689 timesteps/s
97.0% CPU use with 4 MPI tasks x no OpenMP threads :pre
@ -1757,14 +1757,14 @@ Ave special neighs/atom = 2.34032
Neighbor list builds = 26
Dangerous builds = 0 :pre
The first section provides a global loop timing summary. The loop time
The first section provides a global loop timing summary. The {loop time}
is the total wall time for the section. The {Performance} line is
provided for convenience to help predicting the number of loop
continuations required and for comparing performance with other
similar MD codes. The CPU use line provides the CPU utilzation per
continuations required and for comparing performance with other,
similar MD codes. The {CPU use} line provides the CPU utilzation per
MPI task; it should be close to 100% times the number of OpenMP
threads (or 1). Lower numbers correspond to delays due to file I/O or
insufficient thread utilization.
threads (or 1 of no OpenMP). Lower numbers correspond to delays due
to file I/O or insufficient thread utilization.
The MPI task section gives the breakdown of the CPU run time (in
seconds) into major categories:
@ -1791,7 +1791,7 @@ is present that also prints the CPU utilization in percent. In
addition, when using {timer full} and the "package omp"_package.html
command are active, a similar timing summary of time spent in threaded
regions to monitor thread utilization and load balance is provided. A
new entry is the {Reduce} section, which lists the time spend in
new entry is the {Reduce} section, which lists the time spent in
reducing the per-thread data elements to the storage for non-threaded
computation. These thread timings are taking from the first MPI rank
only and and thus, as the breakdown for MPI tasks can change from MPI

View File

@ -29,7 +29,7 @@ Bond Styles: fene, harmonic :l
Dihedral Styles: charmm, harmonic, opls :l
Fixes: nve, npt, nvt, nvt/sllod :l
Improper Styles: cvff, harmonic :l
Pair Styles: buck/coul/cut, buck/coul/long, buck, gayberne,
Pair Styles: buck/coul/cut, buck/coul/long, buck, eam, gayberne,
charmm/coul/long, lj/cut, lj/cut/coul/long, sw, tersoff :l
K-Space Styles: pppm :l
:ule

View File

@ -110,14 +110,14 @@ mpirun -np 96 -ppn 12 lmp_g++ -k on t 20 -sf kk -in in.lj # ditto on 8 Phis :p
[Required hardware/software:]
Kokkos support within LAMMPS must be built with a C++11 compatible
compiler. If using gcc, version 4.8.1 or later is required.
compiler. If using gcc, version 4.7.2 or later is required.
To build with Kokkos support for CPUs, your compiler must support the
OpenMP interface. You should have one or more multi-core CPUs so that
multiple threads can be launched by each MPI task running on a CPU.
To build with Kokkos support for NVIDIA GPUs, NVIDIA Cuda software
version 6.5 or later must be installed on your system. See the
version 7.5 or later must be installed on your system. See the
discussion for the "GPU"_accelerate_gpu.html package for details of
how to check and do this.

View File

@ -0,0 +1,70 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
bond_style oxdna_fene command :h3
[Syntax:]
bond_style oxdna_fene :pre
[Examples:]
bond_style oxdna_fene
bond_coeff * 2.0 0.25 0.7525 :pre
[Description:]
The {oxdna_fene} bond style uses the potential
:c,image(Eqs/bond_oxdna_fene.jpg)
to define a modified finite extensible nonlinear elastic (FENE) potential
"(Ouldridge)"_#oxdna_fene to model the connectivity of the phosphate backbone
in the oxDNA force field for coarse-grained modelling of DNA.
The following coefficients must be defined for the bond type via the
"bond_coeff"_bond_coeff.html command as given in the above example, or in
the data file or restart files read by the "read_data"_read_data.html
or "read_restart"_read_restart.html commands:
epsilon (energy)
Delta (distance)
r0 (distance) :ul
NOTE: This bond style has to be used together with the corresponding oxDNA pair styles
for excluded volume interaction {oxdna_excv}, stacking {oxdna_stk}, cross-stacking {oxdna_xstk}
and coaxial stacking interaction {oxdna_coaxstk} as well as hydrogen-bonding interaction {oxdna_hbond} (see also documentation of
"pair_style oxdna_excv"_pair_oxdna_excv.html). The coefficients
in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model.
Example input and data files can be found in /examples/USER/cgdna/examples/duplex1/ and /duplex2/.
A simple python setup tool which creates single straight or helical DNA strands,
DNA duplexes or arrays of DNA duplexes can be found in /examples/USER/cgdna/util/.
A technical report with more information on the model, the structure of the input file,
the setup tool and the performance of the LAMMPS-implementation of oxDNA
can be found "here"_PDF/USER-CGDNA-overview.pdf.
:line
[Restrictions:]
This bond style can only be used if LAMMPS was built with the
USER-CGDNA package and the MOLECULE and ASPHERE package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
"pair_style oxdna_excv"_pair_oxdna_excv.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "bond_coeff"_bond_coeff.html
[Default:] none
:line
:link(oxdna_fene)
[(Ouldridge)] T.E. Ouldridge, A.A. Louis, J.P.K. Doye, J. Chem. Phys. 134, 085101 (2011).

View File

@ -15,6 +15,7 @@ Bond Styles :h1
bond_morse
bond_none
bond_nonlinear
bond_oxdna_fene
bond_quartic
bond_table
bond_zero

View File

@ -91,6 +91,7 @@ Commands :h1
suffix
tad
temper
temper_grem
thermo
thermo_modify
thermo_style

View File

@ -10,34 +10,43 @@ compute coord/atom command :h3
[Syntax:]
compute ID group-ID coord/atom cutoff type1 type2 ... :pre
compute ID group-ID coord/atom cstyle args ... :pre
ID, group-ID are documented in "compute"_compute.html command
coord/atom = style name of this compute command
cutoff = distance within which to count coordination neighbors (distance units)
typeN = atom type for Nth coordination count (see asterisk form below) :ul
ID, group-ID are documented in "compute"_compute.html command :ulb,l
coord/atom = style name of this compute command :l
cstyle = {cutoff} or {orientorder} :l
{cutoff} args = cutoff typeN
cutoff = distance within which to count coordination neighbors (distance units)
typeN = atom type for Nth coordination count (see asterisk form below)
{orientorder} args = orientorderID threshold
orientorderID = ID of an orientorder/atom compute
threshold = minimum value of the product of two "connected" atoms :pre
:ule
[Examples:]
compute 1 all coord/atom 2.0
compute 1 all coord/atom 6.0 1 2
compute 1 all coord/atom 6.0 2*4 5*8 * :pre
compute 1 all coord/atom cutoff 2.0
compute 1 all coord/atom cutoff 6.0 1 2
compute 1 all coord/atom cutoff 6.0 2*4 5*8 *
compute 1 all coord/atom orientorder 2 0.5 :pre
[Description:]
Define a computation that calculates one or more coordination numbers
for each atom in a group.
This compute performs calculations between neighboring atoms to
determine a coordination value. The specific calculation and the
meaning of the resulting value depend on the {cstyle} keyword used.
A coordination number is defined as the number of neighbor atoms with
specified atom type(s) that are within the specified cutoff distance
from the central atom. Atoms not in the group are included in a
coordination number of atoms in the group.
The {cutoff} cstyle calculates one or more traditional coordination
numbers for each atom. A coordination number is defined as the number
of neighbor atoms with specified atom type(s) that are within the
specified cutoff distance from the central atom. Atoms not in the
specified group are included in the coordination number tally.
The {typeN} keywords allow you to specify which atom types contribute
to each coordination number. One coordination number is computed for
each of the {typeN} keywords listed. If no {typeN} keywords are
listed, a single coordination number is calculated, which includes
atoms of all types (same as the "*" format, see below).
The {typeN} keywords allow specification of which atom types
contribute to each coordination number. One coordination number is
computed for each of the {typeN} keywords listed. If no {typeN}
keywords are listed, a single coordination number is calculated, which
includes atoms of all types (same as the "*" format, see below).
The {typeN} keywords can be specified in one of two ways. An explicit
numeric value can be used, as in the 2nd example above. Or a
@ -49,8 +58,27 @@ from 1 to N. A leading asterisk means all types from 1 to n
(inclusive). A middle asterisk means all types from m to n
(inclusive).
The value of all coordination numbers will be 0.0 for atoms not in the
specified compute group.
The {orientorder} cstyle calculates the number of "connected" neighbor
atoms J around each central atom I. For this {cstyle}, connected is
defined by the orientational order parameter calculated by the
"compute orientorder/atom"_compute_orientorder_atom.html command.
This {cstyle} thus allows one to apply the ten Wolde's criterion to
identify crystal-like atoms in a system, as discussed in "ten
Wolde"_#tenWolde.
The ID of the previously specified "compute
orientorder/atom"_compute_orientorder/atom command is specified as
{orientorderID}. The compute must invoke its {components} option to
calculate components of the {Ybar_lm} vector for each atoms, as
described in its documenation. Note that orientorder/atom compute
defines its own criteria for identifying neighboring atoms. If the
scalar product ({Ybar_lm(i)},{Ybar_lm(j)}), calculated by the
orientorder/atom compute is larger than the specified {threshold},
then I and J are connected, and the coordination value of I is
incremented by one.
For all {cstyle} settings, all coordination values will be 0.0 for
atoms not in the specified compute group.
The neighbor list needed to compute this quantity is constructed each
time the calculation is performed (i.e. each time a snapshot of atoms
@ -72,11 +100,16 @@ the neighbor list.
[Output info:]
If single {type1} keyword is specified (or if none are specified),
this compute calculates a per-atom vector. If multiple {typeN}
keywords are specified, this compute calculates a per-atom array, with
N columns. These values can be accessed by any command that uses
per-atom values from a compute as input. See "Section
For {cstyle} cutoff, this compute can calculate a per-atom vector or
array. If single {type1} keyword is specified (or if none are
specified), this compute calculates a per-atom vector. If multiple
{typeN} keywords are specified, this compute calculates a per-atom
array, with N columns.
For {cstyle} orientorder, this compute calculates a per-atom vector.
These values can be accessed by any command that uses per-atom values
from a compute as input. See "Section
6.15"_Section_howto.html#howto_15 for an overview of LAMMPS output
options.
@ -88,5 +121,12 @@ explained above.
[Related commands:]
"compute cluster/atom"_compute_cluster_atom.html
"compute orientorder/atom"_compute_orientorder_atom.html
[Default:] none
:line
:link(tenWolde)
[(tenWolde)] P. R. ten Wolde, M. J. Ruiz-Montero, D. Frenkel,
J. Chem. Phys. 104, 9932 (1996).

View File

@ -15,17 +15,19 @@ compute ID group-ID orientorder/atom keyword values ... :pre
ID, group-ID are documented in "compute"_compute.html command :ulb,l
orientorder/atom = style name of this compute command :l
one or more keyword/value pairs may be appended :l
keyword = {cutoff} or {nnn} or {degrees}
keyword = {cutoff} or {nnn} or {degrees} or {components}
{cutoff} value = distance cutoff
{nnn} value = number of nearest neighbors
{degrees} values = nlvalues, l1, l2,... :pre
{degrees} values = nlvalues, l1, l2,...
{components} value = ldegree :pre
:ule
[Examples:]
compute 1 all orientorder/atom
compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5 :pre
compute 1 all orientorder/atom degrees 5 4 6 8 10 12 nnn NULL cutoff 1.5
compute 1 all orientorder/atom degrees 4 6 components 6 nnn NULL cutoff 3.0 :pre
[Description:]
@ -62,14 +64,21 @@ specified distance cutoff are used.
The optional keyword {degrees} defines the list of order parameters to
be computed. The first argument {nlvalues} is the number of order
parameters. This is followed by that number of integers giving the
degree of each order parameter. Because {Q}2 and all odd-degree
order parameters are zero for atoms in cubic crystals
(see "Steinhardt"_#Steinhardt), the default order parameters
are {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12. For the
FCC crystal with {nnn}=12, {Q}4 = sqrt(7/3)/8 = 0.19094....
The numerical values of all order parameters up to {Q}12
for a range of commonly encountered high-symmetry structures are given
in Table I of "Mickel et al."_#Mickel.
degree of each order parameter. Because {Q}2 and all odd-degree order
parameters are zero for atoms in cubic crystals (see
"Steinhardt"_#Steinhardt), the default order parameters are {Q}4,
{Q}6, {Q}8, {Q}10, and {Q}12. For the FCC crystal with {nnn}=12, {Q}4
= sqrt(7/3)/8 = 0.19094.... The numerical values of all order
parameters up to {Q}12 for a range of commonly encountered
high-symmetry structures are given in Table I of "Mickel et
al."_#Mickel.
The optional keyword {components} will output the components of the
normalized complex vector {Ybar_lm} of degree {ldegree}, which must be
explicitly included in the keyword {degrees}. This option can be used
in conjunction with "compute coord_atom"_compute_coord_atom.html to
calculate the ten Wolde's criterion to identify crystal-like
particles, as discussed in "ten Wolde"_#tenWolde.
The value of {Ql} is set to zero for atoms not in the
specified compute group, as well as for atoms that have less than
@ -95,8 +104,16 @@ the neighbor list.
[Output info:]
This compute calculates a per-atom array with {nlvalues} columns, giving the
{Ql} values for each atom, which are real numbers on the range 0 <= {Ql} <= 1.
This compute calculates a per-atom array with {nlvalues} columns,
giving the {Ql} values for each atom, which are real numbers on the
range 0 <= {Ql} <= 1.
If the keyword {components} is set, then the real and imaginary parts
of each component of (normalized) {Ybar_lm} will be added to the
output array in the following order: Re({Ybar_-m}) Im({Ybar_-m})
Re({Ybar_-m+1}) Im({Ybar_-m+1}) ... Re({Ybar_m}) Im({Ybar_m}). This
way, the per-atom array will have a total of {nlvalues}+2*(2{l}+1)
columns.
These values can be accessed by any command that uses
per-atom values from a compute as input. See "Section
@ -107,15 +124,25 @@ options.
[Related commands:]
"compute coord/atom"_compute_coord_atom.html, "compute centro/atom"_compute_centro_atom.html, "compute hexorder/atom"_compute_hexorder_atom.html
"compute coord/atom"_compute_coord_atom.html, "compute
centro/atom"_compute_centro_atom.html, "compute
hexorder/atom"_compute_hexorder_atom.html
[Default:]
The option defaults are {cutoff} = pair style cutoff, {nnn} = 12, {degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12.
The option defaults are {cutoff} = pair style cutoff, {nnn} = 12,
{degrees} = 5 4 6 8 10 12 i.e. {Q}4, {Q}6, {Q}8, {Q}10, and {Q}12.
:line
:link(Steinhardt)
[(Steinhardt)] P. Steinhardt, D. Nelson, and M. Ronchetti, Phys. Rev. B 28, 784 (1983).
[(Steinhardt)] P. Steinhardt, D. Nelson, and M. Ronchetti,
Phys. Rev. B 28, 784 (1983).
:link(Mickel)
[(Mickel)] W. Mickel, S. C. Kapfer, G. E. Schroeder-Turkand, K. Mecke, J. Chem. Phys. 138, 044501 (2013).
[(Mickel)] W. Mickel, S. C. Kapfer, G. E. Schroeder-Turkand, K. Mecke,
J. Chem. Phys. 138, 044501 (2013).
:link(tenWolde)
[(tenWolde)] P. R. ten Wolde, M. J. Ruiz-Montero, D. Frenkel,
J. Chem. Phys. 104, 9932 (1996).

View File

@ -35,6 +35,7 @@ Computes :h1
compute_erotate_sphere_atom
compute_event_displace
compute_fep
compute_global_atom
compute_group_group
compute_gyration
compute_gyration_chunk

View File

@ -151,7 +151,7 @@ The option default for the {energy} keyword is energy = no.
:line
:link(Strong)
[(Strong)] Strong and Eaves, J. Phys. Chem. Lett. 7, 1907 (2016).
[(Strong)] Strong and Eaves, J. Phys. Chem. B 121, 189 (2017).
:link(Evans)
[(Evans)] Evans and Morriss, Phys. Rev. Lett. 56, 2172 (1986).

View File

@ -29,7 +29,7 @@ fix fxgREM all grem 502 -0.15 -80000 fxnvt :pre
[Description:]
This fix implements the molecular dynamics version of the generalized
replica exchange method (gREM) originally developed by "(Kim)"_#Kim,
replica exchange method (gREM) originally developed by "(Kim)"_#Kim2010,
which uses non-Boltzmann ensembles to sample over first order phase
transitions. The is done by defining replicas with an enthalpy
dependent effective temperature
@ -103,7 +103,7 @@ npt"_fix_nh.html, "thermo_modify"_thermo_modify.html
:line
:link(Kim)
:link(Kim2010)
[(Kim)] Kim, Keyes, Straub, J Chem. Phys, 132, 224107 (2010).
:link(Malolepsza)

130
doc/src/fix_mscg.txt Normal file
View File

@ -0,0 +1,130 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
fix mscg command :h3
[Syntax:]
fix ID group-ID mscg N keyword args ... :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
mscg = style name of this fix command :l
N = envoke this fix every this many timesteps :l
zero or more keyword/value pairs may be appended :l
keyword = {range} or {name} or {max} :l
{range} arg = {on} or {off}
{on} = range finding functionality is performed
{off} = force matching functionality is performed
{name} args = name1 ... nameN
name1,...,nameN = string names for each atom type (1-Ntype)
{max} args = maxb maxa maxd
maxb,maxa,maxd = maximum bonds/angles/dihedrals per atom :pre
:ule
[Examples:]
fix 1 all mscg 1
fix 1 all mscg 1 range name A B
fix 1 all mscg 1 max 4 8 20 :pre
[Description:]
This fix applies the Multi-Scale Coarse-Graining (MSCG) method to
snapshots from a dump file to generate potentials for coarse-grained
simulations from all-atom simulations, using a force-matching
technique ("Izvekov"_#Izvekov, "Noid"_#Noid).
It makes use of the MS-CG library, written and maintained by Greg
Voth's group at the University of Chicago, which is freely available
on their "MS-CG GitHub
site"_https://github.com/uchicago-voth/MSCG-release. See instructions
on obtaining and installing the MS-CG library in the src/MSCG/README
file, which must be done before you build LAMMPS with this fix command
and use the command in a LAMMPS input script.
An example script using this fix is provided the examples/mscg
directory.
The general workflow for using LAMMPS in conjunction with the MS-CG
library to create a coarse-grained model and run coarse-grained
simulations is as follows:
Perform all-atom simulations on the system to be coarse grained.
Generate a trajectory mapped to the coarse-grained model.
Create input files for the MS-CG library.
Run the range finder functionality of the MS-CG library.
Run the force matching functionality of the MS-CG library.
Check the results of the force matching.
Run coarse-grained simulations using the new coarse-grained potentials. :ol
This fix can perform the range finding and force matching steps 4 and
5 of the above workflow when used in conjunction with the
"rerun"_rerun.html command. It does not perform steps 1-3 and 6-7.
Step 2 can be performed using a Python script (what is the name?)
provided with the MS-CG library which defines the coarse-grained model
and converts a standard LAMMPS dump file for an all-atom simulation
(step 1) into a LAMMPS dump file which has the positions of and forces
on the coarse-grained beads.
In step 3, an input file named "control.in" is needed by the MS-CG
library which sets parameters for the range finding and force matching
functionalities. See the examples/mscg/control.in file as an example.
And see the documentation provided with the MS-CG library for more
info on this file.
When this fix is used to perform steps 4 and 5, the MS-CG library also
produces additional output files. The range finder functionality
(step 4) outputs files defining pair and bonded interaction ranges.
The force matching functionality (step 5) outputs tabulated force
files for every interaction in the system. Other diagnostic files can
also be output depending on the paramters in the MS-CG library input
script. Again, see the documentation provided with the MS-CG library
for more info.
:line
The {range} keyword specifies which MS-CG library functionality should
be invoked. If {on}, the step 4 range finder functionality is invoked.
{off}, the step 5 force matching functionality is invoked.
If the {name} keyword is used, string names are defined to associate
with the integer atom types in LAMMPS. {Ntype} names must be
provided, one for each atom type (1-Ntype).
The {max} keyword specifies the maximum number of bonds, angles, and
dihedrals a bead can have in the coarse-grained model.
[Restrictions:]
This fix is part of the MSCG package. It is only enabled if LAMMPS was
built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
The MS-CG library uses C++11, which may not be supported by older
compilers. The MS-CG library also has some additional numeric library
dependencies, which are describd in its documentation.
Currently, the MS-CG library is not setup to run in parallel with MPI,
so this fix can only be used in a serial LAMMPS build and run
on a single processor.
[Related commands:] none
[Default:]
The default keyword settings are range off, max 4 12 36.
:line
:link(Izvekov)
[(Izvekov)] Izvekov, Voth, J Chem Phys 123, 134105 (2005).
:link(Noid)
[(Noid)] Noid, Chu, Ayton, Krishna, Izvekov, Voth, Das, Andersen, J
Chem Phys 128, 134105 (2008).

61
doc/src/fix_nve_dot.txt Normal file
View File

@ -0,0 +1,61 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
fix nve/dot command :h3
[Syntax:]
fix ID group-ID nve/dot :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
nve/dot = style name of this fix command :l
:ule
[Examples:]
fix 1 all nve/dot :pre
[Description:]
Apply a rigid-body integrator as described in "(Davidchack)"_#Davidchack
to a group of atoms, but without Langevin dynamics.
This command performs Molecular dynamics (MD)
via a velocity-Verlet algorithm and an evolution operator that rotates
the quaternion degrees of freedom, similar to the scheme outlined in "(Miller)"_#Miller.
This command is the equivalent of the "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html
without damping and noise and can be used to determine the stability range
in a NVE ensemble prior to using the Langevin-type DOTC-integrator
(see also "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html).
The command is equivalent to the "fix nve"_fix_nve.html.
The particles are always considered to have a finite size.
An example input file can be found in /examples/USER/cgdna/examples/duplex1/.
A technical report with more information on this integrator can be found
"here"_PDF/USER-CGDNA-overview.pdf.
:line
[Restrictions:]
These pair styles can only be used if LAMMPS was built with the
USER-CGDNA package and the MOLECULE and ASPHERE package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
"fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "fix nve"_fix_nve.html
[Default:] none
:line
:link(Davidchack)
[(Davidchack)] R.L Davidchack, T.E. Ouldridge, and M.V. Tretyakov. J. Chem. Phys. 142, 144114 (2015).
:link(Miller)
[(Miller)] T. F. Miller III, M. Eleftheriou, P. Pattnaik, A. Ndirango, G. J. Martyna, J. Chem. Phys., 116, 8649-8659 (2002).

View File

@ -0,0 +1,134 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
fix nve/dotc/langevin command :h3
[Syntax:]
fix ID group-ID nve/dotc/langevin Tstart Tstop damp seed keyword value :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
nve/dotc/langevin = style name of this fix command :l
Tstart,Tstop = desired temperature at start/end of run (temperature units) :l
damp = damping parameter (time units) :l
seed = random number seed to use for white noise (positive integer) :l
keyword = {angmom} :l
{angmom} value = factor
factor = do thermostat rotational degrees of freedom via the angular momentum and apply numeric scale factor as discussed below :pre
:ule
[Examples:]
fix 1 all nve/dotc/langevin 1.0 1.0 0.03 457145 angmom 10 :pre
[Description:]
Apply a rigid-body Langevin-type integrator of the kind "Langevin C"
as described in "(Davidchack)"_#Davidchack
to a group of atoms, which models an interaction with an implicit background
solvent. This command performs Brownian dynamics (BD)
via a technique that splits the integration into a deterministic Hamiltonian
part and the Ornstein-Uhlenbeck process for noise and damping.
The quaternion degrees of freedom are updated though an evolution
operator which performs a rotation in quaternion space, preserves
the quaternion norm and is akin to "(Miller)"_#Miller.
In terms of syntax this command has been closely modelled on the
"fix langevin"_fix_langevin.html and its {angmom} option. But it combines
the "fix nve"_fix_nve.html and the "fix langevin"_fix_langevin.html in
one single command. The main feature is improved stability
over the standard integrator, permitting slightly larger timestep sizes.
NOTE: Unlike the "fix langevin"_fix_langevin.html this command performs
also time integration of the translational and quaternion degrees of freedom.
The total force on each atom will have the form:
F = Fc + Ff + Fr
Ff = - (m / damp) v
Fr is proportional to sqrt(Kb T m / (dt damp)) :pre
Fc is the conservative force computed via the usual inter-particle
interactions ("pair_style"_pair_style.html,
"bond_style"_bond_style.html, etc).
The Ff and Fr terms are implicitly taken into account by this fix
on a per-particle basis.
Ff is a frictional drag or viscous damping term proportional to the
particle's velocity. The proportionality constant for each atom is
computed as m/damp, where m is the mass of the particle and damp is
the damping factor specified by the user.
Fr is a force due to solvent atoms at a temperature T randomly bumping
into the particle. As derived from the fluctuation/dissipation
theorem, its magnitude as shown above is proportional to sqrt(Kb T m /
dt damp), where Kb is the Boltzmann constant, T is the desired
temperature, m is the mass of the particle, dt is the timestep size,
and damp is the damping factor. Random numbers are used to randomize
the direction and magnitude of this force as described in
"(Dunweg)"_#Dunweg, where a uniform random number is used (instead of
a Gaussian random number) for speed.
:line
{Tstart} and {Tstop} have to be constant values, i.e. they cannot
be variables.
The {damp} parameter is specified in time units and determines how
rapidly the temperature is relaxed. For example, a value of 0.03
means to relax the temperature in a timespan of (roughly) 0.03 time
units tau (see the "units"_units.html command).
The damp factor can be thought of as inversely related to the
viscosity of the solvent, i.e. a small relaxation time implies a
hi-viscosity solvent and vice versa. See the discussion about gamma
and viscosity in the documentation for the "fix
viscous"_fix_viscous.html command for more details.
The random # {seed} must be a positive integer. A Marsaglia random
number generator is used. Each processor uses the input seed to
generate its own unique seed and its own stream of random numbers.
Thus the dynamics of the system will not be identical on two runs on
different numbers of processors.
The keyword/value option has to be used in the following way:
This fix has to be used together with the {angmom} keyword. The
particles are always considered to have a finite size.
The keyword {angmom} enables thermostatting of the rotational degrees of
freedom in addition to the usual translational degrees of freedom.
The scale factor after the {angmom} keyword gives the ratio of the rotational to
the translational friction coefficient.
An example input file can be found in /examples/USER/cgdna/examples/duplex2/.
A technical report with more information on this integrator can be found
"here"_PDF/USER-CGDNA-overview.pdf.
:line
[Restrictions:]
These pair styles can only be used if LAMMPS was built with the
USER-CGDNA package and the MOLECULE and ASPHERE package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
"fix nve"_fix_nve.html, "fix langevin"_fix_langevin.html, "fix nve/dot"_fix_nve_dot.html,
[Default:] none
:line
:link(Davidchack)
[(Davidchack)] R.L Davidchack, T.E. Ouldridge, M.V. Tretyakov. J. Chem. Phys. 142, 144114 (2015).
:link(Miller)
[(Miller)] T. F. Miller III, M. Eleftheriou, P. Pattnaik, A. Ndirango, G. J. Martyna, J. Chem. Phys., 116, 8649-8659 (2002).
:link(Dunweg)
[(Dunweg)] B. Dunweg, W. Paul, Int. J. Mod. Phys. C, 2, 817-27 (1991).

View File

@ -89,11 +89,7 @@ NOTE: The center of mass of a group of atoms is calculated in
group can straddle a periodic boundary. See the "dump"_dump.html doc
page for a discussion of unwrapped coordinates. It also means that a
spring connecting two groups or a group and the tether point can cross
a periodic boundary and its length be calculated correctly. One
exception is for rigid bodies, which should not be used with the fix
spring command, if the rigid body will cross a periodic boundary.
This is because image flags for rigid bodies are used in a different
way, as explained on the "fix rigid"_fix_rigid.html doc page.
a periodic boundary and its length be calculated correctly.
[Restart, fix_modify, output, run start/stop, minimize info:]

View File

@ -68,6 +68,7 @@ Fixes :h1
fix_meso_stationary
fix_momentum
fix_move
fix_mscg
fix_msst
fix_neb
fix_nh
@ -83,6 +84,8 @@ Fixes :h1
fix_nve_asphere
fix_nve_asphere_noforce
fix_nve_body
fix_nve_dot
fix_nve_dotc_langevin
fix_nve_eff
fix_nve_limit
fix_nve_line

View File

@ -229,11 +229,16 @@ dramatically in z. For example, for a triclinic system with all three
tilt factors set to the maximum limit, the PPPM grid should be
increased roughly by a factor of 1.5 in the y direction and 2.0 in the
z direction as compared to the same system using a cubic orthogonal
simulation cell. One way to ensure the accuracy requirement is being
met is to run a short simulation at the maximum expected tilt or
length, note the required grid size, and then use the
simulation cell. One way to handle this issue if you have a long
simulation where the box size changes dramatically, is to break it
into shorter simulations (multiple "run"_run.html commands). This
works because the grid size is re-computed at the beginning of each
run. Another way to ensure the descired accuracy requirement is met
is to run a short simulation at the maximum expected tilt or length,
note the required grid size, and then use the
"kspace_modify"_kspace_modify.html {mesh} command to manually set the
PPPM grid size to this value.
PPPM grid size to this value for the long run. The simulation then
will be "too accurate" for some portion of the run.
RMS force errors in real space for {ewald} and {pppm} are estimated
using equation 18 of "(Kolafa)"_#Kolafa, which is also referenced as
@ -285,6 +290,8 @@ LAMMPS"_Section_start.html#start_3 section for more info.
See "Section 5"_Section_accelerate.html of the manual for
more instructions on how to use the accelerated styles effectively.
:line
[Restrictions:]
Note that the long-range electrostatic solvers in LAMMPS assume conducting

View File

@ -23,6 +23,7 @@ Section_history.html
tutorial_drude.html
tutorial_github.html
tutorial_pylammps.html
body.html
manifolds.html
@ -113,6 +114,7 @@ special_bonds.html
suffix.html
tad.html
temper.html
temper_grem.html
thermo.html
thermo_modify.html
thermo_style.html
@ -207,6 +209,8 @@ fix_nve.html
fix_nve_asphere.html
fix_nve_asphere_noforce.html
fix_nve_body.html
fix_nve_dot.html
fix_nve_dotc_langevin.html
fix_nve_eff.html
fix_nve_limit.html
fix_nve_line.html
@ -214,7 +218,6 @@ fix_nve_manifold_rattle.html
fix_nve_noforce.html
fix_nve_sphere.html
fix_nve_tri.html
fix_nvk.html
fix_nvt_asphere.html
fix_nvt_body.html
fix_nvt_manifold_rattle.html
@ -455,6 +458,7 @@ pair_multi_lucy_rx.html
pair_nb3b_harmonic.html
pair_nm.html
pair_none.html
pair_oxdna_excv.html
pair_peri.html
pair_polymorphic.html
pair_quip.html
@ -493,6 +497,7 @@ pair_zero.html
bond_class2.html
bond_fene.html
bond_fene_expand.html
bond_oxdna_fene.html
bond_harmonic.html
bond_harmonic_shift.html
bond_harmonic_shift_cut.html

View File

@ -8,6 +8,7 @@
pair_style eam command :h3
pair_style eam/gpu command :h3
pair_style eam/intel command :h3
pair_style eam/kk command :h3
pair_style eam/omp command :h3
pair_style eam/opt command :h3

View File

@ -0,0 +1,80 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
pair_style oxdna_excv command :h3
pair_style oxdna_stk command :h3
pair_style oxdna_hbond command :h3
pair_style oxdna_xstk command :h3
pair_style oxdna_coaxstk command :h3
[Syntax:]
pair_style style :pre
style = {hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk} :ul
[Examples:]
pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
pair_coeff * * oxdna_excv 2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
pair_coeff * * oxdna_stk 1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65
pair_coeff * * oxdna_hbond 0.0 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 1 4 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 2 3 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff * * oxdna_xstk 47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68
pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65 :pre
[Description:]
The {oxdna} pair styles compute the pairwise-additive parts of the oxDNA force field
for coarse-grained modelling of DNA. The effective interaction between the nucleotides consists of potentials for the
excluded volume interaction {oxdna_excv}, the stacking {oxdna_stk}, cross-stacking {oxdna_xstk}
and coaxial stacking interaction {oxdna_coaxstk} as well
as the hydrogen-bonding interaction {oxdna_hbond} between complementary pairs of nucleotides on
opposite strands.
The exact functional form of the pair styles is rather complex, which manifests itself in the 144 coefficients
in the above example. The individual potentials consist of products of modulation factors,
which themselves are constructed from a number of more basic potentials
(Morse, Lennard-Jones, harmonic angle and distance) as well as quadratic smoothing and modulation terms.
We refer to "(Ouldridge-DPhil)"_#Ouldridge-DPhil and "(Ouldridge)"_#Ouldridge
for a detailed description of the oxDNA force field.
NOTE: These pair styles have to be used together with the related oxDNA bond style
{oxdna_fene} for the connectivity of the phosphate backbone (see also documentation of
"bond_style oxdna_fene"_bond_oxdna_fene.html). The coefficients
in the above example have to be kept fixed and cannot be changed without reparametrizing the entire model.
Example input and data files can be found in /examples/USER/cgdna/examples/duplex1/ and /duplex2/.
A simple python setup tool which creates single straight or helical DNA strands,
DNA duplexes or arrays of DNA duplexes can be found in /examples/USER/cgdna/util/.
A technical report with more information on the model, the structure of the input file,
the setup tool and the performance of the LAMMPS-implementation of oxDNA
can be found "here"_PDF/USER-CGDNA-overview.pdf.
:line
[Restrictions:]
These pair styles can only be used if LAMMPS was built with the
USER-CGDNA package and the MOLECULE and ASPHERE package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info on packages.
[Related commands:]
"bond_style oxdna_fene"_bond_oxdna_fene.html, "fix nve/dotc/langevin"_fix_nve_dotc_langevin.html, "pair_coeff"_pair_coeff.html
[Default:] none
:line
:link(Ouldridge-DPhil)
[(Ouldrigde-DPhil)] T.E. Ouldridge, Coarse-grained modelling of DNA and DNA self-assembly, DPhil. University of Oxford (2011).
:link(Ouldridge)
[(Ouldridge)] T.E. Ouldridge, A.A. Louis, J.P.K. Doye, J. Chem. Phys. 134, 085101 (2011).

View File

@ -65,6 +65,7 @@ Pair Styles :h1
pair_nb3b_harmonic
pair_nm
pair_none
pair_oxdna_excv
pair_peri
pair_polymorphic
pair_quip

View File

@ -15,11 +15,12 @@ read_dump file Nstep field1 field2 ... keyword values ... :pre
file = name of dump file to read :ulb,l
Nstep = snapshot timestep to read from file :l
one or more fields may be appended :l
field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz}
field = {x} or {y} or {z} or {vx} or {vy} or {vz} or {q} or {ix} or {iy} or {iz} or {fx} or {fy} or {fz}
{x},{y},{z} = atom coordinates
{vx},{vy},{vz} = velocity components
{q} = charge
{ix},{iy},{iz} = image flags in each dimension :pre
{ix},{iy},{iz} = image flags in each dimension
{fx},{fy},{fz} = force components :pre
zero or more keyword/value pairs may be appended :l
keyword = {box} or {replace} or {purge} or {trim} or {add} or {label} or {scaled} or {wrapped} or {format} :l
{box} value = {yes} or {no} = replace simulation box with dump box

View File

@ -32,7 +32,7 @@ Run a parallel tempering or replica exchange simulation in LAMMPS
partition mode using multiple generalized replicas (ensembles) of a
system defined by "fix grem"_fix_grem.html, which stands for the
generalized replica exchange method (gREM) originally developed by
"(Kim)"_#Kim. It uses non-Boltzmann ensembles to sample over first
"(Kim)"_#KimStraub. It uses non-Boltzmann ensembles to sample over first
order phase transitions. The is done by defining replicas with an
enthalpy dependent effective temperature
@ -105,5 +105,5 @@ This command must be used with "fix grem"_fix_grem.html.
[Default:] none
:link(Kim)
:link(KimStraub)
[(Kim)] Kim, Keyes, Straub, J Chem Phys, 132, 224107 (2010).

View File

@ -33,14 +33,14 @@ timer loop :pre
Select the level of detail at which LAMMPS performs its CPU timings.
Multiple keywords can be specified with the {timer} command. For
keywords that are mutually exclusive, the last one specified takes
effect.
precedence.
During a simulation run LAMMPS collects information about how much
time is spent in different sections of the code and thus can provide
information for determining performance and load imbalance problems.
This can be done at different levels of detail and accuracy. For more
information about the timing output, see this "discussion of screen
output"_Section_start.html#start_8.
output in Section 2.8"_Section_start.html#start_8.
The {off} setting will turn all time measurements off. The {loop}
setting will only measure the total time for a run and not collect any
@ -52,20 +52,22 @@ procsessors. The {full} setting adds information about CPU
utilization and thread utilization, when multi-threading is enabled.
With the {sync} setting, all MPI tasks are synchronized at each timer
call which meaures load imbalance more accuractly, though it can also
slow down the simulation. Using the {nosync} setting (which is the
default) turns off this synchronization.
call which measures load imbalance for each section more accuractly,
though it can also slow down the simulation by prohibiting overlapping
independent computations on different MPI ranks Using the {nosync}
setting (which is the default) turns this synchronization off.
With the {timeout} keyword a walltime limit can be imposed that
With the {timeout} keyword a walltime limit can be imposed, that
affects the "run"_run.html and "minimize"_minimize.html commands.
This can be convenient when runs have to confirm to time limits,
e.g. when running under a batch system and you want to maximize
the utilization of the batch time slot, especially when the time
per timestep varies and is thus difficult to predict how many
steps a simulation can perform, or for difficult to converge
minimizations. The timeout {elapse} value should be somewhat smaller
than the time requested from the batch system, as there is usually
some overhead to launch jobs, and it may be advisable to write
This can be convenient when calculations have to comply with execution
time limits, e.g. when running under a batch system when you want to
maximize the utilization of the batch time slot, especially for runs
where the time per timestep varies much and thus it becomes difficult
to predict how many steps a simulation can perform for a given walltime
limit. This also applies for difficult to converge minimizations.
The timeout {elapse} value should be somewhat smaller than the maximum
wall time requested from the batch system, as there is usually
some overhead to launch jobs, and it is advisable to write
out a restart after terminating a run due to a timeout.
The timeout timer starts when the command is issued. When the time

View File

@ -336,12 +336,15 @@ commit and push again:
$ git commit -m "Merged Axel's suggestions and updated text"
$ git push git@github.com:Pakketeretet2/lammps :pre
This merge also shows up on the lammps Github page:
:c,image(JPG/tutorial_reverse_pull_request7.png)
:line
[After a merge]
When everything is fine, the feature branch is merged into the master branch.
When everything is fine, the feature branch is merged into the master branch:
:c,image(JPG/tutorial_merged.png)

View File

@ -82,6 +82,7 @@ meam: MEAM test for SiC and shear (same as shear examples)
melt: rapid melt of 3d LJ system
micelle: self-assembly of small lipid-like molecules into 2d bilayers
min: energy minimization of 2d LJ melt
mscg: parameterize a multi-scale coarse-graining (MSCG) model
msst: MSST shock dynamics
nb3b: use of nonbonded 3-body harmonic pair style
neb: nudged elastic band (NEB) calculation for barrier finding

View File

@ -0,0 +1,28 @@
This directory contains example data and input files
and utility scripts for the oxDNA coarse-grained model
for DNA.
/examples/duplex1:
Input, data and log files for a DNA duplex (double-stranded DNA)
consisiting of 5 base pairs. The duplex contains two strands with
complementary base pairs. The topology is
A - A - A - A - A
| | | | |
T - T - T - T - T
/examples/duplex2:
Input, data and log files for a nicked DNA duplex (double-stranded DNA)
consisiting of 8 base pairs. The duplex contains strands with
complementary base pairs, but the backbone on one side is not continuous:
two individual strands on one side form a duplex with a longer single
strand on the other side. The topology is
A - A - A - A - A - A - A - A
| | | | | | | |
T - T - T T - T - T - T - T
/util:
This directory contains a simple python setup tool which creates
single straight or helical DNA strands, DNA duplexes or arrays of DNA
duplexes.

View File

@ -0,0 +1,74 @@
# LAMMPS data file
10 atoms
10 ellipsoids
8 bonds
4 atom types
1 bond types
# System size
-20.000000 20.000000 xlo xhi
-20.000000 20.000000 ylo yhi
-20.000000 20.000000 zlo zhi
# Atom masses for each atom type
Masses
1 3.1575
2 3.1575
3 3.1575
4 3.1575
# Atom-ID, type, position, molecule-ID, ellipsoid flag, density
Atoms
1 1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 1 1 1
2 1 1.3274493266864451e-01 -4.2912827978022683e-01 3.7506163469402809e-01 1 1 1
3 1 4.8460810659772807e-01 -7.0834970533509178e-01 7.5012326938805618e-01 1 1 1
4 1 9.3267359196674593e-01 -7.4012419946742802e-01 1.1251849040820843e+00 1 1 1
5 1 1.3204192238113461e+00 -5.1335201721887447e-01 1.5002465387761124e+00 1 1 1
6 4 1.9958077618865377e-01 5.1335201721887447e-01 1.5002465387761124e+00 1 1 1
7 4 5.8732640803325409e-01 7.4012419946742802e-01 1.1251849040820843e+00 1 1 1
8 4 1.0353918934022719e+00 7.0834970533509178e-01 7.5012326938805618e-01 1 1 1
9 4 1.3872550673313555e+00 4.2912827978022683e-01 3.7506163469402809e-01 1 1 1
10 4 1.5200000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 1 1 1
# Atom-ID, translational, rotational velocity
Velocities
1 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
2 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
3 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
4 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
5 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
6 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
7 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
8 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
9 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
10 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
# Atom-ID, shape, quaternion
Ellipsoids
1 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 1.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00 0.0000000000000000e+00
2 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 9.5533648912560598e-01 0.0000000000000000e+00 0.0000000000000000e+00 2.9552020666133955e-01
3 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 8.2533561490967822e-01 0.0000000000000000e+00 0.0000000000000000e+00 5.6464247339503526e-01
4 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 6.2160996827066439e-01 0.0000000000000000e+00 0.0000000000000000e+00 7.8332690962748319e-01
5 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 3.6235775447667351e-01 0.0000000000000000e+00 0.0000000000000000e+00 9.3203908596722607e-01
6 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 9.3203908596722607e-01 -3.6235775447667351e-01 0.0000000000000000e+00
7 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 7.8332690962748319e-01 -6.2160996827066439e-01 0.0000000000000000e+00
8 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 5.6464247339503526e-01 -8.2533561490967822e-01 0.0000000000000000e+00
9 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 2.9552020666133955e-01 -9.5533648912560598e-01 0.0000000000000000e+00
10 1.1739845031423408e+00 1.1739845031423408e+00 1.1739845031423408e+00 0.0000000000000000e+00 0.0000000000000000e+00 -1.0000000000000000e+00 0.0000000000000000e+00
# Bond topology
Bonds
1 1 1 2
2 1 2 3
3 1 3 4
4 1 4 5
5 1 6 7
6 1 7 8
7 1 8 9
8 1 9 10

View File

@ -0,0 +1,75 @@
variable number equal 1
variable ofreq equal 1000
variable efreq equal 1000
units lj
dimension 3
newton off
boundary p p p
atom_style hybrid bond ellipsoid
atom_modify sort 0 1.0
# Pair interactions require lists of neighbours to be calculated
neighbor 1.0 bin
neigh_modify every 1 delay 0 check yes
read_data data.duplex1
set atom * mass 3.1575
group all type 1 4
# oxDNA bond interactions - FENE backbone
bond_style oxdna_fene
bond_coeff * 2.0 0.25 0.7525
# oxDNA pair interactions
pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
pair_coeff * * oxdna_excv 2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
pair_coeff * * oxdna_stk 1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65
pair_coeff * * oxdna_hbond 0.0 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 1 4 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 2 3 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff * * oxdna_xstk 47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68
pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65
# NVE ensemble
#fix 1 all nve/dotc/langevin 0.1 0.1 0.03 457145 angmom 10
fix 1 all nve/dot
timestep 1e-5
#comm_style tiled
#fix 3 all balance 10000 1.1 rcb
#compute mol all chunk/atom molecule
#compute mychunk all vcm/chunk mol
#fix 4 all ave/time 10000 1 10000 c_mychunk[1] c_mychunk[2] c_mychunk[3] file vcm.txt mode vector
dump pos all xyz ${ofreq} traj.${number}.xyz
compute quat all property/atom quatw quati quatj quatk
dump quat all custom ${ofreq} quat.${number}.txt id c_quat[1] c_quat[2] c_quat[3] c_quat[4]
dump_modify quat sort id
dump_modify quat format line "%d %13.6le %13.6le %13.6le %13.6le"
compute erot all erotate/asphere
compute ekin all ke
compute epot all pe
variable erot equal c_erot
variable ekin equal c_ekin
variable epot equal c_epot
variable etot equal c_erot+c_ekin+c_epot
fix 5 all print ${efreq} "$(step) ekin = ${ekin} | erot = ${erot} | epot = ${epot} | etot = ${etot}" screen yes
dump out all custom ${ofreq} out.${number}.txt id x y z vx vy vz fx fy fz tqx tqy tqz
dump_modify out sort id
dump_modify out format line "%d %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le"
run 1000000
#write_restart config.${number}.*

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,97 @@
# LAMMPS data file
16 atoms
16 ellipsoids
13 bonds
4 atom types
1 bond types
# System size
-20.0 20.0 xlo xhi
-20.0 20.0 ylo yhi
-20.0 20.0 zlo zhi
# Atom masses for each atom type
Masses
1 3.1575
2 3.1575
3 3.1575
4 3.1575
# Atom-ID, type, position, molecule-ID, ellipsoid flag, density
Atoms
1 1 0.000000000000000e+00 0.000000000000000e+00 0.000000000000000e+00 1 1 1
2 1 1.327449326686445e-01 -4.291282797802268e-01 3.750616346940281e-01 1 1 1
3 1 4.846081065977281e-01 -7.083497053350921e-01 7.501232693880562e-01 1 1 1
4 1 9.326735919667459e-01 -7.401241994674285e-01 1.125184904082084e+00 1 1 1
5 1 1.320419223811347e+00 -5.133520172188747e-01 1.500246538776112e+00 1 1 1
6 1 1.512394297416339e+00 -1.072512061254991e-01 1.875308173470140e+00 1 1 1
7 1 1.441536396413952e+00 3.363155369040876e-01 2.250369808164169e+00 1 1 1
8 1 1.132598224218932e+00 6.623975870343269e-01 2.625431442858197e+00 1 1 1
9 4 5.873264080332541e-01 7.401241994674285e-01 1.125184904082084e+00 1 1 1
10 4 1.035391893402272e+00 7.083497053350921e-01 7.501232693880562e-01 1 1 1
11 4 1.387255067331356e+00 4.291282797802267e-01 3.750616346940281e-01 1 1 1
12 4 1.520000000000000e+00 1.260981291332700e-33 0.000000000000000e+00 1 1 1
13 4 3.874017757810680e-01 -6.623975870343268e-01 2.625431442858197e+00 1 1 1
14 4 7.846360358604798e-02 -3.363155369040874e-01 2.250369808164169e+00 1 1 1
15 4 7.605702583661333e-03 1.072512061254995e-01 1.875308173470140e+00 1 1 1
16 4 1.995807761886533e-01 5.133520172188748e-01 1.500246538776112e+00 1 1 1
# Atom-ID, translational, rotational velocity
Velocities
1 0.0 0.0 0.0 0.0 0.0 0.0
2 0.0 0.0 0.0 0.0 0.0 0.0
3 0.0 0.0 0.0 0.0 0.0 0.0
4 0.0 0.0 0.0 0.0 0.0 0.0
5 0.0 0.0 0.0 0.0 0.0 0.0
6 0.0 0.0 0.0 0.0 0.0 0.0
7 0.0 0.0 0.0 0.0 0.0 0.0
8 0.0 0.0 0.0 0.0 0.0 0.0
9 0.0 0.0 0.0 0.0 0.0 0.0
10 0.0 0.0 0.0 0.0 0.0 0.0
11 0.0 0.0 0.0 0.0 0.0 0.0
12 0.0 0.0 0.0 0.0 0.0 0.0
13 0.0 0.0 0.0 0.0 0.0 0.0
14 0.0 0.0 0.0 0.0 0.0 0.0
15 0.0 0.0 0.0 0.0 0.0 0.0
16 0.0 0.0 0.0 0.0 0.0 0.0
# Atom-ID, shape, quaternion
Ellipsoids
1 1.1739845031423408 1.1739845031423408 1.1739845031423408 1.000000000000000e+00 0.000000000000000e+00 0.000000000000000e+00 0.000000000000000e+00
2 1.1739845031423408 1.1739845031423408 1.1739845031423408 9.553364891256060e-01 0.000000000000000e+00 0.000000000000000e+00 2.955202066613395e-01
3 1.1739845031423408 1.1739845031423408 1.1739845031423408 8.253356149096783e-01 0.000000000000000e+00 0.000000000000000e+00 5.646424733950354e-01
4 1.1739845031423408 1.1739845031423408 1.1739845031423408 6.216099682706646e-01 0.000000000000000e+00 0.000000000000000e+00 7.833269096274833e-01
5 1.1739845031423408 1.1739845031423408 1.1739845031423408 3.623577544766736e-01 0.000000000000000e+00 0.000000000000000e+00 9.320390859672263e-01
6 1.1739845031423408 1.1739845031423408 1.1739845031423408 7.073720166770291e-02 0.000000000000000e+00 0.000000000000000e+00 9.974949866040544e-01
7 1.1739845031423408 1.1739845031423408 1.1739845031423408 -2.272020946930869e-01 -0.000000000000000e+00 0.000000000000000e+00 9.738476308781953e-01
8 1.1739845031423408 1.1739845031423408 1.1739845031423408 -5.048461045998575e-01 -0.000000000000000e+00 0.000000000000000e+00 8.632093666488738e-01
9 1.1739845031423408 1.1739845031423408 1.1739845031423408 4.796493962806427e-17 7.833269096274833e-01 -6.216099682706646e-01 3.806263289803786e-17
10 1.1739845031423408 1.1739845031423408 1.1739845031423408 5.707093416549944e-17 5.646424733950354e-01 -8.253356149096784e-01 2.218801320830406e-17
11 1.1739845031423408 1.1739845031423408 1.1739845031423408 6.107895212550935e-17 2.955202066613394e-01 -9.553364891256061e-01 4.331404380149668e-18
12 1.1739845031423408 1.1739845031423408 1.1739845031423408 5.963096920061075e-17 0.000000000000000e+00 -1.000000000000000e+00 -1.391211590127312e-17
13 1.1739845031423408 1.1739845031423408 1.1739845031423408 5.285632939302787e-17 8.632093666488739e-01 5.048461045998572e-01 -3.091290830301125e-17
14 1.1739845031423408 1.1739845031423408 1.1739845031423408 4.136019110019290e-17 9.738476308781953e-01 2.272020946930868e-01 -4.515234267244800e-17
15 1.1739845031423408 1.1739845031423408 1.1739845031423408 2.616947011741696e-17 9.974949866040544e-01 -7.073720166770313e-02 -5.535845274597425e-17
16 1.1739845031423408 1.1739845031423408 1.1739845031423408 8.641108308308281e-18 9.320390859672264e-01 -3.623577544766736e-01 -6.061955710708163e-17
# Bond-ID, type, atom pairs
Bonds
1 1 1 2
2 1 2 3
3 1 3 4
4 1 4 5
5 1 5 6
6 1 6 7
7 1 7 8
8 1 13 14
9 1 14 15
10 1 15 16
11 1 9 10
12 1 10 11
13 1 11 12

View File

@ -0,0 +1,75 @@
variable number equal 2
variable ofreq equal 1000
variable efreq equal 1000
units lj
dimension 3
newton off
boundary p p p
atom_style hybrid bond ellipsoid
atom_modify sort 0 1.0
# Pair interactions require lists of neighbours to be calculated
neighbor 1.0 bin
neigh_modify every 1 delay 0 check yes
read_data data.duplex2
set atom * mass 3.1575
group all type 1 4
# oxDNA bond interactions - FENE backbone
bond_style oxdna_fene
bond_coeff * 2.0 0.25 0.7525
# oxDNA pair interactions
pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
pair_coeff * * oxdna_excv 2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
pair_coeff * * oxdna_stk 1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65
pair_coeff * * oxdna_hbond 0.0 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 1 4 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 2 3 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff * * oxdna_xstk 47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68
pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65
# NVE ensemble
fix 1 all nve/dotc/langevin 0.1 0.1 0.03 457145 angmom 10
#fix 1 all nve/dot
timestep 1e-5
#comm_style tiled
#fix 3 all balance 10000 1.1 rcb
#compute mol all chunk/atom molecule
#compute mychunk all vcm/chunk mol
#fix 4 all ave/time 10000 1 10000 c_mychunk[1] c_mychunk[2] c_mychunk[3] file vcm.txt mode vector
dump pos all xyz ${ofreq} traj.${number}.xyz
compute quat all property/atom quatw quati quatj quatk
dump quat all custom ${ofreq} quat.${number}.txt id c_quat[1] c_quat[2] c_quat[3] c_quat[4]
dump_modify quat sort id
dump_modify quat format line "%d %13.6le %13.6le %13.6le %13.6le"
compute erot all erotate/asphere
compute ekin all ke
compute epot all pe
variable erot equal c_erot
variable ekin equal c_ekin
variable epot equal c_epot
variable etot equal c_erot+c_ekin+c_epot
fix 5 all print ${efreq} "$(step) ekin = ${ekin} | erot = ${erot} | epot = ${epot} | etot = ${etot}" screen yes
dump out all custom ${ofreq} out.${number}.txt id x y z vx vy vz fx fy fz tqx tqy tqz
dump_modify out sort id
dump_modify out format line "%d %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le"
run 1000000
#write_restart config.${number}.*

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,388 @@
# Setup tool for oxDNA input in LAMMPS format.
import math,numpy as np,sys,os
# system size
lxmin = -115.0
lxmax = +115.0
lymin = -115.0
lymax = +115.0
lzmin = -115.0
lzmax = +115.0
# rise in z-direction
r0 = 0.7
# definition of single untwisted strand
def single():
strand = inp[1].split(':')
com_start=strand[0].split(',')
posx=float(com_start[0])
posy=float(com_start[1])
posz=float(com_start[2])
risex=0
risey=0
risez=r0
strandstart=len(nucleotide)+1
for letter in strand[2]:
temp=[]
temp.append(nt2num[letter])
temp.append([posx,posy,posz])
vel=[0,0,0,0,0,0]
temp.append(vel)
temp.append(shape)
quat=[1,0,0,0]
temp.append(quat)
posx=posx+risex
posy=posy+risey
posz=posz+risez
if (len(nucleotide)+1 > strandstart):
topology.append([1,len(nucleotide),len(nucleotide)+1])
nucleotide.append(temp)
return
# definition of single twisted strand
def single_helix():
strand = inp[1].split(':')
com_start=strand[0].split(',')
twist=float(strand[1])
posx = float(com_start[0])
posy = float(com_start[1])
posz = float(com_start[2])
risex=0
risey=0
risez=math.sqrt(r0**2-4.0*math.sin(0.5*twist)**2)
dcomh=0.76
axisx=dcomh + posx
axisy=posy
strandstart=len(nucleotide)+1
quat=[1,0,0,0]
qrot0=math.cos(0.5*twist)
qrot1=0
qrot2=0
qrot3=math.sin(0.5*twist)
for letter in strand[2]:
temp=[]
temp.append(nt2num[letter])
temp.append([posx,posy,posz])
vel=[0,0,0,0,0,0]
temp.append(vel)
temp.append(shape)
temp.append(quat)
quat0 = quat[0]*qrot0 - quat[1]*qrot1 - quat[2]*qrot2 - quat[3]*qrot3
quat1 = quat[0]*qrot1 + quat[1]*qrot0 + quat[2]*qrot3 - quat[3]*qrot2
quat2 = quat[0]*qrot2 + quat[2]*qrot0 + quat[3]*qrot1 - quat[1]*qrot3
quat3 = quat[0]*qrot3 + quat[3]*qrot0 + quat[1]*qrot2 + quat[2]*qrot1
quat = [quat0,quat1,quat2,quat3]
posx=axisx - dcomh*(quat[0]**2+quat[1]**2-quat[2]**2-quat[3]**2)
posy=axisy - dcomh*(2*(quat[1]*quat[2]+quat[0]*quat[3]))
posz=posz+risez
if (len(nucleotide)+1 > strandstart):
topology.append([1,len(nucleotide),len(nucleotide)+1])
nucleotide.append(temp)
return
# definition of twisted duplex
def duplex():
strand = inp[1].split(':')
com_start=strand[0].split(',')
twist=float(strand[1])
compstrand=[]
comptopo=[]
posx1 = float(com_start[0])
posy1 = float(com_start[1])
posz1 = float(com_start[2])
risex=0
risey=0
risez=math.sqrt(r0**2-4.0*math.sin(0.5*twist)**2)
dcomh=0.76
axisx=dcomh + posx1
axisy=posy1
posx2 = axisx + dcomh
posy2 = posy1
posz2 = posz1
strandstart=len(nucleotide)+1
quat1=[1,0,0,0]
quat2=[0,0,-1,0]
qrot0=math.cos(0.5*twist)
qrot1=0
qrot2=0
qrot3=math.sin(0.5*twist)
for letter in strand[2]:
temp1=[]
temp2=[]
temp1.append(nt2num[letter])
temp2.append(compnt2num[letter])
temp1.append([posx1,posy1,posz1])
temp2.append([posx2,posy2,posz2])
vel=[0,0,0,0,0,0]
temp1.append(vel)
temp2.append(vel)
temp1.append(shape)
temp2.append(shape)
temp1.append(quat1)
temp2.append(quat2)
quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3
quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2
quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3
quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1
quat1 = [quat1_0,quat1_1,quat1_2,quat1_3]
posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
posz1=posz1+risez
quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3
quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2
quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3
quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1
quat2 = [quat2_0,quat2_1,quat2_2,quat2_3]
posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
posz2=posz1
if (len(nucleotide)+1 > strandstart):
topology.append([1,len(nucleotide),len(nucleotide)+1])
comptopo.append([1,len(nucleotide)+len(strand[2]),len(nucleotide)+len(strand[2])+1])
nucleotide.append(temp1)
compstrand.append(temp2)
for ib in range(len(compstrand)):
nucleotide.append(compstrand[len(compstrand)-1-ib])
for ib in range(len(comptopo)):
topology.append(comptopo[ib])
return
# definition of array of duplexes
def duplex_array():
strand = inp[1].split(':')
number=strand[0].split(',')
posz1_0 = float(strand[1])
twist=float(strand[2])
nx = int(number[0])
ny = int(number[1])
dx = (lxmax-lxmin)/nx
dy = (lymax-lymin)/ny
risex=0
risey=0
risez=math.sqrt(r0**2-4.0*math.sin(0.5*twist)**2)
dcomh=0.76
for ix in range(nx):
axisx=lxmin + dx/2 + ix * dx
for iy in range(ny):
axisy=lymin + dy/2 + iy * dy
compstrand=[]
comptopo=[]
posx1 = axisx - dcomh
posy1 = axisy
posz1 = posz1_0
posx2 = axisx + dcomh
posy2 = posy1
posz2 = posz1
strandstart=len(nucleotide)+1
quat1=[1,0,0,0]
quat2=[0,0,-1,0]
qrot0=math.cos(0.5*twist)
qrot1=0
qrot2=0
qrot3=math.sin(0.5*twist)
for letter in strand[3]:
temp1=[]
temp2=[]
temp1.append(nt2num[letter])
temp2.append(compnt2num[letter])
temp1.append([posx1,posy1,posz1])
temp2.append([posx2,posy2,posz2])
vel=[0,0,0,0,0,0]
temp1.append(vel)
temp2.append(vel)
temp1.append(shape)
temp2.append(shape)
temp1.append(quat1)
temp2.append(quat2)
quat1_0 = quat1[0]*qrot0 - quat1[1]*qrot1 - quat1[2]*qrot2 - quat1[3]*qrot3
quat1_1 = quat1[0]*qrot1 + quat1[1]*qrot0 + quat1[2]*qrot3 - quat1[3]*qrot2
quat1_2 = quat1[0]*qrot2 + quat1[2]*qrot0 + quat1[3]*qrot1 - quat1[1]*qrot3
quat1_3 = quat1[0]*qrot3 + quat1[3]*qrot0 + quat1[1]*qrot2 + quat1[2]*qrot1
quat1 = [quat1_0,quat1_1,quat1_2,quat1_3]
posx1=axisx - dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
posy1=axisy - dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
posz1=posz1+risez
quat2_0 = quat2[0]*qrot0 - quat2[1]*qrot1 - quat2[2]*qrot2 + quat2[3]*qrot3
quat2_1 = quat2[0]*qrot1 + quat2[1]*qrot0 - quat2[2]*qrot3 - quat2[3]*qrot2
quat2_2 = quat2[0]*qrot2 + quat2[2]*qrot0 + quat2[3]*qrot1 + quat2[1]*qrot3
quat2_3 =-quat2[0]*qrot3 + quat2[3]*qrot0 + quat2[1]*qrot2 + quat2[2]*qrot1
quat2 = [quat2_0,quat2_1,quat2_2,quat2_3]
posx2=axisx + dcomh*(quat1[0]**2+quat1[1]**2-quat1[2]**2-quat1[3]**2)
posy2=axisy + dcomh*(2*(quat1[1]*quat1[2]+quat1[0]*quat1[3]))
posz2=posz1
if (len(nucleotide)+1 > strandstart):
topology.append([1,len(nucleotide),len(nucleotide)+1])
comptopo.append([1,len(nucleotide)+len(strand[3]),len(nucleotide)+len(strand[3])+1])
nucleotide.append(temp1)
compstrand.append(temp2)
for ib in range(len(compstrand)):
nucleotide.append(compstrand[len(compstrand)-1-ib])
for ib in range(len(comptopo)):
topology.append(comptopo[ib])
return
# main part
nt2num = {'A':1, 'C':2, 'G':3, 'T':4}
compnt2num = {'T':1, 'G':2, 'C':3, 'A':4}
shape = [1.1739845031423408,1.1739845031423408,1.1739845031423408]
nucleotide=[]
topology=[]
seqfile = open(sys.argv[1],'r')
# process sequence file line by line
for line in seqfile:
inp = line.split()
if inp[0] == 'single':
single()
if inp[0] == 'single_helix':
single_helix()
if inp[0] == 'duplex':
duplex()
if inp[0] == 'duplex_array':
duplex_array()
# output atom data in LAMMPS format
out = open(sys.argv[2],'w')
out.write('# LAMMPS data file\n')
out.write('%d atoms\n' % len(nucleotide))
out.write('%d ellipsoids\n' % len(nucleotide))
out.write('%d bonds\n' % len(topology))
out.write('\n')
out.write('4 atom types\n')
out.write('1 bond types\n')
out.write('\n')
out.write('# System size\n')
out.write('%f %f xlo xhi\n' % (lxmin,lxmax))
out.write('%f %f ylo yhi\n' % (lymin,lymax))
out.write('%f %f zlo zhi\n' % (lzmin,lzmax))
out.write('\n')
out.write('Masses\n')
out.write('\n')
out.write('1 3.1575\n')
out.write('2 3.1575\n')
out.write('3 3.1575\n')
out.write('4 3.1575\n')
out.write('\n')
out.write('# Atom-ID, type, position, molecule-ID, ellipsoid flag, density\n')
out.write('Atoms\n')
out.write('\n')
for ib in range(len(nucleotide)):
out.write("%d %d %22.16le %22.16le %22.16le 1 1 1\n" % (ib+1,nucleotide[ib][0],nucleotide[ib][1][0],nucleotide[ib][1][1],nucleotide[ib][1][2]))
out.write('\n')
out.write('# Atom-ID, translational, rotational velocity\n')
out.write('Velocities\n')
out.write('\n')
for ib in range(len(nucleotide)):
out.write("%d %22.16le %22.16le %22.16le %22.16le %22.16le %22.16le\n" % (ib+1,nucleotide[ib][2][0],nucleotide[ib][2][1],nucleotide[ib][2][2],nucleotide[ib][2][3],nucleotide[ib][2][4],nucleotide[ib][2][5]))
out.write('\n')
out.write('# Atom-ID, shape, quaternion\n')
out.write('Ellipsoids\n')
out.write('\n')
for ib in range(len(nucleotide)):
out.write("%d %22.16le %22.16le %22.16le %22.16le %22.16le %22.16le %22.16le\n" % (ib+1,nucleotide[ib][3][0],nucleotide[ib][3][1],nucleotide[ib][3][2],nucleotide[ib][4][0],nucleotide[ib][4][1],nucleotide[ib][4][2],nucleotide[ib][4][3]))
out.write('\n')
out.write('# Bond topology\n')
out.write('Bonds\n')
out.write('\n')
for ib in range(len(topology)):
out.write("%d %d %d %d\n" % (ib+1,topology[ib][0],topology[ib][1],topology[ib][2]))
out.close()
seqfile.close()
sys.exit(0)

View File

@ -0,0 +1,77 @@
variable number equal 8
variable ofreq equal 1000
variable efreq equal 1000
units lj
dimension 3
newton off
processors 1 1 1
boundary p p p
atom_style hybrid bond ellipsoid
atom_modify sort 0 1.0
# Pair interactions require lists of neighbours to be calculated
neighbor 1.0 bin
neigh_modify every 1 delay 0 check yes
read_data data.duplex2
set atom * mass 3.1575
group all type 1 4
# oxDNA bond interactions - FENE backbone
bond_style oxdna_fene
bond_coeff * 2.0 0.25 0.7525
# oxDNA pair interactions
pair_style hybrid/overlay oxdna_excv oxdna_stk oxdna_hbond oxdna_xstk oxdna_coaxstk
pair_coeff * * oxdna_excv 2.0 0.7 0.675 2.0 0.515 0.5 2.0 0.33 0.32
pair_coeff * * oxdna_stk 1.61048 6.0 0.4 0.9 0.32 0.6 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 0.65 2.0 0.65
pair_coeff * * oxdna_hbond 0.0 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 1 4 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff 2 3 oxdna_hbond 1.077 8.0 0.4 0.75 0.34 0.7 1.5 0 0.7 1.5 0 0.7 1.5 0 0.7 0.46 3.141592653589793 0.7 4.0 1.5707963267948966 0.45 4.0 1.5707963267948966 0.45
pair_coeff * * oxdna_xstk 47.5 0.575 0.675 0.495 0.655 2.25 0.791592653589793 0.58 1.7 1.0 0.68 1.7 1.0 0.68 1.5 0 0.65 1.7 0.875 0.68 1.7 0.875 0.68
pair_coeff * * oxdna_coaxstk 46.0 0.4 0.6 0.22 0.58 2.0 2.541592653589793 0.65 1.3 0 0.8 0.9 0 0.95 0.9 0 0.95 2.0 -0.65 2.0 -0.65
# NVE ensemble
#fix 1 all nve/dotc/langevin 0.1 0.1 0.03 457145 angmom 10
fix 1 all nve/dot
timestep 1e-5
#comm_style tiled
#fix 3 all balance 10000 1.1 rcb
#compute mol all chunk/atom molecule
#compute mychunk all vcm/chunk mol
#fix 4 all ave/time 10000 1 10000 c_mychunk[1] c_mychunk[2] c_mychunk[3] file vcm.txt mode vector
#dump pos all xyz ${ofreq} traj.${number}.xyz
#compute quat all property/atom quatw quati quatj quatk
#dump quat all custom ${ofreq} quat.${number}.txt id c_quat[1] c_quat[2] c_quat[3] c_quat[4]
#dump_modify quat sort id
#dump_modify quat format line "%d %13.6le %13.6le %13.6le %13.6le"
compute erot all erotate/asphere
compute ekin all ke
compute epot all pe
variable erot equal c_erot
variable ekin equal c_ekin
variable epot equal c_epot
variable etot equal c_erot+c_ekin+c_epot
fix 5 all print ${efreq} "$(step) ekin = ${ekin} | erot = ${erot} | epot = ${epot} | etot = ${etot}" screen yes
dump out all custom ${ofreq} out.${number}.txt id x y z vx vy vz fx fy fz tqx tqy tqz
dump_modify out sort id
dump_modify out format line "%d %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le %13.6le"
run 1000000
#write_restart config.${number}.*

View File

@ -0,0 +1,4 @@
single 0,0,0:0.6:AAAAA
single_helix 0,0,0:0.6:AAAAA
duplex 0,0,0:0.6:AAAAA
duplex_array 10,10:-112.0:0.6:AAAAA

View File

@ -18,7 +18,7 @@ neigh_modify every 1 delay 0 check no once no
timestep 0.001
compute dpdU all dpd
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[1]+press*vol
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[2]+press*vol
thermo 1
thermo_style custom step temp press vol pe ke v_totEnergy cella cellb cellc

View File

@ -22,7 +22,7 @@ neigh_modify every 1 delay 0 check no once no
timestep 0.001
compute dpdU all dpd
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[1]+press*vol
variable totEnergy equal pe+ke+c_dpdU[1]+c_dpdU[2]+press*vol
thermo 1
thermo_style custom step temp press vol pe ke v_totEnergy cella cellb cellc
@ -34,129 +34,137 @@ fix 2 all eos/cv 0.0005
run 100
Neighbor list info ...
1 neighbor list requests
update every 1 steps, delay 0 steps, check no
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 12
ghost atom cutoff = 12
binsize = 6 -> bins = 22 22 22
Memory usage per processor = 6.48143 Mbytes
binsize = 6, bins = 22 22 22
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair dpd/fdt/energy, perpetual
pair build: half/bin/newton
stencil: half/bin/3d/newton
bin: standard
(2) fix shardlow, perpetual, ssa
pair build: half/bin/newton/ssa
stencil: half/bin/3d/newton/ssa
bin: ssa
Memory usage per processor = 8.55503 Mbytes
Step Temp Press Volume PotEng KinEng v_totEnergy Cella Cellb Cellc
0 239.4274282976 2817.4421750949 2146689.0000000000 2639.8225470740 313.3218455755 6048176597.3066043854 129.0000000000 129.0000000000 129.0000000000
1 239.4771405316 2817.4798146419 2146689.0000581890 2639.8304543632 313.3869004818 6048257397.9450111389 129.0000000012 129.0000000012 129.0000000012
2 239.5643955010 2817.5423194969 2146689.0002327557 2639.8379071907 313.5010849268 6048391577.0431985855 129.0000000047 129.0000000047 129.0000000047
3 239.6633839196 2817.6123662396 2146689.0005237064 2639.8445238058 313.6306241122 6048541946.5712032318 129.0000000105 129.0000000105 129.0000000105
4 239.5371222027 2817.5355424336 2146689.0009310376 2639.8505035043 313.4653942786 6048377030.7404460907 129.0000000186 129.0000000186 129.0000000186
5 239.6512678169 2817.6153097076 2146689.0014547524 2639.8561498340 313.6147686202 6048548267.9007377625 129.0000000291 129.0000000291 129.0000000291
6 239.5617886781 2817.5624195435 2146689.0020948485 2639.8617493725 313.4976735610 6048434730.8592004776 129.0000000420 129.0000000420 129.0000000420
7 239.5228587856 2817.5420009502 2146689.0028513218 2639.8666590407 313.4467287471 6048390900.5748577118 129.0000000571 129.0000000571 129.0000000571
8 239.6066877934 2817.6008649264 2146689.0037241788 2639.8710757645 313.5564298772 6048517265.7987136841 129.0000000746 129.0000000746 129.0000000746
9 239.5719861485 2817.5823530300 2146689.0047134170 2639.8752557893 313.5110182737 6048477529.2603597641 129.0000000944 129.0000000944 129.0000000944
10 239.5800176776 2817.5915671176 2146689.0058190385 2639.8793778438 313.5215285712 6048497312.1706552505 129.0000001166 129.0000001166 129.0000001166
11 239.6299830954 2817.6281223139 2146689.0070410441 2639.8829762049 313.5869148014 6048575788.3208351135 129.0000001410 129.0000001410 129.0000001410
12 239.6011995911 2817.6132377273 2146689.0083794324 2639.8860704236 313.5492478526 6048543839.4788360596 129.0000001678 129.0000001678 129.0000001678
13 239.6407681166 2817.6427924824 2146689.0098342048 2639.8889816934 313.6010284005 6048607288.5005025864 129.0000001970 129.0000001970 129.0000001970
14 239.6981172055 2817.6844100046 2146689.0114053637 2639.8913405110 313.6760771219 6048696632.8825626373 129.0000002285 129.0000002285 129.0000002285
15 239.8563971968 2817.7922519039 2146689.0130929090 2639.8934358481 313.8832070208 6048928140.8671455383 129.0000002623 129.0000002623 129.0000002623
16 239.8561894618 2817.7971208197 2146689.0148968464 2639.8950496967 313.8829351726 6048938597.9994916916 129.0000002984 129.0000002984 129.0000002984
17 239.8816520361 2817.8185621543 2146689.0168171758 2639.8961257823 313.9162562538 6048984631.3226108551 129.0000003369 129.0000003369 129.0000003369
18 239.9099966096 2817.8417368960 2146689.0188538977 2639.8965743204 313.9533488047 6049034386.0627622604 129.0000003777 129.0000003777 129.0000003777
19 240.0514024347 2817.9389205774 2146689.0210070144 2639.8966103811 314.1383966683 6049243015.4568052292 129.0000004208 129.0000004208 129.0000004208
20 239.8802541140 2817.8327386176 2146689.0232765260 2639.8962085210 313.9144268914 6049015081.9802341461 129.0000004662 129.0000004662 129.0000004662
21 239.8462621903 2817.8160306167 2146689.0256624296 2639.8953174755 313.8699440502 6048979221.7758703232 129.0000005140 129.0000005140 129.0000005140
22 240.0487944678 2817.9533849157 2146689.0281647225 2639.8938590354 314.1349838054 6049274086.0571212769 129.0000005642 129.0000005642 129.0000005642
23 240.0966314441 2817.9897873787 2146689.0307834130 2639.8918104774 314.1975846937 6049352238.2649183273 129.0000006166 129.0000006166 129.0000006166
24 240.1765312516 2818.0463843765 2146689.0335185044 2639.8891292321 314.3021439554 6049473742.2287187576 129.0000006714 129.0000006714 129.0000006714
25 240.1500705973 2818.0336048048 2146689.0363699966 2639.8858785483 314.2675167572 6049446316.4600162506 129.0000007285 129.0000007285 129.0000007285
26 240.2681423500 2818.1151708195 2146689.0393378921 2639.8825176506 314.4220289603 6049621421.8445177078 129.0000007880 129.0000007880 129.0000007880
27 240.4728815247 2818.2527327079 2146689.0424221945 2639.8784158747 314.6899567267 6049916733.3989181519 129.0000008498 129.0000008498 129.0000008498
28 240.4793027032 2818.2613348477 2146689.0456229053 2639.8736089473 314.6983596717 6049935208.5421981812 129.0000009139 129.0000009139 129.0000009139
29 240.5020619198 2818.2805472685 2146689.0489400285 2639.8681043704 314.7281430587 6049976461.0082206726 129.0000009803 129.0000009803 129.0000009803
30 240.5513721776 2818.3167157263 2146689.0523735629 2639.8623484053 314.7926719270 6050054113.1760177612 129.0000010491 129.0000010491 129.0000010491
31 240.7340393104 2818.4391703712 2146689.0559235099 2639.8563442170 315.0317155636 6050316995.4599781036 129.0000011202 129.0000011202 129.0000011202
32 240.8254719483 2818.5014640740 2146689.0595898777 2639.8498122053 315.1513670299 6050450731.1168394089 129.0000011936 129.0000011936 129.0000011936
33 240.9681573541 2818.5965480750 2146689.0633726656 2639.8425779528 315.3380893908 6050654857.7432861328 129.0000012694 129.0000012694 129.0000012694
34 241.0039494187 2818.6217008564 2146689.0672718794 2639.8347174393 315.3849279499 6050708863.9733209610 129.0000013475 129.0000013475 129.0000013475
35 241.0314566197 2818.6411150538 2146689.0712875174 2639.8262983643 315.4209246902 6050750551.5649127960 129.0000014279 129.0000014279 129.0000014279
36 241.0829173424 2818.6763455617 2146689.0754195810 2639.8174397481 315.4882677207 6050826192.2165899277 129.0000015107 129.0000015107 129.0000015107
37 241.2845682012 2818.8087982181 2146689.0796680767 2639.8080129872 315.7521540252 6051110539.1171846390 129.0000015958 129.0000015958 129.0000015958
38 241.3214712920 2818.8336260248 2146689.0840330068 2639.7981963574 315.8004465062 6051163849.0412235260 129.0000016833 129.0000016833 129.0000016833
39 241.3392127125 2818.8456991528 2146689.0885143690 2639.7879618658 315.8236634561 6051189778.9386901855 129.0000017730 129.0000017730 129.0000017730
40 241.5383770555 2818.9753950055 2146689.0931121684 2639.7769824244 316.0842958321 6051468208.8210506439 129.0000018651 129.0000018651 129.0000018651
41 241.5059730674 2818.9543817992 2146689.0978264087 2639.7656512498 316.0418910106 6051423113.2358427048 129.0000019595 129.0000019595 129.0000019595
42 241.3907605672 2818.8793800508 2146689.1026570834 2639.7541331920 315.8911205101 6051262121.2551422119 129.0000020563 129.0000020563 129.0000020563
43 241.5095917610 2818.9559595711 2146689.1076041958 2639.7424355740 316.0466265406 6051426527.7663059235 129.0000021554 129.0000021554 129.0000021554
44 241.6271631762 2819.0312325531 2146689.1126677482 2639.7297705654 316.2004839873 6051588129.8722610474 129.0000022568 129.0000022568 129.0000022568
45 241.5702411838 2818.9923790176 2146689.1178477411 2639.7163554760 316.1259941770 6051504737.9250564575 129.0000023606 129.0000023606 129.0000023606
46 241.7029985068 2819.0771124986 2146689.1231441777 2639.7024246704 316.2997243538 6051686649.4576120377 129.0000024667 129.0000024667 129.0000024667
47 241.7966144965 2819.1357830868 2146689.1285570571 2639.6882106593 316.4222330191 6051812612.3391046524 129.0000025751 129.0000025751 129.0000025751
48 241.8573480255 2819.1726205120 2146689.1340863821 2639.6735287925 316.5017107195 6051891706.4921989441 129.0000026859 129.0000026859 129.0000026859
49 241.9611147338 2819.2374095379 2146689.1397321564 2639.6583357477 316.6375029166 6052030804.4275226593 129.0000027990 129.0000027990 129.0000027990
50 242.1023518806 2819.3259059811 2146689.1454943856 2639.6424863169 316.8223300428 6052220795.1955394745 129.0000029144 129.0000029144 129.0000029144
51 242.1174105473 2819.3319633044 2146689.1513730693 2639.6264141131 316.8420362613 6052233814.9634265900 129.0000030321 129.0000030321 129.0000030321
52 242.2534914901 2819.4164594322 2146689.1573682069 2639.6098392670 317.0201158259 6052415218.9485445023 129.0000031522 129.0000031522 129.0000031522
53 242.3504633236 2819.4754119996 2146689.1634798055 2639.5930076506 317.1470160479 6052541789.1274013519 129.0000032746 129.0000032746 129.0000032746
54 242.2982323323 2819.4368568264 2146689.1697078613 2639.5756353782 317.0786650211 6052459040.6286897659 129.0000033994 129.0000033994 129.0000033994
55 242.3452896272 2819.4623310219 2146689.1760523771 2639.5575918586 317.1402455951 6052513743.7400159836 129.0000035265 129.0000035265 129.0000035265
56 242.4181903333 2819.5048897011 2146689.1825133534 2639.5390347547 317.2356456249 6052605122.2894439697 129.0000036559 129.0000036559 129.0000036559
57 242.5317091656 2819.5739975787 2146689.1890907930 2639.5199828249 317.3841997413 6052753494.0979280472 129.0000037876 129.0000037876 129.0000037876
58 242.5478978740 2819.5796954935 2146689.1957846982 2639.5006137388 317.4053847660 6052765744.6257629395 129.0000039217 129.0000039217 129.0000039217
59 242.6655316466 2819.6519225743 2146689.2025950695 2639.4808234811 317.5593238156 6052920813.0568208694 129.0000040582 129.0000040582 129.0000040582
60 242.8126131177 2819.7431588157 2146689.2095219092 2639.4607996998 317.7517989980 6053116688.6155729294 129.0000041969 129.0000041969 129.0000041969
61 242.7957124913 2819.7275989047 2146689.2165652174 2639.4406312730 317.7296823362 6053083306.1403274536 129.0000043380 129.0000043380 129.0000043380
62 242.9276177041 2819.8088790098 2146689.2237249981 2639.4201279058 317.9022974164 6053257809.6067762375 129.0000044814 129.0000044814 129.0000044814
63 243.0465445938 2819.8814758895 2146689.2310012528 2639.3991657500 318.0579286774 6053413673.1989650726 129.0000046272 129.0000046272 129.0000046272
64 242.9890585501 2819.8387587817 2146689.2383939880 2639.3781767844 317.9827007328 6053321993.5937871933 129.0000047752 129.0000047752 129.0000047752
65 242.9653746583 2819.8180104181 2146689.2459031967 2639.3568184374 317.9517072884 6053277474.4272727966 129.0000049256 129.0000049256 129.0000049256
66 243.0259297024 2819.8514334947 2146689.2535288804 2639.3352568621 318.0309514181 6053349244.9473772049 129.0000050784 129.0000050784 129.0000050784
67 242.9638979697 2819.8046112742 2146689.2612710390 2639.3134547096 317.9497748498 6053248753.9180717468 129.0000052335 129.0000052335 129.0000052335
68 243.0283540775 2819.8395632725 2146689.2691296688 2639.2912303374 318.0341240273 6053323807.2197017670 129.0000053909 129.0000053909 129.0000053909
69 243.2256418664 2819.9609646019 2146689.2771047787 2639.2684509205 318.2923006889 6053584440.8757400513 129.0000055506 129.0000055506 129.0000055506
70 243.2507495334 2819.9706145524 2146689.2851963686 2639.2450126010 318.3251573278 6053605179.1483964920 129.0000057127 129.0000057127 129.0000057127
71 243.4287155518 2820.0794853386 2146689.2934044413 2639.2213699915 318.5580489464 6053838914.2552747726 129.0000058771 129.0000058771 129.0000058771
72 243.5097518574 2820.1249498194 2146689.3017290002 2639.1971212009 318.6640954635 6053936535.9274711609 129.0000060439 129.0000060439 129.0000060439
73 243.5356790969 2820.1337977544 2146689.3101700447 2639.1723394661 318.6980246193 6053955553.5090074539 129.0000062130 129.0000062130 129.0000062130
74 243.5479180498 2820.1331964183 2146689.3187275808 2639.1473868749 318.7140408766 6053954286.7515821457 129.0000063844 129.0000063844 129.0000063844
75 243.7115573025 2820.2314361523 2146689.3274016059 2639.1220411207 318.9281840641 6054165201.5909118652 129.0000065581 129.0000065581 129.0000065581
76 243.7457279618 2820.2454531429 2146689.3361921217 2639.0963868224 318.9729008040 6054195316.5254154205 129.0000067342 129.0000067342 129.0000067342
77 243.8345031069 2820.2948644965 2146689.3450991292 2639.0700900389 319.0890745962 6054301412.5615310669 129.0000069126 129.0000069126 129.0000069126
78 244.0193931195 2820.4067881628 2146689.3541226317 2639.0435094409 319.3310271594 6054541703.5689058304 129.0000070934 129.0000070934 129.0000070934
79 243.9919100078 2820.3799166166 2146689.3632626338 2639.0164249037 319.2950619430 6054484044.4218587875 129.0000072765 129.0000072765 129.0000072765
80 244.0965612207 2820.4387335935 2146689.3725191355 2638.9888176882 319.4320116291 6054610332.4174261093 129.0000074619 129.0000074619 129.0000074619
81 244.1334315951 2820.4535208568 2146689.3818921377 2638.9608330195 319.4802612965 6054642102.5347270966 129.0000076496 129.0000076496 129.0000076496
82 244.3029520408 2820.5543485196 2146689.3913816395 2638.9318525796 319.7021007878 6054858575.1664342880 129.0000078397 129.0000078397 129.0000078397
83 244.3445761189 2820.5713690935 2146689.4009876498 2638.9021684795 319.7565712929 6054895140.1710596085 129.0000080321 129.0000080321 129.0000080321
84 244.2696671559 2820.5125763350 2146689.4107101629 2638.8720941742 319.6585431986 6054768957.6739044189 129.0000082269 129.0000082269 129.0000082269
85 244.5161919319 2820.6629431352 2146689.4205491822 2638.8415194387 319.9811528443 6055091776.5361995697 129.0000084240 129.0000084240 129.0000084240
86 244.5641090282 2820.6838080201 2146689.4305047127 2638.8103612394 320.0438585800 6055136595.0767974854 129.0000086234 129.0000086234 129.0000086234
87 244.5348240638 2820.6541129118 2146689.4405767513 2638.7789728309 320.0055354056 6055072877.2416200638 129.0000088251 129.0000088251 129.0000088251
88 244.6939431427 2820.7468233396 2146689.4507653015 2638.7470269267 320.2137633592 6055271926.6536149979 129.0000090292 129.0000090292 129.0000090292
89 244.8800201091 2820.8567117003 2146689.4610703662 2638.7147520097 320.4572692055 6055507852.1186332703 129.0000092356 129.0000092356 129.0000092356
90 244.8804280382 2820.8451141876 2146689.4714919478 2638.6820441173 320.4578030336 6055482985.2258749008 129.0000094444 129.0000094444 129.0000094444
91 244.9558851986 2820.8815975090 2146689.4820300462 2638.6491836104 320.5565485155 6055561333.3803453445 129.0000096555 129.0000096555 129.0000096555
92 244.9965893140 2820.8949614294 2146689.4926846647 2638.6159817170 320.6098151301 6055590051.6433181763 129.0000098689 129.0000098689 129.0000098689
93 245.1381056687 2820.9732811388 2146689.5034558061 2638.5824451870 320.7950076360 6055758210.2774200439 129.0000100846 129.0000100846 129.0000100846
94 245.2954807041 2821.0619342131 2146689.5143434699 2638.5485198222 321.0009532826 6055948551.7882709503 129.0000103027 129.0000103027 129.0000103027
95 245.3535822199 2821.0860553731 2146689.5253476589 2638.5144817512 321.0769866522 6056000363.5151576996 129.0000105232 129.0000105232 129.0000105232
96 245.5013476026 2821.1682908185 2146689.5364683764 2638.4801107361 321.2703568219 6056176929.0169925690 129.0000107459 129.0000107459 129.0000107459
97 245.4166531417 2821.0989038023 2146689.5477056229 2638.4453663061 321.1595231342 6056028008.1910057068 129.0000109710 129.0000109710 129.0000109710
98 245.4121937790 2821.0817490953 2146689.5590593945 2638.4097762390 321.1536874797 6055991214.3494396210 129.0000111984 129.0000111984 129.0000111984
99 245.4532592994 2821.0946353191 2146689.5705296928 2638.3738037546 321.2074270397 6056018909.4480972290 129.0000114282 129.0000114282 129.0000114282
100 245.7500657390 2821.2735939427 2146689.5821165247 2638.3375549051 321.5958367642 6056403111.1006488800 129.0000116603 129.0000116603 129.0000116603
Loop time of 4.05006 on 1 procs for 100 steps with 10125 atoms
0 239.4274282976 2817.4421750949 2146689.0000000000 2639.8225470740 313.3218455755 6048176597.3066034317 129.0000000000 129.0000000000 129.0000000000
1 239.4771405316 2817.4798146419 2146689.0000581890 2639.8304543632 313.3869004818 6048257397.8720483780 129.0000000012 129.0000000012 129.0000000012
2 239.5643955010 2817.5423194969 2146689.0002327557 2639.8379071907 313.5010849268 6048391576.8485937119 129.0000000047 129.0000000047 129.0000000047
3 239.6633839196 2817.6123662396 2146689.0005237064 2639.8445238058 313.6306241122 6048541946.2404479980 129.0000000105 129.0000000105 129.0000000105
4 239.5371222027 2817.5355424336 2146689.0009310376 2639.8505035043 313.4653942786 6048377030.5689325333 129.0000000186 129.0000000186 129.0000000186
5 239.6512678169 2817.6153097076 2146689.0014547524 2639.8561498340 313.6147686202 6048548267.5742130280 129.0000000291 129.0000000291 129.0000000291
6 239.5617886781 2817.5624195435 2146689.0020948485 2639.8617493725 313.4976735610 6048434730.6441593170 129.0000000420 129.0000000420 129.0000000420
7 239.5228587856 2817.5420009502 2146689.0028513218 2639.8666590407 313.4467287471 6048390900.4058599472 129.0000000571 129.0000000571 129.0000000571
8 239.6066877934 2817.6008649264 2146689.0037241788 2639.8710757645 313.5564298772 6048517265.5155982971 129.0000000746 129.0000000746 129.0000000746
9 239.5719861485 2817.5823530300 2146689.0047134170 2639.8752557893 313.5110182737 6048477529.0184717178 129.0000000944 129.0000000944 129.0000000944
10 239.5800176776 2817.5915671176 2146689.0058190385 2639.8793778438 313.5215285712 6048497311.9141387939 129.0000001166 129.0000001166 129.0000001166
11 239.6299830954 2817.6281223139 2146689.0070410441 2639.8829762049 313.5869148014 6048575787.9953098297 129.0000001410 129.0000001410 129.0000001410
12 239.6011995911 2817.6132377273 2146689.0083794324 2639.8860704236 313.5492478526 6048543839.1878814697 129.0000001678 129.0000001678 129.0000001678
13 239.6407681166 2817.6427924824 2146689.0098342048 2639.8889816934 313.6010284005 6048607288.1548709869 129.0000001970 129.0000001970 129.0000001970
14 239.6981172055 2817.6844100046 2146689.0114053637 2639.8913405110 313.6760771219 6048696632.4595127106 129.0000002285 129.0000002285 129.0000002285
15 239.8563971968 2817.7922519039 2146689.0130929090 2639.8934358481 313.8832070208 6048928140.2348766327 129.0000002623 129.0000002623 129.0000002623
16 239.8561894618 2817.7971208196 2146689.0148968464 2639.8950496967 313.8829351726 6048938597.3658657074 129.0000002984 129.0000002984 129.0000002984
17 239.8816520361 2817.8185621543 2146689.0168171758 2639.8961257823 313.9162562538 6048984630.6545839310 129.0000003369 129.0000003369 129.0000003369
18 239.9099966096 2817.8417368960 2146689.0188538977 2639.8965743204 313.9533488047 6049034385.3571958542 129.0000003777 129.0000003777 129.0000003777
19 240.0514024347 2817.9389205774 2146689.0210070144 2639.8966103811 314.1383966683 6049243014.5661621094 129.0000004208 129.0000004208 129.0000004208
20 239.8802541140 2817.8327386176 2146689.0232765260 2639.8962085210 313.9144268914 6049015081.3139505386 129.0000004662 129.0000004662 129.0000004662
21 239.8462621903 2817.8160306167 2146689.0256624296 2639.8953174755 313.8699440502 6048979221.1549577713 129.0000005140 129.0000005140 129.0000005140
22 240.0487944678 2817.9533849157 2146689.0281647225 2639.8938590354 314.1349838054 6049274085.1726217270 129.0000005642 129.0000005642 129.0000005642
23 240.0966314441 2817.9897873787 2146689.0307834130 2639.8918104774 314.1975846937 6049352237.3198652267 129.0000006166 129.0000006166 129.0000006166
24 240.1765312516 2818.0463843765 2146689.0335185044 2639.8891292321 314.3021439554 6049473741.1817827225 129.0000006714 129.0000006714 129.0000006714
25 240.1500705973 2818.0336048048 2146689.0363699966 2639.8858785483 314.2675167572 6049446315.4509468079 129.0000007285 129.0000007285 129.0000007285
26 240.2681423500 2818.1151708195 2146689.0393378921 2639.8825176506 314.4220289603 6049621420.6842966080 129.0000007880 129.0000007880 129.0000007880
27 240.4728815247 2818.2527327079 2146689.0424221945 2639.8784158747 314.6899567267 6049916731.9748563766 129.0000008498 129.0000008498 129.0000008498
28 240.4793027032 2818.2613348477 2146689.0456229053 2639.8736089473 314.6983596717 6049935207.1145420074 129.0000009139 129.0000009139 129.0000009139
29 240.5020619198 2818.2805472685 2146689.0489400285 2639.8681043704 314.7281430587 6049976459.5562763214 129.0000009803 129.0000009803 129.0000009803
30 240.5513721776 2818.3167157263 2146689.0523735629 2639.8623484053 314.7926719270 6050054111.6652946472 129.0000010491 129.0000010491 129.0000010491
31 240.7340393104 2818.4391703712 2146689.0559235099 2639.8563442170 315.0317155636 6050316993.7162160873 129.0000011202 129.0000011202 129.0000011202
32 240.8254719483 2818.5014640740 2146689.0595898777 2639.8498122053 315.1513670299 6050450729.2599506378 129.0000011936 129.0000011936 129.0000011936
33 240.9681573541 2818.5965480750 2146689.0633726656 2639.8425779528 315.3380893908 6050654855.7068986893 129.0000012694 129.0000012694 129.0000012694
34 241.0039494187 2818.6217008564 2146689.0672718794 2639.8347174393 315.3849279499 6050708861.8979463577 129.0000013475 129.0000013475 129.0000013475
35 241.0314566197 2818.6411150538 2146689.0712875174 2639.8262983643 315.4209246902 6050750549.4619541168 129.0000014279 129.0000014279 129.0000014279
36 241.0829173424 2818.6763455617 2146689.0754195810 2639.8174397481 315.4882677207 6050826190.0551443100 129.0000015107 129.0000015107 129.0000015107
37 241.2845682012 2818.8087982181 2146689.0796680767 2639.8080129872 315.7521540252 6051110536.7012710571 129.0000015958 129.0000015958 129.0000015958
38 241.3214712920 2818.8336260248 2146689.0840330068 2639.7981963574 315.8004465062 6051163846.5868301392 129.0000016833 129.0000016833 129.0000016833
39 241.3392127125 2818.8456991528 2146689.0885143690 2639.7879618658 315.8236634561 6051189776.4712991714 129.0000017730 129.0000017730 129.0000017730
40 241.5383770555 2818.9753950055 2146689.0931121684 2639.7769824244 316.0842958321 6051468206.1039972305 129.0000018651 129.0000018651 129.0000018651
41 241.5059730674 2818.9543817992 2146689.0978264087 2639.7656512498 316.0418910106 6051423110.5725250244 129.0000019595 129.0000019595 129.0000019595
42 241.3907605672 2818.8793800508 2146689.1026570834 2639.7541331920 315.8911205101 6051262118.7541017532 129.0000020563 129.0000020563 129.0000020563
43 241.5095917610 2818.9559595711 2146689.1076041958 2639.7424355740 316.0466265406 6051426525.1214485168 129.0000021554 129.0000021554 129.0000021554
44 241.6271631762 2819.0312325531 2146689.1126677482 2639.7297705654 316.2004839873 6051588127.0861988068 129.0000022568 129.0000022568 129.0000022568
45 241.5702411838 2818.9923790176 2146689.1178477411 2639.7163554760 316.1259941770 6051504735.2269029617 129.0000023606 129.0000023606 129.0000023606
46 241.7029985068 2819.0771124986 2146689.1231441777 2639.7024246704 316.2997243538 6051686646.5996389389 129.0000024667 129.0000024667 129.0000024667
47 241.7966144965 2819.1357830868 2146689.1285570571 2639.6882106593 316.4222330191 6051812609.3728218079 129.0000025751 129.0000025751 129.0000025751
48 241.8573480255 2819.1726205120 2146689.1340863821 2639.6735287925 316.5017107195 6051891703.4611186981 129.0000026859 129.0000026859 129.0000026859
49 241.9611147338 2819.2374095379 2146689.1397321564 2639.6583357477 316.6375029166 6052030801.2758235931 129.0000027990 129.0000027990 129.0000027990
50 242.1023518806 2819.3259059811 2146689.1454943856 2639.6424863169 316.8223300428 6052220791.8748512268 129.0000029144 129.0000029144 129.0000029144
51 242.1174105473 2819.3319633044 2146689.1513730693 2639.6264141131 316.8420362613 6052233811.6391019821 129.0000030321 129.0000030321 129.0000030321
52 242.2534914901 2819.4164594322 2146689.1573682069 2639.6098392671 317.0201158259 6052415215.4627037048 129.0000031522 129.0000031522 129.0000031522
53 242.3504633236 2819.4754119996 2146689.1634798055 2639.5930076506 317.1470160479 6052541785.5314817429 129.0000032746 129.0000032746 129.0000032746
54 242.2982323323 2819.4368568264 2146689.1697078613 2639.5756353782 317.0786650211 6052459037.1184797287 129.0000033994 129.0000033994 129.0000033994
55 242.3452896272 2819.4623310219 2146689.1760523771 2639.5575918586 317.1402455951 6052513740.1862611771 129.0000035265 129.0000035265 129.0000035265
56 242.4181903333 2819.5048897011 2146689.1825133534 2639.5390347547 317.2356456249 6052605118.6588287354 129.0000036559 129.0000036559 129.0000036559
57 242.5317091656 2819.5739975787 2146689.1890907930 2639.5199828249 317.3841997413 6052753490.3378009796 129.0000037876 129.0000037876 129.0000037876
58 242.5478978740 2819.5796954935 2146689.1957846982 2639.5006137388 317.4053847660 6052765740.8638200760 129.0000039217 129.0000039217 129.0000039217
59 242.6655316466 2819.6519225743 2146689.2025950695 2639.4808234811 317.5593238156 6052920809.1607065201 129.0000040582 129.0000040582 129.0000040582
60 242.8126131177 2819.7431588157 2146689.2095219092 2639.4607996998 317.7517989980 6053116684.5470046997 129.0000041969 129.0000041969 129.0000041969
61 242.7957124913 2819.7275989047 2146689.2165652174 2639.4406312730 317.7296823362 6053083302.1140241623 129.0000043380 129.0000043380 129.0000043380
62 242.9276177041 2819.8088790098 2146689.2237249981 2639.4201279058 317.9022974164 6053257805.4283437729 129.0000044814 129.0000044814 129.0000044814
63 243.0465445938 2819.8814758895 2146689.2310012528 2639.3991657500 318.0579286774 6053413668.8858547211 129.0000046272 129.0000046272 129.0000046272
64 242.9890585501 2819.8387587817 2146689.2383939880 2639.3781767844 317.9827007328 6053321989.3768787384 129.0000047752 129.0000047752 129.0000047752
65 242.9653746583 2819.8180104181 2146689.2459031967 2639.3568184374 317.9517072884 6053277470.2627182007 129.0000049256 129.0000049256 129.0000049256
66 243.0259297024 2819.8514334947 2146689.2535288804 2639.3352568621 318.0309514181 6053349240.7251205444 129.0000050784 129.0000050784 129.0000050784
67 242.9638979697 2819.8046112742 2146689.2612710390 2639.3134547096 317.9497748498 6053248749.7987766266 129.0000052335 129.0000052335 129.0000052335
68 243.0283540775 2819.8395632725 2146689.2691296688 2639.2912303374 318.0341240273 6053323803.0382738113 129.0000053909 129.0000053909 129.0000053909
69 243.2256418664 2819.9609646019 2146689.2771047787 2639.2684509205 318.2923006889 6053584436.4588871002 129.0000055506 129.0000055506 129.0000055506
70 243.2507495334 2819.9706145524 2146689.2851963686 2639.2450126010 318.3251573278 6053605174.7221174240 129.0000057127 129.0000057127 129.0000057127
71 243.4287155518 2820.0794853386 2146689.2934044413 2639.2213699915 318.5580489464 6053838909.6197280884 129.0000058771 129.0000058771 129.0000058771
72 243.5097518574 2820.1249498194 2146689.3017290002 2639.1971212009 318.6640954635 6053936531.2101163864 129.0000060439 129.0000060439 129.0000060439
73 243.5356790969 2820.1337977544 2146689.3101700447 2639.1723394661 318.6980246193 6053955548.7824945450 129.0000062130 129.0000062130 129.0000062130
74 243.5479180498 2820.1331964183 2146689.3187275808 2639.1473868749 318.7140408766 6053954282.0339813232 129.0000063844 129.0000063844 129.0000063844
75 243.7115573025 2820.2314361523 2146689.3274016059 2639.1220411207 318.9281840641 6054165196.6845111847 129.0000065581 129.0000065581 129.0000065581
76 243.7457279618 2820.2454531429 2146689.3361921217 2639.0963868224 318.9729008040 6054195311.5999307632 129.0000067342 129.0000067342 129.0000067342
77 243.8345031069 2820.2948644965 2146689.3450991292 2639.0700900389 319.0890745962 6054301407.5461502075 129.0000069126 129.0000069126 129.0000069126
78 244.0193931195 2820.4067881628 2146689.3541226317 2639.0435094409 319.3310271594 6054541698.3381366730 129.0000070934 129.0000070934 129.0000070934
79 243.9919100078 2820.3799166166 2146689.3632626338 2639.0164249037 319.2950619430 6054484039.2541246414 129.0000072765 129.0000072765 129.0000072765
80 244.0965612207 2820.4387335935 2146689.3725191355 2638.9888176882 319.4320116291 6054610327.1403293610 129.0000074619 129.0000074619 129.0000074619
81 244.1334315951 2820.4535208568 2146689.3818921377 2638.9608330195 319.4802612965 6054642097.2373485565 129.0000076496 129.0000076496 129.0000076496
82 244.3029520408 2820.5543485196 2146689.3913816395 2638.9318525796 319.7021007878 6054858569.6761827469 129.0000078397 129.0000078397 129.0000078397
83 244.3445761189 2820.5713690935 2146689.4009876498 2638.9021684795 319.7565712929 6054895134.6560049057 129.0000080321 129.0000080321 129.0000080321
84 244.2696671559 2820.5125763350 2146689.4107101629 2638.8720941742 319.6585431986 6054768952.2869329453 129.0000082269 129.0000082269 129.0000082269
85 244.5161919319 2820.6629431352 2146689.4205491822 2638.8415194387 319.9811528443 6055091770.8571672440 129.0000084240 129.0000084240 129.0000084240
86 244.5641090282 2820.6838080201 2146689.4305047127 2638.8103612394 320.0438585800 6055136589.3662166595 129.0000086234 129.0000086234 129.0000086234
87 244.5348240638 2820.6541129118 2146689.4405767513 2638.7789728309 320.0055354056 6055072871.6007261276 129.0000088251 129.0000088251 129.0000088251
88 244.6939431427 2820.7468233396 2146689.4507653015 2638.7470269267 320.2137633592 6055271920.8364210129 129.0000090292 129.0000090292 129.0000090292
89 244.8800201091 2820.8567117003 2146689.4610703662 2638.7147520097 320.4572692055 6055507846.0901927948 129.0000092356 129.0000092356 129.0000092356
90 244.8804280382 2820.8451141876 2146689.4714919478 2638.6820441173 320.4578030336 6055482979.2295818329 129.0000094444 129.0000094444 129.0000094444
91 244.9558851986 2820.8815975090 2146689.4820300462 2638.6491836104 320.5565485155 6055561327.3181543350 129.0000096555 129.0000096555 129.0000096555
92 244.9965893140 2820.8949614294 2146689.4926846647 2638.6159817170 320.6098151301 6055590045.5610351562 129.0000098689 129.0000098689 129.0000098689
93 245.1381056687 2820.9732811388 2146689.5034558061 2638.5824451870 320.7950076360 6055758204.0434722900 129.0000100846 129.0000100846 129.0000100846
94 245.2954807041 2821.0619342131 2146689.5143434699 2638.5485198222 321.0009532826 6055948545.3822879791 129.0000103027 129.0000103027 129.0000103027
95 245.3535822199 2821.0860553731 2146689.5253476589 2638.5144817512 321.0769866522 6056000357.0671482086 129.0000105232 129.0000105232 129.0000105232
96 245.5013476026 2821.1682908185 2146689.5364683764 2638.4801107361 321.2703568219 6056176922.4099712372 129.0000107459 129.0000107459 129.0000107459
97 245.4166531417 2821.0989038023 2146689.5477056229 2638.4453663061 321.1595231342 6056028001.7295455933 129.0000109710 129.0000109710 129.0000109710
98 245.4121937790 2821.0817490953 2146689.5590593945 2638.4097762390 321.1536874797 6055991207.9293851852 129.0000111984 129.0000111984 129.0000111984
99 245.4532592994 2821.0946353191 2146689.5705296928 2638.3738037546 321.2074270397 6056018903.0102539062 129.0000114282 129.0000114282 129.0000114282
100 245.7500657390 2821.2735939427 2146689.5821165247 2638.3375549051 321.5958367642 6056403104.3106222153 129.0000116603 129.0000116603 129.0000116603
Loop time of 5.22601 on 1 procs for 100 steps with 10125 atoms
Performance: 2.133 ns/day, 11.250 hours/ns, 24.691 timesteps/s
99.8% CPU use with 1 MPI tasks x no OpenMP threads
Performance: 1.653 ns/day, 14.517 hours/ns, 19.135 timesteps/s
99.7% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.46587 | 0.46587 | 0.46587 | 0.0 | 11.50
Neigh | 1.4713 | 1.4713 | 1.4713 | 0.0 | 36.33
Comm | 0.05567 | 0.05567 | 0.05567 | 0.0 | 1.37
Output | 0.011364 | 0.011364 | 0.011364 | 0.0 | 0.28
Modify | 2.0158 | 2.0158 | 2.0158 | 0.0 | 49.77
Other | | 0.03004 | | | 0.74
Pair | 0.44045 | 0.44045 | 0.44045 | 0.0 | 8.43
Neigh | 2.669 | 2.669 | 2.669 | 0.0 | 51.07
Comm | 0.056143 | 0.056143 | 0.056143 | 0.0 | 1.07
Output | 0.012469 | 0.012469 | 0.012469 | 0.0 | 0.24
Modify | 2.0163 | 2.0163 | 2.0163 | 0.0 | 38.58
Other | | 0.03168 | | | 0.61
Nlocal: 10125 ave 10125 max 10125 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -172,4 +180,4 @@ Dangerous builds not checked
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:04
Total wall time: 0:00:05

View File

@ -1,163 +1,163 @@
############################################################################
# Input file for investigating twinning nucleation under uniaxial loading with basal plane vector analysis
# Christopher Barrett, March 2013
# This script requires a Mg pair potential file to be in the same directory.
# fname is the file name. It is necessary for loops to work correctly. (See jump command)
variable fname index in.basal
######################################
# POTENTIAL VARIABLES
# lattice parameters and the minimum energy per atom which should be obtained with the current pair potential and homogeneous lattice
variable lx equal 3.181269601
variable b equal sqrt(3)
variable c equal sqrt(8/3)
variable ly equal ${b}*${lx}
variable lz equal ${c}*${lx}
variable pairlocation index almg.liu
variable pairstyle index eam/alloy/opt
######################################
# EQUILIBRATION/DEFORMATION VARIABLES
# eqpress = 10 bar = 1 MPa
# tstep (the timestep) is set to a default value of 0.001 (1 fs)
# seed randomizes the velocity
# srate is the rate of strain in 1/s
# Ndump is the number of timesteps in between each dump of the atom coordinates
variable tstep equal 0.001
variable seed equal 95812384
variable srate equal 1e9
######################################
# INITIALIZATION
units metal
dimension 3
boundary s s s
atom_style atomic
######################################
# ATOM BUILD
atom_modify map array
# lattice custom scale a1 "coordinates of a1" a2 "coordinates of a2" a3 "coordinates of a3" basis "atom1 coordinates" basis "atom2 coordinates" basis "atom3 coordinates" basis "atom4 coordinates" orient x "crystallagraphic orientation of x axis" orient y "crystallagraphic orientation of y axis" z "crystallagraphic orientation of z axis"
lattice custom 3.181269601 a1 1 0 0 a2 0 1.732050808 0 a3 0 0 1.632993162 basis 0.0 0.0 0.0 basis 0.5 0.5 0 basis 0 0.3333333 0.5 basis 0.5 0.833333 0.5 orient x 0 1 1 orient y 1 0 0 orient z 0 1 -1
variable multiple equal 20
variable mx equal "v_lx*v_multiple"
variable my equal "v_ly*v_multiple"
variable mz equal "v_lz*v_multiple"
# the simulation region should be from 0 to a multiple of the periodic boundary in x, y and z.
region whole block 0 ${mz} 0 ${mx} 0 ${my} units box
create_box 2 whole
create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1
region fixed1 block INF INF INF INF INF 10 units box
region fixed2 block INF INF INF INF 100 INF units box
group lower region fixed1
group upper region fixed2
group boundary union upper lower
group mobile subtract all boundary
variable natoms equal "count(all)"
print "# of atoms are: ${natoms}"
######################################
# INTERATOMIC POTENTIAL
pair_style ${pairstyle}
pair_coeff * * ${pairlocation} Mg Mg
######################################
# COMPUTES REQUIRED
compute csym all centro/atom 12
compute eng all pe/atom
compute eatoms all reduce sum c_eng
compute basal all basal/atom
######################################
# MINIMIZATION
# Primarily adjusts the c/a ratio to value predicted by EAM potential
reset_timestep 0
thermo 1
thermo_style custom step pe c_eatoms
min_style cg
minimize 1e-15 1e-15 1000 2000
variable eminimum equal "c_eatoms / count(all)"
print "%%e(it,1)=${eminimum}"
######################################
# EQUILIBRATION
reset_timestep 0
timestep ${tstep}
# atoms are given a random velocity based on a temperature of 100K.
velocity all create 100 ${seed} mom yes rot no
# temperature and pressure are set to 100 and 0
fix 1 all nve
# Set thermo output
thermo 100
thermo_style custom step lx ly lz press pxx pyy pzz pe temp
# Run for at least 2 picosecond (assuming 1 fs timestep)
run 2000
# Loop to run until pressure is below the variable eqpress (defined at beginning of file)
label loopeq
variable eq loop 100
run 250
variable converge equal press
if "${converge} <= 0" then "variable converge equal -press" else "variable converge equal press"
if "${converge} <= 50" then "jump ${fname} breakeq"
next eq
jump ${fname} loopeq
label breakeq
# Store length for strain rate calculations
variable tmp equal "lx"
variable L0 equal ${tmp}
print "Initial Length, L0: ${L0}"
unfix 1
######################################
# DEFORMATION
reset_timestep 0
timestep ${tstep}
# Impose constant strain rate
variable srate1 equal "v_srate / 1.0e10"
velocity upper set 0.0 NULL 0.0 units box
velocity lower set 0.0 NULL 0.0 units box
fix 2 upper setforce 0.0 NULL 0.0
fix 3 lower setforce 0.0 NULL 0.0
fix 1 all nve
# Output strain and stress info to file
# for units metal, pressure is in [bars] = 100 [kPa] = 1/10000 [GPa]
# p2 is in GPa
variable strain equal "(lx - v_L0)/v_L0"
variable p1 equal "v_strain"
variable p2 equal "-pxz/10000"
variable p3 equal "lx"
variable p4 equal "temp"
variable p5 equal "pe"
variable p6 equal "ke"
fix def1 all print 100 "${p1} ${p2} ${p3} ${p4} ${p5} ${p6}" file output.def1.txt screen no
# Dump coordinates to file (for void size calculations)
dump 1 all custom 1000 output.dump.* id x y z c_basal[1] c_basal[2] c_basal[3]
# Display thermo
thermo_style custom step v_strain pxz lx temp pe ke
restart 50000 output.restart
# run deformation for 100000 timesteps (10% strain assuming 1 fs timestep and 1e9/s strainrate)
variable runtime equal 0
label loop
displace_atoms all ramp x 0.0 ${srate1} z 10 100 units box
run 100
variable runtime equal ${runtime}+100
if "${runtime} < 100000" then "jump ${fname} loop"
######################################
# SIMULATION DONE
print "All done"
############################################################################
# Input file for investigating twinning nucleation under uniaxial loading with basal plane vector analysis
# Christopher Barrett, March 2013
# This script requires a Mg pair potential file to be in the same directory.
# fname is the file name. It is necessary for loops to work correctly. (See jump command)
variable fname index in.basal
######################################
# POTENTIAL VARIABLES
# lattice parameters and the minimum energy per atom which should be obtained with the current pair potential and homogeneous lattice
variable lx equal 3.181269601
variable b equal sqrt(3)
variable c equal sqrt(8/3)
variable ly equal ${b}*${lx}
variable lz equal ${c}*${lx}
variable pairlocation index almg.liu
variable pairstyle index eam/alloy/opt
######################################
# EQUILIBRATION/DEFORMATION VARIABLES
# eqpress = 10 bar = 1 MPa
# tstep (the timestep) is set to a default value of 0.001 (1 fs)
# seed randomizes the velocity
# srate is the rate of strain in 1/s
# Ndump is the number of timesteps in between each dump of the atom coordinates
variable tstep equal 0.001
variable seed equal 95812384
variable srate equal 1e9
######################################
# INITIALIZATION
units metal
dimension 3
boundary s s s
atom_style atomic
######################################
# ATOM BUILD
atom_modify map array
# lattice custom scale a1 "coordinates of a1" a2 "coordinates of a2" a3 "coordinates of a3" basis "atom1 coordinates" basis "atom2 coordinates" basis "atom3 coordinates" basis "atom4 coordinates" orient x "crystallagraphic orientation of x axis" orient y "crystallagraphic orientation of y axis" z "crystallagraphic orientation of z axis"
lattice custom 3.181269601 a1 1 0 0 a2 0 1.732050808 0 a3 0 0 1.632993162 basis 0.0 0.0 0.0 basis 0.5 0.5 0 basis 0 0.3333333 0.5 basis 0.5 0.833333 0.5 orient x 0 1 1 orient y 1 0 0 orient z 0 1 -1
variable multiple equal 20
variable mx equal "v_lx*v_multiple"
variable my equal "v_ly*v_multiple"
variable mz equal "v_lz*v_multiple"
# the simulation region should be from 0 to a multiple of the periodic boundary in x, y and z.
region whole block 0 ${mz} 0 ${mx} 0 ${my} units box
create_box 2 whole
create_atoms 1 box basis 1 1 basis 2 1 basis 3 1 basis 4 1
region fixed1 block INF INF INF INF INF 10 units box
region fixed2 block INF INF INF INF 100 INF units box
group lower region fixed1
group upper region fixed2
group boundary union upper lower
group mobile subtract all boundary
variable natoms equal "count(all)"
print "# of atoms are: ${natoms}"
######################################
# INTERATOMIC POTENTIAL
pair_style ${pairstyle}
pair_coeff * * ${pairlocation} Mg Mg
######################################
# COMPUTES REQUIRED
compute csym all centro/atom 12
compute eng all pe/atom
compute eatoms all reduce sum c_eng
compute basal all basal/atom
######################################
# MINIMIZATION
# Primarily adjusts the c/a ratio to value predicted by EAM potential
reset_timestep 0
thermo 1
thermo_style custom step pe c_eatoms
min_style cg
minimize 1e-15 1e-15 1000 2000
variable eminimum equal "c_eatoms / count(all)"
print "%%e(it,1)=${eminimum}"
######################################
# EQUILIBRATION
reset_timestep 0
timestep ${tstep}
# atoms are given a random velocity based on a temperature of 100K.
velocity all create 100 ${seed} mom yes rot no
# temperature and pressure are set to 100 and 0
fix 1 all nve
# Set thermo output
thermo 100
thermo_style custom step lx ly lz press pxx pyy pzz pe temp
# Run for at least 2 picosecond (assuming 1 fs timestep)
run 2000
# Loop to run until pressure is below the variable eqpress (defined at beginning of file)
label loopeq
variable eq loop 100
run 250
variable converge equal press
if "${converge} <= 0" then "variable converge equal -press" else "variable converge equal press"
if "${converge} <= 50" then "jump ${fname} breakeq"
next eq
jump ${fname} loopeq
label breakeq
# Store length for strain rate calculations
variable tmp equal "lx"
variable L0 equal ${tmp}
print "Initial Length, L0: ${L0}"
unfix 1
######################################
# DEFORMATION
reset_timestep 0
timestep ${tstep}
# Impose constant strain rate
variable srate1 equal "v_srate / 1.0e10"
velocity upper set 0.0 NULL 0.0 units box
velocity lower set 0.0 NULL 0.0 units box
fix 2 upper setforce 0.0 NULL 0.0
fix 3 lower setforce 0.0 NULL 0.0
fix 1 all nve
# Output strain and stress info to file
# for units metal, pressure is in [bars] = 100 [kPa] = 1/10000 [GPa]
# p2 is in GPa
variable strain equal "(lx - v_L0)/v_L0"
variable p1 equal "v_strain"
variable p2 equal "-pxz/10000"
variable p3 equal "lx"
variable p4 equal "temp"
variable p5 equal "pe"
variable p6 equal "ke"
fix def1 all print 100 "${p1} ${p2} ${p3} ${p4} ${p5} ${p6}" file output.def1.txt screen no
# Dump coordinates to file (for void size calculations)
dump 1 all custom 1000 output.dump.* id x y z c_basal[1] c_basal[2] c_basal[3]
# Display thermo
thermo_style custom step v_strain pxz lx temp pe ke
restart 50000 output.restart
# run deformation for 100000 timesteps (10% strain assuming 1 fs timestep and 1e9/s strainrate)
variable runtime equal 0
label loop
displace_atoms all ramp x 0.0 ${srate1} z 10 100 units box
run 100
variable runtime equal ${runtime}+100
if "${runtime} < 100000" then "jump ${fname} loop"
######################################
# SIMULATION DONE
print "All done"

View File

@ -15,6 +15,7 @@ bond_style harmonic
bond_coeff * 225.0 0.85
comm_modify vel yes
comm_modify cutoff 3.6
# must use pair hybrid, since srp bond particles
# do not interact with other atoms types

10
examples/mscg/README Normal file
View File

@ -0,0 +1,10 @@
Running this example requires that LAMMPS be built with the MSCG
package and its fix mscg command. The fix uses the Multi-Scale
Coarse-Graining (MS-CG) library, freely available at
https://github.com/uchicago-voth/MSCG-release, to compute optimized
coarse-grained force field parameters. The MS-CG library was
developed by Jacob Wagner in Greg Voth's group at the University of
Chicago.
See the lib/mscg/README file for instructions on how to download and
install the MS-CG library for use with LAMMPS.

12
examples/mscg/control.in Normal file
View File

@ -0,0 +1,12 @@
block_size 1
start_frame 1
n_frames 19
nonbonded_cutoff 10.0
basis_type 0
primary_output_style 0
output_solution_flag 1
output_spline_coeffs_flag 1
pair_nonbonded_bspline_basis_order 6
pair_nonbonded_basis_set_resolution 0.7
pair_nonbonded_output_binwidth 0.1
matrix_type 0

1015
examples/mscg/data.meoh Normal file

File diff suppressed because it is too large Load Diff

20180
examples/mscg/dump.meoh Normal file

File diff suppressed because it is too large Load Diff

22
examples/mscg/in.mscg Normal file
View File

@ -0,0 +1,22 @@
units real
atom_style full
pair_style zero 10.0
read_data data.meoh
pair_coeff * *
thermo 1
thermo_style custom step
# Test 1a: range finder functionality
fix 1 all mscg 1 range on
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
print "TEST_1a mscg range finder"
unfix 1
# Test 1b: force matching functionality
fix 1 all mscg 1
rerun dump.meoh first 0 last 4500 every 250 dump x y z fx fy fz
print "TEST_1b mscg force matching"
print TEST_DONE

View File

@ -0,0 +1,77 @@
2.500000 5.670970817963099e+02
2.600000 2.404059283529051e+02
2.700000 9.157060823529977e+01
2.800000 3.428273061369140e+01
2.900000 1.619868149395266e+01
3.000000 1.039607214301755e+01
3.100000 6.830187514267188e+00
3.200000 3.861970842349535e+00
3.300000 1.645948643278161e+00
3.400000 2.395428971623918e-01
3.500000 -4.276763637833773e-01
3.600000 -5.132022977965877e-01
3.700000 -2.208024961234051e-01
3.800000 2.402697744243800e-01
3.900000 6.956064296165573e-01
4.000000 1.034070044257954e+00
4.100000 1.205997975111669e+00
4.200000 1.209501102128581e+00
4.300000 1.076304670380924e+00
4.400000 8.575891319958883e-01
4.500000 6.098309880892070e-01
4.600000 3.807992942746473e-01
4.700000 1.995994191469442e-01
4.800000 7.699059877424269e-02
4.900000 9.750744163981299e-03
5.000000 -1.480308769532222e-02
5.100000 -1.429422279228416e-02
5.200000 -6.765899050869768e-03
5.300000 -6.214398421078919e-03
5.400000 -1.951586041390797e-02
5.500000 -4.689090237947263e-02
5.600000 -8.376292122940529e-02
5.700000 -1.226699982917263e-01
5.800000 -1.551768041657136e-01
5.900000 -1.737865035767736e-01
6.000000 -1.738272491408507e-01
6.100000 -1.546779867768825e-01
6.200000 -1.193171291488982e-01
6.300000 -7.321054075616322e-02
6.400000 -2.317411193286228e-02
6.500000 2.376366715221714e-02
6.600000 6.149913249600215e-02
6.700000 8.597538938112201e-02
6.800000 9.590170060736655e-02
6.900000 9.245100462148878e-02
7.000000 7.855487875847664e-02
7.100000 5.818301960249692e-02
7.200000 3.562272334783877e-02
7.300000 1.475836615985744e-02
7.400000 -1.639617536128255e-03
7.500000 -1.237881063914745e-02
7.600000 -1.768202571195587e-02
7.700000 -1.877757119362295e-02
7.800000 -1.748001968416543e-02
7.900000 -1.577097622918088e-02
8.000000 -1.537984660448136e-02
8.100000 -1.737044400054951e-02
8.200000 -2.187939410237979e-02
8.300000 -2.823987455760605e-02
8.400000 -3.525715284001425e-02
8.500000 -4.148996251287761e-02
8.600000 -4.553187949229211e-02
8.700000 -4.629269831051163e-02
8.800000 -4.327548798226762e-02
8.900000 -3.674131754868225e-02
9.000000 -2.758883541814894e-02
9.100000 -1.712151838480657e-02
9.200000 -6.810600249997737e-03
9.300000 1.941999556272785e-03
9.400000 8.040747353879739e-03
9.500000 1.092691524686838e-02
9.600000 1.063606620723048e-02
9.700000 7.416550438142138e-03
9.800000 1.175066786686231e-03
9.900000 -9.084427187675534e-03
10.000000 -2.582180514463068e-02
10.100000 -5.352186189454393e-02

View File

@ -0,0 +1,82 @@
# Header information on force file
1_1
N 77 R 2.500000 10.100000
1 2.500000 69.428523 567.097082
2 2.600000 29.053372 240.405928
3 2.700000 12.454545 91.570608
4 2.800000 6.161878 34.282731
5 2.900000 3.637808 16.198681
6 3.000000 2.308070 10.396072
7 3.100000 1.446757 6.830188
8 3.200000 0.912149 3.861971
9 3.300000 0.636753 1.645949
10 3.400000 0.542478 0.239543
11 3.500000 0.551885 -0.427676
12 3.600000 0.598929 -0.513202
13 3.700000 0.635629 -0.220802
14 3.800000 0.634656 0.240270
15 3.900000 0.587862 0.695606
16 4.000000 0.501378 1.034070
17 4.100000 0.389375 1.205998
18 4.200000 0.268600 1.209501
19 4.300000 0.154310 1.076305
20 4.400000 0.057615 0.857589
21 4.500000 -0.015756 0.609831
22 4.600000 -0.065288 0.380799
23 4.700000 -0.094307 0.199599
24 4.800000 -0.108137 0.076991
25 4.900000 -0.112474 0.009751
26 5.000000 -0.112221 -0.014803
27 5.100000 -0.110767 -0.014294
28 5.200000 -0.109714 -0.006766
29 5.300000 -0.109065 -0.006214
30 5.400000 -0.107778 -0.019516
31 5.500000 -0.104458 -0.046891
32 5.600000 -0.097925 -0.083763
33 5.700000 -0.087603 -0.122670
34 5.800000 -0.073711 -0.155177
35 5.900000 -0.057263 -0.173787
36 6.000000 -0.039882 -0.173827
37 6.100000 -0.023457 -0.154678
38 6.200000 -0.009757 -0.119317
39 6.300000 -0.000131 -0.073211
40 6.400000 0.004688 -0.023174
41 6.500000 0.004659 0.023764
42 6.600000 0.000396 0.061499
43 6.700000 -0.006978 0.085975
44 6.800000 -0.016072 0.095902
45 6.900000 -0.025489 0.092451
46 7.000000 -0.034040 0.078555
47 7.100000 -0.040877 0.058183
48 7.200000 -0.045567 0.035623
49 7.300000 -0.048086 0.014758
50 7.400000 -0.048742 -0.001640
51 7.500000 -0.048041 -0.012379
52 7.600000 -0.046538 -0.017682
53 7.700000 -0.044715 -0.018778
54 7.800000 -0.042902 -0.017480
55 7.900000 -0.041239 -0.015771
56 8.000000 -0.039682 -0.015380
57 8.100000 -0.038044 -0.017370
58 8.200000 -0.036082 -0.021879
59 8.300000 -0.033576 -0.028240
60 8.400000 -0.030401 -0.035257
61 8.500000 -0.026564 -0.041490
62 8.600000 -0.022213 -0.045532
63 8.700000 -0.017621 -0.046293
64 8.800000 -0.013143 -0.043275
65 8.900000 -0.009142 -0.036741
66 9.000000 -0.005926 -0.027589
67 9.100000 -0.003690 -0.017122
68 9.200000 -0.002494 -0.006811
69 9.300000 -0.002250 0.001942
70 9.400000 -0.002749 0.008041
71 9.500000 -0.003698 0.010927
72 9.600000 -0.004776 0.010636
73 9.700000 -0.005678 0.007417
74 9.800000 -0.006108 0.001175
75 9.900000 -0.005712 -0.009084
76 10.000000 -0.003967 -0.025822
77 10.100000 0.000000 -0.053522

View File

@ -0,0 +1,2 @@
n: 1 1 6 12 2.400000000000002e+00 1.010000000000000e+01
1.200460787805587e+03 2.169623423326193e+01 2.388396964379328e+01 -1.197754948555067e+01 6.472482422420378e+00 -1.483711824891365e+00 7.768139601662113e-01 -7.869494711740244e-01 4.830820182054661e-01 -1.892989444995645e-01 1.021275453070386e-01 -1.637649039972671e-01 5.570978712841167e-02 7.637188693695119e-03 -4.109175461195019e-03 -5.352186189455146e-02

View File

@ -0,0 +1 @@
1 1 2.852369 10.000000 fm

View File

View File

@ -0,0 +1,18 @@
fm_matrix_rows:3000; fm_matrix_columns:16;
Singular vector:
2.442317e+00
2.105009e+00
1.433251e+00
1.184602e+00
9.739627e-01
6.944898e-01
5.376709e-01
4.616070e-01
3.257062e-01
2.683729e-01
1.530153e-01
9.336288e-02
5.042150e-02
2.126912e-02
1.446682e-02
4.167763e-05

View File

@ -0,0 +1 @@
<EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD>@47h<<3C>5@<40><><EFBFBD><EFBFBD>K<EFBFBD>7@<40>R<EFBFBD>]<5D><>'<27><><EFBFBD><EFBFBD>n<EFBFBD><6E>@݌I<DD8C>H<EFBFBD><48><EFBFBD><19>?<3F><><EFBFBD><EFBFBD>?r<>I<EFBFBD><49>.<2E><><11>^<5E><><EFBFBD><EFBFBD>?W<57><7F><EFBFBD>:ȿ(O<1D>%<25>?<3F>Ns<4E>?<3F>Ŀ<EFBFBD>:<3A>C<EFBFBD><43><EFBFBD>?<3F><><EFBFBD>:,H?<3F>}<7D>c<EFBFBD><63>p<EFBFBD><70><EFBFBD><EFBFBD><EFBFBD>7g<37><67>

View File

@ -78,7 +78,7 @@ run 100
# only output atoms near vacancy
compute coord all coord/atom $r
compute coord all coord/atom cutoff $r
#dump events all custom 1 dump.prd id type x y z
#dump_modify events thresh c_coord != 4

View File

@ -80,7 +80,7 @@ velocity all zero linear
# only output atoms near vacancy
compute coord all coord/atom $r
compute coord all coord/atom cutoff $r
#dump events all custom 1 dump.prd id type x y z
#dump_modify events thresh c_coord != 4

10
examples/voronoi/README Normal file
View File

@ -0,0 +1,10 @@
Running this example requires that LAMMPS be built with the VORONOI
package and its compute voronoi command. The compute uses the Voro++
library, freely available at http://math.lbl.gov/voro++, to compute
the Voronoi tesselation locally on each processor. Voro++ was
developed by Chris H. Rycroft while at UC Berkeley / Lawrence Berkeley
Laboratory.
See the lib/voronoi/README file for instructions on how to download
and install the Voro++ library for use with LAMMPS.

View File

@ -39,6 +39,8 @@ meam modified embedded atom method (MEAM) potential, MEAM package
from Greg Wagner (Sandia)
molfile hooks to VMD molfile plugins, used by the USER-MOLFILE package
from Axel Kohlmeyer (Temple U) and the VMD development team
mscg hooks to the MSCG library, used by fix_mscg command
from Jacob Wagner and Greg Voth group (U Chicago)
python hooks to the system Python library, used by the PYTHON package
from the LAMMPS development team
qmmm quantum mechanics/molecular mechanics coupling interface

View File

@ -1,8 +0,0 @@
# Standard ignores
*~
*.pyc
\#*#
.#*
.*.swp
.cproject
.project

284
lib/kokkos/CHANGELOG.md Normal file
View File

@ -0,0 +1,284 @@
# Change Log
## [2.02.07](https://github.com/kokkos/kokkos/tree/2.02.07) (2016-12-16)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.01...2.02.07)
**Implemented enhancements:**
- Add CMake option to enable Cuda Lambda support [\#589](https://github.com/kokkos/kokkos/issues/589)
- Add CMake option to enable Cuda RDC support [\#588](https://github.com/kokkos/kokkos/issues/588)
- Add Initial Intel Sky Lake Xeon-HPC Compiler Support to Kokkos Make System [\#584](https://github.com/kokkos/kokkos/issues/584)
- Building Tutorial Examples [\#582](https://github.com/kokkos/kokkos/issues/582)
- Internal way for using ThreadVectorRange without TeamHandle [\#574](https://github.com/kokkos/kokkos/issues/574)
- Testing: Add testing for uvm and rdc [\#571](https://github.com/kokkos/kokkos/issues/571)
- Profiling: Add Memory Tracing and Region Markers [\#557](https://github.com/kokkos/kokkos/issues/557)
- nvcc\_wrapper not installed with Kokkos built with CUDA through CMake [\#543](https://github.com/kokkos/kokkos/issues/543)
- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
- Benchmarks: Add Gather benchmark [\#536](https://github.com/kokkos/kokkos/issues/536)
- Testing: add spot\_check option to test\_all\_sandia [\#535](https://github.com/kokkos/kokkos/issues/535)
- Deprecate Kokkos::Impl::VerifyExecutionCanAccessMemorySpace [\#527](https://github.com/kokkos/kokkos/issues/527)
- Add AtomicAdd support for 64bit float for Pascal [\#522](https://github.com/kokkos/kokkos/issues/522)
- Add Restrict and Aligned memory trait [\#517](https://github.com/kokkos/kokkos/issues/517)
- Kokkos Tests are Not Run using Compiler Optimization [\#501](https://github.com/kokkos/kokkos/issues/501)
- Add support for clang 3.7 w/ openmp backend [\#393](https://github.com/kokkos/kokkos/issues/393)
- Provide an error throw class [\#79](https://github.com/kokkos/kokkos/issues/79)
**Fixed bugs:**
- Cuda UVM Allocation test broken with UVM as default space [\#586](https://github.com/kokkos/kokkos/issues/586)
- Bug \(develop branch only\): multiple tests are now failing when forcing uvm usage. [\#570](https://github.com/kokkos/kokkos/issues/570)
- Error in generate\_makefile.sh for Kokkos when Compiler is Empty String/Fails [\#568](https://github.com/kokkos/kokkos/issues/568)
- XL 13.1.4 incorrect C++11 flag [\#553](https://github.com/kokkos/kokkos/issues/553)
- Improve DynRankView debug check [\#541](https://github.com/kokkos/kokkos/issues/541)
- Installing Library on MAC broken due to cp -u [\#539](https://github.com/kokkos/kokkos/issues/539)
- Intel Nightly Testing with Debug enabled fails [\#534](https://github.com/kokkos/kokkos/issues/534)
## [2.02.01](https://github.com/kokkos/kokkos/tree/2.02.01) (2016-11-01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.02.00...2.02.01)
**Implemented enhancements:**
- Add Changelog generation to our process. [\#506](https://github.com/kokkos/kokkos/issues/506)
**Fixed bugs:**
- Test scratch\_request fails in Serial with Debug enabled [\#520](https://github.com/kokkos/kokkos/issues/520)
- Bug In BoundsCheck for DynRankView [\#516](https://github.com/kokkos/kokkos/issues/516)
## [2.02.00](https://github.com/kokkos/kokkos/tree/2.02.00) (2016-10-30)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.10...2.02.00)
**Implemented enhancements:**
- Add PowerPC assembly for grabbing clock register in memory pool [\#511](https://github.com/kokkos/kokkos/issues/511)
- Add GCC 6.x support [\#508](https://github.com/kokkos/kokkos/issues/508)
- Test install and build against installed library [\#498](https://github.com/kokkos/kokkos/issues/498)
- Makefile.kokkos adds expt-extended-lambda to cuda build with clang [\#490](https://github.com/kokkos/kokkos/issues/490)
- Add top-level makefile option to just test kokkos-core unit-test [\#485](https://github.com/kokkos/kokkos/issues/485)
- Split and harmonize Object Files of Core UnitTests to increase build parallelism [\#484](https://github.com/kokkos/kokkos/issues/484)
- LayoutLeft to LayoutLeft subview for 3D and 4D views [\#473](https://github.com/kokkos/kokkos/issues/473)
- Add official Cuda 8.0 support [\#468](https://github.com/kokkos/kokkos/issues/468)
- Allow C++1Z Flag for Class Lambda capture [\#465](https://github.com/kokkos/kokkos/issues/465)
- Add Clang 4.0+ compilation of Cuda code [\#455](https://github.com/kokkos/kokkos/issues/455)
- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
- Add name of view to "View bounds error" [\#432](https://github.com/kokkos/kokkos/issues/432)
- Move Sort Binning Operators into Kokkos namespace [\#421](https://github.com/kokkos/kokkos/issues/421)
- TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396)
- Import WithoutInitializing and AllowPadding into Kokkos namespace [\#325](https://github.com/kokkos/kokkos/issues/325)
- TeamThreadRange requires begin, end to be the same type [\#305](https://github.com/kokkos/kokkos/issues/305)
- CudaUVMSpace should track \# allocations, due to CUDA limit on \# UVM allocations [\#300](https://github.com/kokkos/kokkos/issues/300)
- Remove old View and its infrastructure [\#259](https://github.com/kokkos/kokkos/issues/259)
**Fixed bugs:**
- Bug in TestCuda\_Other.cpp: most likely assembly inserted into Device code [\#515](https://github.com/kokkos/kokkos/issues/515)
- Cuda Compute Capability check of GPU is outdated [\#509](https://github.com/kokkos/kokkos/issues/509)
- multi\_scratch test with hwloc and pthreads seg-faults. [\#504](https://github.com/kokkos/kokkos/issues/504)
- generate\_makefile.bash: "make install" is broken [\#503](https://github.com/kokkos/kokkos/issues/503)
- make clean in Out of Source Build/Tests Does Not Work Correctly [\#502](https://github.com/kokkos/kokkos/issues/502)
- Makefiles for test and examples have issues in Cuda when CXX is not explicitly specified [\#497](https://github.com/kokkos/kokkos/issues/497)
- Dispatch lambda test directly inside GTEST macro doesn't work with nvcc [\#491](https://github.com/kokkos/kokkos/issues/491)
- UnitTests with HWLOC enabled fail if run with mpirun bound to a single core [\#489](https://github.com/kokkos/kokkos/issues/489)
- Failing Reducer Test on Mac with Pthreads [\#479](https://github.com/kokkos/kokkos/issues/479)
- make test Dumps Error with Clang Not Found [\#471](https://github.com/kokkos/kokkos/issues/471)
- OpenMP TeamPolicy member broadcast not using correct volatile shared variable [\#424](https://github.com/kokkos/kokkos/issues/424)
- TaskPolicy - generate error when attempt to use uninitialized [\#396](https://github.com/kokkos/kokkos/issues/396)
- New task policy implementation is pulling in old experimental code. [\#372](https://github.com/kokkos/kokkos/issues/372)
- MemoryPool unit test hangs on Power8 with GCC 6.1.0 [\#298](https://github.com/kokkos/kokkos/issues/298)
## [2.01.10](https://github.com/kokkos/kokkos/tree/2.01.10) (2016-09-27)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.06...2.01.10)
**Implemented enhancements:**
- Enable Profiling by default in Tribits build [\#438](https://github.com/kokkos/kokkos/issues/438)
- parallel\_reduce\(0\), parallel\_scan\(0\) unit tests [\#436](https://github.com/kokkos/kokkos/issues/436)
- data\(\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
- Fix tutorials to track new Kokkos::View [\#323](https://github.com/kokkos/kokkos/issues/323)
- Rename team policy set\_scratch\_size. [\#195](https://github.com/kokkos/kokkos/issues/195)
**Fixed bugs:**
- Possible Issue with Intel 17.0.098 and GCC 6.1.0 in Develop Branch [\#445](https://github.com/kokkos/kokkos/issues/445)
- Makefile spits syntax error [\#435](https://github.com/kokkos/kokkos/issues/435)
- Kokkos::sort fails for view with all the same values [\#422](https://github.com/kokkos/kokkos/issues/422)
- Generic Reducers: can't accept inline constructed reducer [\#404](https://github.com/kokkos/kokkos/issues/404)
- data\\(\\)==NULL after realloc with LayoutStride [\#351](https://github.com/kokkos/kokkos/issues/351)
- const subview of const view with compile time dimensions on Cuda backend [\#310](https://github.com/kokkos/kokkos/issues/310)
- Kokkos \(in Trilinos\) Causes Internal Compiler Error on CUDA 8.0.21-EA on POWER8 [\#307](https://github.com/kokkos/kokkos/issues/307)
- Core Oversubscription Detection Broken? [\#159](https://github.com/kokkos/kokkos/issues/159)
## [2.01.06](https://github.com/kokkos/kokkos/tree/2.01.06) (2016-09-02)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.01.00...2.01.06)
**Implemented enhancements:**
- Add "standard" reducers for lambda-supportable customized reduce [\#411](https://github.com/kokkos/kokkos/issues/411)
- TaskPolicy - single thread back-end execution [\#390](https://github.com/kokkos/kokkos/issues/390)
- Kokkos master clone tag [\#387](https://github.com/kokkos/kokkos/issues/387)
- Query memory requirements from task policy [\#378](https://github.com/kokkos/kokkos/issues/378)
- Output order of test\_atomic.cpp is confusing [\#373](https://github.com/kokkos/kokkos/issues/373)
- Missing testing for atomics [\#341](https://github.com/kokkos/kokkos/issues/341)
- Feature request for Kokkos to provide Kokkos::atomic\_fetch\_max and atomic\_fetch\_min [\#336](https://github.com/kokkos/kokkos/issues/336)
- TaskPolicy\<Cuda\> performance requires teams mapped to warps [\#218](https://github.com/kokkos/kokkos/issues/218)
**Fixed bugs:**
- Reduce with Teams broken for custom initialize [\#407](https://github.com/kokkos/kokkos/issues/407)
- Failing Kokkos build on Debian [\#402](https://github.com/kokkos/kokkos/issues/402)
- Failing Tests on NVIDIA Pascal GPUs [\#398](https://github.com/kokkos/kokkos/issues/398)
- Algorithms: fill\_random assumes dimensions fit in unsigned int [\#389](https://github.com/kokkos/kokkos/issues/389)
- Kokkos::subview with RandomAccess Memory Trait [\#385](https://github.com/kokkos/kokkos/issues/385)
- Build warning \(signed / unsigned comparison\) in Cuda implementation [\#365](https://github.com/kokkos/kokkos/issues/365)
- wrong results for a parallel\_reduce with CUDA8 / Maxwell50 [\#352](https://github.com/kokkos/kokkos/issues/352)
- Hierarchical parallelism - 3 level unit test [\#344](https://github.com/kokkos/kokkos/issues/344)
- Can I allocate a View w/ both WithoutInitializing & AllowPadding? [\#324](https://github.com/kokkos/kokkos/issues/324)
- subview View layout determination [\#309](https://github.com/kokkos/kokkos/issues/309)
- Unit tests with Cuda - Maxwell [\#196](https://github.com/kokkos/kokkos/issues/196)
## [2.01.00](https://github.com/kokkos/kokkos/tree/2.01.00) (2016-07-21)
[Full Changelog](https://github.com/kokkos/kokkos/compare/End_C++98...2.01.00)
**Implemented enhancements:**
- Edit ViewMapping so assigning Views with the same custom layout compiles when const casting [\#327](https://github.com/kokkos/kokkos/issues/327)
- DynRankView: Performance improvement for operator\(\) [\#321](https://github.com/kokkos/kokkos/issues/321)
- Interoperability between static and dynamic rank views [\#295](https://github.com/kokkos/kokkos/issues/295)
- subview member function ? [\#280](https://github.com/kokkos/kokkos/issues/280)
- Inter-operatibility between View and DynRankView. [\#245](https://github.com/kokkos/kokkos/issues/245)
- \(Trilinos\) build warning in atomic\_assign, with Kokkos::complex [\#177](https://github.com/kokkos/kokkos/issues/177)
- View\<\>::shmem\_size should runtime check for number of arguments equal to rank [\#176](https://github.com/kokkos/kokkos/issues/176)
- Custom reduction join via lambda argument [\#99](https://github.com/kokkos/kokkos/issues/99)
- DynRankView with 0 dimensions passed in at construction [\#293](https://github.com/kokkos/kokkos/issues/293)
- Inject view\_alloc and friends into Kokkos namespace [\#292](https://github.com/kokkos/kokkos/issues/292)
- Less restrictive TeamPolicy reduction on Cuda [\#286](https://github.com/kokkos/kokkos/issues/286)
- deep\_copy using remap with source execution space [\#267](https://github.com/kokkos/kokkos/issues/267)
- Suggestion: Enable opt-in L1 caching via nvcc-wrapper [\#261](https://github.com/kokkos/kokkos/issues/261)
- More flexible create\_mirror functions [\#260](https://github.com/kokkos/kokkos/issues/260)
- Rename View::memory\_span to View::required\_allocation\_size [\#256](https://github.com/kokkos/kokkos/issues/256)
- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
- Use of subviews and views with compile-time dimensions [\#237](https://github.com/kokkos/kokkos/issues/237)
- Kokkos::Timer [\#234](https://github.com/kokkos/kokkos/issues/234)
- Fence CudaUVMSpace allocations [\#230](https://github.com/kokkos/kokkos/issues/230)
- View::operator\(\) accept std::is\_integral and std::is\_enum [\#227](https://github.com/kokkos/kokkos/issues/227)
- Allocating zero size View [\#216](https://github.com/kokkos/kokkos/issues/216)
- Thread scalable memory pool [\#212](https://github.com/kokkos/kokkos/issues/212)
- Add a way to disable memory leak output [\#194](https://github.com/kokkos/kokkos/issues/194)
- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
- Runtime rank wrapper for View [\#189](https://github.com/kokkos/kokkos/issues/189)
- Profiling Interface [\#158](https://github.com/kokkos/kokkos/issues/158)
- Fix View assignment \(of managed to unmanaged\) [\#153](https://github.com/kokkos/kokkos/issues/153)
- Add unit test for assignment of managed View to unmanaged View [\#152](https://github.com/kokkos/kokkos/issues/152)
- Check for oversubscription of threads with MPI in Kokkos::initialize [\#149](https://github.com/kokkos/kokkos/issues/149)
- Dynamic resizeable 1dimensional view [\#143](https://github.com/kokkos/kokkos/issues/143)
- Develop TaskPolicy for CUDA [\#142](https://github.com/kokkos/kokkos/issues/142)
- New View : Test Compilation Downstream [\#138](https://github.com/kokkos/kokkos/issues/138)
- New View Implementation [\#135](https://github.com/kokkos/kokkos/issues/135)
- Add variant of subview that lets users add traits [\#134](https://github.com/kokkos/kokkos/issues/134)
- NVCC-WRAPPER: Add --host-only flag [\#121](https://github.com/kokkos/kokkos/issues/121)
- Address gtest issue with TriBITS Kokkos build outside of Trilinos [\#117](https://github.com/kokkos/kokkos/issues/117)
- Make tests pass with -expt-extended-lambda on CUDA [\#108](https://github.com/kokkos/kokkos/issues/108)
- Dynamic scheduling for parallel\_for and parallel\_reduce [\#106](https://github.com/kokkos/kokkos/issues/106)
- Runtime or compile time error when reduce functor's join is not properly specified as const member function or with volatile arguments [\#105](https://github.com/kokkos/kokkos/issues/105)
- Error out when the number of threads is modified after kokkos is initialized [\#104](https://github.com/kokkos/kokkos/issues/104)
- Porting to POWER and remove assumption of X86 default [\#103](https://github.com/kokkos/kokkos/issues/103)
- Dynamic scheduling option for RangePolicy [\#100](https://github.com/kokkos/kokkos/issues/100)
- SharedMemory Support for Lambdas [\#81](https://github.com/kokkos/kokkos/issues/81)
- Recommended TeamSize for Lambdas [\#80](https://github.com/kokkos/kokkos/issues/80)
- Add Aggressive Vectorization Compilation mode [\#72](https://github.com/kokkos/kokkos/issues/72)
- Dynamic scheduling team execution policy [\#53](https://github.com/kokkos/kokkos/issues/53)
- UVM allocations in multi-GPU systems [\#50](https://github.com/kokkos/kokkos/issues/50)
- Synchronic in Kokkos::Impl [\#44](https://github.com/kokkos/kokkos/issues/44)
- index and dimension types in for loops [\#28](https://github.com/kokkos/kokkos/issues/28)
- Subview assign of 1D Strided with stride 1 to LayoutLeft/Right [\#1](https://github.com/kokkos/kokkos/issues/1)
**Fixed bugs:**
- misspelled variable name in Kokkos\_Atomic\_Fetch + missing unit tests [\#340](https://github.com/kokkos/kokkos/issues/340)
- seg fault Kokkos::Impl::CudaInternal::print\_configuration [\#338](https://github.com/kokkos/kokkos/issues/338)
- Clang compiler error with named parallel\_reduce, tags, and TeamPolicy. [\#335](https://github.com/kokkos/kokkos/issues/335)
- Shared Memory Allocation Error at parallel\_reduce [\#311](https://github.com/kokkos/kokkos/issues/311)
- DynRankView: Fix resize and realloc [\#303](https://github.com/kokkos/kokkos/issues/303)
- Scratch memory and dynamic scheduling [\#279](https://github.com/kokkos/kokkos/issues/279)
- MemoryPool infinite loop when out of memory [\#312](https://github.com/kokkos/kokkos/issues/312)
- Kokkos DynRankView changes break Sacado and Panzer [\#299](https://github.com/kokkos/kokkos/issues/299)
- MemoryPool fails to compile on non-cuda non-x86 [\#297](https://github.com/kokkos/kokkos/issues/297)
- Random Number Generator Fix [\#296](https://github.com/kokkos/kokkos/issues/296)
- View template parameter ordering Bug [\#282](https://github.com/kokkos/kokkos/issues/282)
- Serial task policy broken. [\#281](https://github.com/kokkos/kokkos/issues/281)
- deep\_copy with LayoutStride should not memcpy [\#262](https://github.com/kokkos/kokkos/issues/262)
- DualView::need\_sync should be a const method [\#248](https://github.com/kokkos/kokkos/issues/248)
- Arbitrary-sized atomics on GPUs broken; loop forever [\#238](https://github.com/kokkos/kokkos/issues/238)
- boolean reduction value\_type changes answer [\#225](https://github.com/kokkos/kokkos/issues/225)
- Custom init\(\) function for parallel\_reduce with array value\_type [\#210](https://github.com/kokkos/kokkos/issues/210)
- unit\_test Makefile is Broken - Recursively Calls itself until Machine Apocalypse. [\#202](https://github.com/kokkos/kokkos/issues/202)
- nvcc\_wrapper Does Not Support -Xcompiler \<compiler option\> [\#198](https://github.com/kokkos/kokkos/issues/198)
- Kokkos exec space init should init Kokkos profiling [\#192](https://github.com/kokkos/kokkos/issues/192)
- Kokkos Threads Backend impl\_shared\_alloc Broken on Intel 16.1 \(Shepard Haswell\) [\#186](https://github.com/kokkos/kokkos/issues/186)
- pthread back end hangs if used uninitialized [\#182](https://github.com/kokkos/kokkos/issues/182)
- parallel\_reduce of size 0, not calling init/join [\#175](https://github.com/kokkos/kokkos/issues/175)
- Bug in Threads with OpenMP enabled [\#173](https://github.com/kokkos/kokkos/issues/173)
- KokkosExp\_SharedAlloc, m\_team\_work\_index inaccessible [\#166](https://github.com/kokkos/kokkos/issues/166)
- 128-bit CAS without Assembly Broken? [\#161](https://github.com/kokkos/kokkos/issues/161)
- fatal error: Cuda/Kokkos\_Cuda\_abort.hpp: No such file or directory [\#157](https://github.com/kokkos/kokkos/issues/157)
- Power8: Fix OpenMP backend [\#139](https://github.com/kokkos/kokkos/issues/139)
- Data race in Kokkos OpenMP initialization [\#131](https://github.com/kokkos/kokkos/issues/131)
- parallel\_launch\_local\_memory and cuda 7.5 [\#125](https://github.com/kokkos/kokkos/issues/125)
- Resize can fail with Cuda due to asynchronous dispatch [\#119](https://github.com/kokkos/kokkos/issues/119)
- Qthread taskpolicy initialization bug. [\#92](https://github.com/kokkos/kokkos/issues/92)
- Windows: sys/mman.h [\#89](https://github.com/kokkos/kokkos/issues/89)
- Windows: atomic\_fetch\_sub\(\) [\#88](https://github.com/kokkos/kokkos/issues/88)
- Windows: snprintf [\#87](https://github.com/kokkos/kokkos/issues/87)
- Parallel\_Reduce with TeamPolicy and league size of 0 returns garbage [\#85](https://github.com/kokkos/kokkos/issues/85)
- Throw with Cuda when using \(2D\) team\_policy parallel\_reduce with less than a warp size [\#76](https://github.com/kokkos/kokkos/issues/76)
- Scalar views don't work with Kokkos::Atomic memory trait [\#69](https://github.com/kokkos/kokkos/issues/69)
- Reduce the number of threads per team for Cuda [\#63](https://github.com/kokkos/kokkos/issues/63)
- Named Kernels fail for reductions with CUDA [\#60](https://github.com/kokkos/kokkos/issues/60)
- Kokkos View dimension\_\(\) for long returning unsigned int [\#20](https://github.com/kokkos/kokkos/issues/20)
- atomic test hangs with LLVM [\#6](https://github.com/kokkos/kokkos/issues/6)
- OpenMP Test should set omp\_set\_num\_threads to 1 [\#4](https://github.com/kokkos/kokkos/issues/4)
**Closed issues:**
- develop branch broken with CUDA 8 and --expt-extended-lambda [\#354](https://github.com/kokkos/kokkos/issues/354)
- --arch=KNL with Intel 2016 build failure [\#349](https://github.com/kokkos/kokkos/issues/349)
- Error building with Cuda when passing -DKOKKOS\_CUDA\_USE\_LAMBDA to generate\_makefile.bash [\#343](https://github.com/kokkos/kokkos/issues/343)
- Can I safely use int indices in a 2-D View with capacity \> 2B? [\#318](https://github.com/kokkos/kokkos/issues/318)
- Kokkos::ViewAllocateWithoutInitializing is not working [\#317](https://github.com/kokkos/kokkos/issues/317)
- Intel build on Mac OS X [\#277](https://github.com/kokkos/kokkos/issues/277)
- deleted [\#271](https://github.com/kokkos/kokkos/issues/271)
- Broken Mira build [\#268](https://github.com/kokkos/kokkos/issues/268)
- 32-bit build [\#246](https://github.com/kokkos/kokkos/issues/246)
- parallel\_reduce with RDC crashes linker [\#232](https://github.com/kokkos/kokkos/issues/232)
- build of Kokkos\_Sparse\_MV\_impl\_spmv\_Serial.cpp.o fails if you use nvcc and have cuda disabled [\#209](https://github.com/kokkos/kokkos/issues/209)
- Kokkos Serial execution space is not tested with TeamPolicy. [\#207](https://github.com/kokkos/kokkos/issues/207)
- Unit test failure on Hansen KokkosCore\_UnitTest\_Cuda\_MPI\_1 [\#200](https://github.com/kokkos/kokkos/issues/200)
- nvcc compiler warning: calling a \_\_host\_\_ function from a \_\_host\_\_ \_\_device\_\_ function is not allowed [\#180](https://github.com/kokkos/kokkos/issues/180)
- Intel 15 build error with defaulted "move" operators [\#171](https://github.com/kokkos/kokkos/issues/171)
- missing libkokkos.a during Trilinos 12.4.2 build, yet other libkokkos\*.a libs are there [\#165](https://github.com/kokkos/kokkos/issues/165)
- Tie atomic updates to execution space or even to thread team? \(speculation\) [\#144](https://github.com/kokkos/kokkos/issues/144)
- New View: Compiletime/size Test [\#137](https://github.com/kokkos/kokkos/issues/137)
- New View : Performance Test [\#136](https://github.com/kokkos/kokkos/issues/136)
- Signed/unsigned comparison warning in CUDA parallel [\#130](https://github.com/kokkos/kokkos/issues/130)
- Kokkos::complex: Need op\* w/ std::complex & real [\#126](https://github.com/kokkos/kokkos/issues/126)
- Use uintptr\_t for casting pointers [\#110](https://github.com/kokkos/kokkos/issues/110)
- Default thread mapping behavior between P and Q threads. [\#91](https://github.com/kokkos/kokkos/issues/91)
- Windows: Atomic\_Fetch\_Exchange\(\) return type [\#90](https://github.com/kokkos/kokkos/issues/90)
- Synchronic unit test is way too long [\#84](https://github.com/kokkos/kokkos/issues/84)
- nvcc\_wrapper -\> $\(NVCC\_WRAPPER\) [\#42](https://github.com/kokkos/kokkos/issues/42)
- Check compiler version and print helpful message [\#39](https://github.com/kokkos/kokkos/issues/39)
- Kokkos shared memory on Cuda uses a lot of registers [\#31](https://github.com/kokkos/kokkos/issues/31)
- Can not pass unit test `cuda.space` without a GT 720 [\#25](https://github.com/kokkos/kokkos/issues/25)
- Makefile.kokkos lacks bounds checking option that CMake has [\#24](https://github.com/kokkos/kokkos/issues/24)
- Kokkos can not complete unit tests with CUDA UVM enabled [\#23](https://github.com/kokkos/kokkos/issues/23)
- Simplify teams + shared memory histogram example to remove vectorization [\#21](https://github.com/kokkos/kokkos/issues/21)
- Kokkos needs to rever to ${PROJECT\_NAME}\_ENABLE\_CXX11 not Trilinos\_ENABLE\_CXX11 [\#17](https://github.com/kokkos/kokkos/issues/17)
- Kokkos Base Makefile adds AVX to KNC Build [\#16](https://github.com/kokkos/kokkos/issues/16)
- MS Visual Studio 2013 Build Errors [\#9](https://github.com/kokkos/kokkos/issues/9)
- subview\(X, ALL\(\), j\) for 2-D LayoutRight View X: should it view a column? [\#5](https://github.com/kokkos/kokkos/issues/5)
## [End_C++98](https://github.com/kokkos/kokkos/tree/End_C++98) (2015-04-15)
\* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)*

View File

@ -34,8 +34,8 @@ TRIBITS_PACKAGE_DECL(Kokkos) # ENABLE_SHADOWING_WARNINGS)
# for compatibility with Kokkos' Makefile build system.
TRIBITS_ADD_OPTION_AND_DEFINE(
${PACKAGE_NAME}_ENABLE_DEBUG
${PACKAGE_NAME_UC}_HAVE_DEBUG
Kokkos_ENABLE_DEBUG
KOKKOS_HAVE_DEBUG
"Enable run-time debug checks. These checks may be expensive, so they are disabled by default in a release build."
${${PROJECT_NAME}_ENABLE_DEBUG}
)
@ -57,7 +57,21 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda_UVM
KOKKOS_USE_CUDA_UVM
"Enable CUDA Unified Virtual Memory support in Kokkos."
"Enable CUDA Unified Virtual Memory as the default in Kokkos."
OFF
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda_RDC
KOKKOS_HAVE_CUDA_RDC
"Enable CUDA Relocatable Device Code support in Kokkos."
OFF
)
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_Cuda_Lambda
KOKKOS_HAVE_CUDA_LAMBDA
"Enable CUDA LAMBDA support in Kokkos."
OFF
)
@ -72,6 +86,9 @@ ASSERT_DEFINED(TPL_ENABLE_Pthread)
IF (Kokkos_ENABLE_Pthread AND NOT TPL_ENABLE_Pthread)
MESSAGE(FATAL_ERROR "You set Kokkos_ENABLE_Pthread=ON, but Trilinos' support for Pthread(s) is not enabled (TPL_ENABLE_Pthread=OFF). This is not allowed. Please enable Pthreads in Trilinos before attempting to enable Kokkos' support for Pthreads.")
ENDIF ()
IF (NOT TPL_ENABLE_Pthread)
ADD_DEFINITIONS(-DGTEST_HAS_PTHREAD=0)
ENDIF()
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_OpenMP
@ -162,13 +179,28 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
#------------------------------------------------------------------------------
#
# C) Process the subpackages for Kokkos
# C) Install Kokkos' executable scripts
#
# nvcc_wrapper is Kokkos' wrapper for NVIDIA's NVCC CUDA compiler.
# Kokkos needs nvcc_wrapper in order to build. Other libraries and
# executables also need nvcc_wrapper. Thus, we need to install it.
# If the argument of DESTINATION is a relative path, CMake computes it
# as relative to ${CMAKE_INSTALL_PATH}.
INSTALL(PROGRAMS ${CMAKE_CURRENT_SOURCE_DIR}/bin/nvcc_wrapper DESTINATION bin)
#------------------------------------------------------------------------------
#
# D) Process the subpackages for Kokkos
#
TRIBITS_PROCESS_SUBPACKAGES()
#
# D) If Kokkos itself is enabled, process the Kokkos package
# E) If Kokkos itself is enabled, process the Kokkos package
#
TRIBITS_PACKAGE_DEF()

View File

@ -7,25 +7,26 @@ CXXFLAGS=$(CCFLAGS)
#Options: OpenMP,Serial,Pthreads,Cuda
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv8,BGQ,Power7,Power8,KNL,BDW
#Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,KNL,BDW,SKX
KOKKOS_ARCH ?= ""
#Options: yes,no
KOKKOS_DEBUG ?= "no"
#Options: hwloc,librt,experimental_memkind
KOKKOS_USE_TPLS ?= ""
#Options: c++11
#Options: c++11,c++1z
KOKKOS_CXX_STANDARD ?= "c++11"
#Options: aggressive_vectorization,disable_profiling
KOKKOS_OPTIONS ?= ""
#Default settings specific options
#Options: force_uvm,use_ldg,rdc,enable_lambda
KOKKOS_CUDA_OPTIONS ?= ""
KOKKOS_CUDA_OPTIONS ?= "enable_lambda"
# Check for general settings
KOKKOS_INTERNAL_ENABLE_DEBUG := $(strip $(shell echo $(KOKKOS_DEBUG) | grep "yes" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX11 := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++11" | wc -l))
KOKKOS_INTERNAL_ENABLE_CXX1Z := $(strip $(shell echo $(KOKKOS_CXX_STANDARD) | grep "c++1z" | wc -l))
# Check for external libraries
KOKKOS_INTERNAL_USE_HWLOC := $(strip $(shell echo $(KOKKOS_USE_TPLS) | grep "hwloc" | wc -l))
@ -53,23 +54,71 @@ ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 0)
endif
endif
# Check for other Execution Spaces
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
CUDA_PATH ?= $(KOKKOS_INTERNAL_NVCC_PATH:/bin/nvcc=)
KOKKOS_INTERNAL_COMPILER_NVCC_VERSION := $(shell nvcc --version 2>&1 | grep release | cut -d' ' -f5 | cut -d',' -f1 | tr -d .)
endif
# Check OS
KOKKOS_OS := $(shell uname -s)
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname -s | grep CYGWIN | wc -l)
KOKKOS_INTERNAL_OS_LINUX := $(shell uname -s | grep Linux | wc -l)
KOKKOS_INTERNAL_OS_DARWIN := $(shell uname -s | grep Darwin | wc -l)
# Check compiler
KOKKOS_INTERNAL_COMPILER_INTEL := $(shell $(CXX) --version 2>&1 | grep "Intel Corporation" | wc -l)
KOKKOS_INTERNAL_COMPILER_PGI := $(shell $(CXX) --version 2>&1 | grep PGI | wc -l)
KOKKOS_INTERNAL_COMPILER_XL := $(shell $(CXX) -qversion 2>&1 | grep XL | wc -l)
KOKKOS_INTERNAL_COMPILER_CRAY := $(shell $(CXX) -craype-verbose 2>&1 | grep "CC-" | wc -l)
KOKKOS_INTERNAL_OS_CYGWIN := $(shell uname | grep CYGWIN | wc -l)
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(CXX) --version 2>&1 | grep "nvcc" | wc -l)
ifneq ($(OMPI_CXX),)
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(OMPI_CXX) --version 2>&1 | grep "nvcc" | wc -l)
endif
ifneq ($(MPICH_CXX),)
KOKKOS_INTERNAL_COMPILER_NVCC := $(shell $(MPICH_CXX) --version 2>&1 | grep "nvcc" | wc -l)
endif
KOKKOS_INTERNAL_COMPILER_CLANG := $(shell $(CXX) --version 2>&1 | grep "clang" | wc -l)
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
KOKKOS_INTERNAL_COMPILER_CLANG = 1
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 2)
KOKKOS_INTERNAL_COMPILER_XL = 1
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_CLANG_VERSION) -lt 400; echo $$?),0)
$(error Compiling Cuda code directly with Clang requires version 4.0.0 or higher)
endif
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := 1
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -mp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp=libomp
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# OpenMP is turned on by default in Cray compiler environment
KOKKOS_INTERNAL_OPENMP_FLAG :=
ifeq ($(KOKKOS_INTERNAL_COMPILER_XL), 1)
KOKKOS_INTERNAL_OPENMP_FLAG := -qsmp=omp
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
# OpenMP is turned on by default in Cray compiler environment
KOKKOS_INTERNAL_OPENMP_FLAG :=
else
KOKKOS_INTERNAL_OPENMP_FLAG := -fopenmp
endif
endif
endif
endif
@ -84,13 +133,11 @@ else
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
else
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
endif
endif
endif
# Check for other Execution Spaces
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
# Check for Kokkos Architecture settings
#Intel based
@ -98,6 +145,7 @@ KOKKOS_INTERNAL_USE_ARCH_KNC := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNC |
KOKKOS_INTERNAL_USE_ARCH_SNB := $(strip $(shell echo $(KOKKOS_ARCH) | grep SNB | wc -l))
KOKKOS_INTERNAL_USE_ARCH_HSW := $(strip $(shell echo $(KOKKOS_ARCH) | grep HSW | wc -l))
KOKKOS_INTERNAL_USE_ARCH_BDW := $(strip $(shell echo $(KOKKOS_ARCH) | grep BDW | wc -l))
KOKKOS_INTERNAL_USE_ARCH_SKX := $(strip $(shell echo $(KOKKOS_ARCH) | grep SKX | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KNL := $(strip $(shell echo $(KOKKOS_ARCH) | grep KNL | wc -l))
#NVIDIA based
@ -110,11 +158,13 @@ KOKKOS_INTERNAL_USE_ARCH_MAXWELL50 := $(strip $(shell echo $(KOKKOS_ARCH) | grep
KOKKOS_INTERNAL_USE_ARCH_MAXWELL52 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell52 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_MAXWELL53 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Maxwell53 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_PASCAL61 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal61 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_PASCAL60 := $(strip $(shell echo $(KOKKOS_ARCH) | grep Pascal60 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KEPLER30) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER32) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
@ -127,13 +177,16 @@ KOKKOS_INTERNAL_USE_ARCH_NVIDIA := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_AR
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER35) \
+ $(KOKKOS_INTERNAL_USE_ARCH_KEPLER37) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL61) \
+ $(KOKKOS_INTERNAL_USE_ARCH_PASCAL60) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52) \
+ $(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53) | bc))
endif
#ARM based
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_ARMV80 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv80 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_ARMV81 := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv81 | wc -l))
KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX := $(strip $(shell echo $(KOKKOS_ARCH) | grep ARMv8-ThunderX | wc -l))
#IBM based
KOKKOS_INTERNAL_USE_ARCH_BGQ := $(strip $(shell echo $(KOKKOS_ARCH) | grep BGQ | wc -l))
@ -145,17 +198,18 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
#Any AVX?
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX2 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX512MIC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
# Decide what ISA level we are able to support
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL) | bc ))
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8) | bc ))
#Incompatible flags?
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)>1" | bc ))
KOKKOS_INTERNAL_USE_ARCH_MULTIHOST := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_AVX)+$(KOKKOS_INTERNAL_USE_ARCH_AVX2)+$(KOKKOS_INTERNAL_USE_ARCH_KNC)+$(KOKKOS_INTERNAL_USE_ARCH_IBM)+$(KOKKOS_INTERNAL_USE_ARCH_AMDAVX)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV80)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV81)+$(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX)>1" | bc ))
KOKKOS_INTERNAL_USE_ARCH_MULTIGPU := $(strip $(shell echo "$(KOKKOS_INTERNAL_USE_ARCH_NVIDIA)>1" | bc))
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MULTIHOST), 1)
@ -207,15 +261,21 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_X86_64), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_X86_64" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_KNC), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_KNC" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCLE" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
@ -230,9 +290,15 @@ ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX1Z), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX1Z_FLAG)
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX11 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_HAVE_CXX1Z 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_ENABLE_DEBUG), 1)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_CXXFLAGS += -G
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_CXXFLAGS += -lineinfo
endif
KOKKOS_CXXFLAGS += -g
KOKKOS_LDFLAGS += -g -ldl
@ -273,13 +339,14 @@ endif
tmp := $(shell echo "/* Cuda Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LDG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LDG_INTRINSIC 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_UVM), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_UVM 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_CUDA_UVM 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
@ -289,27 +356,101 @@ ifeq ($(KOKKOS_INTERNAL_CUDA_USE_RELOC), 1)
endif
ifeq ($(KOKKOS_INTERNAL_CUDA_USE_LAMBDA), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -expt-extended-lambda
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
ifeq ($(shell test $(KOKKOS_INTERNAL_COMPILER_NVCC_VERSION) -gt 70; echo $$?),0)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -expt-extended-lambda
else
$(warning Warning: Cuda Lambda support was requested but NVCC version is too low. This requires NVCC for Cuda version 7.5 or higher. Disabling Lambda support now.)
endif
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
tmp := $(shell echo "\#define KOKKOS_CUDA_USE_LAMBDA 1" >> KokkosCore_config.tmp )
endif
endif
endif
#Add Architecture flags
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV80), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -mavx
KOKKOS_LDFLAGS += -mavx
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -march=armv8-a
KOKKOS_LDFLAGS += -march=armv8-a
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV81), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV81 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -march=armv8.1-a
KOKKOS_LDFLAGS += -march=armv8.1-a
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_ARMV8_THUNDERX), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV80 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_ARMV8_THUNDERX 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_CXXFLAGS +=
KOKKOS_LDFLAGS +=
else
KOKKOS_CXXFLAGS += -march=armv8-a -mtune=thunderx
KOKKOS_LDFLAGS += -march=armv8-a -mtune=thunderx
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -mavx
KOKKOS_LDFLAGS += -mavx
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_CXXFLAGS += -tp=sandybridge
KOKKOS_LDFLAGS += -tp=sandybridge
else
# Assume that this is a really a GNU compiler
KOKKOS_CXXFLAGS += -mavx
KOKKOS_LDFLAGS += -mavx
endif
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Assume that this is a really a GNU compiler or it could be XL on P8
KOKKOS_CXXFLAGS += -mcpu=power8 -mtune=power8
KOKKOS_LDFLAGS += -mcpu=power8 -mtune=power8
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
@ -322,7 +463,8 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX2), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
KOKKOS_CXXFLAGS += -tp=haswell
KOKKOS_LDFLAGS += -tp=haswell
else
# Assume that this is a really a GNU compiler
KOKKOS_CXXFLAGS += -march=core-avx2 -mtune=core-avx2
@ -352,52 +494,85 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512MIC), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX512XEON), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_AVX512XEON 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_INTEL), 1)
KOKKOS_CXXFLAGS += -xCORE-AVX512
KOKKOS_LDFLAGS += -xCORE-AVX512
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
else
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Nothing here yet
KOKKOS_CXXFLAGS += -march=skylake-avx512
KOKKOS_LDFLAGS += -march=skylake-avx512
endif
endif
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KNC), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KNC 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -mmic
KOKKOS_LDFLAGS += -mmic
endif
#Figure out the architecture flag for Cuda
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-arch
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG=-x cuda --cuda-gpu-arch
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER30), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER30 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_30
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_30
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER32), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER32 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_32
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_32
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER35), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER35 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_35
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_35
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KEPLER37), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KEPLER37 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_37
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_37
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL50), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL50 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_50
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_50
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL52), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL52 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_52
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_52
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_MAXWELL53), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_MAXWELL53 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_53
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_53
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL61), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL61 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += -arch=sm_61
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_61
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_PASCAL60), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL 1" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_PASCAL60 1" >> KokkosCore_config.tmp )
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_COMPILER_CUDA_ARCH_FLAG)=sm_60
endif
endif
@ -424,6 +599,7 @@ KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/containers/src/impl/*.cpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/Cuda/*.hpp)
KOKKOS_CXXFLAGS += -I$(CUDA_PATH)/include
KOKKOS_LDFLAGS += -L$(CUDA_PATH)/lib64
KOKKOS_LIBS += -lcudart -lcuda
endif
@ -443,7 +619,7 @@ endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/OpenMP/*.hpp)
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
ifeq ($(KOKKOS_INTERNAL_COMPILER_NVCC), 1)
KOKKOS_CXXFLAGS += -Xcompiler $(KOKKOS_INTERNAL_OPENMP_FLAG)
else
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
@ -451,6 +627,14 @@ ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_OPENMP_FLAG)
endif
#Explicitly set the GCC Toolchain for Clang
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_GCC_PATH = $(shell which g++)
KOKKOS_INTERNAL_GCC_TOOLCHAIN = $(KOKKOS_INTERNAL_GCC_PATH:/bin/g++=)
KOKKOS_CXXFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN) -DKOKKOS_CUDA_CLANG_WORKAROUND -DKOKKOS_CUDA_USE_LDG_INTRINSIC
KOKKOS_LDFLAGS += --gcc-toolchain=$(KOKKOS_INTERNAL_GCC_TOOLCHAIN)
endif
#With Cygwin functions such as fdopen and fileno are not defined
#when strict ansi is enabled. strict ansi gets enabled with --std=c++11
#though. So we hard undefine it here. Not sure if that has any bad side effects
@ -471,7 +655,7 @@ KOKKOS_OBJ_LINK = $(notdir $(KOKKOS_OBJ))
include $(KOKKOS_PATH)/Makefile.targets
kokkos-clean:
-rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
rm -f $(KOKKOS_OBJ_LINK) KokkosCore_config.h KokkosCore_config.tmp libkokkos.a
libkokkos.a: $(KOKKOS_OBJ_LINK) $(KOKKOS_SRC) $(KOKKOS_HEADERS)
ar cr libkokkos.a $(KOKKOS_OBJ_LINK)

View File

@ -14,20 +14,16 @@ Kokkos_hwloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_hwloc.cpp
Kokkos_Serial.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial.cpp
Kokkos_Serial_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_TaskPolicy.cpp
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_Serial_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Serial_Task.cpp
Kokkos_Shape.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Shape.cpp
Kokkos_TaskQueue.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_TaskQueue.cpp
Kokkos_spinwait.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_spinwait.cpp
Kokkos_Profiling_Interface.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_Profiling_Interface.cpp
KokkosExp_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/KokkosExp_SharedAlloc.cpp
Kokkos_SharedAlloc.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_SharedAlloc.cpp
Kokkos_MemoryPool.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_MemoryPool.cpp
@ -38,8 +34,6 @@ Kokkos_CudaSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cu
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_CudaSpace.cpp
Kokkos_Cuda_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Task.cpp
Kokkos_Cuda_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
@ -47,8 +41,6 @@ Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
Kokkos_ThreadsExec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec.cpp
Kokkos_Threads_TaskPolicy.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_Threads_TaskPolicy.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_QTHREAD), 1)
@ -67,6 +59,4 @@ endif
Kokkos_HBWSpace.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWSpace.cpp
Kokkos_HBWAllocators.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/impl/Kokkos_HBWAllocators.cpp

View File

@ -45,31 +45,32 @@ Primary tested compilers on X86 are:
Intel 14.0.4
Intel 15.0.2
Intel 16.0.1
Intel 17.0.098
Clang 3.5.2
Clang 3.6.1
Clang 3.9.0
Primary tested compilers on Power 8 are:
IBM XL 13.1.3 (OpenMP,Serial)
GCC 4.9.2 (OpenMP,Serial)
GCC 5.3.0 (OpenMP,Serial)
GCC 5.4.0 (OpenMP,Serial)
IBM XL 13.1.3 (OpenMP, Serial) (There is a workaround in place to avoid a compiler bug)
Primary tested compilers on Intel KNL are:
Intel 16.2.181 (with gcc 4.7.2)
Intel 17.0.098 (with gcc 4.7.2)
Secondary tested compilers are:
CUDA 6.5 (with gcc 4.7.2)
CUDA 7.0 (with gcc 4.7.2)
CUDA 7.5 (with gcc 4.8.4)
CUDA 7.5 (with gcc 4.7.2)
CUDA 8.0 (with gcc 5.3.0 on X86 and gcc 5.4.0 on Power8)
CUDA/Clang 8.0 using Clang/Trunk compiler
Other compilers working:
X86:
Intel 17.0.042 (the FENL example causes internal compiler error)
PGI 15.4
Cygwin 2.1.0 64bit with gcc 4.9.3
KNL:
Intel 16.2.181 (the FENL example causes internal compiler error)
Intel 17.0.042 (the FENL example causes internal compiler error)
Known non-working combinations:
Power8:
GCC 6.1.0
Pthreads backend
@ -92,9 +93,10 @@ master branch, without -Werror and only for a select set of backends.
In the 'example/tutorial' directory you will find step by step tutorial
examples which explain many of the features of Kokkos. They work with
simple Makefiles. To build with g++ and OpenMP simply type 'make openmp'
simple Makefiles. To build with g++ and OpenMP simply type 'make'
in the 'example/tutorial' directory. This will build all examples in the
subfolders.
subfolders. To change the build options refer to the Programming Guide
in the compilation section.
============================================================================
====Running Unit Tests======================================================

View File

@ -476,54 +476,54 @@ namespace Kokkos {
};
template<class Generator>
struct rand<Generator, ::Kokkos::complex<float> > {
struct rand<Generator, Kokkos::complex<float> > {
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<float> max () {
return ::Kokkos::complex<float> (1.0, 1.0);
static Kokkos::complex<float> max () {
return Kokkos::complex<float> (1.0, 1.0);
}
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<float> draw (Generator& gen) {
static Kokkos::complex<float> draw (Generator& gen) {
const float re = gen.frand ();
const float im = gen.frand ();
return ::Kokkos::complex<float> (re, im);
return Kokkos::complex<float> (re, im);
}
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& range) {
static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& range) {
const float re = gen.frand (real (range));
const float im = gen.frand (imag (range));
return ::Kokkos::complex<float> (re, im);
return Kokkos::complex<float> (re, im);
}
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<float> draw (Generator& gen, const ::Kokkos::complex<float>& start, const ::Kokkos::complex<float>& end) {
static Kokkos::complex<float> draw (Generator& gen, const Kokkos::complex<float>& start, const Kokkos::complex<float>& end) {
const float re = gen.frand (real (start), real (end));
const float im = gen.frand (imag (start), imag (end));
return ::Kokkos::complex<float> (re, im);
return Kokkos::complex<float> (re, im);
}
};
template<class Generator>
struct rand<Generator, ::Kokkos::complex<double> > {
struct rand<Generator, Kokkos::complex<double> > {
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<double> max () {
return ::Kokkos::complex<double> (1.0, 1.0);
static Kokkos::complex<double> max () {
return Kokkos::complex<double> (1.0, 1.0);
}
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<double> draw (Generator& gen) {
static Kokkos::complex<double> draw (Generator& gen) {
const double re = gen.drand ();
const double im = gen.drand ();
return ::Kokkos::complex<double> (re, im);
return Kokkos::complex<double> (re, im);
}
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& range) {
static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& range) {
const double re = gen.drand (real (range));
const double im = gen.drand (imag (range));
return ::Kokkos::complex<double> (re, im);
return Kokkos::complex<double> (re, im);
}
KOKKOS_INLINE_FUNCTION
static ::Kokkos::complex<double> draw (Generator& gen, const ::Kokkos::complex<double>& start, const ::Kokkos::complex<double>& end) {
static Kokkos::complex<double> draw (Generator& gen, const Kokkos::complex<double>& start, const Kokkos::complex<double>& end) {
const double re = gen.drand (real (start), real (end));
const double im = gen.drand (imag (start), imag (end));
return ::Kokkos::complex<double> (re, im);
return Kokkos::complex<double> (re, im);
}
};
@ -670,8 +670,8 @@ namespace Kokkos {
double S = 2.0;
double U;
while(S>=1.0) {
U = drand();
const double V = drand();
U = 2.0*drand() - 1.0;
const double V = 2.0*drand() - 1.0;
S = U*U+V*V;
}
return U*sqrt(-2.0*log(S)/S);
@ -910,8 +910,8 @@ namespace Kokkos {
double S = 2.0;
double U;
while(S>=1.0) {
U = drand();
const double V = drand();
U = 2.0*drand() - 1.0;
const double V = 2.0*drand() - 1.0;
S = U*U+V*V;
}
return U*sqrt(-2.0*log(S)/S);
@ -1163,8 +1163,8 @@ namespace Kokkos {
double S = 2.0;
double U;
while(S>=1.0) {
U = drand();
const double V = drand();
U = 2.0*drand() - 1.0;
const double V = 2.0*drand() - 1.0;
S = U*U+V*V;
}
return U*sqrt(-2.0*log(S)/S);

View File

@ -51,7 +51,7 @@
namespace Kokkos {
namespace SortImpl {
namespace Impl {
template<class ValuesViewType, int Rank=ValuesViewType::Rank>
struct CopyOp;
@ -199,7 +199,7 @@ public:
parallel_for(values.dimension_0(),
bin_sort_sort_functor<ValuesViewType, offset_type,
SortImpl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
Impl::CopyOp<ValuesViewType> >(values,sorted_values,sort_order));
deep_copy(values,sorted_values);
}
@ -262,17 +262,15 @@ public:
}
};
namespace SortImpl {
template<class KeyViewType>
struct DefaultBinOp1D {
struct BinOp1D {
const int max_bins_;
const double mul_;
typename KeyViewType::const_value_type range_;
typename KeyViewType::const_value_type min_;
//Construct BinOp with number of bins, minimum value and maxuimum value
DefaultBinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
BinOp1D(int max_bins__, typename KeyViewType::const_value_type min,
typename KeyViewType::const_value_type max )
:max_bins_(max_bins__+1),mul_(1.0*max_bins__/(max-min)),range_(max-min),min_(min) {}
@ -298,13 +296,13 @@ struct DefaultBinOp1D {
};
template<class KeyViewType>
struct DefaultBinOp3D {
struct BinOp3D {
int max_bins_[3];
double mul_[3];
typename KeyViewType::non_const_value_type range_[3];
typename KeyViewType::non_const_value_type min_[3];
DefaultBinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
BinOp3D(int max_bins__[], typename KeyViewType::const_value_type min[],
typename KeyViewType::const_value_type max[] )
{
max_bins_[0] = max_bins__[0]+1;
@ -348,109 +346,11 @@ struct DefaultBinOp3D {
}
};
template<typename Scalar>
struct min_max {
Scalar min;
Scalar max;
bool init;
KOKKOS_INLINE_FUNCTION
min_max() {
min = 0;
max = 0;
init = 0;
}
KOKKOS_INLINE_FUNCTION
min_max (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
}
KOKKOS_INLINE_FUNCTION
min_max operator = (const min_max& val) {
min = val.min;
max = val.max;
init = val.init;
return *this;
}
KOKKOS_INLINE_FUNCTION
void operator+= (const Scalar& val) {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (const min_max& val) {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const Scalar& val) volatile {
if(init) {
min = min<val?min:val;
max = max>val?max:val;
} else {
min = val;
max = val;
init = 1;
}
}
KOKKOS_INLINE_FUNCTION
void operator+= (volatile const min_max& val) volatile {
if(init && val.init) {
min = min<val.min?min:val.min;
max = max>val.max?max:val.max;
} else {
if(val.init) {
min = val.min;
max = val.max;
init = 1;
}
}
}
};
template<class ViewType>
struct min_max_functor {
typedef typename ViewType::execution_space execution_space;
ViewType view;
typedef min_max<typename ViewType::non_const_value_type> value_type;
min_max_functor (const ViewType view_):view(view_) {
}
KOKKOS_INLINE_FUNCTION
void operator()(const size_t& i, value_type& val) const {
val += view(i);
}
};
namespace Impl {
template<class ViewType>
bool try_std_sort(ViewType view) {
bool possible = true;
#if ! KOKKOS_USING_EXP_VIEW
size_t stride[8];
view.stride(stride);
#else
size_t stride[8] = { view.stride_0()
, view.stride_1()
, view.stride_2()
@ -460,8 +360,7 @@ bool try_std_sort(ViewType view) {
, view.stride_6()
, view.stride_7()
};
#endif
possible = possible && Impl::is_same<typename ViewType::memory_space, HostSpace>::value;
possible = possible && std::is_same<typename ViewType::memory_space, HostSpace>::value;
possible = possible && (ViewType::Rank == 1);
possible = possible && (stride[0] == 1);
if(possible) {
@ -470,27 +369,39 @@ bool try_std_sort(ViewType view) {
return possible;
}
template<class ViewType>
struct min_max_functor {
typedef Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> minmax_scalar;
ViewType view;
min_max_functor(const ViewType& view_):view(view_) {}
KOKKOS_INLINE_FUNCTION
void operator() (const size_t& i, minmax_scalar& minmax) const {
if(view(i) < minmax.min_val) minmax.min_val = view(i);
if(view(i) > minmax.max_val) minmax.max_val = view(i);
}
};
}
template<class ViewType>
void sort(ViewType view, bool always_use_kokkos_sort = false) {
if(!always_use_kokkos_sort) {
if(SortImpl::try_std_sort(view)) return;
if(Impl::try_std_sort(view)) return;
}
typedef BinOp1D<ViewType> CompType;
typedef SortImpl::DefaultBinOp1D<ViewType> CompType;
SortImpl::min_max<typename ViewType::non_const_value_type> val;
parallel_reduce(view.dimension_0(),SortImpl::min_max_functor<ViewType>(view),val);
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,val.min,val.max),true);
Kokkos::Experimental::MinMaxScalar<typename ViewType::non_const_value_type> result;
Kokkos::Experimental::MinMax<typename ViewType::non_const_value_type> reducer(result);
parallel_reduce(Kokkos::RangePolicy<typename ViewType::execution_space>(0,view.dimension_0()),
Impl::min_max_functor<ViewType>(view),reducer);
if(result.min_val == result.max_val) return;
BinSort<ViewType, CompType> bin_sort(view,CompType(view.dimension_0()/2,result.min_val,result.max_val),true);
bin_sort.create_permute_vector();
bin_sort.sort(view);
}
/*template<class ViewType, class Comparator>
void sort(ViewType view, Comparator comp, bool always_use_kokkos_sort = false) {
}*/
}
#endif

View File

@ -1,6 +1,6 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(REQUIRED_DURING_INSTALLATION_TESTING ${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}/../src )
SET(SOURCES

View File

@ -7,21 +7,18 @@ vpath %.cpp ${KOKKOS_PATH}/algorithms/unit_tests
default: build_all
echo "End Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = $(KOKKOS_PATH)/config/nvcc_wrapper
else
CXX = g++
endif
CXXFLAGS = -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
include $(KOKKOS_PATH)/Makefile.kokkos
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
CXX = $(NVCC_WRAPPER)
CXXFLAGS ?= -O3
LINK = $(CXX)
LDFLAGS ?= -lpthread
else
CXX ?= g++
CXXFLAGS ?= -O3
LINK ?= $(CXX)
LDFLAGS ?= -lpthread
endif
KOKKOS_CXXFLAGS += -I$(GTEST_PATH) -I${KOKKOS_PATH}/algorithms/unit_tests
TEST_TARGETS =

View File

@ -131,6 +131,10 @@ void test_1D_sort(unsigned int n,bool force_kokkos) {
typedef Kokkos::View<KeyType*,ExecutionSpace> KeyViewType;
KeyViewType keys("Keys",n);
// Test sorting array with all numbers equal
Kokkos::deep_copy(keys,KeyType(1));
Kokkos::sort(keys,force_kokkos);
Kokkos::Random_XorShift64_Pool<ExecutionSpace> g(1931);
Kokkos::fill_random(keys,g,Kokkos::Random_XorShift64_Pool<ExecutionSpace>::generator_type::MAX_URAND);
@ -174,7 +178,7 @@ void test_3D_sort(unsigned int n) {
typename KeyViewType::value_type min[3] = {0,0,0};
typename KeyViewType::value_type max[3] = {100,100,100};
typedef Kokkos::SortImpl::DefaultBinOp3D< KeyViewType > BinOp;
typedef Kokkos::BinOp3D< KeyViewType > BinOp;
BinOp bin_op(bin_max,min,max);
Kokkos::BinSort< KeyViewType , BinOp >
Sorter(keys,bin_op,false);

View File

@ -0,0 +1,43 @@
KOKKOS_PATH = ${HOME}/kokkos
SRC = $(wildcard *.cpp)
KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
EXE = bytes_and_flops.cuda
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
EXE = bytes_and_flops.host
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
CXXFLAGS = -O3 -g
DEPFLAGS = -M
LINK = ${CXX}
LINKFLAGS =
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) bench.hpp bench_unroll_stride.hpp bench_stride.hpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -0,0 +1,99 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include<Kokkos_Core.hpp>
#include<impl/Kokkos_Timer.hpp>
template<class Scalar, int Unroll,int Stride>
struct Run {
static void run(int N, int K, int R, int F, int T, int S);
};
template<class Scalar, int Stride>
struct RunStride {
static void run_1(int N, int K, int R, int F, int T, int S);
static void run_2(int N, int K, int R, int F, int T, int S);
static void run_3(int N, int K, int R, int F, int T, int S);
static void run_4(int N, int K, int R, int F, int T, int S);
static void run_5(int N, int K, int R, int F, int T, int S);
static void run_6(int N, int K, int R, int F, int T, int S);
static void run_7(int N, int K, int R, int F, int T, int S);
static void run_8(int N, int K, int R, int F, int T, int S);
static void run(int N, int K, int R, int U, int F, int T, int S);
};
#define STRIDE 1
#include<bench_stride.hpp>
#undef STRIDE
#define STRIDE 2
#include<bench_stride.hpp>
#undef STRIDE
#define STRIDE 4
#include<bench_stride.hpp>
#undef STRIDE
#define STRIDE 8
#include<bench_stride.hpp>
#undef STRIDE
#define STRIDE 16
#include<bench_stride.hpp>
#undef STRIDE
#define STRIDE 32
#include<bench_stride.hpp>
#undef STRIDE
template<class Scalar>
void run_stride_unroll(int N, int K, int R, int D, int U, int F, int T, int S) {
if(D == 1)
RunStride<Scalar,1>::run(N,K,R,U,F,T,S);
if(D == 2)
RunStride<Scalar,2>::run(N,K,R,U,F,T,S);
if(D == 4)
RunStride<Scalar,4>::run(N,K,R,U,F,T,S);
if(D == 8)
RunStride<Scalar,8>::run(N,K,R,U,F,T,S);
if(D == 16)
RunStride<Scalar,16>::run(N,K,R,U,F,T,S);
if(D == 32)
RunStride<Scalar,32>::run(N,K,R,U,F,T,S);
}

View File

@ -0,0 +1,124 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#define UNROLL 1
#include<bench_unroll_stride.hpp>
#undef UNROLL
#define UNROLL 2
#include<bench_unroll_stride.hpp>
#undef UNROLL
#define UNROLL 3
#include<bench_unroll_stride.hpp>
#undef UNROLL
#define UNROLL 4
#include<bench_unroll_stride.hpp>
#undef UNROLL
#define UNROLL 5
#include<bench_unroll_stride.hpp>
#undef UNROLL
#define UNROLL 6
#include<bench_unroll_stride.hpp>
#undef UNROLL
#define UNROLL 7
#include<bench_unroll_stride.hpp>
#undef UNROLL
#define UNROLL 8
#include<bench_unroll_stride.hpp>
#undef UNROLL
template<class Scalar>
struct RunStride<Scalar,STRIDE> {
static void run_1(int N, int K, int R, int F, int T, int S) {
Run<Scalar,1,STRIDE>::run(N,K,R,F,T,S);
}
static void run_2(int N, int K, int R, int F, int T, int S) {
Run<Scalar,2,STRIDE>::run(N,K,R,F,T,S);
}
static void run_3(int N, int K, int R, int F, int T, int S) {
Run<Scalar,3,STRIDE>::run(N,K,R,F,T,S);
}
static void run_4(int N, int K, int R, int F, int T, int S) {
Run<Scalar,4,STRIDE>::run(N,K,R,F,T,S);
}
static void run_5(int N, int K, int R, int F, int T, int S) {
Run<Scalar,5,STRIDE>::run(N,K,R,F,T,S);
}
static void run_6(int N, int K, int R, int F, int T, int S) {
Run<Scalar,6,STRIDE>::run(N,K,R,F,T,S);
}
static void run_7(int N, int K, int R, int F, int T, int S) {
Run<Scalar,7,STRIDE>::run(N,K,R,F,T,S);
}
static void run_8(int N, int K, int R, int F, int T, int S) {
Run<Scalar,8,STRIDE>::run(N,K,R,F,T,S);
}
static void run(int N, int K, int R, int U, int F, int T, int S) {
if(U==1) {
run_1(N,K,R,F,T,S);
}
if(U==2) {
run_2(N,K,R,F,T,S);
}
if(U==3) {
run_3(N,K,R,F,T,S);
}
if(U==4) {
run_4(N,K,R,F,T,S);
}
if(U==5) {
run_5(N,K,R,F,T,S);
}
if(U==6) {
run_6(N,K,R,F,T,S);
}
if(U==7) {
run_7(N,K,R,F,T,S);
}
if(U==8) {
run_8(N,K,R,F,T,S);
}
}
};

View File

@ -0,0 +1,148 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
template<class Scalar>
struct Run<Scalar,UNROLL,STRIDE> {
static void run(int N, int K, int R, int F, int T, int S) {
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> A("A",N,K);
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> B("B",N,K);
Kokkos::View<Scalar**[STRIDE],Kokkos::LayoutRight> C("C",N,K);
Kokkos::deep_copy(A,Scalar(1.5));
Kokkos::deep_copy(B,Scalar(2.5));
Kokkos::deep_copy(C,Scalar(3.5));
Kokkos::Timer timer;
Kokkos::parallel_for("BenchmarkKernel",Kokkos::TeamPolicy<>(N,T).set_scratch_size(0,Kokkos::PerTeam(S)),
KOKKOS_LAMBDA ( const Kokkos::TeamPolicy<>::member_type& team) {
const int n = team.league_rank();
for(int r=0; r<R; r++) {
Kokkos::parallel_for(Kokkos::TeamThreadRange(team,0,K), [&] (const int& i) {
Scalar a1 = A(n,i,0);
const Scalar b = B(n,i,0);
#if(UNROLL>1)
Scalar a2 = a1*1.3;
#endif
#if(UNROLL>2)
Scalar a3 = a2*1.1;
#endif
#if(UNROLL>3)
Scalar a4 = a3*1.1;
#endif
#if(UNROLL>4)
Scalar a5 = a4*1.3;
#endif
#if(UNROLL>5)
Scalar a6 = a5*1.1;
#endif
#if(UNROLL>6)
Scalar a7 = a6*1.1;
#endif
#if(UNROLL>7)
Scalar a8 = a7*1.1;
#endif
for(int f = 0; f<F; f++) {
a1 += b*a1;
#if(UNROLL>1)
a2 += b*a2;
#endif
#if(UNROLL>2)
a3 += b*a3;
#endif
#if(UNROLL>3)
a4 += b*a4;
#endif
#if(UNROLL>4)
a5 += b*a5;
#endif
#if(UNROLL>5)
a6 += b*a6;
#endif
#if(UNROLL>6)
a7 += b*a7;
#endif
#if(UNROLL>7)
a8 += b*a8;
#endif
}
#if(UNROLL==1)
C(n,i,0) = a1;
#endif
#if(UNROLL==2)
C(n,i,0) = a1+a2;
#endif
#if(UNROLL==3)
C(n,i,0) = a1+a2+a3;
#endif
#if(UNROLL==4)
C(n,i,0) = a1+a2+a3+a4;
#endif
#if(UNROLL==5)
C(n,i,0) = a1+a2+a3+a4+a5;
#endif
#if(UNROLL==6)
C(n,i,0) = a1+a2+a3+a4+a5+a6;
#endif
#if(UNROLL==7)
C(n,i,0) = a1+a2+a3+a4+a5+a6+a7;
#endif
#if(UNROLL==8)
C(n,i,0) = a1+a2+a3+a4+a5+a6+a7+a8;
#endif
});
}
});
Kokkos::fence();
double seconds = timer.seconds();
double bytes = 1.0*N*K*R*3*sizeof(Scalar);
double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
printf("NKRUFTS: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf\n",N,K,R,UNROLL,F,T,S,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds);
}
};

View File

@ -0,0 +1,96 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include<Kokkos_Core.hpp>
#include<impl/Kokkos_Timer.hpp>
#include<bench.hpp>
int main(int argc, char* argv[]) {
Kokkos::initialize();
if(argc<10) {
printf("Arguments: N K R D U F T S\n");
printf(" P: Precision (1==float, 2==double)\n");
printf(" N,K: dimensions of the 2D array to allocate\n");
printf(" R: how often to loop through the K dimension with each team\n");
printf(" D: distance between loaded elements (stride)\n");
printf(" U: how many independent flops to do per load\n");
printf(" F: how many times to repeat the U unrolled operations before reading next element\n");
printf(" T: team size\n");
printf(" S: shared memory per team (used to control occupancy on GPUs)\n");
printf("Example Input GPU:\n");
printf(" Bandwidth Bound : 2 100000 1024 1 1 1 1 256 6000\n");
printf(" Cache Bound : 2 100000 1024 64 1 1 1 512 20000\n");
printf(" Compute Bound : 2 100000 1024 1 1 8 64 256 6000\n");
printf(" Load Slots Used : 2 20000 256 32 16 1 1 256 6000\n");
printf(" Inefficient Load: 2 20000 256 32 2 1 1 256 20000\n");
Kokkos::finalize();
return 0;
}
int P = atoi(argv[1]);
int N = atoi(argv[2]);
int K = atoi(argv[3]);
int R = atoi(argv[4]);
int D = atoi(argv[5]);
int U = atoi(argv[6]);
int F = atoi(argv[7]);
int T = atoi(argv[8]);
int S = atoi(argv[9]);
if(U>8) {printf("U must be 1-8\n"); return 0;}
if( (D!=1) && (D!=2) && (D!=4) && (D!=8) && (D!=16) && (D!=32)) {printf("D must be one of 1,2,4,8,16,32\n"); return 0;}
if( (P!=1) && (P!=2) ) {printf("P must be one of 1,2\n"); return 0;}
if(P==1) {
run_stride_unroll<float>(N,K,R,D,U,F,T,S);
}
if(P==2) {
run_stride_unroll<double>(N,K,R,D,U,F,T,S);
}
Kokkos::finalize();
}

View File

@ -0,0 +1,44 @@
KOKKOS_PATH = ${HOME}/kokkos
SRC = $(wildcard *.cpp)
KOKKOS_DEVICES=Cuda
KOKKOS_CUDA_OPTIONS=enable_lambda
default: build
echo "Start Build"
ifneq (,$(findstring Cuda,$(KOKKOS_DEVICES)))
CXX = ${KOKKOS_PATH}/config/nvcc_wrapper
EXE = gather.cuda
KOKKOS_DEVICES = "Cuda,OpenMP"
KOKKOS_ARCH = "SNB,Kepler35"
else
CXX = g++
EXE = gather.host
KOKKOS_DEVICES = "OpenMP"
KOKKOS_ARCH = "SNB"
endif
CXXFLAGS = -O3 -g
DEPFLAGS = -M
LINK = ${CXX}
LINKFLAGS =
OBJ = $(SRC:.cpp=.o)
LIB =
include $(KOKKOS_PATH)/Makefile.kokkos
$(warning ${KOKKOS_CPPFLAGS})
build: $(EXE)
$(EXE): $(OBJ) $(KOKKOS_LINK_DEPENDS)
$(LINK) $(KOKKOS_LDFLAGS) $(LINKFLAGS) $(EXTRA_PATH) $(OBJ) $(KOKKOS_LIBS) $(LIB) -o $(EXE)
clean: kokkos-clean
rm -f *.o *.cuda *.host
# Compilation rules
%.o:%.cpp $(KOKKOS_CPP_DEPENDS) gather_unroll.hpp gather.hpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) $(EXTRA_INC) -c $<

View File

@ -0,0 +1,92 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
template<class Scalar, int UNROLL>
struct RunGather {
static void run(int N, int K, int D, int R, int F);
};
#define UNROLL 1
#include<gather_unroll.hpp>
#undef UNROLL
#define UNROLL 2
#include<gather_unroll.hpp>
#undef UNROLL
#define UNROLL 3
#include<gather_unroll.hpp>
#undef UNROLL
#define UNROLL 4
#include<gather_unroll.hpp>
#undef UNROLL
#define UNROLL 5
#include<gather_unroll.hpp>
#undef UNROLL
#define UNROLL 6
#include<gather_unroll.hpp>
#undef UNROLL
#define UNROLL 7
#include<gather_unroll.hpp>
#undef UNROLL
#define UNROLL 8
#include<gather_unroll.hpp>
#undef UNROLL
template<class Scalar>
void run_gather_test(int N, int K, int D, int R, int U, int F) {
if(U == 1)
RunGather<Scalar,1>::run(N,K,D,R,F);
if(U == 2)
RunGather<Scalar,2>::run(N,K,D,R,F);
if(U == 3)
RunGather<Scalar,3>::run(N,K,D,R,F);
if(U == 4)
RunGather<Scalar,4>::run(N,K,D,R,F);
if(U == 5)
RunGather<Scalar,5>::run(N,K,D,R,F);
if(U == 6)
RunGather<Scalar,6>::run(N,K,D,R,F);
if(U == 7)
RunGather<Scalar,7>::run(N,K,D,R,F);
if(U == 8)
RunGather<Scalar,8>::run(N,K,D,R,F);
}

View File

@ -0,0 +1,169 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include<Kokkos_Core.hpp>
#include<Kokkos_Random.hpp>
template<class Scalar>
struct RunGather<Scalar,UNROLL> {
static void run(int N, int K, int D, int R, int F) {
Kokkos::View<int**> connectivity("Connectivity",N,K);
Kokkos::View<Scalar*> A_in("Input",N);
Kokkos::View<Scalar*> B_in("Input",N);
Kokkos::View<Scalar*> C("Output",N);
Kokkos::Random_XorShift64_Pool<> rand_pool(12313);
Kokkos::deep_copy(A_in,1.5);
Kokkos::deep_copy(B_in,2.0);
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > A(A_in);
Kokkos::View<const Scalar*, Kokkos::MemoryTraits<Kokkos::RandomAccess> > B(B_in);
Kokkos::parallel_for("InitKernel",N,
KOKKOS_LAMBDA (const int& i) {
auto rand_gen = rand_pool.get_state();
for( int jj=0; jj<K; jj++) {
connectivity(i,jj) = (rand_gen.rand(D) + i - D/2 + N)%N;
}
rand_pool.free_state(rand_gen);
});
Kokkos::fence();
Kokkos::Timer timer;
for(int r = 0; r<R; r++) {
Kokkos::parallel_for("BenchmarkKernel",N,
KOKKOS_LAMBDA (const int& i) {
Scalar c = Scalar(0.0);
for( int jj=0; jj<K; jj++) {
const int j = connectivity(i,jj);
Scalar a1 = A(j);
const Scalar b = B(j);
#if(UNROLL>1)
Scalar a2 = a1*Scalar(1.3);
#endif
#if(UNROLL>2)
Scalar a3 = a2*Scalar(1.1);
#endif
#if(UNROLL>3)
Scalar a4 = a3*Scalar(1.1);
#endif
#if(UNROLL>4)
Scalar a5 = a4*Scalar(1.3);
#endif
#if(UNROLL>5)
Scalar a6 = a5*Scalar(1.1);
#endif
#if(UNROLL>6)
Scalar a7 = a6*Scalar(1.1);
#endif
#if(UNROLL>7)
Scalar a8 = a7*Scalar(1.1);
#endif
for(int f = 0; f<F; f++) {
a1 += b*a1;
#if(UNROLL>1)
a2 += b*a2;
#endif
#if(UNROLL>2)
a3 += b*a3;
#endif
#if(UNROLL>3)
a4 += b*a4;
#endif
#if(UNROLL>4)
a5 += b*a5;
#endif
#if(UNROLL>5)
a6 += b*a6;
#endif
#if(UNROLL>6)
a7 += b*a7;
#endif
#if(UNROLL>7)
a8 += b*a8;
#endif
}
#if(UNROLL==1)
c += a1;
#endif
#if(UNROLL==2)
c += a1+a2;
#endif
#if(UNROLL==3)
c += a1+a2+a3;
#endif
#if(UNROLL==4)
c += a1+a2+a3+a4;
#endif
#if(UNROLL==5)
c += a1+a2+a3+a4+a5;
#endif
#if(UNROLL==6)
c += a1+a2+a3+a4+a5+a6;
#endif
#if(UNROLL==7)
c += a1+a2+a3+a4+a5+a6+a7;
#endif
#if(UNROLL==8)
c += a1+a2+a3+a4+a5+a6+a7+a8;
#endif
}
C(i) = c ;
});
Kokkos::fence();
}
double seconds = timer.seconds();
double bytes = 1.0*N*K*R*(2*sizeof(Scalar)+sizeof(int)) + 1.0*N*R*sizeof(Scalar);
double flops = 1.0*N*K*R*(F*2*UNROLL + 2*(UNROLL-1));
double gather_ops = 1.0*N*K*R*2;
printf("SNKDRUF: %i %i %i %i %i %i %i Time: %lfs Bandwidth: %lfGiB/s GFlop/s: %lf GGather/s: %lf\n",sizeof(Scalar)/4,N,K,D,R,UNROLL,F,seconds,1.0*bytes/seconds/1024/1024/1024,1.e-9*flops/seconds,1.e-9*gather_ops/seconds);
}
};

View File

@ -1,13 +1,13 @@
/*
//@HEADER
// ************************************************************************
//
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
@ -36,73 +36,58 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
//
// ************************************************************************
//@HEADER
*/
#include <Kokkos_HostSpace.hpp>
#include<Kokkos_Core.hpp>
#include<impl/Kokkos_Timer.hpp>
#include<gather.hpp>
#include <impl/Kokkos_HBWAllocators.hpp>
#include <impl/Kokkos_Error.hpp>
int main(int argc, char* argv[]) {
Kokkos::initialize(argc,argv);
#include <stdint.h> // uintptr_t
#include <cstdlib> // for malloc, realloc, and free
#include <cstring> // for memcpy
#if defined(KOKKOS_POSIX_MEMALIGN_AVAILABLE)
#include <sys/mman.h> // for mmap, munmap, MAP_ANON, etc
#include <unistd.h> // for sysconf, _SC_PAGE_SIZE, _SC_PHYS_PAGES
#endif
#include <sstream>
#include <iostream>
#ifdef KOKKOS_HAVE_HBWSPACE
#include <memkind.h>
namespace Kokkos {
namespace Experimental {
namespace Impl {
#define MEMKIND_TYPE MEMKIND_HBW //hbw_get_kind(HBW_PAGESIZE_4KB)
/*--------------------------------------------------------------------------*/
void* HBWMallocAllocator::allocate( size_t size )
{
std::cout<< "Allocate HBW: " << 1.0e-6*size << "MB" << std::endl;
void * ptr = NULL;
if (size) {
ptr = memkind_malloc(MEMKIND_TYPE,size);
if (!ptr)
{
std::ostringstream msg ;
msg << name() << ": allocate(" << size << ") FAILED";
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
if(argc<8) {
printf("Arguments: S N K D\n");
printf(" S: Scalar Type Size (1==float, 2==double, 4=complex<double>)\n");
printf(" N: Number of entities\n");
printf(" K: Number of things to gather per entity\n");
printf(" D: Max distance of gathered things of an entity\n");
printf(" R: how often to loop through the K dimension with each team\n");
printf(" U: how many independent flops to do per load\n");
printf(" F: how many times to repeat the U unrolled operations before reading next element\n");
printf("Example Input GPU:\n");
printf(" Bandwidth Bound : 2 10000000 1 1 10 1 1\n");
printf(" Cache Bound : 2 10000000 64 1 10 1 1\n");
printf(" Cache Gather : 2 10000000 64 256 10 1 1\n");
printf(" Global Gather : 2 100000000 16 100000000 1 1 1\n");
printf(" Typical MD : 2 100000 32 512 1000 8 2\n");
Kokkos::finalize();
return 0;
}
return ptr;
int S = atoi(argv[1]);
int N = atoi(argv[2]);
int K = atoi(argv[3]);
int D = atoi(argv[4]);
int R = atoi(argv[5]);
int U = atoi(argv[6]);
int F = atoi(argv[7]);
if( (S!=1) && (S!=2) && (S!=4)) {printf("S must be one of 1,2,4\n"); return 0;}
if( N<D ) {printf("N must be larger or equal to D\n"); return 0; }
if(S==1) {
run_gather_test<float>(N,K,D,R,U,F);
}
if(S==2) {
run_gather_test<double>(N,K,D,R,U,F);
}
if(S==4) {
run_gather_test<Kokkos::complex<double> >(N,K,D,R,U,F);
}
Kokkos::finalize();
}
void HBWMallocAllocator::deallocate( void * ptr, size_t /*size*/ )
{
if (ptr) {
memkind_free(MEMKIND_TYPE,ptr);
}
}
void * HBWMallocAllocator::reallocate(void * old_ptr, size_t /*old_size*/, size_t new_size)
{
void * ptr = memkind_realloc(MEMKIND_TYPE, old_ptr, new_size);
if (new_size > 0u && ptr == NULL) {
Kokkos::Impl::throw_runtime_exception("Error: Malloc Allocator could not reallocate memory");
}
return ptr;
}
} // namespace Impl
} // namespace Experimental
} // namespace Kokkos
#endif

284
lib/kokkos/bin/nvcc_wrapper Executable file
View File

@ -0,0 +1,284 @@
#!/bin/bash
#
# This shell script (nvcc_wrapper) wraps both the host compiler and
# NVCC, if you are building legacy C or C++ code with CUDA enabled.
# The script remedies some differences between the interface of NVCC
# and that of the host compiler, in particular for linking.
# It also means that a legacy code doesn't need separate .cu files;
# it can just use .cpp files.
#
# Default settings: change those according to your machine. For
# example, you may have have two different wrappers with either icpc
# or g++ as their back-end compiler. The defaults can be overwritten
# by using the usual arguments (e.g., -arch=sm_30 -ccbin icpc).
default_arch="sm_35"
#default_arch="sm_50"
#
# The default C++ compiler.
#
host_compiler=${NVCC_WRAPPER_DEFAULT_COMPILER:-"g++"}
#host_compiler="icpc"
#host_compiler="/usr/local/gcc/4.8.3/bin/g++"
#host_compiler="/usr/local/gcc/4.9.1/bin/g++"
#
# Internal variables
#
# C++ files
cpp_files=""
# Host compiler arguments
xcompiler_args=""
# Cuda (NVCC) only arguments
cuda_args=""
# Arguments for both NVCC and Host compiler
shared_args=""
# Linker arguments
xlinker_args=""
# Object files passable to NVCC
object_files=""
# Link objects for the host linker only
object_files_xlinker=""
# Shared libraries with version numbers are not handled correctly by NVCC
shared_versioned_libraries_host=""
shared_versioned_libraries=""
# Does the User set the architecture
arch_set=0
# Does the user overwrite the host compiler
ccbin_set=0
#Error code of compilation
error_code=0
# Do a dry run without actually compiling
dry_run=0
# Skip NVCC compilation and use host compiler directly
host_only=0
# Enable workaround for CUDA 6.5 for pragma ident
replace_pragma_ident=0
# Mark first host compiler argument
first_xcompiler_arg=1
temp_dir=${TMPDIR:-/tmp}
# Check if we have an optimization argument already
optimization_applied=0
#echo "Arguments: $# $@"
while [ $# -gt 0 ]
do
case $1 in
#show the executed command
--show|--nvcc-wrapper-show)
dry_run=1
;;
#run host compilation only
--host-only)
host_only=1
;;
#replace '#pragma ident' with '#ident' this is needed to compile OpenMPI due to a configure script bug and a non standardized behaviour of pragma with macros
--replace-pragma-ident)
replace_pragma_ident=1
;;
#handle source files to be compiled as cuda files
*.cpp|*.cxx|*.cc|*.C|*.c++|*.cu)
cpp_files="$cpp_files $1"
;;
# Ensure we only have one optimization flag because NVCC doesn't allow muliple
-O*)
if [ $optimization_applied -eq 1 ]; then
echo "nvcc_wrapper - *warning* you have set multiple optimization flags (-O*), only the first is used because nvcc can only accept a single optimization setting."
else
shared_args="$shared_args $1"
optimization_applied=1
fi
;;
#Handle shared args (valid for both nvcc and the host compiler)
-D*|-c|-I*|-L*|-l*|-g|--help|--version|-E|-M|-shared)
shared_args="$shared_args $1"
;;
#Handle shared args that have an argument
-o|-MT)
shared_args="$shared_args $1 $2"
shift
;;
#Handle known nvcc args
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
--expt-extended-lambda|--expt-relaxed-constexpr)
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-rdc|-maxrregcount|--default-stream)
cuda_args="$cuda_args $1 $2"
shift
;;
#Handle c++11 setting
--std=c++11|-std=c++11)
shared_args="$shared_args $1"
;;
#strip of -std=c++98 due to nvcc warnings and Tribits will place both -std=c++11 and -std=c++98
-std=c++98|--std=c++98)
;;
#strip of pedantic because it produces endless warnings about #LINE added by the preprocessor
-pedantic|-Wpedantic|-ansi)
;;
#strip -Xcompiler because we add it
-Xcompiler)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$2"
fi
shift
;;
#strip of "-x cu" because we add that
-x)
if [[ $2 != "cu" ]]; then
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args="-x,$2"
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,-x,$2"
fi
fi
shift
;;
#Handle -ccbin (if its not set we can set it to a default value)
-ccbin)
cuda_args="$cuda_args $1 $2"
ccbin_set=1
host_compiler=$2
shift
;;
#Handle -arch argument (if its not set use a default
-arch*)
cuda_args="$cuda_args $1"
arch_set=1
;;
#Handle -Xcudafe argument
-Xcudafe)
cuda_args="$cuda_args -Xcudafe $2"
shift
;;
#Handle args that should be sent to the linker
-Wl*)
xlinker_args="$xlinker_args -Xlinker ${1:4:${#1}}"
host_linker_args="$host_linker_args ${1:4:${#1}}"
;;
#Handle object files: -x cu applies to all input files, so give them to linker, except if only linking
*.a|*.so|*.o|*.obj)
object_files="$object_files $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle object files which always need to use "-Xlinker": -x cu applies to all input files, so give them to linker, except if only linking
*.dylib)
object_files="$object_files -Xlinker $1"
object_files_xlinker="$object_files_xlinker -Xlinker $1"
;;
#Handle shared libraries with *.so.* names which nvcc can't do.
*.so.*)
shared_versioned_libraries_host="$shared_versioned_libraries_host $1"
shared_versioned_libraries="$shared_versioned_libraries -Xlinker $1"
;;
#All other args are sent to the host compiler
*)
if [ $first_xcompiler_arg -eq 1 ]; then
xcompiler_args=$1
first_xcompiler_arg=0
else
xcompiler_args="$xcompiler_args,$1"
fi
;;
esac
shift
done
#Add default host compiler if necessary
if [ $ccbin_set -ne 1 ]; then
cuda_args="$cuda_args -ccbin $host_compiler"
fi
#Add architecture command
if [ $arch_set -ne 1 ]; then
cuda_args="$cuda_args -arch=$default_arch"
fi
#Compose compilation command
nvcc_command="nvcc $cuda_args $shared_args $xlinker_args $shared_versioned_libraries"
if [ $first_xcompiler_arg -eq 0 ]; then
nvcc_command="$nvcc_command -Xcompiler $xcompiler_args"
fi
#Compose host only command
host_command="$host_compiler $shared_args $xcompiler_args $host_linker_args $shared_versioned_libraries_host"
#nvcc does not accept '#pragma ident SOME_MACRO_STRING' but it does accept '#ident SOME_MACRO_STRING'
if [ $replace_pragma_ident -eq 1 ]; then
cpp_files2=""
for file in $cpp_files
do
var=`grep pragma ${file} | grep ident | grep "#"`
if [ "${#var}" -gt 0 ]
then
sed 's/#[\ \t]*pragma[\ \t]*ident/#ident/g' $file > $temp_dir/nvcc_wrapper_tmp_$file
cpp_files2="$cpp_files2 $temp_dir/nvcc_wrapper_tmp_$file"
else
cpp_files2="$cpp_files2 $file"
fi
done
cpp_files=$cpp_files2
#echo $cpp_files
fi
if [ "$cpp_files" ]; then
nvcc_command="$nvcc_command $object_files_xlinker -x cu $cpp_files"
else
nvcc_command="$nvcc_command $object_files"
fi
if [ "$cpp_files" ]; then
host_command="$host_command $object_files $cpp_files"
else
host_command="$host_command $object_files"
fi
#Print command for dryrun
if [ $dry_run -eq 1 ]; then
if [ $host_only -eq 1 ]; then
echo $host_command
else
echo $nvcc_command
fi
exit 0
fi
#Run compilation command
if [ $host_only -eq 1 ]; then
$host_command
else
$nvcc_command
fi
error_code=$?
#Report error code
exit $error_code

View File

@ -53,12 +53,12 @@
# ************************************************************************
# @HEADER
include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
#include(${TRIBITS_DEPS_DIR}/CUDA.cmake)
IF (TPL_ENABLE_CUDA)
GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
ENDIF()
#IF (TPL_ENABLE_CUDA)
# GLOBAL_SET(TPL_CUSPARSE_LIBRARY_DIRS)
# GLOBAL_SET(TPL_CUSPARSE_INCLUDE_DIRS ${TPL_CUDA_INCLUDE_DIRS})
# GLOBAL_SET(TPL_CUSPARSE_LIBRARIES ${CUDA_cusparse_LIBRARY})
# TIBITS_CREATE_IMPORTED_TPL_LIBRARY(CUSPARSE)
#ENDIF()

View File

@ -1,6 +1,16 @@
INCLUDE(CMakeParseArguments)
INCLUDE(CTest)
cmake_policy(SET CMP0054 NEW)
IF(NOT DEFINED ${PROJECT_NAME})
project(Kokkos)
ENDIF()
IF(NOT DEFINED ${${PROJECT_NAME}_ENABLE_DEBUG}})
SET(${PROJECT_NAME}_ENABLE_DEBUG OFF)
ENDIF()
FUNCTION(ASSERT_DEFINED VARS)
FOREACH(VAR ${VARS})
IF(NOT DEFINED ${VAR})
@ -75,6 +85,13 @@ MACRO(TRIBITS_ADD_EXAMPLE_DIRECTORIES)
ENDMACRO()
function(INCLUDE_DIRECTORIES)
cmake_parse_arguments(INCLUDE_DIRECTORIES "REQUIRED_DURING_INSTALLATION_TESTING" "" "" ${ARGN})
_INCLUDE_DIRECTORIES(${INCLUDE_DIRECTORIES_UNPARSED_ARGUMENTS})
endfunction()
MACRO(TARGET_TRANSFER_PROPERTY TARGET_NAME PROP_IN PROP_OUT)
SET(PROP_VALUES)
FOREACH(TARGET_X ${ARGN})
@ -271,6 +288,11 @@ ENDFUNCTION()
ADD_CUSTOM_TARGET(check COMMAND ${CMAKE_CTEST_COMMAND} -VV -C ${CMAKE_CFG_INTDIR})
FUNCTION(TRIBITS_ADD_TEST)
ENDFUNCTION()
FUNCTION(TRIBITS_TPL_TENTATIVELY_ENABLE)
ENDFUNCTION()
FUNCTION(TRIBITS_ADD_EXECUTABLE_AND_TEST EXE_NAME)
SET(options STANDARD_PASS_OUTPUT WILL_FAIL)

0
lib/kokkos/config/configure_compton_cpu.sh Executable file → Normal file
View File

0
lib/kokkos/config/configure_compton_mic.sh Executable file → Normal file
View File

0
lib/kokkos/config/configure_kokkos.sh Executable file → Normal file
View File

0
lib/kokkos/config/configure_kokkos_nvidia.sh Executable file → Normal file
View File

0
lib/kokkos/config/configure_shannon.sh Executable file → Normal file
View File

View File

@ -91,9 +91,20 @@ Step 3:
// -------------------------------------------------------------------------------- //
Step 4:
4.1. Once all Trilinos tests pass promote Kokkos develop branch to master on Github
Step 4: Once all Trilinos tests pass promote Kokkos develop branch to master on Github
4.1. Generate Changelog (You need a github API token)
Close all Open issues with "InDevelop" tag on github
(Not from kokkos directory)
gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
(Copy the new section from the generated CHANGELOG.md to the kokkos/CHANGELOG.md)
(Make desired changes to CHANGELOG.md to enhance clarity)
(Commit and push the CHANGELOG to develop)
4.2 Merge develop into Master
- DO NOT fast-forward the merge!!!!
(From kokkos directory):
@ -103,7 +114,7 @@ Step 4:
git reset --hard origin/master
git merge --no-ff origin/develop
4.2. Update the tag in kokkos/config/master_history.txt
4.3. Update the tag in kokkos/config/master_history.txt
Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
Tag format: #.#.##

View File

@ -1,3 +1,6 @@
tag: 2.01.00 date: 07:21:2016 master: xxxxxxxx develop: fa6dfcc4
tag: 2.01.06 date: 09:02:2016 master: 9afaa87f develop: 555f1a3a
tag: 2.01.10 date: 09:27:2016 master: e4119325 develop: e6cda11e
tag: 2.02.00 date: 10:30:2016 master: 6c90a581 develop: ca3dd56e
tag: 2.02.01 date: 11:01:2016 master: 9c698c86 develop: b0072304
tag: 2.02.07 date: 12:16:2016 master: 4b4cc4ba develop: 382c0966

View File

@ -121,6 +121,10 @@ do
-gencode*|--dryrun|--verbose|--keep|--keep-dir*|-G|--relocatable-device-code*|-lineinfo|-expt-extended-lambda|--resource-usage|-Xptxas*)
cuda_args="$cuda_args $1"
;;
#Handle more known nvcc args
--expt-extended-lambda|--expt-relaxed-constexpr)
cuda_args="$cuda_args $1"
;;
#Handle known nvcc args that have an argument
-rdc|-maxrregcount|--default-stream)
cuda_args="$cuda_args $1 $2"

View File

@ -16,6 +16,8 @@ elif [[ "$HOSTNAME" =~ .*bowman.* ]]; then
MACHINE=bowman
elif [[ "$HOSTNAME" =~ node.* ]]; then # Warning: very generic name
MACHINE=shepard
elif [[ "$HOSTNAME" =~ apollo ]]; then
MACHINE=apollo
elif [ ! -z "$SEMS_MODULEFILES_ROOT" ]; then
MACHINE=sems
else
@ -28,6 +30,7 @@ IBM_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
INTEL_BUILD_LIST="OpenMP,Pthread,Serial,OpenMP_Serial,Pthread_Serial"
CLANG_BUILD_LIST="Pthread,Serial,Pthread_Serial"
CUDA_BUILD_LIST="Cuda_OpenMP,Cuda_Pthread,Cuda_Serial"
CUDA_IBM_BUILD_LIST="Cuda_OpenMP,Cuda_Serial"
GCC_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wignored-qualifiers,-Wempty-body,-Wclobbered,-Wuninitialized"
IBM_WARNING_FLAGS="-Wall,-Wshadow,-pedantic,-Werror,-Wsign-compare,-Wtype-limits,-Wuninitialized"
@ -44,102 +47,12 @@ BUILD_ONLY=False
declare -i NUM_JOBS_TO_RUN_IN_PARALLEL=3
TEST_SCRIPT=False
SKIP_HWLOC=False
SPOT_CHECK=False
ARCH_FLAG=""
PRINT_HELP=False
OPT_FLAG=""
KOKKOS_OPTIONS=""
#
# Machine specific config
#
if [ "$MACHINE" = "sems" ]; then
source /projects/modulefiles/utils/sems-modules-init.sh
source /projects/modulefiles/utils/kokkos-modules-init.sh
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>/base,hwloc/1.10.1/<COMPILER_NAME>/<COMPILER_VERSION>/base"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.7.2/base"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/6.5.14 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
elif [ "$MACHINE" = "white" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/4.9.2"
# Don't do pthread on white
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.9.2 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
)
ARCH_FLAG="--arch=Power8"
NUM_JOBS_TO_RUN_IN_PARALLEL=8
elif [ "$MACHINE" = "bowman" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
ARCH_FLAG="--arch=KNL"
NUM_JOBS_TO_RUN_IN_PARALLEL=8
elif [ "$MACHINE" = "shepard" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.064 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
ARCH_FLAG="--arch=HSW"
NUM_JOBS_TO_RUN_IN_PARALLEL=8
else
echo "Unhandled machine $MACHINE" >&2
exit 1
fi
export OMP_NUM_THREADS=4
declare -i NUM_RESULTS_TO_KEEP=7
RESULT_ROOT_PREFIX=TestAll
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
#
# Handle arguments
@ -173,7 +86,211 @@ NUM_JOBS_TO_RUN_IN_PARALLEL="${key#*=}"
--dry-run*)
DRYRUN=True
;;
--help)
--spot-check*)
SPOT_CHECK=True
;;
--arch*)
ARCH_FLAG="--arch=${key#*=}"
;;
--opt-flag*)
OPT_FLAG="${key#*=}"
;;
--with-cuda-options*)
KOKKOS_CUDA_OPTIONS="--with-cuda-options=${key#*=}"
;;
--help*)
PRINT_HELP=True
;;
*)
# args, just append
ARGS="$ARGS $1"
;;
esac
shift
done
SCRIPT_KOKKOS_ROOT=$( cd "$( dirname "$0" )" && cd .. && pwd )
# set kokkos path
if [ -z "$KOKKOS_PATH" ]; then
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
else
# Ensure KOKKOS_PATH is abs path
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
#
# Machine specific config
#
if [ "$MACHINE" = "sems" ]; then
source /projects/sems/modulefiles/utils/sems-modules-init.sh
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG=""
fi
if [ "$SPOT_CHECK" = "True" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
"cuda/8.0.44 $CUDA8_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
else
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/8.0.44 $CUDA8_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
fi
elif [ "$MACHINE" = "white" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>"
IBM_MODULE_LIST="<COMPILER_NAME>/xl/<COMPILER_VERSION>"
CUDA_MODULE_LIST="<COMPILER_NAME>/<COMPILER_VERSION>,gcc/5.4.0"
# Don't do pthread on white
GCC_BUILD_LIST="OpenMP,Serial,OpenMP_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/5.4.0 $BASE_MODULE_LIST $IBM_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"ibm/13.1.3 $IBM_MODULE_LIST $IBM_BUILD_LIST xlC $IBM_WARNING_FLAGS"
"cuda/8.0.44 $CUDA_MODULE_LIST $CUDA_IBM_BUILD_LIST ${KOKKOS_PATH}/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=Power8,Kepler37"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
elif [ "$MACHINE" = "bowman" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=KNL"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
elif [ "$MACHINE" = "shepard" ]; then
source /etc/profile.d/modules.sh
SKIP_HWLOC=True
export SLURM_TASKS_PER_NODE=32
BASE_MODULE_LIST="<COMPILER_NAME>/compilers/<COMPILER_VERSION>"
OLD_INTEL_BUILD_LIST="Pthread,Serial,Pthread_Serial"
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("intel/16.2.181 $BASE_MODULE_LIST $OLD_INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.098 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
)
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=HSW"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
elif [ "$MACHINE" = "apollo" ]; then
source /projects/sems/modulefiles/utils/sems-modules-init.sh
module use /home/projects/modulefiles/local/x86-64
module load kokkos-env
module load sems-git
module load sems-tex
module load sems-cmake/3.5.2
module load sems-gdb
SKIP_HWLOC=True
BASE_MODULE_LIST="sems-env,kokkos-env,sems-<COMPILER_NAME>/<COMPILER_VERSION>,kokkos-hwloc/1.10.1/base"
CUDA_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/4.8.4,kokkos-hwloc/1.10.1/base"
CUDA8_MODULE_LIST="sems-env,kokkos-env,kokkos-<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0,kokkos-hwloc/1.10.1/base"
CLANG_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,cuda/8.0.44"
NVCC_MODULE_LIST="sems-env,kokkos-env,sems-git,sems-cmake/3.5.2,<COMPILER_NAME>/<COMPILER_VERSION>,sems-gcc/5.3.0"
BUILD_LIST_CUDA_NVCC="Cuda_Serial,Cuda_OpenMP"
BUILD_LIST_CUDA_CLANG="Cuda_Serial,Cuda_Pthread"
BUILD_LIST_CLANG="Serial,Pthread,OpenMP"
if [ "$SPOT_CHECK" = "True" ]; then
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("gcc/4.7.2 $BASE_MODULE_LIST "OpenMP,Pthread" g++ $GCC_WARNING_FLAGS"
"gcc/5.1.0 $BASE_MODULE_LIST "Serial" g++ $GCC_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST "OpenMP" icpc $INTEL_WARNING_FLAGS"
"clang/3.9.0 $BASE_MODULE_LIST "Pthread_Serial" clang++ $CLANG_WARNING_FLAGS"
"clang/head $CLANG_MODULE_LIST "Cuda_Pthread" clang++ $CUDA_WARNING_FLAGS"
"cuda/8.0.44 $CUDA_MODULE_LIST "Cuda_OpenMP" $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
else
# Format: (compiler module-list build-list exe-name warning-flag)
COMPILERS=("cuda/8.0.44 $CUDA8_MODULE_LIST $BUILD_LIST_CUDA_NVCC $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"clang/head $CLANG_MODULE_LIST $BUILD_LIST_CUDA_CLANG clang++ $CUDA_WARNING_FLAGS"
"clang/3.9.0 $CLANG_MODULE_LIST $BUILD_LIST_CLANG clang++ $CLANG_WARNING_FLAGS"
"gcc/4.7.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.8.4 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/4.9.2 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/5.3.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"gcc/6.1.0 $BASE_MODULE_LIST $GCC_BUILD_LIST g++ $GCC_WARNING_FLAGS"
"intel/14.0.4 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.5.2 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"cuda/7.0.28 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
"cuda/7.5.18 $CUDA_MODULE_LIST $CUDA_BUILD_LIST $KOKKOS_PATH/config/nvcc_wrapper $CUDA_WARNING_FLAGS"
)
fi
if [ -z "$ARCH_FLAG" ]; then
ARCH_FLAG="--arch=SNB,Kepler35"
fi
NUM_JOBS_TO_RUN_IN_PARALLEL=2
else
echo "Unhandled machine $MACHINE" >&2
exit 1
fi
export OMP_NUM_THREADS=4
declare -i NUM_RESULTS_TO_KEEP=7
RESULT_ROOT_PREFIX=TestAll
if [ "$PRINT_HELP" = "True" ]; then
echo "test_all_sandia <ARGS> <OPTIONS>:"
echo "--kokkos-path=/Path/To/Kokkos: Path to the Kokkos root directory"
echo " Defaults to root repo containing this script"
@ -183,6 +300,9 @@ echo "--skip-hwloc: Do not do hwloc tests"
echo "--num=N: Number of jobs to run in parallel "
echo "--dry-run: Just print what would be executed"
echo "--build-only: Just do builds, don't run anything"
echo "--opt-flag=FLAG: Optimization flag (default: -O3)"
echo "--arch=ARCHITECTURE: overwrite architecture flags"
echo "--with-cuda-options=OPT: set KOKKOS_CUDA_OPTIONS"
echo "--build-list=BUILD,BUILD,BUILD..."
echo " Provide a comma-separated list of builds instead of running all builds"
echo " Valid items:"
@ -220,21 +340,6 @@ echo " hit ctrl-z"
echo " % kill -9 %1"
echo
exit 0
;;
*)
# args, just append
ARGS="$ARGS $1"
;;
esac
shift
done
# set kokkos path
if [ -z "$KOKKOS_PATH" ]; then
KOKKOS_PATH=$SCRIPT_KOKKOS_ROOT
else
# Ensure KOKKOS_PATH is abs path
KOKKOS_PATH=$( cd $KOKKOS_PATH && pwd )
fi
# set build type
@ -381,11 +486,15 @@ single_build_and_test() {
local extra_args=--with-hwloc=$(dirname $(dirname $(which hwloc-info)))
fi
if [[ "$OPT_FLAG" = "" ]]; then
OPT_FLAG="-O3"
fi
if [[ "$build_type" = *debug* ]]; then
local extra_args="$extra_args --debug"
local cxxflags="-g $compiler_warning_flags"
else
local cxxflags="-O3 $compiler_warning_flags"
local cxxflags="$OPT_FLAG $compiler_warning_flags"
fi
if [[ "$compiler" == cuda* ]]; then
@ -393,7 +502,9 @@ single_build_and_test() {
export TMPDIR=$(pwd)
fi
# cxxflags="-DKOKKOS_USING_EXP_VIEW=1 $cxxflags"
if [[ "$KOKKOS_CUDA_OPTIONS" != "" ]]; then
local extra_args="$extra_args $KOKKOS_CUDA_OPTIONS"
fi
echo " Starting job $desc"
@ -440,13 +551,14 @@ run_in_background() {
local compiler=$1
local -i num_jobs=$NUM_JOBS_TO_RUN_IN_PARALLEL
if [[ "$BUILD_ONLY" == True ]]; then
num_jobs=8
else
# don't override command line input
# if [[ "$BUILD_ONLY" == True ]]; then
# num_jobs=8
# else
if [[ "$compiler" == cuda* ]]; then
num_jobs=1
fi
fi
# fi
wait_for_jobs $num_jobs
single_build_and_test $* &

View File

@ -0,0 +1,50 @@
#!/bin/bash -le
export TRILINOS_UPDATED_PATH=${PWD}/trilinos-update
export TRILINOS_PRISTINE_PATH=${PWD}/trilinos-pristine
#rm -rf ${KOKKOS_PATH}
#rm -rf ${TRILINOS_UPDATED_PATH}
#rm -rf ${TRILINOS_PRISTINE_PATH}
#Already done:
if [ ! -d "${TRILINOS_UPDATED_PATH}" ]; then
git clone https://github.com/trilinos/trilinos ${TRILINOS_UPDATED_PATH}
fi
if [ ! -d "${TRILINOS_PRISTINE_PATH}" ]; then
git clone https://github.com/trilinos/trilinos ${TRILINOS_PRISTINE_PATH}
fi
cd ${TRILINOS_UPDATED_PATH}
git checkout develop
git reset --hard origin/develop
git pull
cd ..
python kokkos/config/snapshot.py ${KOKKOS_PATH} ${TRILINOS_UPDATED_PATH}/packages
cd ${TRILINOS_UPDATED_PATH}
echo ""
echo ""
echo "Trilinos State:"
git log --pretty=oneline --since=2.days
SHA=`git log --pretty=oneline --since=2.days | head -n 2 | tail -n 1 | awk '{print $1}'`
cd ..
cd ${TRILINOS_PRISTINE_PATH}
git status
git log --pretty=oneline --since=2.days
echo "Checkout develop"
git checkout develop
echo "Pull"
git pull
echo "Checkout SHA"
git checkout ${SHA}
cd ..
cd ${TRILINOS_PRISTINE_PATH}
echo ""
echo ""
echo "Trilinos Pristine State:"
git log --pretty=oneline --since=2.days
cd ..

Some files were not shown because too many files have changed in this diff Show More