Compare commits

...

203 Commits

Author SHA1 Message Date
bb11344424 Merge pull request #1522 from akohlmey/next_version
Step version string for next patch release
2019-06-18 22:01:42 -04:00
e754fb80a4 Merge pull request #1518 from akohlmey/collected-small-changes
Collected small changes and fixes for the next patch release
2019-06-18 16:21:41 -04:00
98fbaef406 workaround for ICE issue with gcc 4.8.x 2019-06-18 15:00:27 -04:00
d98c105d34 step version string to 18 June 2019 2019-06-18 11:52:09 -04:00
961dcfc261 mention alternate build environment generators for cmake 2019-06-18 11:50:06 -04:00
81cdce9b04 flag and document that USER-TALLY computes are not compatible with dynamic groups 2019-06-18 00:14:48 -04:00
995b6b31a2 flag a couple more packages that contain sub-builds of downloaded code 2019-06-17 16:05:54 -04:00
e9666f585f update log files for kolmogorov/crespi/full potential 2019-06-17 15:46:54 -04:00
f4f4a7c850 adapt for Ninja build tool, which cannot handle sub-builds and fortran
so builds of downloaded libraries are not supported right now
2019-06-17 14:24:51 -04:00
5fc3081a55 make building tools (msi2lmp, chain.x) an option, which is off by default 2019-06-17 14:23:30 -04:00
a39a35af20 Merge pull request #1517 from jrgissing/bond/react-clarify-template-check-messages
Bond/react: clarify reaction-template warning messages
2019-06-17 14:14:53 -04:00
1452d3d25b Merge pull request #1516 from gtribello/master
Using PLUMED to compute committor with LAMMPS
2019-06-17 14:14:37 -04:00
2dea4eb0e9 Merge pull request #1513 from akohlmey/user-tally-vs-hybrid
Make USER-TALLY compute styles compatible with hybrid pair styles
2019-06-17 14:14:06 -04:00
5d73b0790f Merge pull request #1510 from evoyiatzis/respa_class_2
Implementation of inner/middle/outer compute methods for lj/class2
2019-06-17 14:13:47 -04:00
93fd33aad9 Merge pull request #1507 from athomps/yarray
Back-porting of Zombie SNAP improvements
2019-06-17 14:13:28 -04:00
6536399fc3 Merge pull request #1476 from uihsnv/log_spacing
A function logfreq3(), for logarithmic spacing
2019-06-17 14:12:59 -04:00
2f29bd29f4 Merge pull request #1430 from ndtrung81/gpu-neigh-hybrid
Enable neighbor build on the device for pair hybrid substyles
2019-06-17 14:12:40 -04:00
b1458ceebf fix typo 2019-06-17 07:42:18 -04:00
92615bda3a update kolmogorov/crespi/full example input for recent change in REBO 2019-06-17 07:38:11 -04:00
4c0cd0a019 remove memory leak by freeing custom MPI data types 2019-06-17 07:36:03 -04:00
fd93c27dcb make valgrind happy by fully initializing line buffer for thermo output 2019-06-17 07:35:30 -04:00
4178c5167b clarify template error/warning message 2019-06-16 14:47:07 -06:00
fd20a0606a Merge pull request #49 from lammps/master
rebase
2019-06-16 14:19:50 -06:00
75e2981bda Using force_timeout call instead of throwing error for plumed stop 2019-06-16 17:52:20 +01:00
98d9a9a4d2 Added call to PLUMED with setStopFlag so that PLUMED can stop LAMMPS if it needs to. This would be needed if you were computing committors for example 2019-06-16 16:09:32 +01:00
e73b34a5b1 do not run tally callback setup multiple times per time step 2019-06-14 15:46:28 -04:00
ac57b41b4d fix line endings and missing EOL at end 2019-06-14 07:13:06 -04:00
43e3c1520b cleaning pair_lj_class2.h of windows line endings
removing DOS/Windows style CR/LF line endings
2019-06-14 11:50:56 +02:00
e7116c8680 Update pair_class2.txt
Addition of a few lines in the documentation file
2019-06-14 09:04:03 +02:00
4d7d3a5d53 Switched algorithm for compute_yi to one based on zlist ordering 2019-06-13 15:56:18 -06:00
c1550ba29b implementation of inner/middle/outer for lj/class2
Implementation of inner/middle/outer functions in style lj/class2 to enable integration with respa
2019-06-13 22:23:01 +02:00
5fb505ca8c Fixed typo 2019-06-13 10:24:18 -06:00
67a1a63f5f Removed old text 2019-06-13 10:10:37 -06:00
65b87fa278 Updated SNAP in KOKKOS package so it compiles and runs 2019-06-13 09:54:56 -06:00
c5c03230cb Not part of this pull request 2019-06-12 17:05:47 -06:00
be5d3d6a19 Not part of this pull request 2019-06-12 17:04:22 -06:00
f8e257d219 Not part of this pull request 2019-06-12 17:04:05 -06:00
3f523ea906 Not part of this pull request 2019-06-12 17:02:59 -06:00
a973700295 Completed back-porting of Zombie SNAP improvements, particularly noteworthy is reduction in memory footprint, elimination of most multidimensional arrays, elimination of diagonal_style, elimination of Z array in force calculation. 2019-06-12 16:42:28 -06:00
daa53e3008 Merge pull request #1469 from julient31/pppm_spin
Adding PPPM and Ewald solvers for electric dipoles and magnetic spins
2019-06-12 14:50:18 -04:00
7a33d1e328 Code cleanup 2019-06-12 11:36:42 -06:00
0559e155f2 Implemented lists instead of multidim arrays 2019-06-11 18:24:02 -06:00
0b25d2feef Merge pull request #1504 from akohlmey/collected-small-fixes
Collected small bugfixes and changes
2019-06-11 20:20:19 -04:00
1c1491eebf Merge pull request #1506 from sniblett402/patch-1
Bugfix for bond_style table with MPI
2019-06-11 19:10:41 -04:00
400751f30f remove redundant call to broadcast r0 2019-06-11 19:09:23 -04:00
4fbf96a353 Bugfix for bond_style table with MPI
Summary

Very small bug fix - an incorrect MPI datatype was causing undefined behaviour for tabulated bond potentials (bond_style table).

Author(s)

Sam Niblett, LBNL

Licensing

By submitting this pull request, I agree, that my contribution will be included in LAMMPS and redistributed under either the GNU General Public License version 2 (GPL v2) or the GNU Lesser General Public License version 2.1 (LGPL v2.1).

Backward Compatibility

No impact

Implementation Notes

The equilibrium bond length of the tabulated potential (tb->r0) was incorrectly specified as an MPI_INT during a broadcast. Therefore, all non-root processes received a truncated value of this parameter. This simple fix produced the expected behaviour for me.
2019-06-11 13:48:01 -07:00
1dc8bb163d Merge pull request #1485 from Adrian-Diaz/memcpy2memmove
replace some calls to `memcpy()` with calls to `memmove()`
2019-06-11 14:07:46 -04:00
bb0240d8eb Merge pull request #1501 from uihsnv/vim_files
Simplify and update vim syntax highlighting files
2019-06-11 14:06:58 -04:00
fd592d510f Merge branch 'collected-small-fixes' of github.com:akohlmey/lammps into collected-small-fixes
# Conflicts:
#	cmake/CMakeLists.txt
2019-06-11 14:03:27 -04:00
dbafb92dd5 cmake minor cleanup and removal of redundant code and empty lines 2019-06-11 14:02:41 -04:00
83060f0902 Merge branch 'master' into collected-small-fixes 2019-06-11 13:35:42 -04:00
fd55d7d367 update list of known LAMMPS keywords 2019-06-11 11:15:04 -04:00
3d5db63381 minor cleanup 2019-06-11 10:36:04 -04:00
42b0cb5e3e Merge pull request #1502 from rbberger/cmake_cleanup
Refactor CMake configuration
2019-06-11 10:19:07 -04:00
4cee333c07 need to use C++ compiler when checking for includes
this is the same bugfix as in PR #1504
2019-06-11 07:12:39 -04:00
7f4c611e21 must use C++ compiler to check for include files 2019-06-11 06:51:03 -04:00
fe29572737 Merge pull request #1496 from akkamesh/enh-ext-reaxc
reaxc/qeq optimization - using kokkos hierarchical parallelism
2019-06-10 21:37:12 -04:00
9421466f57 Merge branch 'master' into enh-ext-reaxc
Resolved Merge Conflict in src/KOKKOS/kokkos.cpp
2019-06-10 20:14:40 -04:00
8d985e53f4 Merge pull request #1494 from mkanski/extep_hybrid
Fix for extep when NULL or only some elements from potential file are used.
2019-06-10 20:04:24 -04:00
e72ac92a7f Merge pull request #1422 from stanmoore1/team_opt
Optimize KOKKOS package for small systems
2019-06-10 20:03:53 -04:00
3e2f3a8058 avoid a case of mixing malloc()/free() with new/delete 2019-06-10 18:22:04 -04:00
72b295d7f4 add support for internal style variables to info command 2019-06-10 18:21:17 -04:00
fa76472135 Add Kamesh as contributing author to fix_qeq_reax_kokkos 2019-06-10 15:48:53 -06:00
b368b11d60 Move ShowHelp test into Testing.cmake 2019-06-09 22:31:56 -04:00
591e6836fd Move CMake configuration for USER-H5MD into its own file 2019-06-09 22:24:59 -04:00
4ac100fe69 Define additional LAMMPS_*_DIRS to remove relative paths later 2019-06-09 22:05:53 -04:00
4ecd81f81b Move CMake configuration of PYTHON package into its own file 2019-06-09 21:48:05 -04:00
203c6d122b Move KSPACE CMake configuration into its own file 2019-06-09 21:37:34 -04:00
e788d32678 Move autogen check into utility function 2019-06-09 04:02:08 -04:00
ae59ffe83e Move more CMake configurations to own files 2019-06-09 03:54:52 -04:00
99a8d3c2ed Create CMake files for several packages
This remove the CMake configuration of several larger packages and places
it into their own files in the Modules/Packages folder.
 - COMPRESS
 - KIM
 - LATTE
 - MESSAGE
 - MSCG
 - USER-MOLFILE
 - USER-NETCDF
 - USER-PLUMED
 - USER-QMMM
 - USER-QUIP
 - USER-SCAFACOS
 - USER-SMD
 - USER-VTK
2019-06-09 03:35:25 -04:00
ca1445788e Move CMake configuration of VORONOI package into its own file 2019-06-09 03:23:23 -04:00
0908bd7aaf Move code coverage and testing into their own files 2019-06-09 03:18:20 -04:00
239dfe163c Simplify vim-files
Resolves #1500
2019-06-08 17:11:20 +05:30
cea1bd9cd9 Merging to stay up-to-date after patch 5Jun2019 2019-06-08 16:47:02 +05:30
31dc5dbb51 Fix variable type 2019-06-07 17:00:43 -06:00
9e3dc26599 Fix name in pair_exp6_rx_kokkos 2019-06-07 16:41:41 -06:00
8da75c2040 Merge branch 'master' of github.com:lammps/lammps into enh-ext-reaxc 2019-06-07 16:33:44 -06:00
85999fc4a7 Restore original compute_h in fix_qeq_reax_kokkos 2019-06-07 16:31:31 -06:00
73fa8d4055 Rename Kokkos variables 2019-06-07 16:30:02 -06:00
bd237a05b8 Tweak scalar view allocation 2019-06-07 15:46:04 -06:00
4941f11ad4 Remove unused tag 2019-06-07 15:37:49 -06:00
afab5ef303 Move CMake part on coverage into its own file 2019-06-07 12:24:35 -04:00
a7c02e699e Move CMake utilities into LAMMPSUtils.cmake 2019-06-07 12:18:49 -04:00
8fed39d726 Move CMake documentation build section into its own file 2019-06-07 12:16:01 -04:00
b9e10d55e2 Merge pull request #1492 from akohlmey/collected-small-changes
Collected small changes for the next patch
2019-06-07 11:07:10 -04:00
1f1a0f95b2 Merge pull request #1497 from junghans/check_for_immintrin
cmake: check for immintrin.h
2019-06-07 10:57:56 -04:00
61e9dc4c8d more accurate checking for styles using utils::strmatch() instead of strcmp() or strncmp() 2019-06-07 07:14:57 -04:00
b53df3dd63 disable optimization on functions building factories for many entries
this will speed up compilation and also avoid spurious warnings with gcc 4.4 and later
2019-06-06 20:37:17 -04:00
56e3b1d1f4 remove dead code 2019-06-06 20:22:08 -04:00
fde7e2de3c switch to use alternate flag 2019-06-06 15:08:26 -04:00
e2391edce6 turn off only variable tracking and make people wait again 2019-06-06 14:44:28 -04:00
f7026491f1 Code reformat 2019-06-06 09:59:41 -06:00
40a2f275c2 Fix a few more issues resulting from "nelements != atom->types"
this also detects in a more safely fashion, whether there is data in the second part of the input file, that is still formatted for he first part.
2019-06-05 17:11:53 -04:00
e549f911f7 turn off variable tracking through turning off optimization for GCC 4.4 and later
This will avoid a difficult to interpret warning and in
addition speed up compilation of this one file by avoiding
to try to optimize something, that needs no optimization.
2019-06-05 14:36:08 -04:00
34dca6dc79 advance warning message about collecting styles and packages info to an earlier slot in the process 2019-06-05 14:32:02 -04:00
2ebc40deb3 cmake: check for immintrin.h 2019-06-05 11:13:48 -06:00
e9b4ab7363 Fixed potential file parsing when NULL or only some elements are used. 2019-06-05 16:12:24 +02:00
8e43a45925 Detect and error out on invalid kspace mesh settings. make coulomb and dispersion settings consistent 2019-06-05 01:10:44 -04:00
92f078cfff nicer typesetting of "none, zero, hybrid" potential styles in commands lists 2019-06-04 22:29:25 -04:00
708052dc81 reaxc/qeq optimization - using kokkos hierarchical parallelism 2019-06-04 14:22:43 -07:00
960a975e2a Added compact arrays, removed unused openmp stuff 2019-06-03 19:50:40 -06:00
3b7c15a8b2 Merging to stay up-to-date
Merge remote-tracking branch 'lammps_upstream/master' into log_spacing
2019-06-01 10:33:27 +05:30
76900b8325 avoids the possibility of undefined behavior with memcpy having the same source and destination arrays; this is not every instance of this issue in the source code 2019-05-31 11:53:58 -06:00
b88158fc3b Fix issue in npair_kokkos 2019-05-29 11:16:38 -06:00
3b60686827 Small tweak to verlet_kokkos 2019-05-29 09:43:50 -06:00
35be1724e3 Commit JT 052819
- corrected examples in examples/SPIN/dipole_spin
- modified warning message in src/SPIN/pair_spin_dipole_*.cpp
2019-05-28 15:31:03 -06:00
d717101e94 Merge branch 'master' of https://github.com/lammps/lammps into team_opt 2019-05-28 14:02:22 -06:00
e44c877738 Add missing tag sync in fix_qeq_reax_kokkos 2019-05-28 10:21:29 -06:00
c2a200fe85 A function logfreq3(), for logarithmical spacing
Unlike logfreq(), this also allows for fractional ratios
Resolves #1471
2019-05-26 14:00:21 +05:30
eea67bf3bf Add sync/modify for growing dvector 2019-05-22 08:52:57 -06:00
439e7da03f Need auto-sync on for initialization 2019-05-21 11:47:55 -06:00
ed7c09ac81 Add missing character 2019-05-21 10:30:33 -06:00
0ee1daa46d Add Lenz to false-positive list 2019-05-21 10:24:24 -06:00
2fbc4f504d Per-atom virial is not yet supported with pppm/dipole 2019-05-21 10:16:13 -06:00
95ab056576 Add PPPM dipole reference 2019-05-21 10:07:41 -06:00
a0bc619550 Need to call atomKK version of sync/modified in Kokkos atom_vec styles 2019-05-21 09:21:55 -06:00
82b50706bd Commit2 JT 052019
- some corrections in the examples
- deleted an old doc files (now redundant)
2019-05-20 22:09:59 -06:00
e90eed9120 Commit JT 052019
- deleted old doc
- renamed new doc files
2019-05-20 21:48:05 -06:00
5f2f7d1575 Merge branch 'pppm_spin' of github.com:julient31/lammps into pppm_spin 2019-05-20 21:35:52 -06:00
3e60ec9be1 Merge branch 'clean-master2' of github.com:julient31/lammps into pppm_spin 2019-05-20 21:35:16 -06:00
dc8b43a95f update docs and sources and for changes in this branch 2019-05-18 15:16:10 -04:00
601746b565 restore lost changes to fix gpu from upstream 2019-05-18 13:09:25 -04:00
fbb78e7b78 Commit JT 051719
- removed qsymp pair style
- cleaned doc (pair/spin/diole and kspace_style)
- cleaned kspace .cpp/h files
2019-05-17 15:04:14 -06:00
803e0631c5 Added bispectrum compute 2019-05-16 22:11:06 -06:00
51a6bfd579 Added bispectrum compute 2019-05-16 22:01:45 -06:00
5b71b3fc57 Added bispectrum compute 2019-05-16 21:51:24 -06:00
e2ed9330b7 Merge branch 'master' into team_opt 2019-05-16 12:31:26 -06:00
41872e37e6 Merge branch 'pppm_spin' of github.com:julient31/lammps into pppm_spin
Conflicts:
	doc/src/pair_spin_dipole.txt
2019-05-16 10:34:33 -06:00
0c0b106924 Commit2 JT 051519
- started doc pair_spin_dipole.txt
- renamed all pair/spin/dipole
- created and tested example pair/spin/dipole/cut
2019-05-15 20:49:05 -06:00
98d9c45ad9 compute_bispectrum 2019-05-15 17:18:24 -06:00
084bb3c35b Commit JT 051519
- start doc pair_spin_dipole
2019-05-15 08:34:12 -06:00
31789ad03b Commit JT 051419
- added beginning doc
- removed a remaining dipolar
2019-05-14 17:44:35 -06:00
fd168068a1 Merge branch 'clean-master2' of github.com:julient31/lammps into pppm_spin
Conflicts:
	src/SPIN/fix_nve_spin.h
2019-05-14 17:41:58 -06:00
a0cc6b5b59 Forgot to change pair style to nn/snap 2019-05-11 14:04:21 -06:00
f2d881470d Added placeholder for neural network SNAP potential 2019-05-11 12:55:11 -06:00
e13c661f77 Added placeholder for neural network SNAP potential 2019-05-11 12:54:18 -06:00
a1f421cd54 Moved compute_beta outside of main force loop 2019-05-11 12:41:54 -06:00
6d84bd6138 Added compute_beta() 2019-05-10 10:34:01 -06:00
cb6b498127 Commit JT 042219
- change ntot -> nlocal
2019-04-22 14:43:01 -06:00
a243be2dc9 Added bare-bones yarray algorithm, 2x speedup 2019-04-21 22:10:03 -06:00
a696b26301 Merge branch 'clean-master2' of github.com:julient31/lammps into pppm_spin
Conflicts:
	src/SPIN/pair_spin_exchange.cpp
	src/atom.cpp
	src/pair.cpp
2019-04-19 15:02:25 -06:00
4a4297591e Did some more cleanups 2019-04-17 12:04:31 -05:00
1f43efc111 Cleaned up the changes in Device and the base class of the pair styles 2019-04-17 00:09:49 -05:00
c55009a0ac Enabled neighbor list build on the device with pair_style hybrid and hybrid/overlay 2019-04-16 23:30:25 -05:00
e4e2249b63 Fix issue in comm_kokkos 2019-04-10 10:17:14 -06:00
cf35ebe5fa Revert optimization that is causing regression tests to fail 2019-04-09 16:17:37 -06:00
073f003470 Doc tweak 2019-04-09 15:17:40 -06:00
618547b72e Reduce DELTA and revert subview change 2019-04-09 14:40:39 -06:00
a01bce46bb Reduce GPU/CPU data transfer 2019-04-09 10:23:37 -06:00
82be3ee32c Only use team with full neigh list 2019-04-09 09:17:07 -06:00
16b17f812c Update docs 2019-04-09 08:51:24 -06:00
22b7e12baf Merge branch 'kk_binsize' into team_opt 2019-04-08 16:40:20 -06:00
b8d3c9e01b Rename team option to neigh/thread 2019-04-08 16:02:18 -06:00
149a57f3ae Add threshold for using Kokkos teams 2019-04-08 15:21:42 -06:00
7f2c81657b Merge from upstream 2019-04-08 14:14:52 -06:00
f2ef02b6d9 Comm exchange is a no-op for 1 MPI rank 2019-03-21 09:27:18 -06:00
c3adfcbc15 Add missing sync in comm_kokkos 2019-03-21 08:56:12 -06:00
744a8215dd Fix compile error in comm_kokkos and indent in atom_vec_kokkos 2019-03-20 15:08:08 -06:00
1f44dc2498 Remove unused array in comm_kokkos 2019-03-20 15:01:47 -06:00
d1e751d717 Fix thread safety issue in fused forward comm 2019-03-20 14:32:03 -06:00
08273c40d7 Fix compile issue in comm_kokkos 2019-03-19 14:29:45 -06:00
e2d28f5160 Only copy pbc info in comm setup 2019-03-18 15:27:35 -06:00
b50ef59a19 Optimize Kokkos comm for small systems 2019-03-18 13:17:32 -06:00
36836598b1 Reduce data transfer in exchange 2019-03-18 10:45:14 -06:00
8c4baac3f1 Only copy force on ghost atoms if newton on 2019-03-15 14:25:24 -06:00
179026dd44 Reduce GPU data movement in npair_kokkos 2019-03-14 17:13:12 -06:00
71a6227240 Optimize KOKKOS package for small system sizes 2019-03-14 15:43:50 -06:00
aecef752e8 Remove unnecessary data movement in fix_nve_kokkos 2019-03-11 13:41:20 -06:00
e422e886de Add error check for team on and full neighborlist 2019-03-08 11:33:29 -07:00
069853fd10 Merge branch 'master' into team 2019-03-08 11:32:31 -07:00
84773f9d1d Merge branch 'master' of github.com:stanmoore1/lammps into team 2019-03-08 08:31:48 -07:00
ff7276e494 Clean up the fused comm 2019-03-07 08:56:13 -07:00
553714b0bb Merge branch 'master' into team 2019-03-07 08:42:24 -07:00
5d8e3c6cb4 Optimize reneighbor for small systems 2019-02-28 10:14:35 -07:00
29073f7d0e Merge branch 'team' into comm_squash 2019-02-25 09:20:13 -07:00
38c6c1907b Merge branch 'master' into team 2019-02-25 09:19:54 -07:00
85a14ebcb8 Fix issue with comm squash 2019-02-25 09:17:34 -07:00
0a02097e20 Add squashed comm forward for Kokkos 2019-02-25 08:39:54 -07:00
a72f3cfb58 Merge branch 'master' into team 2019-02-11 14:45:34 -07:00
58905525bf Add team-based calcs to some KOKKOS package pair_styles 2019-02-06 14:42:37 -07:00
7a2d326103 Commit JT 010819
- commit before co
2019-01-08 09:19:49 -07:00
ddd5e61254 Commit JT 111418
- removed muk table (size kmax3d, mem fault)
2018-11-14 09:46:16 -07:00
d66a1ac054 Commit JT 111318
- corrections pair/spin/dipolar/long
2018-11-13 17:03:32 -07:00
9727fdc473 Commit JT 110818
- correct bug (match ewald/disp results for vir)
- started correct mag. part
2018-11-08 16:17:43 -07:00
d5fe8857cc Commit JT 100518
- correction torque ewald_dipole
- idem ewald_dipole_spin to check
2018-10-05 14:01:29 -06:00
a745a0aed0 Commit JT 100318
- correction forces ewald_dipole
- correction mag. dipolar energy
2018-10-03 10:23:58 -06:00
19aaf294e5 Commit JT 092718
- renamed pair/spin/long functions
- started to work on debugging ewald_dipole (force errors)
2018-09-27 10:46:52 -06:00
6b4303c405 Commit2 JT 092418
- initialized g_ewald before Newton solver
- mu2 is now adim in ewald_dipole_spin
2018-09-24 16:40:59 -06:00
53a779067e Commit JT 092418
- ewald_dipole_spin added
- accuracy problem (with eval of gewald and Newton solver)
2018-09-24 10:59:17 -06:00
cce9fe4a34 Commit2 JT 092118
- created pair_spin_dipolar_cut
- real-space short-range calc of the spin dipolar interaction
- run and check valgrind ok
2018-09-21 09:55:41 -06:00
407392f6bf Commit JT 092118
- ewald_dipole with virial, torque and slabcorr
- run and valgrind test ok

Merge branch 'pppm_spin' of github.com:julient31/lammps into pppm_spin

Conflicts:
	src/KSPACE/ewald_dipole.cpp
2018-09-21 08:33:02 -06:00
cdde878d60 Commit JT 091618
- merge with modifs Stan 1
- energy correction

Merge branch 'pppm_spin' of github.com:julient31/lammps into pppm_spin

Conflicts:
	src/KSPACE/ewald_dipole.cpp
2018-09-16 09:42:27 -06:00
b9e33e631f Fix bug in ewald_dipole forces 2018-09-15 13:34:24 -06:00
82a5346ab1 Commit JT 091418
- created pair_spin_long_qsymp
- modified ewald_dipole
2018-09-14 15:09:59 -06:00
a76457ef22 Fix bug in ewald_dipole structure factor 2018-09-14 13:05:48 -06:00
e6b5112ddc Fix issues in ewald_dipole 2018-09-13 14:36:54 -06:00
16911adcea Commit1 JT 083018
- started to work on ewald_dipole (not yet triclinic)
- compiles and runs (no memory issue)
- check the energy accuracy
2018-08-30 07:33:25 -06:00
cf1d421e10 Commit JT 082318
- corrected memory errors in pppm_dipole and pppm_dipole_spin
- created fm_long in atom_vec_spin
- fm_long added to fm in initial_integrate (in ComputeInteractionsSpin)
2018-08-23 15:18:30 -06:00
8d79db03d3 Commit1 JT 082118
- created pppm_dipole_spin.h/cpp (child-class of pppm_dipole)
- improved pair_spin_long.h/cpp
- created documentation for pair_spin_long
- new 3xN fm_long vector in atom_vec_spin (with associated comm)
2018-08-21 13:47:38 -06:00
5e287033f7 Commit1 JT 081618
- converted pppm_spin for long range spin-spin interactions
- modified kspace, pair,and pair_hybrid to add spinflag
2018-08-16 10:13:18 -06:00
e1ab38439b Commit2 JT 081418
- converted pppm_dipole toward spin quantities
- need to check if can handle ferrimagnets
2018-08-14 17:09:44 -06:00
062c1a04fc Commit JT 081418
- initial commit pppm_spin branch
- copied short_range spin files (src/SPIN)
- copied/renamed Stan's file (from pppm_dipole branch)
2018-08-14 14:42:01 -06:00
197 changed files with 12220 additions and 4739 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,28 @@
###############################################################################
# Coverage
#
# Requires latest gcovr (for GCC 8.1 support):#
# pip install git+https://github.com/gcovr/gcovr.git
###############################################################################
if(ENABLE_COVERAGE)
find_program(GCOVR_BINARY gcovr)
find_package_handle_standard_args(GCOVR DEFAULT_MSG GCOVR_BINARY)
if(GCOVR_FOUND)
get_filename_component(ABSOLUTE_LAMMPS_SOURCE_DIR ${LAMMPS_SOURCE_DIR} ABSOLUTE)
add_custom_target(
gen_coverage_xml
COMMAND ${GCOVR_BINARY} -s -x -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.xml
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Generating XML Coverage Report..."
)
add_custom_target(
gen_coverage_html
COMMAND ${GCOVR_BINARY} -s --html --html-details -r ${ABSOLUTE_LAMMPS_SOURCE_DIR} --object-directory=${CMAKE_BINARY_DIR} -o coverage.html
WORKING_DIRECTORY ${CMAKE_BINARY_DIR}
COMMENT "Generating HTML Coverage Report..."
)
endif()
endif()

View File

@ -0,0 +1,59 @@
###############################################################################
# Build documentation
###############################################################################
option(BUILD_DOC "Build LAMMPS documentation" OFF)
if(BUILD_DOC)
include(ProcessorCount)
ProcessorCount(NPROCS)
find_package(PythonInterp 3 REQUIRED)
set(VIRTUALENV ${PYTHON_EXECUTABLE} -m virtualenv)
file(GLOB DOC_SOURCES ${LAMMPS_DOC_DIR}/src/[^.]*.txt)
file(GLOB PDF_EXTRA_SOURCES ${LAMMPS_DOC_DIR}/src/lammps_commands*.txt ${LAMMPS_DOC_DIR}/src/lammps_support.txt ${LAMMPS_DOC_DIR}/src/lammps_tutorials.txt)
list(REMOVE_ITEM DOC_SOURCES ${PDF_EXTRA_SOURCES})
add_custom_command(
OUTPUT docenv
COMMAND ${VIRTUALENV} docenv
)
set(DOCENV_BINARY_DIR ${CMAKE_BINARY_DIR}/docenv/bin)
add_custom_command(
OUTPUT requirements.txt
DEPENDS docenv
COMMAND ${CMAKE_COMMAND} -E copy ${LAMMPS_DOC_DIR}/utils/requirements.txt requirements.txt
COMMAND ${DOCENV_BINARY_DIR}/pip install -r requirements.txt --upgrade
COMMAND ${DOCENV_BINARY_DIR}/pip install --upgrade ${LAMMPS_DOC_DIR}/utils/converters
)
set(RST_FILES "")
set(RST_DIR ${CMAKE_BINARY_DIR}/rst)
file(MAKE_DIRECTORY ${RST_DIR})
foreach(TXT_FILE ${DOC_SOURCES})
get_filename_component(FILENAME ${TXT_FILE} NAME_WE)
set(RST_FILE ${RST_DIR}/${FILENAME}.rst)
list(APPEND RST_FILES ${RST_FILE})
add_custom_command(
OUTPUT ${RST_FILE}
DEPENDS requirements.txt docenv ${TXT_FILE}
COMMAND ${DOCENV_BINARY_DIR}/txt2rst -o ${RST_DIR} ${TXT_FILE}
)
endforeach()
add_custom_command(
OUTPUT html
DEPENDS ${RST_FILES}
COMMAND ${CMAKE_COMMAND} -E copy_directory ${LAMMPS_DOC_DIR}/src ${RST_DIR}
COMMAND ${DOCENV_BINARY_DIR}/sphinx-build -j ${NPROCS} -b html -c ${LAMMPS_DOC_DIR}/utils/sphinx-config -d ${CMAKE_BINARY_DIR}/doctrees ${RST_DIR} html
)
add_custom_target(
doc ALL
DEPENDS html
SOURCES ${LAMMPS_DOC_DIR}/utils/requirements.txt ${DOC_SOURCES}
)
install(DIRECTORY ${CMAKE_BINARY_DIR}/html DESTINATION ${CMAKE_INSTALL_DOCDIR})
endif()

View File

@ -0,0 +1,71 @@
# Utility functions
function(list_to_bulletpoints result)
list(REMOVE_AT ARGV 0)
set(temp "")
foreach(item ${ARGV})
set(temp "${temp}* ${item}\n")
endforeach()
set(${result} "${temp}" PARENT_SCOPE)
endfunction(list_to_bulletpoints)
function(validate_option name values)
string(TOLOWER ${${name}} needle_lower)
string(TOUPPER ${${name}} needle_upper)
list(FIND ${values} ${needle_lower} IDX_LOWER)
list(FIND ${values} ${needle_upper} IDX_UPPER)
if(${IDX_LOWER} LESS 0 AND ${IDX_UPPER} LESS 0)
list_to_bulletpoints(POSSIBLE_VALUE_LIST ${${values}})
message(FATAL_ERROR "\n########################################################################\n"
"Invalid value '${${name}}' for option ${name}\n"
"\n"
"Possible values are:\n"
"${POSSIBLE_VALUE_LIST}"
"########################################################################")
endif()
endfunction(validate_option)
function(get_lammps_version version_header variable)
file(READ ${version_header} line)
set(MONTHS x Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)
string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\1" day "${line}")
string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\2" month "${line}")
string(REGEX REPLACE "#define LAMMPS_VERSION \"([0-9]+) ([A-Za-z]+) ([0-9]+)\"" "\\3" year "${line}")
string(STRIP ${day} day)
string(STRIP ${month} month)
string(STRIP ${year} year)
list(FIND MONTHS "${month}" month)
string(LENGTH ${day} day_length)
string(LENGTH ${month} month_length)
if(day_length EQUAL 1)
set(day "0${day}")
endif()
if(month_length EQUAL 1)
set(month "0${month}")
endif()
set(${variable} "${year}${month}${day}" PARENT_SCOPE)
endfunction()
function(check_for_autogen_files source_dir)
message(STATUS "Running check for auto-generated files from make-based build system")
file(GLOB SRC_AUTOGEN_FILES ${source_dir}/style_*.h)
file(GLOB SRC_AUTOGEN_PACKAGES ${source_dir}/packages_*.h)
list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h)
foreach(_SRC ${SRC_AUTOGEN_FILES})
get_filename_component(FILENAME "${_SRC}" NAME)
if(EXISTS ${source_dir}/${FILENAME})
message(FATAL_ERROR "\n########################################################################\n"
"Found header file(s) generated by the make-based build system\n"
"\n"
"Please run\n"
"make -C ${source_dir} purge\n"
"to remove\n"
"########################################################################")
endif()
endforeach()
endfunction()
macro(pkg_depends PKG1 PKG2)
if(PKG_${PKG1} AND NOT (PKG_${PKG2} OR BUILD_${PKG2}))
message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}")
endif()
endmacro()

View File

@ -0,0 +1,5 @@
if(PKG_COMPRESS)
find_package(ZLIB REQUIRED)
include_directories(${ZLIB_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${ZLIB_LIBRARIES})
endif()

View File

@ -0,0 +1,13 @@
if(PKG_CORESHELL)
set(CORESHELL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/CORESHELL)
set(CORESHELL_SOURCES)
set_property(GLOBAL PROPERTY "CORESHELL_SOURCES" "${CORESHELL_SOURCES}")
# detects styles which have a CORESHELL version
RegisterStylesExt(${CORESHELL_SOURCES_DIR} cs CORESHELL_SOURCES)
get_property(CORESHELL_SOURCES GLOBAL PROPERTY CORESHELL_SOURCES)
list(APPEND LIB_SOURCES ${CORESHELL_SOURCES})
include_directories(${CORESHELL_SOURCES_DIR})
endif()

View File

@ -0,0 +1,194 @@
if(PKG_GPU)
if (CMAKE_VERSION VERSION_LESS "3.1")
message(FATAL_ERROR "For the GPU package you need at least cmake-3.1")
endif()
set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h
${GPU_SOURCES_DIR}/fix_gpu.h
${GPU_SOURCES_DIR}/fix_gpu.cpp)
set(GPU_API "opencl" CACHE STRING "API used by GPU package")
set(GPU_API_VALUES opencl cuda)
set_property(CACHE GPU_API PROPERTY STRINGS ${GPU_API_VALUES})
validate_option(GPU_API GPU_API_VALUES)
string(TOUPPER ${GPU_API} GPU_API)
set(GPU_PREC "mixed" CACHE STRING "LAMMPS GPU precision")
set(GPU_PREC_VALUES double mixed single)
set_property(CACHE GPU_PREC PROPERTY STRINGS ${GPU_PREC_VALUES})
validate_option(GPU_PREC GPU_PREC_VALUES)
string(TOUPPER ${GPU_PREC} GPU_PREC)
if(GPU_PREC STREQUAL "DOUBLE")
set(GPU_PREC_SETTING "DOUBLE_DOUBLE")
elseif(GPU_PREC STREQUAL "MIXED")
set(GPU_PREC_SETTING "SINGLE_DOUBLE")
elseif(GPU_PREC STREQUAL "SINGLE")
set(GPU_PREC_SETTING "SINGLE_SINGLE")
endif()
file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cpp)
file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu)
if(GPU_API STREQUAL "CUDA")
find_package(CUDA REQUIRED)
find_program(BIN2C bin2c)
if(NOT BIN2C)
message(FATAL_ERROR "Could not find bin2c, use -DBIN2C=/path/to/bin2c to help cmake finding it.")
endif()
option(CUDPP_OPT "Enable CUDPP_OPT" ON)
option(CUDA_MPS_SUPPORT "Enable tweaks to support CUDA Multi-process service (MPS)" OFF)
if(CUDA_MPS_SUPPORT)
set(GPU_CUDA_MPS_FLAGS "-DCUDA_PROXY")
endif()
set(GPU_ARCH "sm_30" CACHE STRING "LAMMPS GPU CUDA SM primary architecture (e.g. sm_60)")
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu ${CMAKE_CURRENT_SOURCE_DIR}/gpu/[^.]*.cu)
list(REMOVE_ITEM GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)
cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu)
if(CUDPP_OPT)
cuda_include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cpp)
file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/[^.]*.cu)
endif()
# build arch/gencode commands for nvcc based on CUDA toolkit version and use choice
# --arch translates directly instead of JIT, so this should be for the preferred or most common architecture
set(GPU_CUDA_GENCODE "-arch=${GPU_ARCH} ")
# Fermi (GPU Arch 2.x) is supported by CUDA 3.2 to CUDA 8.0
if((CUDA_VERSION VERSION_GREATER "3.1") AND (CUDA_VERSION VERSION_LESS "9.0"))
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_20,code=[sm_20,compute_20] ")
endif()
# Kepler (GPU Arch 3.x) is supported by CUDA 5 and later
if(CUDA_VERSION VERSION_GREATER "4.9")
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_30,code=[sm_30,compute_30] -gencode arch=compute_35,code=[sm_35,compute_35] ")
endif()
# Maxwell (GPU Arch 5.x) is supported by CUDA 6 and later
if(CUDA_VERSION VERSION_GREATER "5.9")
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_50,code=[sm_50,compute_50] -gencode arch=compute_52,code=[sm_52,compute_52] ")
endif()
# Pascal (GPU Arch 6.x) is supported by CUDA 8 and later
if(CUDA_VERSION VERSION_GREATER "7.9")
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_60,code=[sm_60,compute_60] -gencode arch=compute_61,code=[sm_61,compute_61] ")
endif()
# Volta (GPU Arch 7.0) is supported by CUDA 9 and later
if(CUDA_VERSION VERSION_GREATER "8.9")
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_70,code=[sm_70,compute_70] ")
endif()
# Turing (GPU Arch 7.5) is supported by CUDA 10 and later
if(CUDA_VERSION VERSION_GREATER "9.9")
string(APPEND GPU_CUDA_GENCODE "-gencode arch=compute_75,code=[sm_75,compute_75] ")
endif()
cuda_compile_fatbin(GPU_GEN_OBJS ${GPU_LIB_CU} OPTIONS
-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DNV_KERNEL -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING})
cuda_compile(GPU_OBJS ${GPU_LIB_CUDPP_CU} OPTIONS ${CUDA_REQUEST_PIC}
-DUNIX -O3 --use_fast_math -Wno-deprecated-gpu-targets -DUCL_CUDADR ${GPU_CUDA_GENCODE} -D_${GPU_PREC_SETTING})
foreach(CU_OBJ ${GPU_GEN_OBJS})
get_filename_component(CU_NAME ${CU_OBJ} NAME_WE)
string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}")
add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
DEPENDS ${CU_OBJ}
COMMENT "Generating ${CU_NAME}_cubin.h")
list(APPEND GPU_LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h)
endforeach()
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h")
add_library(gpu STATIC ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS})
target_link_libraries(gpu ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_BINARY_DIR}/gpu ${CUDA_INCLUDE_DIRS})
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -DMPI_GERYON -DUCL_NO_EXIT ${GPU_CUDA_MPS_FLAGS})
if(CUDPP_OPT)
target_include_directories(gpu PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
target_compile_definitions(gpu PRIVATE -DUSE_CUDPP)
endif()
list(APPEND LAMMPS_LINK_LIBS gpu)
add_executable(nvc_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
target_compile_definitions(nvc_get_devices PRIVATE -DUCL_CUDADR)
target_link_libraries(nvc_get_devices PRIVATE ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
target_include_directories(nvc_get_devices PRIVATE ${CUDA_INCLUDE_DIRS})
elseif(GPU_API STREQUAL "OPENCL")
find_package(OpenCL REQUIRED)
set(OCL_TUNE "generic" CACHE STRING "OpenCL Device Tuning")
set(OCL_TUNE_VALUES intel fermi kepler cypress generic)
set_property(CACHE OCL_TUNE PROPERTY STRINGS ${OCL_TUNE_VALUES})
validate_option(OCL_TUNE OCL_TUNE_VALUES)
string(TOUPPER ${OCL_TUNE} OCL_TUNE)
include(OpenCLUtils)
set(OCL_COMMON_HEADERS ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_preprocessor.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_aux_fun1.h)
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/[^.]*.cu)
list(REMOVE_ITEM GPU_LIB_CU
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu
${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu
)
foreach(GPU_KERNEL ${GPU_LIB_CU})
get_filename_component(basename ${GPU_KERNEL} NAME_WE)
string(SUBSTRING ${basename} 4 -1 KERNEL_NAME)
GenerateOpenCLHeader(${KERNEL_NAME} ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h ${OCL_COMMON_HEADERS} ${GPU_KERNEL})
list(APPEND GPU_LIB_SOURCES ${CMAKE_CURRENT_BINARY_DIR}/gpu/${KERNEL_NAME}_cl.h)
endforeach()
GenerateOpenCLHeader(gayberne ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne.cu)
GenerateOpenCLHeader(gayberne_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_gayberne_lj.cu)
GenerateOpenCLHeader(re_squared ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared.cu)
GenerateOpenCLHeader(re_squared_lj ${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_ellipsoid_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_re_squared_lj.cu)
GenerateOpenCLHeader(tersoff ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff.cu)
GenerateOpenCLHeader(tersoff_zbl ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_zbl.cu)
GenerateOpenCLHeader(tersoff_mod ${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h ${OCL_COMMON_HEADERS} ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod_extra.h ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_tersoff_mod.cu)
list(APPEND GPU_LIB_SOURCES
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/gayberne_lj_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/re_squared_lj_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_zbl_cl.h
${CMAKE_CURRENT_BINARY_DIR}/gpu/tersoff_mod_cl.h
)
add_library(gpu STATIC ${GPU_LIB_SOURCES})
target_link_libraries(gpu ${OpenCL_LIBRARIES})
target_include_directories(gpu PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/gpu ${OpenCL_INCLUDE_DIRS})
target_compile_definitions(gpu PRIVATE -D_${GPU_PREC_SETTING} -D${OCL_TUNE}_OCL -DMPI_GERYON -DUCL_NO_EXIT)
target_compile_definitions(gpu PRIVATE -DUSE_OPENCL)
list(APPEND LAMMPS_LINK_LIBS gpu)
add_executable(ocl_get_devices ${LAMMPS_LIB_SOURCE_DIR}/gpu/geryon/ucl_get_devices.cpp)
target_compile_definitions(ocl_get_devices PRIVATE -DUCL_OPENCL)
target_link_libraries(ocl_get_devices PRIVATE ${OpenCL_LIBRARIES})
target_include_directories(ocl_get_devices PRIVATE ${OpenCL_INCLUDE_DIRS})
endif()
# GPU package
FindStyleHeaders(${GPU_SOURCES_DIR} FIX_CLASS fix_ FIX)
set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}")
# detects styles which have GPU version
RegisterStylesExt(${GPU_SOURCES_DIR} gpu GPU_SOURCES)
get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES)
list(APPEND LIB_SOURCES ${GPU_SOURCES})
include_directories(${GPU_SOURCES_DIR})
endif()

View File

@ -0,0 +1,42 @@
if(PKG_KIM)
find_package(CURL)
if(CURL_FOUND)
include_directories(${CURL_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${CURL_LIBRARIES})
add_definitions(-DLMP_KIM_CURL)
endif()
find_package(KIM-API QUIET)
if(KIM-API_FOUND)
set(DOWNLOAD_KIM_DEFAULT OFF)
else()
set(DOWNLOAD_KIM_DEFAULT ON)
endif()
option(DOWNLOAD_KIM "Download KIM-API from OpenKIM instead of using an already installed one" ${DOWNLOAD_KIM_DEFAULT})
if(DOWNLOAD_KIM)
if(CMAKE_GENERATOR STREQUAL "Ninja")
message(FATAL_ERROR "Cannot build downloaded KIM-API library with Ninja build tool")
endif()
message(STATUS "KIM-API download requested - we will build our own")
enable_language(C)
enable_language(Fortran)
include(ExternalProject)
ExternalProject_Add(kim_build
URL https://s3.openkim.org/kim-api/kim-api-2.0.2.txz
URL_MD5 537d9c0abd30f85b875ebb584f9143fa
BINARY_DIR build
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
-DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER}
-DCMAKE_INSTALL_PREFIX=<INSTALL_DIR>
-DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
)
ExternalProject_get_property(kim_build INSTALL_DIR)
set(KIM-API_INCLUDE_DIRS ${INSTALL_DIR}/include/kim-api)
set(KIM-API_LDFLAGS ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/libkim-api${CMAKE_SHARED_LIBRARY_SUFFIX})
list(APPEND LAMMPS_DEPS kim_build)
else()
find_package(KIM-API REQUIRED)
endif()
list(APPEND LAMMPS_LINK_LIBS "${KIM-API_LDFLAGS}")
include_directories(${KIM-API_INCLUDE_DIRS})
endif()

View File

@ -0,0 +1,53 @@
if(PKG_KOKKOS)
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos)
add_definitions(-DLMP_KOKKOS)
add_subdirectory(${LAMMPS_LIB_KOKKOS_SRC_DIR} ${LAMMPS_LIB_KOKKOS_BIN_DIR})
set(Kokkos_INCLUDE_DIRS ${LAMMPS_LIB_KOKKOS_SRC_DIR}/core/src
${LAMMPS_LIB_KOKKOS_SRC_DIR}/containers/src
${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src
${LAMMPS_LIB_KOKKOS_BIN_DIR})
include_directories(${Kokkos_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS kokkos)
set(KOKKOS_PKG_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/KOKKOS)
set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/fix_nh_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp)
if(PKG_KSPACE)
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/gridcomm_kokkos.cpp)
endif()
set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}")
# detects styles which have KOKKOS version
RegisterStylesExt(${KOKKOS_PKG_SOURCES_DIR} kokkos KOKKOS_PKG_SOURCES)
# register kokkos-only styles
RegisterNBinStyle(${KOKKOS_PKG_SOURCES_DIR}/nbin_kokkos.h)
RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_kokkos.h)
if(PKG_USER-DPD)
get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES)
list(APPEND KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.cpp)
RegisterNPairStyle(${KOKKOS_PKG_SOURCES_DIR}/npair_ssa_kokkos.h)
set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}")
endif()
get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES)
list(APPEND LIB_SOURCES ${KOKKOS_PKG_SOURCES})
include_directories(${KOKKOS_PKG_SOURCES_DIR})
endif()

View File

@ -0,0 +1,38 @@
if(PKG_KSPACE)
option(FFT_SINGLE "Use single precision FFT instead of double" OFF)
set(FFTW "FFTW3")
if(FFT_SINGLE)
set(FFTW "FFTW3F")
add_definitions(-DFFT_SINGLE)
endif()
find_package(${FFTW} QUIET)
if(${FFTW}_FOUND)
set(FFT "${FFTW}" CACHE STRING "FFT library for KSPACE package")
else()
set(FFT "KISS" CACHE STRING "FFT library for KSPACE package")
endif()
set(FFT_VALUES KISS ${FFTW} MKL)
set_property(CACHE FFT PROPERTY STRINGS ${FFT_VALUES})
validate_option(FFT FFT_VALUES)
string(TOUPPER ${FFT} FFT)
if(NOT FFT STREQUAL "KISS")
find_package(${FFT} REQUIRED)
if(NOT FFT STREQUAL "FFTW3F")
add_definitions(-DFFT_FFTW)
else()
add_definitions(-DFFT_${FFT})
endif()
include_directories(${${FFT}_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${${FFT}_LIBRARIES})
else()
add_definitions(-DFFT_KISS)
endif()
set(FFT_PACK "array" CACHE STRING "Optimization for FFT")
set(FFT_PACK_VALUES array pointer memcpy)
set_property(CACHE FFT_PACK PROPERTY STRINGS ${FFT_PACK_VALUES})
validate_option(FFT_PACK FFT_PACK_VALUES)
if(NOT FFT_PACK STREQUAL "array")
string(TOUPPER ${FFT_PACK} FFT_PACK)
add_definitions(-DFFT_PACK_${FFT_PACK})
endif()
endif()

View File

@ -0,0 +1,38 @@
if(PKG_LATTE)
enable_language(Fortran)
find_package(LATTE)
if(LATTE_FOUND)
set(DOWNLOAD_LATTE_DEFAULT OFF)
else()
set(DOWNLOAD_LATTE_DEFAULT ON)
endif()
option(DOWNLOAD_LATTE "Download the LATTE library instead of using an already installed one" ${DOWNLOAD_LATTE_DEFAULT})
if(DOWNLOAD_LATTE)
if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR
message(FATAL_ERROR "For downlading LATTE you need at least cmake-3.7")
endif()
if(CMAKE_GENERATOR STREQUAL "Ninja")
message(FATAL_ERROR "Cannot build downloaded LATTE library with Ninja build tool")
endif()
message(STATUS "LATTE download requested - we will build our own")
include(ExternalProject)
ExternalProject_Add(latte_build
URL https://github.com/lanl/LATTE/archive/v1.2.1.tar.gz
URL_MD5 85ac414fdada2d04619c8f936344df14
SOURCE_SUBDIR cmake
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${CMAKE_REQUEST_PIC}
-DBLAS_LIBRARIES=${BLAS_LIBRARIES} -DLAPACK_LIBRARIES=${LAPACK_LIBRARIES}
-DCMAKE_Fortran_COMPILER=${CMAKE_Fortran_COMPILER} -DCMAKE_Fortran_FLAGS=${CMAKE_Fortran_FLAGS}
-DCMAKE_Fortran_FLAGS_${BTYPE}=${CMAKE_Fortran_FLAGS_${BTYPE}} -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}
)
ExternalProject_get_property(latte_build INSTALL_DIR)
set(LATTE_LIBRARIES ${INSTALL_DIR}/${CMAKE_INSTALL_LIBDIR}/liblatte.a)
list(APPEND LAMMPS_DEPS latte_build)
else()
find_package(LATTE)
if(NOT LATTE_FOUND)
message(FATAL_ERROR "LATTE library not found, help CMake to find it by setting LATTE_LIBRARY, or set DOWNLOAD_LATTE=ON to download it")
endif()
endif()
list(APPEND LAMMPS_LINK_LIBS ${LATTE_LIBRARIES} ${LAPACK_LIBRARIES})
endif()

View File

@ -0,0 +1,29 @@
if(PKG_MESSAGE)
option(MESSAGE_ZMQ "Use ZeroMQ in MESSAGE package" OFF)
file(GLOB_RECURSE cslib_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.F
${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.c
${LAMMPS_LIB_SOURCE_DIR}/message/cslib/[^.]*.cpp)
add_library(cslib STATIC ${cslib_SOURCES})
if(BUILD_MPI)
target_compile_definitions(cslib PRIVATE -DMPI_YES)
set_target_properties(cslib PROPERTIES OUTPUT_NAME "csmpi")
else()
target_compile_definitions(cslib PRIVATE -DMPI_NO)
target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_MPI)
set_target_properties(cslib PROPERTIES OUTPUT_NAME "csnompi")
endif()
if(MESSAGE_ZMQ)
target_compile_definitions(cslib PRIVATE -DZMQ_YES)
find_package(ZMQ REQUIRED)
target_include_directories(cslib PRIVATE ${ZMQ_INCLUDE_DIRS})
target_link_libraries(cslib PUBLIC ${ZMQ_LIBRARIES})
else()
target_compile_definitions(cslib PRIVATE -DZMQ_NO)
target_include_directories(cslib PRIVATE ${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src/STUBS_ZMQ)
endif()
list(APPEND LAMMPS_LINK_LIBS cslib)
include_directories(${LAMMPS_LIB_SOURCE_DIR}/message/cslib/src)
endif()

View File

@ -0,0 +1,45 @@
if(PKG_MSCG)
find_package(GSL REQUIRED)
find_package(MSCG QUIET)
if(MSGC_FOUND)
set(DOWNLOAD_MSCG_DEFAULT OFF)
else()
set(DOWNLOAD_MSCG_DEFAULT ON)
endif()
option(DOWNLOAD_MSCG "Download MSCG library instead of using an already installed one)" ${DOWNLOAD_MSCG_DEFAULT})
if(DOWNLOAD_MSCG)
if (CMAKE_VERSION VERSION_LESS "3.7") # due to SOURCE_SUBDIR
message(FATAL_ERROR "For downlading MSCG you need at least cmake-3.7")
endif()
if(CMAKE_GENERATOR STREQUAL "Ninja")
message(FATAL_ERROR "Cannot build downloaded MSCG library with Ninja build tool")
endif()
include(ExternalProject)
if(NOT LAPACK_FOUND)
set(EXTRA_MSCG_OPTS "-DLAPACK_LIBRARIES=${CMAKE_CURRENT_BINARY_DIR}/liblinalg.a")
endif()
ExternalProject_Add(mscg_build
URL https://github.com/uchicago-voth/MSCG-release/archive/1.7.3.1.tar.gz
URL_MD5 8c45e269ee13f60b303edd7823866a91
SOURCE_SUBDIR src/CMake
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=<INSTALL_DIR> ${CMAKE_REQUEST_PIC} ${EXTRA_MSCG_OPTS}
BUILD_COMMAND make mscg INSTALL_COMMAND ""
)
ExternalProject_get_property(mscg_build BINARY_DIR)
set(MSCG_LIBRARIES ${BINARY_DIR}/libmscg.a)
ExternalProject_get_property(mscg_build SOURCE_DIR)
set(MSCG_INCLUDE_DIRS ${SOURCE_DIR}/src)
list(APPEND LAMMPS_DEPS mscg_build)
if(NOT LAPACK_FOUND)
file(MAKE_DIRECTORY ${MSCG_INCLUDE_DIRS})
add_dependencies(mscg_build linalg)
endif()
else()
find_package(MSCG)
if(NOT MSCG_FOUND)
message(FATAL_ERROR "MSCG not found, help CMake to find it by setting MSCG_LIBRARY and MSCG_INCLUDE_DIRS, or set DOWNLOAD_MSCG=ON to download it")
endif()
endif()
list(APPEND LAMMPS_LINK_LIBS ${MSCG_LIBRARIES} ${GSL_LIBRARIES} ${LAPACK_LIBRARIES})
include_directories(${MSCG_INCLUDE_DIRS})
endif()

View File

@ -0,0 +1,13 @@
if(PKG_OPT)
set(OPT_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/OPT)
set(OPT_SOURCES)
set_property(GLOBAL PROPERTY "OPT_SOURCES" "${OPT_SOURCES}")
# detects styles which have OPT version
RegisterStylesExt(${OPT_SOURCES_DIR} opt OPT_SOURCES)
get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES)
list(APPEND LIB_SOURCES ${OPT_SOURCES})
include_directories(${OPT_SOURCES_DIR})
endif()

View File

@ -0,0 +1,6 @@
if(PKG_PYTHON)
find_package(PythonLibs REQUIRED)
add_definitions(-DLMP_PYTHON)
include_directories(${PYTHON_INCLUDE_DIR})
list(APPEND LAMMPS_LINK_LIBS ${PYTHON_LIBRARY})
endif()

View File

@ -0,0 +1,20 @@
# Fix qeq/fire requires MANYBODY (i.e. COMB and COMB3) to be installed
if(PKG_QEQ)
set(QEQ_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/QEQ)
file(GLOB QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix*.h)
file(GLOB QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix*.cpp)
if(NOT PKG_MANYBODY)
list(REMOVE_ITEM QEQ_HEADERS ${QEQ_SOURCES_DIR}/fix_qeq_fire.h)
list(REMOVE_ITEM QEQ_SOURCES ${QEQ_SOURCES_DIR}/fix_qeq_fire.cpp)
endif()
set_property(GLOBAL PROPERTY "QEQ_SOURCES" "${QEQ_SOURCES}")
foreach(MY_HEADER ${QEQ_HEADERS})
AddStyleHeader(${MY_HEADER} FIX)
endforeach()
get_property(QEQ_SOURCES GLOBAL PROPERTY QEQ_SOURCES)
list(APPEND LIB_SOURCES ${QEQ_SOURCES})
include_directories(${QEQ_SOURCES_DIR})
endif()

View File

@ -0,0 +1,8 @@
if(PKG_USER-H5MD)
enable_language(C)
find_package(HDF5 REQUIRED)
target_link_libraries(h5md ${HDF5_LIBRARIES})
target_include_directories(h5md PRIVATE ${HDF5_INCLUDE_DIRS})
include_directories(${HDF5_INCLUDE_DIRS})
endif()

View File

@ -0,0 +1,118 @@
if(PKG_USER-INTEL)
check_include_file_cxx(immintrin.h FOUND_IMMINTRIN)
if(NOT FOUND_IMMINTRIN)
message(FATAL_ERROR "immintrin.h header not found, Intel package won't work without it")
endif()
add_definitions(-DLMP_USER_INTEL)
set(INTEL_ARCH "cpu" CACHE STRING "Architectures used by USER-INTEL (cpu or knl)")
set(INTEL_ARCH_VALUES cpu knl)
set_property(CACHE INTEL_ARCH PROPERTY STRINGS ${INTEL_ARCH_VALUES})
validate_option(INTEL_ARCH INTEL_ARCH_VALUES)
string(TOUPPER ${INTEL_ARCH} INTEL_ARCH)
find_package(Threads QUIET)
if(Threads_FOUND)
set(INTEL_LRT_MODE "threads" CACHE STRING "Long-range threads mode (none, threads, or c++11)")
else()
set(INTEL_LRT_MODE "none" CACHE STRING "Long-range threads mode (none, threads, or c++11)")
endif()
set(INTEL_LRT_VALUES none threads c++11)
set_property(CACHE INTEL_LRT_MODE PROPERTY STRINGS ${INTEL_LRT_VALUES})
validate_option(INTEL_LRT_MODE INTEL_LRT_VALUES)
string(TOUPPER ${INTEL_LRT_MODE} INTEL_LRT_MODE)
if(INTEL_LRT_MODE STREQUAL "THREADS")
if(Threads_FOUND)
add_definitions(-DLMP_INTEL_USELRT)
list(APPEND LAMMPS_LINK_LIBS ${CMAKE_THREAD_LIBS_INIT})
else()
message(FATAL_ERROR "Must have working threads library for Long-range thread support")
endif()
endif()
if(INTEL_LRT_MODE STREQUAL "C++11")
add_definitions(-DLMP_INTEL_USERLRT -DLMP_INTEL_LRT11)
endif()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_VERSION VERSION_LESS 16)
message(FATAL_ERROR "USER-INTEL needs at least a 2016 Intel compiler, found ${CMAKE_CXX_COMPILER_VERSION}")
endif()
else()
message(WARNING "USER-INTEL gives best performance with Intel compilers")
endif()
find_package(TBB QUIET)
if(TBB_FOUND)
list(APPEND LAMMPS_LINK_LIBS ${TBB_MALLOC_LIBRARIES})
else()
add_definitions(-DLMP_INTEL_NO_TBB)
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
message(WARNING "USER-INTEL with Intel compilers should use TBB malloc libraries")
endif()
endif()
find_package(MKL QUIET)
if(MKL_FOUND)
add_definitions(-DLMP_USE_MKL_RNG)
list(APPEND LAMMPS_LINK_LIBS ${MKL_LIBRARIES})
else()
message(STATUS "Pair style dpd/intel will be faster with MKL libraries")
endif()
if((NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "64") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "128") AND (NOT ${LAMMPS_MEMALIGN} STREQUAL "256"))
message(FATAL_ERROR "USER-INTEL only supports memory alignment of 64, 128 or 256 on this platform")
endif()
if(INTEL_ARCH STREQUAL "KNL")
if(NOT CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
message(FATAL_ERROR "Must use Intel compiler with USER-INTEL for KNL architecture")
endif()
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -xHost -qopenmp -qoffload")
set(MIC_OPTIONS "-qoffload-option,mic,compiler,\"-fp-model fast=2 -mGLOB_default_function_attrs=\\\"gather_scatter_loop_unroll=4\\\"\"")
add_compile_options(-xMIC-AVX512 -qoffload -fno-alias -ansi-alias -restrict -qoverride-limits ${MIC_OPTIONS})
add_definitions(-DLMP_INTEL_OFFLOAD)
else()
if(CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
if(CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.3 OR CMAKE_CXX_COMPILER_VERSION VERSION_EQUAL 17.4)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xCOMMON-AVX512")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -xHost")
endif()
include(CheckCXXCompilerFlag)
foreach(_FLAG -O2 -fp-model fast=2 -no-prec-div -qoverride-limits -qopt-zmm-usage=high -qno-offload -fno-alias -ansi-alias -restrict)
check_cxx_compiler_flag("${__FLAG}" COMPILER_SUPPORTS${_FLAG})
if(COMPILER_SUPPORTS${_FLAG})
add_compile_options(${_FLAG})
endif()
endforeach()
else()
add_compile_options(-O3 -ffast-math)
endif()
endif()
# collect sources
set(USER-INTEL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-INTEL)
set(USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/fix_intel.cpp
${USER-INTEL_SOURCES_DIR}/fix_nh_intel.cpp
${USER-INTEL_SOURCES_DIR}/intel_buffers.cpp
${USER-INTEL_SOURCES_DIR}/nbin_intel.cpp
${USER-INTEL_SOURCES_DIR}/npair_intel.cpp)
set_property(GLOBAL PROPERTY "USER-INTEL_SOURCES" "${USER-INTEL_SOURCES}")
# detect styles which have a USER-INTEL version
RegisterStylesExt(${USER-INTEL_SOURCES_DIR} intel USER-INTEL_SOURCES)
RegisterNBinStyle(${USER-INTEL_SOURCES_DIR}/nbin_intel.h)
RegisterNPairStyle(${USER-INTEL_SOURCES_DIR}/npair_intel.h)
RegisterFixStyle(${USER-INTEL_SOURCES_DIR}/fix_intel.h)
get_property(USER-INTEL_SOURCES GLOBAL PROPERTY USER-INTEL_SOURCES)
if(PKG_KSPACE)
list(APPEND USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.cpp)
RegisterIntegrateStyle(${USER-INTEL_SOURCES_DIR}/verlet_lrt_intel.h)
endif()
list(APPEND LIB_SOURCES ${USER-INTEL_SOURCES})
include_directories(${USER-INTEL_SOURCES_DIR})
endif()

View File

@ -0,0 +1,10 @@
if(PKG_USER-MOLFILE)
set(MOLFILE_INCLUDE_DIRS "${LAMMPS_LIB_SOURCE_DIR}/molfile" CACHE STRING "Path to VMD molfile plugin headers")
add_library(molfile INTERFACE)
target_include_directories(molfile INTERFACE ${MOLFILE_INCLUDE_DIRS})
# no need to link with -ldl on windows
if(NOT ${CMAKE_SYSTEM_NAME} STREQUAL "Windows")
target_link_libraries(molfile INTERFACE ${CMAKE_DL_LIBS})
endif()
list(APPEND LAMMPS_LINK_LIBS molfile)
endif()

View File

@ -0,0 +1,6 @@
if(PKG_USER-NETCDF)
find_package(NetCDF REQUIRED)
include_directories(${NETCDF_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARIES})
add_definitions(-DLMP_HAS_NETCDF -DNC_64BIT_DATA=0x0020)
endif()

View File

@ -0,0 +1,42 @@
if(PKG_USER-OMP)
set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP)
set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp
${USER-OMP_SOURCES_DIR}/thr_omp.cpp
${USER-OMP_SOURCES_DIR}/fix_omp.cpp
${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp
${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp
${USER-OMP_SOURCES_DIR}/domain_omp.cpp)
add_definitions(-DLMP_USER_OMP)
set_property(GLOBAL PROPERTY "OMP_SOURCES" "${USER-OMP_SOURCES}")
# detects styles which have USER-OMP version
RegisterStylesExt(${USER-OMP_SOURCES_DIR} omp OMP_SOURCES)
RegisterFixStyle(${USER-OMP_SOURCES_DIR}/fix_omp.h)
get_property(USER-OMP_SOURCES GLOBAL PROPERTY OMP_SOURCES)
# manually add package dependent source files from USER-OMP that do not provide styles
if(PKG_ASPHERE)
list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_nh_asphere_omp.cpp)
endif()
if(PKG_RIGID)
list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/fix_rigid_nh_omp.cpp)
endif()
if(PKG_USER-REAXC)
list(APPEND USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/reaxc_bond_orders_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_hydrogen_bonds_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_nonbonded_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_bonds_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_init_md_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_torsion_angles_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_forces_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_multi_body_omp.cpp
${USER-OMP_SOURCES_DIR}/reaxc_valence_angles_omp.cpp)
endif()
list(APPEND LIB_SOURCES ${USER-OMP_SOURCES})
include_directories(${USER-OMP_SOURCES_DIR})
endif()

View File

@ -0,0 +1,79 @@
if(PKG_USER-PLUMED)
find_package(GSL REQUIRED)
set(PLUMED_MODE "static" CACHE STRING "Linkage mode for Plumed2 library")
set(PLUMED_MODE_VALUES static shared runtime)
set_property(CACHE PLUMED_MODE PROPERTY STRINGS ${PLUMED_MODE_VALUES})
validate_option(PLUMED_MODE PLUMED_MODE_VALUES)
string(TOUPPER ${PLUMED_MODE} PLUMED_MODE)
find_package(PkgConfig QUIET)
set(DOWNLOAD_PLUMED_DEFAULT ON)
if(PKG_CONFIG_FOUND)
pkg_check_modules(PLUMED QUIET plumed)
if(PLUMED_FOUND)
set(DOWNLOAD_PLUMED_DEFAULT OFF)
endif()
endif()
option(DOWNLOAD_PLUMED "Download Plumed package instead of using an already installed one" ${DOWNLOAD_PLUMED_DEFAULT})
if(DOWNLOAD_PLUMED)
if(CMAKE_GENERATOR STREQUAL "Ninja")
message(FATAL_ERROR "Cannot build downloaded Plumed library with Ninja build tool")
endif()
if(BUILD_MPI)
set(PLUMED_CONFIG_MPI "--enable-mpi")
set(PLUMED_CONFIG_CC ${CMAKE_MPI_C_COMPILER})
set(PLUMED_CONFIG_CXX ${CMAKE_MPI_CXX_COMPILER})
else()
set(PLUMED_CONFIG_MPI "--disable-mpi")
set(PLUMED_CONFIG_CC ${CMAKE_C_COMPILER})
set(PLUMED_CONFIG_CXX ${CMAKE_CXX_COMPILER})
endif()
if(BUILD_OMP)
set(PLUMED_CONFIG_OMP "--enable-openmp")
else()
set(PLUMED_CONFIG_OMP "--disable-openmp")
endif()
message(STATUS "PLUMED download requested - we will build our own")
include(ExternalProject)
ExternalProject_Add(plumed_build
URL https://github.com/plumed/plumed2/releases/download/v2.5.1/plumed-src-2.5.1.tgz
URL_MD5 c2a7b519e32197a120cdf47e0f194f81
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR>
${CONFIGURE_REQUEST_PIC}
--enable-modules=all
${PLUMED_CONFIG_MPI}
${PLUMED_CONFIG_OMP}
CXX=${PLUMED_CONFIG_CXX}
CC=${PLUMED_CONFIG_CC}
)
ExternalProject_get_property(plumed_build INSTALL_DIR)
set(PLUMED_INSTALL_DIR ${INSTALL_DIR})
list(APPEND LAMMPS_DEPS plumed_build)
if(PLUMED_MODE STREQUAL "STATIC")
add_definitions(-D__PLUMED_WRAPPER_CXX=1)
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.a ${GSL_LIBRARIES} ${LAPACK_LIBRARIES} ${CMAKE_DL_LIBS})
elseif(PLUMED_MODE STREQUAL "SHARED")
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumed.so ${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so ${CMAKE_DL_LIBS})
elseif(PLUMED_MODE STREQUAL "RUNTIME")
add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_INSTALL_DIR}/lib/libplumedKernel.so)
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_INSTALL_DIR}/lib/libplumedWrapper.a -rdynamic ${CMAKE_DL_LIBS})
endif()
set(PLUMED_INCLUDE_DIRS "${PLUMED_INSTALL_DIR}/include")
else()
find_package(PkgConfig REQUIRED)
pkg_check_modules(PLUMED REQUIRED plumed)
if(PLUMED_MODE STREQUAL "STATIC")
add_definitions(-D__PLUMED_WRAPPER_CXX=1)
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.static)
elseif(PLUMED_MODE STREQUAL "SHARED")
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.shared)
elseif(PLUMED_MODE STREQUAL "RUNTIME")
add_definitions(-D__PLUMED_HAS_DLOPEN=1 -D__PLUMED_DEFAULT_KERNEL=${PLUMED_LIBDIR}/libplumedKernel.so)
include(${PLUMED_LIBDIR}/plumed/src/lib/Plumed.cmake.runtime)
endif()
list(APPEND LAMMPS_LINK_LIBS ${PLUMED_LOAD})
endif()
include_directories(${PLUMED_INCLUDE_DIRS})
endif()

View File

@ -0,0 +1,9 @@
if(PKG_USER-QMMM)
enable_language(Fortran)
enable_language(C)
message(WARNING "Building QMMM with CMake is still experimental")
find_package(QE REQUIRED)
include_directories(${QE_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES})
endif()

View File

@ -0,0 +1,5 @@
if(PKG_USER-QUIP)
enable_language(Fortran)
find_package(QUIP REQUIRED)
list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${LAPACK_LIBRARIES})
endif()

View File

@ -0,0 +1,62 @@
if(PKG_USER-SCAFACOS)
enable_language(Fortran)
enable_language(C)
find_package(GSL REQUIRED)
find_package(PkgConfig QUIET)
set(DOWNLOAD_SCAFACOS_DEFAULT ON)
if(PKG_CONFIG_FOUND)
pkg_check_modules(SCAFACOS QUIET scafacos)
if(SCAFACOS_FOUND)
set(DOWNLOAD_SCAFACOS_DEFAULT OFF)
endif()
endif()
option(DOWNLOAD_SCAFACOS "Download ScaFaCoS library instead of using an already installed one" ${DOWNLOAD_SCAFACOS_DEFAULT})
if(DOWNLOAD_SCAFACOS)
if(CMAKE_GENERATOR STREQUAL "Ninja")
message(FATAL_ERROR "Cannot build downloaded ScaFaCoS library with Ninja build tool")
endif()
message(STATUS "ScaFaCoS download requested - we will build our own")
include(ExternalProject)
ExternalProject_Add(scafacos_build
URL https://github.com/scafacos/scafacos/releases/download/v1.0.1/scafacos-1.0.1.tar.gz
URL_MD5 bd46d74e3296bd8a444d731bb10c1738
CONFIGURE_COMMAND <SOURCE_DIR>/configure --prefix=<INSTALL_DIR> --disable-doc
--enable-fcs-solvers=fmm,p2nfft,direct,ewald,p3m
--with-internal-fftw --with-internal-pfft
--with-internal-pnfft ${CONFIGURE_REQUEST_PIC}
FC=${CMAKE_MPI_Fortran_COMPILER}
CXX=${CMAKE_MPI_CXX_COMPILER}
CC=${CMAKE_MPI_C_COMPILER}
F77=
)
ExternalProject_get_property(scafacos_build INSTALL_DIR)
set(SCAFACOS_BUILD_DIR ${INSTALL_DIR})
set(SCAFACOS_INCLUDE_DIRS ${SCAFACOS_BUILD_DIR}/include)
list(APPEND LAMMPS_DEPS scafacos_build)
# list and order from pkg_config file of ScaFaCoS build
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_direct.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_ewald.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fmm.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p2nfft.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_p3m.a)
list(APPEND LAMMPS_LINK_LIBS ${GSL_LIBRARIES})
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_near.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_gridsort.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_resort.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_redist.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_common.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pnfft.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_pfft.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3_mpi.a)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_BUILD_DIR}/lib/libfcs_fftw3.a)
list(APPEND LAMMPS_LINK_LIBS ${MPI_Fortran_LIBRARIES})
list(APPEND LAMMPS_LINK_LIBS ${MPI_C_LIBRARIES})
else()
find_package(PkgConfig REQUIRED)
pkg_check_modules(SCAFACOS REQUIRED scafacos)
list(APPEND LAMMPS_LINK_LIBS ${SCAFACOS_LDFLAGS})
endif()
include_directories(${SCAFACOS_INCLUDE_DIRS})
endif()

View File

@ -0,0 +1,13 @@
# Fix rigid/meso requires RIGID to be installed
if(PKG_USER-SDPD)
set(USER-SDPD_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-SDPD)
get_property(hlist GLOBAL PROPERTY FIX)
if(NOT PKG_RIGID)
list(REMOVE_ITEM hlist ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.h)
list(REMOVE_ITEM LIB_SOURCES ${USER-SDPD_SOURCES_DIR}/fix_rigid_meso.cpp)
endif()
set_property(GLOBAL PROPERTY FIX "${hlist}")
include_directories(${USER-SDPD_SOURCES_DIR})
endif()

View File

@ -0,0 +1,28 @@
if(PKG_USER-SMD)
find_package(Eigen3 NO_MODULE)
if(EIGEN3_FOUND)
set(DOWNLOAD_EIGEN3_DEFAULT OFF)
else()
set(DOWNLOAD_EIGEN3_DEFAULT ON)
endif()
option(DOWNLOAD_EIGEN3 "Download Eigen3 instead of using an already installed one)" ${DOWNLOAD_EIGEN3_DEFAULT})
if(DOWNLOAD_EIGEN3)
message(STATUS "Eigen3 download requested - we will build our own")
include(ExternalProject)
ExternalProject_Add(Eigen3_build
URL http://bitbucket.org/eigen/eigen/get/3.3.7.tar.gz
URL_MD5 f2a417d083fe8ca4b8ed2bc613d20f07
CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND ""
)
ExternalProject_get_property(Eigen3_build SOURCE_DIR)
set(EIGEN3_INCLUDE_DIR ${SOURCE_DIR})
list(APPEND LAMMPS_DEPS Eigen3_build)
else()
find_package(Eigen3 NO_MODULE)
mark_as_advanced(Eigen3_DIR)
if(NOT EIGEN3_FOUND)
message(FATAL_ERROR "Eigen3 not found, help CMake to find it by setting EIGEN3_INCLUDE_DIR, or set DOWNLOAD_EIGEN3=ON to download it")
endif()
endif()
include_directories(${EIGEN3_INCLUDE_DIR})
endif()

View File

@ -0,0 +1,6 @@
if(PKG_USER-VTK)
find_package(VTK REQUIRED NO_MODULE)
include(${VTK_USE_FILE})
add_definitions(-DLAMMPS_VTK)
list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES})
endif()

View File

@ -0,0 +1,41 @@
if(PKG_VORONOI)
find_package(VORO)
if(VORO_FOUND)
set(DOWNLOAD_VORO_DEFAULT OFF)
else()
set(DOWNLOAD_VORO_DEFAULT ON)
endif()
option(DOWNLOAD_VORO "Download and compile the Voro++ library instead of using an already installed one" ${DOWNLOAD_VORO_DEFAULT})
if(DOWNLOAD_VORO)
if(CMAKE_GENERATOR STREQUAL "Ninja")
message(FATAL_ERROR "Cannot build downloaded Voro++ library with Ninja build tool")
endif()
message(STATUS "Voro++ download requested - we will build our own")
include(ExternalProject)
if(BUILD_SHARED_LIBS)
set(VORO_BUILD_CFLAGS "${CMAKE_SHARED_LIBRARY_CXX_FLAGS} ${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}")
else()
set(VORO_BUILD_CFLAGS "${CMAKE_CXX_FLAGS} ${CMAKE_CXX_FLAGS_${BTYPE}}")
endif()
string(APPEND VORO_BUILD_CFLAGS ${CMAKE_CXX_FLAGS})
set(VORO_BUILD_OPTIONS CXX=${CMAKE_CXX_COMPILER} CFLAGS=${VORO_BUILD_CFLAGS})
ExternalProject_Add(voro_build
URL https://download.lammps.org/thirdparty/voro++-0.4.6.tar.gz
URL_MD5 2338b824c3b7b25590e18e8df5d68af9
CONFIGURE_COMMAND "" BUILD_COMMAND make ${VORO_BUILD_OPTIONS} BUILD_IN_SOURCE 1 INSTALL_COMMAND ""
)
ExternalProject_get_property(voro_build SOURCE_DIR)
set(VORO_LIBRARIES ${SOURCE_DIR}/src/libvoro++.a)
set(VORO_INCLUDE_DIRS ${SOURCE_DIR}/src)
list(APPEND LAMMPS_DEPS voro_build)
else()
find_package(VORO)
if(NOT VORO_FOUND)
message(FATAL_ERROR "Voro++ library not found. Help CMake to find it by setting VORO_LIBRARY and VORO_INCLUDE_DIR, or set DOWNLOAD_VORO=ON to download it")
endif()
endif()
include_directories(${VORO_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES})
endif()

View File

@ -0,0 +1,52 @@
###############################################################################
# Testing
###############################################################################
option(ENABLE_TESTING "Enable testing" OFF)
if(ENABLE_TESTING AND BUILD_EXE)
enable_testing()
option(LAMMPS_TESTING_SOURCE_DIR "Location of lammps-testing source directory" "")
option(LAMMPS_TESTING_GIT_TAG "Git tag of lammps-testing" "master")
mark_as_advanced(LAMMPS_TESTING_SOURCE_DIR LAMMPS_TESTING_GIT_TAG)
if (CMAKE_VERSION VERSION_GREATER "3.10.3" AND NOT LAMMPS_TESTING_SOURCE_DIR)
include(FetchContent)
FetchContent_Declare(lammps-testing
GIT_REPOSITORY https://github.com/lammps/lammps-testing.git
GIT_TAG ${LAMMPS_TESTING_GIT_TAG}
)
FetchContent_GetProperties(lammps-testing)
if(NOT lammps-testing_POPULATED)
message(STATUS "Downloading tests...")
FetchContent_Populate(lammps-testing)
endif()
set(LAMMPS_TESTING_SOURCE_DIR ${lammps-testing_SOURCE_DIR})
elseif(NOT LAMMPS_TESTING_SOURCE_DIR)
message(WARNING "Full test-suite requires CMake >= 3.11 or copy of\n"
"https://github.com/lammps/lammps-testing in LAMMPS_TESTING_SOURCE_DIR")
endif()
add_test(ShowHelp ${CMAKE_BINARY_DIR}/${LAMMPS_BINARY} -help)
if(EXISTS ${LAMMPS_TESTING_SOURCE_DIR})
message(STATUS "Running test discovery...")
file(GLOB_RECURSE TEST_SCRIPTS ${LAMMPS_TESTING_SOURCE_DIR}/tests/core/*/in.*)
foreach(script_path ${TEST_SCRIPTS})
get_filename_component(TEST_NAME ${script_path} EXT)
get_filename_component(SCRIPT_NAME ${script_path} NAME)
get_filename_component(PARENT_DIR ${script_path} DIRECTORY)
string(SUBSTRING ${TEST_NAME} 1 -1 TEST_NAME)
string(REPLACE "-" "_" TEST_NAME ${TEST_NAME})
string(REPLACE "+" "_" TEST_NAME ${TEST_NAME})
set(TEST_NAME "test_core_${TEST_NAME}_serial")
add_test(${TEST_NAME} ${CMAKE_BINARY_DIR}/${LAMMPS_BINARY} -in ${SCRIPT_NAME})
set_tests_properties(${TEST_NAME} PROPERTIES WORKING_DIRECTORY ${PARENT_DIR})
endforeach()
list(LENGTH TEST_SCRIPTS NUM_TESTS)
message(STATUS "Found ${NUM_TESTS} tests.")
endif()
endif()

View File

@ -1,4 +1,4 @@
.TH LAMMPS "5 June 2019" "2019-06-05"
.TH LAMMPS "18 June 2019" "2019-06-18"
.SH NAME
.B LAMMPS
\- Molecular Dynamics Simulator.

View File

@ -32,10 +32,18 @@ cmake \[options ...\] ../cmake # configuration with (command-line) cmake
make # compilation :pre
The cmake command will detect available features, enable selected
packages and options, and will generate the build environment. The make
command will then compile and link LAMMPS, producing (by default) an
executable called "lmp" and a library called "liblammps.a" in the
"build" folder.
packages and options, and will generate the build environment. By default
this build environment will be created for "Unix Makefiles" on most
platforms and particularly on Linux. However, alternate build tools
(e.g. Ninja) and support files for Integrated Development Environments
(IDE) like Eclipse, CodeBlocks, or Kate can be generated, too. This is
selected via the "-G" command line flag. For the rest of the documentation
we will assume that the build environment is generated for makefiles
and thus the make command will be used to compile and link LAMMPS as
indicated above, producing (by default) an executable called "lmp" and
a library called "liblammps.a" in the "build" folder. When generating
a build environment for the "Ninja" build tool, the build command would
be "ninja" instead of "make".
If your machine has multiple CPU cores (most do these days), using a
command like "make -jN" (with N being the number of available local

View File

@ -28,8 +28,12 @@ OPT.
"none"_bond_none.html,
"zero"_bond_zero.html,
"hybrid"_bond_hybrid.html :tb(c=3,ea=c)
"hybrid"_bond_hybrid.html,
,
,
,
,
,
"class2 (ko)"_bond_class2.html,
"fene (iko)"_bond_fene.html,
"fene/expand (o)"_bond_fene_expand.html,
@ -56,8 +60,12 @@ OPT.
"none"_angle_none.html,
"zero"_angle_zero.html,
"hybrid"_angle_hybrid.html :tb(c=3,ea=c)
"hybrid"_angle_hybrid.html,
,
,
,
,
,
"charmm (iko)"_angle_charmm.html,
"class2 (ko)"_angle_class2.html,
"class2/p6"_angle_class2.html,
@ -89,8 +97,12 @@ OPT.
"none"_dihedral_none.html,
"zero"_dihedral_zero.html,
"hybrid"_dihedral_hybrid.html :tb(c=3,ea=c)
"hybrid"_dihedral_hybrid.html,
,
,
,
,
,
"charmm (iko)"_dihedral_charmm.html,
"charmmfsw"_dihedral_charmm.html,
"class2 (ko)"_dihedral_class2.html,
@ -117,8 +129,12 @@ OPT.
"none"_improper_none.html,
"zero"_improper_zero.html,
"hybrid"_improper_hybrid.html :tb(c=3,ea=c)
"hybrid"_improper_hybrid.html,
,
,
,
,
,
"class2 (ko)"_improper_class2.html,
"cossq (o)"_improper_cossq.html,
"cvff (io)"_improper_cvff.html,

View File

@ -27,8 +27,11 @@ OPT.
"none"_pair_none.html,
"zero"_pair_zero.html,
"hybrid (k)"_pair_hybrid.html,
"hybrid/overlay (k)"_pair_hybrid.html :tb(c=4,ea=c)
"hybrid/overlay (k)"_pair_hybrid.html,
,
,
,
,
"adp (o)"_pair_adp.html,
"agni (o)"_pair_agni.html,
"airebo (io)"_pair_airebo.html,

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

View File

@ -0,0 +1,42 @@
\documentclass[preview]{standalone}
\usepackage{varwidth}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amssymb,graphics,bm,setspace}
\begin{document}
\begin{varwidth}{50in}
\begin{equation}
\mathcal{H}_{\rm long}=
-\frac{\mu_{0} \left( \mu_B\right)^2}{4\pi}
\sum_{i,j,i\neq j}^{N}
\frac{g_i g_j}{r_{ij}^3}
\Big(3
\left(\bm{e}_{ij}\cdot \bm{s}_{i}\right)
\left(\bm{e}_{ij}\cdot \bm{s}_{j}\right)
-\bm{s}_i\cdot\bm{s}_j \Big)
\nonumber
\end{equation}
\begin{equation}
\bm{\omega}_i =
\frac{\mu_0 (\mu_B)^2}{4\pi\hbar}\sum_{j}
\frac{g_i g_j}{r_{ij}^3}
\, \Big(
3\,(\bm{e}_{ij}\cdot\bm{s}_{j})\bm{e}_{ij}
-\bm{s}_{j} \Big) \nonumber
\end{equation}
\begin{equation}
\bm{F}_i =
\frac{3\, \mu_0 (\mu_B)^2}{4\pi} \sum_j
\frac{g_i g_j}{r_{ij}^4}
\Big[\big( (\bm{s}_i\cdot\bm{s}_j)
-5(\bm{e}_{ij}\cdot\bm{s}_i)
(\bm{e}_{ij}\cdot\bm{s}_j)\big) \bm{e}_{ij}+
\big(
(\bm{e}_{ij}\cdot\bm{s}_i)\bm{s}_j+
(\bm{e}_{ij}\cdot\bm{s}_j)\bm{s}_i
\big)
\Big]
\nonumber
\end{equation}
\end{varwidth}
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

View File

@ -0,0 +1,20 @@
\documentclass[preview]{standalone}
\usepackage{varwidth}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amssymb,graphics,bm,setspace}
\begin{document}
\begin{varwidth}{50in}
\begin{equation}
\mathcal{H}_{\rm long}=
-\frac{\mu_{0} \left( \mu_B\right)^2}{4\pi}
\sum_{i,j,i\neq j}^{N}
\frac{g_i g_j}{r_{ij}^3}
\Big(3
\left(\bm{e}_{ij}\cdot \bm{s}_{i}\right)
\left(\bm{e}_{ij}\cdot \bm{s}_{j}\right)
-\bm{s}_i\cdot\bm{s}_j \Big)
\nonumber
\end{equation}
\end{varwidth}
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

View File

@ -0,0 +1,23 @@
\documentclass[preview]{standalone}
\usepackage{varwidth}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amssymb,graphics,bm,setspace}
\begin{document}
\begin{varwidth}{50in}
\begin{equation}
\bm{F}_i =
\frac{\mu_0 (\mu_B)^2}{4\pi} \sum_j
\frac{g_i g_j}{r_{ij}^4}
\Big[\big( (\bm{s}_i\cdot\bm{s}_j)
-5(\bm{e}_{ij}\cdot\bm{s}_i)
(\bm{e}_{ij}\cdot\bm{s}_j)\big) \bm{e}_{ij}+
\big(
(\bm{e}_{ij}\cdot\bm{s}_i)\bm{s}_j+
(\bm{e}_{ij}\cdot\bm{s}_j)\bm{s}_i
\big)
\Big]
\nonumber
\end{equation}
\end{varwidth}
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 9.2 KiB

View File

@ -0,0 +1,17 @@
\documentclass[preview]{standalone}
\usepackage{varwidth}
\usepackage[utf8x]{inputenc}
\usepackage{amsmath,amssymb,graphics,bm,setspace}
\begin{document}
\begin{varwidth}{50in}
\begin{equation}
\bm{\omega}_i =
\frac{\mu_0 (\mu_B)^2}{4\pi\hbar}\sum_{j}
\frac{g_i g_j}{r_{ij}^3}
\, \Big(
3\,(\bm{e}_{ij}\cdot\bm{s}_{j})\bm{e}_{ij}
-\bm{s}_{j} \Big) \nonumber
\end{equation}
\end{varwidth}
\end{document}

View File

@ -636,12 +636,12 @@ Please ensure reaction map files are properly formatted. :dd
{Bond/react: Atom affected by reaction too close to template edge} :dt
This means an atom which changes type during the reaction is too close
to an 'edge' atom defined in the superimpose file. This could cause
incorrect assignment of bonds, angle, etc. Generally, this means you
must include more atoms in your templates, such that there are at
least two atoms between each atom involved in the reaction and an edge
atom. :dd
This means an atom which changes type or connectivity during the
reaction is too close to an 'edge' atom defined in the superimpose
file. This could cause incorrect assignment of bonds, angle, etc.
Generally, this means you must include more atoms in your templates,
such that there are at least two atoms between each atom involved in
the reaction and an edge atom. :dd
{Bond/react: Fix bond/react needs ghost atoms from farther away} :dt
@ -2202,10 +2202,6 @@ Self-explanatory. :dd
This is a current restriction in LAMMPS. :dd
{Cannot use pair hybrid with GPU neighbor list builds} :dt
Neighbor list builds must be done on the CPU for this pair style. :dd
{Cannot use pair tail corrections with 2d simulations} :dt
The correction factors are only currently defined for 3d systems. :dd
@ -5523,10 +5519,6 @@ Self-explanatory. :dd
For this pair style, you cannot run part of the force calculation on
the host. See the package command. :dd
{GPU split param must be positive for hybrid pair styles} :dt
See the package gpu command. :dd
{GPUs are requested but Kokkos has not been compiled for CUDA} :dt
Re-compile Kokkos with CUDA support to use GPUs. :dd
@ -7097,6 +7089,18 @@ Self-explanatory. :dd
One or more GPUs must be used when Kokkos is compiled for CUDA. :dd
{Kspace_modify mesh parameter must be all zero or all positive} :dt
Valid kspace mesh parameters are >0. The code will try to auto-detect
suitable values when all three mesh sizes are set to zero (the default). :dd
{Kspace_modify mesh/disp parameter must be all zero or all positive} :dt
Valid kspace mesh/disp parameters are >0. The code will try to auto-detect
suitable values when all three mesh sizes are set to zero [and]
the required accuracy via {force/disp/real} as well as
{force/disp/kspace} is set. :dd
{Kspace style does not support compute group/group} :dt
Self-explanatory. :dd

View File

@ -82,10 +82,14 @@ bond/angle/dihedral. LAMMPS computes this by taking the maximum bond
length, multiplying by the number of bonds in the interaction (e.g. 3
for a dihedral) and adding a small amount of stretch. :dd
{Bond/react: An atom in 'react #%d' changes bond connectivity but not atom type} :dt
{Bond/react: Atom affected by reaction too close to template edge} :dt
You may want to double-check that all atom types are properly assigned
in the post-reaction template. :dd
This means an atom which changes type or connectivity during the
reaction is too close to an 'edge' atom defined in the superimpose
file. This could cause incorrect assignment of bonds, angle, etc.
Generally, this means you must include more atoms in your templates,
such that there are at least two atoms between each atom involved in
the reaction and an edge atom. :dd
{Both groups in compute group/group have a net charge; the Kspace boundary correction to energy will be non-zero} :dt

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="5 Jun 2019 version">
<META NAME="docnumber" CONTENT="18 Jun 2019 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
:line
LAMMPS Documentation :c,h1
5 Jun 2019 version :c,h2
18 Jun 2019 version :c,h2
"What is a LAMMPS version?"_Manual_version.html

View File

@ -104,7 +104,7 @@ code (with a performance penalty due to having data transfers between
host and GPU). :ulb,l
The GPU package requires neighbor lists to be built on the CPU when using
exclusion lists, hybrid pair styles, or a triclinic simulation box. :l
exclusion lists, or a triclinic simulation box. :l
The GPU package can be compiled for CUDA or OpenCL and thus supports
both, Nvidia and AMD GPUs well. On Nvidia hardware, using CUDA is typically

View File

@ -46,7 +46,7 @@ software version 7.5 or later must be installed on your system. See
the discussion for the "GPU package"_Speed_gpu.html for details of how
to check and do this.
NOTE: Kokkos with CUDA currently implicitly assumes, that the MPI
NOTE: Kokkos with CUDA currently implicitly assumes that the MPI
library is CUDA-aware and has support for GPU-direct. This is not
always the case, especially when using pre-compiled MPI libraries
provided by a Linux distribution. This is not a problem when using
@ -207,19 +207,21 @@ supports.
[Running on GPUs:]
Use the "-k" "command-line switch"_Run_options.html to
specify the number of GPUs per node. Typically the -np setting of the
mpirun command should set the number of MPI tasks/node to be equal to
the number of physical GPUs on the node. You can assign multiple MPI
tasks to the same GPU with the KOKKOS package, but this is usually
only faster if significant portions of the input script have not
been ported to use Kokkos. Using CUDA MPS is recommended in this
scenario. Using a CUDA-aware MPI library with support for GPU-direct
is highly recommended. GPU-direct use can be avoided by using
"-pk kokkos gpu/direct no"_package.html.
As above for multi-core CPUs (and no GPU), if N is the number of
physical cores/node, then the number of MPI tasks/node should not
exceed N.
Use the "-k" "command-line switch"_Run_options.html to specify the
number of GPUs per node. Typically the -np setting of the mpirun command
should set the number of MPI tasks/node to be equal to the number of
physical GPUs on the node. You can assign multiple MPI tasks to the same
GPU with the KOKKOS package, but this is usually only faster if some
portions of the input script have not been ported to use Kokkos. In this
case, also packing/unpacking communication buffers on the host may give
speedup (see the KOKKOS "package"_package.html command). Using CUDA MPS
is recommended in this scenario.
Using a CUDA-aware MPI library with
support for GPU-direct is highly recommended. GPU-direct use can be
avoided by using "-pk kokkos gpu/direct no"_package.html. As above for
multi-core CPUs (and no GPU), if N is the number of physical cores/node,
then the number of MPI tasks/node should not exceed N.
-k on g Ng :pre

View File

@ -24,12 +24,7 @@ twojmax = band limit for bispectrum components (non-negative integer) :l
R_1, R_2,... = list of cutoff radii, one for each type (distance units) :l
w_1, w_2,... = list of neighbor weights, one for each type :l
zero or more keyword/value pairs may be appended :l
keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l
{diagonal} value = {0} or {1} or {2} or {3}
{0} = all j1, j2, j <= twojmax, j2 <= j1
{1} = subset satisfying j1 == j2
{2} = subset satisfying j1 == j2 == j3
{3} = subset satisfying j2 <= j1 <= j
keyword = {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag} :l
{rmin0} value = parameter in distance to angle conversion (distance units)
{switchflag} value = {0} or {1}
{0} = do not use switching function
@ -44,7 +39,7 @@ keyword = {diagonal} or {rmin0} or {switchflag} or {bzeroflag} or {quadraticflag
[Examples:]
compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 diagonal 3 rmin0 0.0
compute b all sna/atom 1.4 0.99363 6 2.0 2.4 0.75 1.0 rmin0 0.0
compute db all sna/atom 1.4 0.95 6 2.0 1.0
compute vb all sna/atom 1.4 0.95 6 2.0 1.0 :pre
@ -151,7 +146,7 @@ The argument {rfac0} and the optional keyword {rmin0} define the
linear mapping from radial distance to polar angle {theta0} on the
3-sphere.
The argument {twojmax} and the keyword {diagonal} define which
The argument {twojmax} defines which
bispectrum components are generated. See section below on output for a
detailed explanation of the number of bispectrum components and the
ordered in which they are listed.
@ -192,23 +187,18 @@ command that includes all pairs in the neighbor list.
Compute {sna/atom} calculates a per-atom array, each column
corresponding to a particular bispectrum component. The total number
of columns and the identity of the bispectrum component contained in
each column depend on the values of {twojmax} and {diagonal}, as
each column depend of the value of {twojmax}, as
described by the following piece of python code:
for j1 in range(0,twojmax+1):
if(diagonal==2):
print j1/2.,j1/2.,j1/2.
elif(diagonal==1):
for j in range(0,min(twojmax,2*j1)+1,2):
print j1/2.,j1/2.,j/2.
elif(diagonal==0):
for j2 in range(0,j1+1):
for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
print j1/2.,j2/2.,j/2.
elif(diagonal==3):
for j2 in range(0,j1+1):
for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
if (j>=j1): print j1/2.,j2/2.,j/2. :pre
for j2 in range(0,j1+1):
for j in range(j1-j2,min(twojmax,j1+j2)+1,2):
if (j>=j1): print j1/2.,j2/2.,j/2. :pre
NOTE: the {diagonal} keyword allowing other possible choices
for the number of bispectrum components was removed in 2019,
since all potentials use the value of 3, corresponding to the
above set of bispectrum components.
Compute {snad/atom} evaluates a per-atom array. The columns are
arranged into {ntypes} blocks, listed in order of atom type {I}. Each
@ -259,7 +249,7 @@ package"_Build_package.html doc page for more info.
[Default:]
The optional keyword defaults are {diagonal} = 0, {rmin0} = 0,
The optional keyword defaults are {rmin0} = 0,
{switchflag} = 1, {bzeroflag} = 1, {quadraticflag} = 0,
:line

View File

@ -88,6 +88,8 @@ potentials only include the pair potential portion of the EAM
interaction when used by this compute, not the embedding term. Also
bonded or Kspace interactions do not contribute to this compute.
The computes in this package are not compatible with dynamic groups.
[Related commands:]
{compute group/group}_compute_group_group.html, {compute

View File

@ -392,7 +392,8 @@ boundaries can be set using "boundary"_boundary.html (the slab
approximation in not needed). The {slab} keyword is not currently
supported by Ewald or PPPM when using a triclinic simulation cell. The
slab correction has also been extended to point dipole interactions
"(Klapp)"_#Klapp in "kspace_style"_kspace_style.html {ewald/disp}.
"(Klapp)"_#Klapp in "kspace_style"_kspace_style.html {ewald/disp},
{ewald/dipole}, and {pppm/dipole}.
NOTE: If you wish to apply an electric field in the Z-direction, in
conjunction with the {slab} keyword, you should do it by adding

View File

@ -20,6 +20,10 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
accuracy = desired relative error in forces
{ewald/omp} value = accuracy
accuracy = desired relative error in forces
{ewald/dipole} value = accuracy
accuracy = desired relative error in forces
{ewald/dipole/spin} value = accuracy
accuracy = desired relative error in forces
{pppm} value = accuracy
accuracy = desired relative error in forces
{pppm/cg} values = accuracy (smallq)
@ -47,6 +51,10 @@ style = {none} or {ewald} or {ewald/disp} or {ewald/omp} or {pppm} or {pppm/cg}
accuracy = desired relative error in forces
{pppm/stagger} value = accuracy
accuracy = desired relative error in forces
{pppm/dipole} value = accuracy
accuracy = desired relative error in forces
{pppm/dipole/spin} value = accuracy
accuracy = desired relative error in forces
{msm} value = accuracy
accuracy = desired relative error in forces
{msm/cg} value = accuracy (smallq)
@ -105,9 +113,15 @@ The {ewald/disp} style adds a long-range dispersion sum option for
but in a more efficient manner than the {ewald} style. The 1/r^6
capability means that Lennard-Jones or Buckingham potentials can be
used without a cutoff, i.e. they become full long-range potentials.
The {ewald/disp} style can also be used with point-dipoles
"(Toukmaji)"_#Toukmaji and is currently the only kspace solver in
LAMMPS with this capability.
The {ewald/disp} style can also be used with point-dipoles, see
"(Toukmaji)"_#Toukmaji.
The {ewald/dipole} style adds long-range standard Ewald summations
for dipole-dipole interactions, see "(Toukmaji)"_#Toukmaji.
The {ewald/dipole/spin} style adds long-range standard Ewald
summations for magnetic dipole-dipole interactions between
magnetic spins.
:line
@ -128,6 +142,12 @@ The optional {smallq} argument defines the cutoff for the absolute
charge value which determines whether a particle is considered charged
or not. Its default value is 1.0e-5.
The {pppm/dipole} style invokes a particle-particle particle-mesh solver
for dipole-dipole interactions, following the method of "(Cerda)"_#Cerda2008.
The {pppm/dipole/spin} style invokes a particle-particle particle-mesh solver
for magnetic dipole-dipole interactions between magnetic spins.
The {pppm/tip4p} style is identical to the {pppm} style except that it
adds a charge at the massless 4th site in each TIP4P water molecule.
It should be used with "pair styles"_pair_style.html with a
@ -317,7 +337,10 @@ using ideas from chapter 3 of "(Hardy)"_#Hardy2006, with equation 3.197
of particular note. When using {msm} with non-periodic boundary
conditions, it is expected that the error estimation will be too
pessimistic. RMS force errors for dipoles when using {ewald/disp}
are estimated using equations 33 and 46 of "(Wang)"_#Wang.
or {ewald/dipole} are estimated using equations 33 and 46 of
"(Wang)"_#Wang. The RMS force errors for {pppm/dipole} are estimated
using the equations in "(Cerda)"_#Cerda2008.
See the "kspace_modify"_kspace_modify.html command for additional
options of the K-space solvers that can be set, including a {force}
@ -464,6 +487,9 @@ Illinois at Urbana-Champaign, (2006).
:link(Sutmann2013)
[(Sutmann)] Sutmann, Arnold, Fahrenberger, et. al., Physical review / E 88(6), 063308 (2013)
:link(Cerda2008)
[(Cerda)] Cerda, Ballenegger, Lenz, Holm, J Chem Phys 129, 234104 (2008)
:link(Who2012)
[(Who)] Who, Author2, Author3, J of Long Range Solvers, 35, 164-177
(2012).

View File

@ -64,13 +64,16 @@ args = arguments specific to the style :l
{no_affinity} values = none
{kokkos} args = keyword value ...
zero or more keyword/value pairs may be appended
keywords = {neigh} or {neigh/qeq} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct}
keywords = {neigh} or {neigh/qeq} or {neigh/thread} or {newton} or {binsize} or {comm} or {comm/exchange} or {comm/forward} or {comm/reverse} or {gpu/direct}
{neigh} value = {full} or {half}
full = full neighbor list
half = half neighbor list built in thread-safe manner
{neigh/qeq} value = {full} or {half}
full = full neighbor list
half = half neighbor list built in thread-safe manner
{neigh/thread} value = {off} or {on}
off = thread only over atoms
on = thread over both atoms and neighbors
{newton} = {off} or {on}
off = set Newton pairwise and bonded flags off
on = set Newton pairwise and bonded flags on
@ -173,12 +176,10 @@ computation will be built. If {neigh} is {yes}, which is the default,
neighbor list building is performed on the GPU. If {neigh} is {no},
neighbor list building is performed on the CPU. GPU neighbor list
building currently cannot be used with a triclinic box. GPU neighbor
list calculation currently cannot be used with
"hybrid"_pair_hybrid.html pair styles. GPU neighbor lists are not
compatible with commands that are not GPU-enabled. When a non-GPU
enabled command requires a neighbor list, it will also be built on the
CPU. In these cases, it will typically be more efficient to only use
CPU neighbor list builds.
lists are not compatible with commands that are not GPU-enabled. When
a non-GPU enabled command requires a neighbor list, it will also be
built on the CPU. In these cases, it will typically be more efficient
to only use CPU neighbor list builds.
The {newton} keyword sets the Newton flags for pairwise (not bonded)
interactions to {off} or {on}, the same as the "newton"_newton.html
@ -442,7 +443,19 @@ running on CPUs, a {half} neighbor list is the default because it are
often faster, just as it is for non-accelerated pair styles. Similarly,
the {neigh/qeq} keyword determines how neighbor lists are built for "fix
qeq/reax/kk"_fix_qeq_reax.html. If not explicitly set, the value of
{neigh/qeq} will match {neigh}.
{neigh/qeq} will match {neigh}.
If the {neigh/thread} keyword is set to {off}, then the KOKKOS package
threads only over atoms. However, for small systems, this may not expose
enough parallelism to keep a GPU busy. When this keyword is set to {on},
the KOKKOS package threads over both atoms and neighbors of atoms. When
using {neigh/thread} {on}, a full neighbor list must also be used. Using
{neigh/thread} {on} may be slower for large systems, so this this option
is turned on by default only when there are 16K atoms or less owned by
an MPI rank and when using a full neighbor list. Not all KOKKOS-enabled
potentials support this keyword yet, and only thread over atoms. Many
simple pair-wise potentials such as Lennard-Jones do support threading
over both atoms and neighbors.
The {newton} keyword sets the Newton flags for pairwise and bonded
interactions to {off} or {on}, the same as the "newton"_newton.html
@ -475,10 +488,10 @@ are rebuilt. The data is only for atoms that migrate to new processors.
"Forward" communication happens every timestep. "Reverse" communication
happens every timestep if the {newton} option is on. The data is for
atom coordinates and any other atom properties that needs to be updated
for ghost atoms owned by each processor.
for ghost atoms owned by each processor.
The {comm} keyword is simply a short-cut to set the same value for both
the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
the {comm/exchange} and {comm/forward} and {comm/reverse} keywords.
The value options for all 3 keywords are {no} or {host} or {device}. A
value of {no} means to use the standard non-KOKKOS method of
@ -486,26 +499,26 @@ packing/unpacking data for the communication. A value of {host} means to
use the host, typically a multi-core CPU, and perform the
packing/unpacking in parallel with threads. A value of {device} means to
use the device, typically a GPU, to perform the packing/unpacking
operation.
operation.
The optimal choice for these keywords depends on the input script and
the hardware used. The {no} value is useful for verifying that the
Kokkos-based {host} and {device} values are working correctly. It is the
default when running on CPUs since it is usually the fastest.
default when running on CPUs since it is usually the fastest.
When running on CPUs or Xeon Phi, the {host} and {device} values work
identically. When using GPUs, the {device} value is the default since it
will typically be optimal if all of your styles used in your input
script are supported by the KOKKOS package. In this case data can stay
on the GPU for many timesteps without being moved between the host and
GPU, if you use the {device} value. This requires that your MPI is able
to access GPU memory directly. Currently that is true for OpenMPI 1.8
(or later versions), Mvapich2 1.9 (or later), and CrayMPI. If your
script uses styles (e.g. fixes) which are not yet supported by the
KOKKOS package, then data has to be move between the host and device
anyway, so it is typically faster to let the host handle communication,
by using the {host} value. Using {host} instead of {no} will enable use
of multiple threads to pack/unpack communicated data.
GPU, if you use the {device} value. If your script uses styles (e.g.
fixes) which are not yet supported by the KOKKOS package, then data has
to be move between the host and device anyway, so it is typically faster
to let the host handle communication, by using the {host} value. Using
{host} instead of {no} will enable use of multiple threads to
pack/unpack communicated data. When running small systems on a GPU,
performing the exchange pack/unpack on the host CPU can give speedup
since it reduces the number of CUDA kernel launches.
The {gpu/direct} keyword chooses whether GPU-direct will be used. When
this keyword is set to {on}, buffers in GPU memory are passed directly
@ -518,7 +531,8 @@ the {gpu/direct} keyword is automatically set to {off} by default. When
the {gpu/direct} keyword is set to {off} while any of the {comm}
keywords are set to {device}, the value for these {comm} keywords will
be automatically changed to {host}. This setting has no effect if not
running on GPUs.
running on GPUs. GPU-direct is available for OpenMPI 1.8 (or later
versions), Mvapich2 1.9 (or later), and CrayMPI.
:line
@ -630,11 +644,12 @@ neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
value, comm = device, gpu/direct = on. When LAMMPS can safely detect
that GPU-direct is not available, the default value of gpu/direct
becomes "off". For CPUs or Xeon Phis, the option defaults are neigh =
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. These
settings are made automatically by the required "-k on" "command-line
switch"_Run_options.html. You can change them by using the package
kokkos command in your input script or via the "-pk kokkos command-line
switch"_Run_options.html.
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The
option neigh/thread = on when there are 16K atoms or less on an MPI
rank, otherwise it is "off". These settings are made automatically by
the required "-k on" "command-line switch"_Run_options.html. You can
change them by using the package kokkos command in your input script or
via the "-pk kokkos command-line switch"_Run_options.html.
For the OMP package, the default is Nthreads = 0 and the option
defaults are neigh = yes. These settings are made automatically if

View File

@ -155,9 +155,12 @@ All of the lj/class2 pair styles write their information to "binary
restart files"_restart.html, so pair_style and pair_coeff commands do
not need to be specified in an input script that reads a restart file.
All of the lj/class2 pair styles can only be used via the {pair}
keyword of the "run_style respa"_run_style.html command. They do not
support the {inner}, {middle}, {outer} keywords.
Only the {lj/class2} pair style support the use of the
{inner}, {middle}, and {outer} keywords of the "run_style
respa"_run_style.html command, meaning the pairwise forces can be
partitioned by distance at different levels of the rRESPA hierarchy.
The other styles only support the {pair} keyword of run_style respa.
See the "run_style"_run_style.html command for details.
[Restrictions:]

View File

@ -38,7 +38,7 @@ where {B_k^i} is the {k}-th bispectrum component of atom {i},
and {beta_k^alpha_i} is the corresponding linear coefficient
that depends on {alpha_i}, the SNAP element of atom {i}. The
number of bispectrum components used and their definitions
depend on the values of {twojmax} and {diagonalstyle}
depend on the value of {twojmax}
defined in the SNAP parameter file described below.
The bispectrum calculation is described in more detail
in "compute sna/atom"_compute_sna_atom.html.
@ -125,14 +125,13 @@ This line is followed by {ncoeff} coefficients, one per line.
The SNAP parameter file can contain blank and comment lines (start
with #) anywhere. Each non-blank non-comment line must contain one
keyword/value pair. The required keywords are {rcutfac} and
{twojmax}. Optional keywords are {rfac0}, {rmin0}, {diagonalstyle},
{twojmax}. Optional keywords are {rfac0}, {rmin0},
{switchflag}, and {bzeroflag}.
The default values for these keywords are
{rfac0} = 0.99363
{rmin0} = 0.0
{diagonalstyle} = 3
{switchflag} = 0
{bzeroflag} = 1
{quadraticflag} = 1 :ul
@ -144,6 +143,9 @@ If {quadraticflag} is set to 1, then the SNAP energy expression includes the qua
The SNAP element file should contain {K}({K}+1)/2 additional coefficients
for each element, the upper-triangular elements of alpha.
NOTE: The previously used {diagonalstyle} keyword was removed in 2019,
since all known SNAP potentials use the default value of 3.
:line
[Mixing, shift, table, tail correction, restart, rRESPA info]:

View File

@ -0,0 +1,89 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Commands_all.html)
:line
pair_style spin/dipole/cut command :h3
pair_style spin/dipole/long command :h3
[Syntax:]
pair_style spin/dipole/cut cutoff
pair_style spin/dipole/long cutoff :pre
cutoff = global cutoff for magnetic dipole energy and forces
(optional) (distance units) :ulb,l
:ule
[Examples:]
pair_style spin/dipole/cut 10.0
pair_coeff * * 10.0
pair_coeff 2 3 8.0 :pre
pair_style spin/dipole/long 9.0
pair_coeff * * 1.0 1.0
pair_coeff 2 3 1.0 1.0 2.5 4.0 scale 0.5
pair_coeff 2 3 1.0 1.0 2.5 4.0 :pre
[Description:]
Style {spin/dipole/cut} computes a short-range dipole-dipole
interaction between pairs of magnetic particles that each
have a magnetic spin.
The magnetic dipole-dipole interactions are computed by the
following formulas for the magnetic energy, magnetic precession
vector omega and mechanical force between particles I and J.
:c,image(Eqs/pair_spin_dipole.jpg)
where si and sj are the spin on two magnetic particles,
r is their separation distance, and the vector e = (Ri - Rj)/|Ri - Rj|
is the direction vector between the two particles.
Style {spin/dipole/long} computes long-range magnetic dipole-dipole
interaction.
A "kspace_style"_kspace_style.html must be defined to
use this pair style. Currently, "kspace_style
ewald/dipole/spin"_kspace_style.html and "kspace_style
pppm/dipole/spin"_kspace_style.html support long-range magnetic
dipole-dipole interactions.
:line
The "pair_modify"_pair_modify.html table option is not relevant
for this pair style.
This pair style does not support the "pair_modify"_pair_modify.html
tail option for adding long-range tail corrections to energy and
pressure.
This pair style writes its information to "binary restart
files"_restart.html, so pair_style and pair_coeff commands do not need
to be specified in an input script that reads a restart file.
[Restrictions:]
The {spin/dipole/cut} and {spin/dipole/long} styles are part of
the SPIN package. They are only enabled if LAMMPS was built with that
package. See the "Build package"_Build_package.html doc page for more
info.
Using dipole/spin pair styles with {electron} "units"_units.html is not
currently supported.
[Related commands:]
"pair_coeff"_pair_coeff.html, "kspace_style"_kspace_style.html
"fix nve/spin"_fix_nve_spin.html
[Default:] none
:line
:link(Allen2)
[(Allen)] Allen and Tildesley, Computer Simulation of Liquids,
Clarendon Press, Oxford, 1987.

View File

@ -52,7 +52,7 @@ style = {delete} or {index} or {loop} or {world} or {universe} or {uloop} or {st
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x),
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x)
ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z),
stride(x,y,z), stride2(x,y,z,a,b,c),
logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c),
vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)
group functions = count(group), mass(group), charge(group),
xcm(group,dim), vcm(group,dim), fcm(group,dim),
@ -459,8 +459,8 @@ Math functions: sqrt(x), exp(x), ln(x), log(x), abs(x), \
sin(x), cos(x), tan(x), asin(x), acos(x), atan(x), atan2(y,x), \
random(x,y,z), normal(x,y,z), ceil(x), floor(x), round(x), \
ramp(x,y), stagger(x,y), logfreq(x,y,z), logfreq2(x,y,z), \
stride(x,y,z), stride2(x,y,z,a,b,c), vdisplace(x,y), \
swiggle(x,y,z), cwiggle(x,y,z)
logfreq3(x,y,z), stride(x,y,z), stride2(x,y,z,a,b,c), \
vdisplace(x,y), swiggle(x,y,z), cwiggle(x,y,z)
Group functions: count(ID), mass(ID), charge(ID), xcm(ID,dim), \
vcm(ID,dim), fcm(ID,dim), bound(ID,dir), \
gyration(ID), ke(ID), angmom(ID,dim), torque(ID,dim), \
@ -670,6 +670,16 @@ sequence of output timesteps:
100,150,200,...950,1000,1500,2000,...9500,10000,15000,etc :pre
The logfreq3(x,y,z) function generates y points between x and z (inclusive),
that are separated by a multiplicative ratio: (z/x)^(1/(y-1)). Constraints
are: x,z > 0, y > 1, z-x >= y-1. For eg., if logfreq3(10,25,1000) is used in
a variable by the "fix print"_fix_print.html command, then the interval
between 10 and 1000 is divided into 24 parts with a multiplicative
separation of ~1.21, and it will generate the following sequence of output
timesteps:
10, 13, 15, 18, 22, 27, 32,...384, 465, 563, 682, 826, 1000 :pre
The stride(x,y,z) function uses the current timestep to generate a new
timestep. X,y >= 0 and z > 0 and x <= y are required. The generated
timesteps increase in increments of z, from x to y, i.e. it generates

View File

@ -1405,6 +1405,7 @@ Lenart
lennard
Lennard
Lenosky
Lenz
Lett
Leuven
Leven

View File

@ -25,16 +25,18 @@ velocity all create 100 4928459 rot yes dist gaussian
#pair_style hybrid/overlay eam/alloy spin/exchange 4.0 spin/neel 4.0
pair_style hybrid/overlay eam/alloy spin/exchange 4.0
pair_coeff * * eam/alloy Co_PurjaPun_2012.eam.alloy Co
pair_coeff * * spin/exchange exchange 4.0 0.3593 1.135028015e-05 1.064568567
pair_coeff * * eam/alloy ../examples/SPIN/cobalt_hcp/Co_PurjaPun_2012.eam.alloy Co
#pair_coeff * * eam/alloy Co_PurjaPun_2012.eam.alloy Co
pair_coeff * * spin/exchange exchange 4.0 -0.3593 1.135028015e-05 1.064568567
#pair_coeff * * spin/neel neel 4.0 0.0048 0.234 1.168 2.6905 0.705 0.652
neighbor 0.1 bin
neigh_modify every 10 check yes delay 20
#fix 1 all precession/spin zeeman 1.0 0.0 0.0 1.0
fix 1 all precession/spin zeeman 0.0 0.0 0.0 1.0
fix 2 all langevin/spin 0.0 0.0 21
fix 1 all precession/spin anisotropy 0.01 0.0 0.0 1.0
#fix 2 all langevin/spin 0.0 0.0 21
fix 2 all langevin/spin 0.0 0.1 21
fix 3 all nve/spin lattice yes
timestep 0.0001

View File

@ -0,0 +1 @@
../iron/Fe_Mishin2006.eam.alloy

View File

@ -0,0 +1,5 @@
2.4824 0.01948336
2.8665 0.01109
4.0538 -0.0002176
4.753 -0.001714
4.965 -0.001986

View File

@ -0,0 +1,32 @@
#Program fitting the exchange interaction
#Model curve: Bethe-Slater function
import numpy as np, pylab, tkinter
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
from decimal import *
print("Loop begin")
#Definition of the Bethe-Slater function
def func(x,a,b,c):
return 4*a*((x/c)**2)*(1-b*(x/c)**2)*np.exp(-(x/c)**2)
#Exchange coeff table (data to fit)
rdata, Jdata = np.loadtxt('exchange_bcc_iron.dat', usecols=(0,1), unpack=True)
plt.plot(rdata, Jdata, 'b-', label='data')
#Perform the fit
popt, pcov = curve_fit(func, rdata, Jdata, bounds=(0, [500.,5.,5.]))
plt.plot(rdata, func(rdata, *popt), 'r--', label='fit')
#Print the fitted params
print("Parameters: a={:.10} (in meV), b={:.10} (adim), c={:.10} (in Ang)".format(*popt))
#Ploting the result
plt.xlabel('r_ij')
pylab.xlim([0,6.5])
plt.ylabel('J_ij')
plt.legend()
plt.show()
print("Loop end")

View File

@ -0,0 +1,19 @@
6 8
Optimal parameter set
1 4.100199340884814 F
2 1.565647547483517 F
1 0.9332056681088162 T 3.000000000000000
2 -1.162558782567700 T 2.866666666666670
3 -0.3502026949249225 T 2.733333333333330
4 0.4287820835430028 T 2.600000000000000
5 4.907925057809273 T 2.400000000000000
6 -5.307049068415304 T 2.300000000000000
1 -0.1960674387419232 F 4.100000000000000
2 0.3687525935422963 F 3.800000000000000
3 -1.505333614924853 F 3.500000000000000
4 4.948907078156191 T 3.200000000000000
5 -4.894613262753399 T 2.900000000000000
6 3.468897724782442 T 2.600000000000000
7 -1.792218099820337 T 2.400000000000000
8 80.22069592246987 T 2.300000000000000

View File

@ -0,0 +1,59 @@
# bcc iron in a 3d periodic box
clear
units metal
atom_style spin
dimension 3
boundary p p p
# necessary for the serial algorithm (sametag)
atom_modify map array
lattice bcc 2.8665
region box block 0.0 5.0 0.0 5.0 0.0 5.0
create_box 1 box
create_atoms 1 box
# setting mass, mag. moments, and interactions for bcc iron
mass 1 55.845
set group all spin 2.2 -1.0 0.0 0.0
velocity all create 100 4928459 rot yes dist gaussian
pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/cut 8.0
pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe
pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841
pair_coeff * * spin/dipole/cut 8.0
neighbor 0.1 bin
neigh_modify every 10 check yes delay 20
fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0
fix_modify 1 energy yes
fix 2 all langevin/spin 0.0 0.0 21
fix 3 all nve/spin lattice yes
timestep 0.0001
# compute and output options
compute out_mag all spin
compute out_pe all pe
compute out_ke all ke
compute out_temp all temp
variable magx equal c_out_mag[1]
variable magy equal c_out_mag[2]
variable magz equal c_out_mag[3]
variable magnorm equal c_out_mag[4]
variable emag equal c_out_mag[5]
variable tmag equal c_out_mag[6]
thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal
thermo 50
compute outsp all property/atom spx spy spz sp fmx fmy fmz
dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3]
run 2000

View File

@ -0,0 +1,61 @@
# bcc iron in a 3d periodic box
clear
units metal
atom_style spin
dimension 3
boundary p p p
# necessary for the serial algorithm (sametag)
atom_modify map array
lattice bcc 2.8665
region box block 0.0 5.0 0.0 5.0 0.0 5.0
create_box 1 box
create_atoms 1 box
# setting mass, mag. moments, and interactions for bcc iron
mass 1 55.845
set group all spin 2.2 -1.0 0.0 0.0
velocity all create 100 4928459 rot yes dist gaussian
pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/long 8.0
pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe
pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841
pair_coeff * * spin/dipole/long 8.0
neighbor 0.1 bin
neigh_modify every 10 check yes delay 20
kspace_style ewald/dipole/spin 1.0e-4
fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0
fix_modify 1 energy yes
fix 2 all langevin/spin 0.0 0.0 21
fix 3 all nve/spin lattice yes
timestep 0.0001
# compute and output options
compute out_mag all spin
compute out_pe all pe
compute out_ke all ke
compute out_temp all temp
variable magx equal c_out_mag[1]
variable magy equal c_out_mag[2]
variable magz equal c_out_mag[3]
variable magnorm equal c_out_mag[4]
variable emag equal c_out_mag[5]
variable tmag equal c_out_mag[6]
thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal
thermo 50
compute outsp all property/atom spx spy spz sp fmx fmy fmz
dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3]
run 2000

View File

@ -0,0 +1,62 @@
# bcc iron in a 3d periodic box
clear
units metal
atom_style spin
dimension 3
boundary p p p
# necessary for the serial algorithm (sametag)
atom_modify map array
lattice bcc 2.8665
region box block 0.0 5.0 0.0 5.0 0.0 5.0
create_box 1 box
create_atoms 1 box
# setting mass, mag. moments, and interactions for bcc iron
mass 1 55.845
set group all spin 2.2 -1.0 0.0 0.0
velocity all create 100 4928459 rot yes dist gaussian
pair_style hybrid/overlay eam/alloy spin/exchange 3.5 spin/dipole/long 8.0
pair_coeff * * eam/alloy Fe_Mishin2006.eam.alloy Fe
pair_coeff * * spin/exchange exchange 3.4 0.02726 0.2171 1.841
pair_coeff * * spin/dipole/long 8.0
neighbor 0.1 bin
neigh_modify every 10 check yes delay 20
kspace_style pppm/dipole/spin 1.0e-4
kspace_modify compute yes
fix 1 all precession/spin cubic 0.001 0.0005 1.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 1.0
fix_modify 1 energy yes
fix 2 all langevin/spin 0.0 0.0 21
fix 3 all nve/spin lattice yes
timestep 0.0001
# compute and output options
compute out_mag all spin
compute out_pe all pe
compute out_ke all ke
compute out_temp all temp
variable magx equal c_out_mag[1]
variable magy equal c_out_mag[2]
variable magz equal c_out_mag[3]
variable magnorm equal c_out_mag[4]
variable emag equal c_out_mag[5]
variable tmag equal c_out_mag[6]
thermo_style custom step time v_magx v_magy v_magz v_magnorm v_tmag v_emag pe etotal
thermo 50
compute outsp all property/atom spx spy spz sp fmx fmy fmz
dump 1 all custom 100 dump_iron.lammpstrj type x y z c_outsp[1] c_outsp[2] c_outsp[3]
run 2000

View File

@ -19,7 +19,8 @@ create_atoms 1 box
mass 1 55.845
set group all spin/random 31 2.2
#set group all spin/random 31 2.2
set group all spin 2.2 0.0 0.0 1.0
velocity all create 100 4928459 rot yes dist gaussian
pair_style hybrid/overlay eam/alloy spin/exchange 3.5

View File

@ -0,0 +1 @@
../../../../potentials/CH.rebo

View File

@ -18,7 +18,7 @@ group adsorbate type 2
######################## Potential defition ########################
pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0
####################################################################
pair_coeff * * rebo CH.airebo NULL C # chemical
pair_coeff * * rebo CH.rebo NULL C # chemical
pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range
####################################################################
# Neighbor update settings

View File

@ -1,4 +1,5 @@
LAMMPS (8 Mar 2018)
LAMMPS (5 Jun 2019)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:88)
using 1 OpenMP thread(s) per MPI task
# Initialization
units metal
@ -21,6 +22,8 @@ read_data Bi_gr_AB_stack_2L_noH_300K.data
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.000353813 secs
read_data CPU = 0.0043292 secs
mass 1 12.0107 # carbon mass (g/mole) | membrane
mass 2 12.0107 # carbon mass (g/mole) | adsorbate
# Separate atom groups
@ -32,8 +35,8 @@ group adsorbate type 2
######################## Potential defition ########################
pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0
####################################################################
pair_coeff * * rebo CH.airebo NULL C # chemical
Reading potential file CH.airebo with DATE: 2011-10-25
pair_coeff * * rebo CH.rebo NULL C # chemical
Reading potential file CH.rebo with DATE: 2018-7-3
pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range
####################################################################
# Neighbor update settings
@ -92,32 +95,32 @@ Neighbor list info ...
bin: standard
Per MPI rank memory allocation (min/avg/max) = 16.96 | 16.96 | 16.96 Mbytes
Step TotEng PotEng KinEng v_REBO v_KC Temp v_adsxcom v_adsycom v_adszcom v_adsvxcom v_adsvycom v_adsvzcom
0 -5025.3867722725 -5040.0767391239 14.6899668514 -5011.2636297759 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
100 -5025.3962433293 -5041.3829775585 15.9867342292 -5012.5109377234 -28.8720398351 91.0071804888 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069014
200 -5025.3942568861 -5041.9638220670 16.5695651809 -5012.7804299195 -29.1833921475 94.3250439654 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265577
300 -5025.3919463074 -5040.9705419367 15.5785956293 -5012.0510295102 -28.9195124265 88.6837826830 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745908
400 -5025.3965376948 -5041.6929964127 16.2964587179 -5012.6418090677 -29.0511873450 92.7703393702 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704496
500 -5025.4050172900 -5042.1712310053 16.7662137153 -5013.1850218645 -28.9862091408 95.4444989087 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
600 -5025.3985715734 -5041.2158947893 15.8173232159 -5012.4875319345 -28.7283628548 90.0427797270 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001093
700 -5025.3997561572 -5041.6276721306 16.2279159734 -5012.7093581188 -28.9183140118 92.3801482386 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311564
800 -5025.3967603736 -5042.3401685987 16.9434082251 -5013.3044877099 -29.0356808888 96.4532085367 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492925
900 -5025.3984542801 -5042.2820667481 16.8836124680 -5013.4066841442 -28.8753826039 96.1128111061 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648638
1000 -5025.3988185618 -5041.9160822433 16.5172636815 -5012.8147737982 -29.1013084450 94.0273088606 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
Loop time of 156.142 on 1 procs for 1000 steps with 1360 atoms
0 -5025.3867727863 -5040.0767396377 14.6899668514 -5011.2636302897 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
100 -5025.3962438431 -5041.3829780735 15.9867342304 -5012.5109382383 -28.8720398352 91.0071804956 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069015
200 -5025.3942574000 -5041.9638225847 16.5695651847 -5012.7804304371 -29.1833921476 94.3250439874 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265578
300 -5025.3919468212 -5040.9705424499 15.5785956286 -5012.0510300232 -28.9195124266 88.6837826792 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745907
400 -5025.3965382086 -5041.6929969192 16.2964587107 -5012.6418095739 -29.0511873454 92.7703393292 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704497
500 -5025.4050178038 -5042.1712315208 16.7662137170 -5013.1850223792 -28.9862091417 95.4444989189 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
600 -5025.3985720873 -5041.2158953052 15.8173232179 -5012.4875324499 -28.7283628553 90.0427797386 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001092
700 -5025.3997566711 -5041.6276726420 16.2279159709 -5012.7093586298 -28.9183140122 92.3801482242 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311565
800 -5025.3967608874 -5042.3401691104 16.9434082230 -5013.3044882226 -29.0356808878 96.4532085250 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492926
900 -5025.3984547937 -5042.2820672614 16.8836124676 -5013.4066846579 -28.8753826035 96.1128111040 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648640
1000 -5025.3988190757 -5041.9160827657 16.5172636900 -5012.8147743212 -29.1013084444 94.0273089090 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
Loop time of 103.724 on 1 procs for 1000 steps with 1360 atoms
Performance: 0.553 ns/day, 43.373 hours/ns, 6.404 timesteps/s
99.6% CPU use with 1 MPI tasks x 1 OpenMP threads
Performance: 0.833 ns/day, 28.812 hours/ns, 9.641 timesteps/s
99.9% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 155.99 | 155.99 | 155.99 | 0.0 | 99.90
Bond | 0.00075769 | 0.00075769 | 0.00075769 | 0.0 | 0.00
Pair | 103.59 | 103.59 | 103.59 | 0.0 | 99.87
Bond | 0.00022388 | 0.00022388 | 0.00022388 | 0.0 | 0.00
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.084217 | 0.084217 | 0.084217 | 0.0 | 0.05
Output | 0.0016122 | 0.0016122 | 0.0016122 | 0.0 | 0.00
Modify | 0.034797 | 0.034797 | 0.034797 | 0.0 | 0.02
Other | | 0.02838 | | | 0.02
Comm | 0.082476 | 0.082476 | 0.082476 | 0.0 | 0.08
Output | 0.0010884 | 0.0010884 | 0.0010884 | 0.0 | 0.00
Modify | 0.032938 | 0.032938 | 0.032938 | 0.0 | 0.03
Other | | 0.01749 | | | 0.02
Nlocal: 1360 ave 1360 max 1360 min
Histogram: 1 0 0 0 0 0 0 0 0 0
@ -133,4 +136,4 @@ Ave neighs/atom = 195.004
Ave special neighs/atom = 0
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:02:36
Total wall time: 0:01:43

View File

@ -1,4 +1,5 @@
LAMMPS (8 Mar 2018)
LAMMPS (5 Jun 2019)
OMP_NUM_THREADS environment is not set. Defaulting to 1 thread. (src/comm.cpp:88)
using 1 OpenMP thread(s) per MPI task
# Initialization
units metal
@ -21,6 +22,8 @@ read_data Bi_gr_AB_stack_2L_noH_300K.data
0 = max # of 1-3 neighbors
0 = max # of 1-4 neighbors
1 = max # of special neighbors
special bonds CPU = 0.000187874 secs
read_data CPU = 0.00234103 secs
mass 1 12.0107 # carbon mass (g/mole) | membrane
mass 2 12.0107 # carbon mass (g/mole) | adsorbate
# Separate atom groups
@ -32,8 +35,8 @@ group adsorbate type 2
######################## Potential defition ########################
pair_style hybrid/overlay rebo kolmogorov/crespi/full 16.0
####################################################################
pair_coeff * * rebo CH.airebo NULL C # chemical
Reading potential file CH.airebo with DATE: 2011-10-25
pair_coeff * * rebo CH.rebo NULL C # chemical
Reading potential file CH.rebo with DATE: 2018-7-3
pair_coeff * * kolmogorov/crespi/full CC.KC-full C C # long range
####################################################################
# Neighbor update settings
@ -92,32 +95,32 @@ Neighbor list info ...
bin: standard
Per MPI rank memory allocation (min/avg/max) = 11.13 | 11.13 | 11.13 Mbytes
Step TotEng PotEng KinEng v_REBO v_KC Temp v_adsxcom v_adsycom v_adszcom v_adsvxcom v_adsvycom v_adsvzcom
0 -5025.3867722725 -5040.0767391239 14.6899668514 -5011.2636297759 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
100 -5025.3962433293 -5041.3829775585 15.9867342292 -5012.5109377234 -28.8720398351 91.0071804888 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069014
200 -5025.3942568861 -5041.9638220670 16.5695651809 -5012.7804299195 -29.1833921475 94.3250439654 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265577
300 -5025.3919463074 -5040.9705419367 15.5785956293 -5012.0510295103 -28.9195124265 88.6837826830 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745908
400 -5025.3965376948 -5041.6929964127 16.2964587179 -5012.6418090677 -29.0511873450 92.7703393702 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704496
500 -5025.4050172900 -5042.1712310053 16.7662137153 -5013.1850218645 -28.9862091408 95.4444989088 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
600 -5025.3985715734 -5041.2158947893 15.8173232159 -5012.4875319345 -28.7283628548 90.0427797270 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001093
700 -5025.3997561572 -5041.6276721306 16.2279159734 -5012.7093581188 -28.9183140118 92.3801482386 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311564
800 -5025.3967603736 -5042.3401685987 16.9434082251 -5013.3044877099 -29.0356808888 96.4532085367 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492925
900 -5025.3984542801 -5042.2820667481 16.8836124680 -5013.4066841442 -28.8753826039 96.1128111061 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648638
1000 -5025.3988185618 -5041.9160822433 16.5172636815 -5012.8147737983 -29.1013084450 94.0273088606 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
Loop time of 42.5422 on 4 procs for 1000 steps with 1360 atoms
0 -5025.3867727863 -5040.0767396377 14.6899668514 -5011.2636302897 -28.8131093480 83.6251135127 22.0155657205 20.2812150219 3.4623630945 0.0282287195 0.0535565745 0.2193320108
100 -5025.3962438431 -5041.3829780735 15.9867342304 -5012.5109382383 -28.8720398352 91.0071804956 22.0181858078 20.2867731676 3.4456714402 0.0241525932 0.0573807336 -0.5235069015
200 -5025.3942574000 -5041.9638225847 16.5695651847 -5012.7804304371 -29.1833921476 94.3250439874 22.0203529515 20.2926376511 3.3740502908 0.0186420748 0.0595018114 -0.7867265578
300 -5025.3919468212 -5040.9705424499 15.5785956286 -5012.0510300232 -28.9195124266 88.6837826792 22.0218424095 20.2984380400 3.3199036613 0.0106250874 0.0544668352 -0.1513745907
400 -5025.3965382086 -5041.6929969192 16.2964587107 -5012.6418095739 -29.0511873454 92.7703393291 22.0224243957 20.3034636122 3.3515794172 0.0006844935 0.0458598502 0.6967704497
500 -5025.4050178038 -5042.1712315208 16.7662137170 -5013.1850223792 -28.9862091417 95.4444989189 22.0220673443 20.3074634962 3.4286173278 -0.0078273439 0.0340764532 0.6845095066
600 -5025.3985720873 -5041.2158953052 15.8173232179 -5012.4875324499 -28.7283628553 90.0427797386 22.0209262700 20.3103065099 3.4653840648 -0.0141442608 0.0229602847 0.0009001092
700 -5025.3997566711 -5041.6276726420 16.2279159709 -5012.7093586298 -28.9183140122 92.3801482242 22.0191651506 20.3120184840 3.4291788224 -0.0208485646 0.0104216414 -0.6668311565
800 -5025.3967608874 -5042.3401691104 16.9434082230 -5013.3044882226 -29.0356808878 96.4532085250 22.0167259920 20.3122737443 3.3535033285 -0.0279747378 -0.0060833621 -0.7003492926
900 -5025.3984547938 -5042.2820672614 16.8836124676 -5013.4066846579 -28.8753826035 96.1128111040 22.0136711877 20.3107854823 3.3206430872 -0.0331979094 -0.0237440547 0.1335648640
1000 -5025.3988190757 -5041.9160827657 16.5172636900 -5012.8147743212 -29.1013084444 94.0273089090 22.0102627032 20.3075977018 3.3736867454 -0.0340065996 -0.0390649991 0.7872380119
Loop time of 33.7338 on 4 procs for 1000 steps with 1360 atoms
Performance: 2.031 ns/day, 11.817 hours/ns, 23.506 timesteps/s
98.9% CPU use with 4 MPI tasks x 1 OpenMP threads
Performance: 2.561 ns/day, 9.370 hours/ns, 29.644 timesteps/s
94.1% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 39.928 | 40.992 | 42.377 | 15.8 | 96.36
Bond | 0.0003643 | 0.00043392 | 0.00048113 | 0.0 | 0.00
Pair | 30.833 | 31.356 | 32.18 | 9.1 | 92.95
Bond | 0.00026059 | 0.00029182 | 0.00031185 | 0.0 | 0.00
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.12253 | 1.5076 | 2.5698 | 82.1 | 3.54
Output | 0.0012577 | 0.0013637 | 0.0016453 | 0.4 | 0.00
Modify | 0.010833 | 0.012247 | 0.013317 | 0.9 | 0.03
Other | | 0.02864 | | | 0.07
Comm | 1.443 | 2.2722 | 2.8091 | 34.3 | 6.74
Output | 0.00068855 | 0.00095087 | 0.0017185 | 0.0 | 0.00
Modify | 0.010187 | 0.011709 | 0.015284 | 1.9 | 0.03
Other | | 0.09241 | | | 0.27
Nlocal: 340 ave 344 max 334 min
Histogram: 1 0 0 0 0 0 1 0 1 1
@ -133,4 +136,4 @@ Ave neighs/atom = 195.004
Ave special neighs/atom = 0
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:42
Total wall time: 0:00:33

View File

@ -64,9 +64,12 @@ int BaseAtomicT::init_atomic(const int nlocal, const int nall,
} else
_nbor_data=&(nbor->dev_nbor);
int success=device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor,
maxspecial,_gpu_host,max_nbors,cell_size,false,
_threads_per_atom);
int success=device->init(*ans,false,false,nlocal,nall,maxspecial);
if (success!=0)
return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;

View File

@ -65,9 +65,12 @@ int BaseChargeT::init_atomic(const int nlocal, const int nall,
} else
_nbor_data=&(nbor->dev_nbor);
int success=device->init(*ans,true,false,nlocal,host_nlocal,nall,nbor,
maxspecial,_gpu_host,max_nbors,cell_size,false,
_threads_per_atom);
int success=device->init(*ans,true,false,nlocal,nall,maxspecial);
if (success!=0)
return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;

View File

@ -66,9 +66,12 @@ int BaseDipoleT::init_atomic(const int nlocal, const int nall,
} else
_nbor_data=&(nbor->dev_nbor);
int success=device->init(*ans,true,true,nlocal,host_nlocal,nall,nbor,
maxspecial,_gpu_host,max_nbors,cell_size,false,
_threads_per_atom);
int success=device->init(*ans,true,true,nlocal,nall,maxspecial);
if (success!=0)
return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;

View File

@ -65,9 +65,13 @@ int BaseDPDT::init_atomic(const int nlocal, const int nall,
} else
_nbor_data=&(nbor->dev_nbor);
int success=device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor,
maxspecial,_gpu_host,max_nbors,cell_size,false,
_threads_per_atom,true);
int success=device->init(*ans,false,false,nlocal,nall,maxspecial,true);
if (success!=0)
return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;

View File

@ -71,12 +71,15 @@ int BaseEllipsoidT::init_base(const int nlocal, const int nall,
_threads_per_atom=device->threads_per_atom();
int success=device->init(*ans,false,true,nlocal,host_nlocal,nall,nbor,
maxspecial,_gpu_host,max_nbors,cell_size,true,
1);
int success=device->init(*ans,false,true,nlocal,nall,maxspecial);
if (success!=0)
return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,true,1);
if (success!=0)
return success;
ucl_device=device->gpu;
atom=&device->atom;

View File

@ -78,9 +78,12 @@ int BaseThreeT::init_three(const int nlocal, const int nall,
if (_threads_per_atom*_threads_per_atom>device->warp_size())
return -10;
int success=device->init(*ans,false,false,nlocal,host_nlocal,nall,nbor,
maxspecial,_gpu_host,max_nbors,cell_size,false,
_threads_per_atom);
int success=device->init(*ans,false,false,nlocal,nall,maxspecial);
if (success!=0)
return success;
success = device->init_nbor(nbor,nlocal,host_nlocal,nall,maxspecial,_gpu_host,
max_nbors,cell_size,false,_threads_per_atom);
if (success!=0)
return success;

View File

@ -246,11 +246,8 @@ int DeviceT::set_ocl_params(char *ocl_vendor) {
template <class numtyp, class acctyp>
int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
const bool rot, const int nlocal,
const int host_nlocal, const int nall,
Neighbor *nbor, const int maxspecial,
const int gpu_host, const int max_nbors,
const double cell_size, const bool pre_cut,
const int threads_per_atom, const bool vel) {
const int nall, const int maxspecial,
const bool vel) {
if (!_device_init)
return -1;
if (sizeof(acctyp)==sizeof(double) && gpu->double_precision()==false)
@ -301,16 +298,6 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const bool charge,
if (!ans.init(ef_nlocal,charge,rot,*gpu))
return -3;
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
*gpu,gpu_nbor,gpu_host,pre_cut, _block_cell_2d,
_block_cell_id, _block_nbor_build, threads_per_atom,
_warp_size, _time_device, compile_string()))
return -3;
if (_cell_size<0.0)
nbor->cell_size(cell_size,cell_size);
else
nbor->cell_size(_cell_size,cell_size);
_init_count++;
return 0;
}
@ -338,6 +325,39 @@ int DeviceT::init(Answer<numtyp,acctyp> &ans, const int nlocal,
return 0;
}
template <class numtyp, class acctyp>
int DeviceT::init_nbor(Neighbor *nbor, const int nlocal,
const int host_nlocal, const int nall,
const int maxspecial, const int gpu_host,
const int max_nbors, const double cell_size,
const bool pre_cut, const int threads_per_atom) {
int ef_nlocal=nlocal;
if (_particle_split<1.0 && _particle_split>0.0)
ef_nlocal=static_cast<int>(_particle_split*nlocal);
int gpu_nbor=0;
if (_gpu_mode==Device<numtyp,acctyp>::GPU_NEIGH)
gpu_nbor=1;
else if (_gpu_mode==Device<numtyp,acctyp>::GPU_HYB_NEIGH)
gpu_nbor=2;
#ifndef USE_CUDPP
if (gpu_nbor==1)
gpu_nbor=2;
#endif
if (!nbor->init(&_neighbor_shared,ef_nlocal,host_nlocal,max_nbors,maxspecial,
*gpu,gpu_nbor,gpu_host,pre_cut,_block_cell_2d,
_block_cell_id, _block_nbor_build, threads_per_atom,
_warp_size, _time_device, compile_string()))
return -3;
if (_cell_size<0.0)
nbor->cell_size(cell_size,cell_size);
else
nbor->cell_size(_cell_size,cell_size);
return 0;
}
template <class numtyp, class acctyp>
void DeviceT::set_single_precompute
(PPPM<numtyp,acctyp,float,_lgpu_float4> *pppm) {
@ -614,7 +634,7 @@ void DeviceT::output_kspace_times(UCL_Timer &time_in,
if (screen && times[6]>0.0) {
fprintf(screen,"\n\n-------------------------------------");
fprintf(screen,"--------------------------------\n");
fprintf(screen," Device Time Info (average): ");
fprintf(screen," Device Time Info (average) for kspace: ");
fprintf(screen,"\n-------------------------------------");
fprintf(screen,"--------------------------------\n");

View File

@ -53,11 +53,43 @@ class Device {
const int t_per_atom, const double cell_size,
char *vendor_string, const int block_pair);
/// Initialize the device for Atom and Neighbor storage
/** \param rot True if quaternions need to be stored
/// Initialize the device for Atom storage
/** \param charge True if charges need to be stored
* \param rot True if quaternions need to be stored
* \param nlocal Total number of local particles to allocate memory for
* \param nall Total number of local+ghost particles
* \param maxspecial Maximum mumber of special bonded atoms per atom
* \param vel True if velocities need to be stored
*
* Returns:
* - 0 if successfull
* - -1 if fix gpu not found
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(Answer<numtyp,acctyp> &ans, const bool charge, const bool rot,
const int nlocal, const int nall, const int maxspecial,
const bool vel=false);
/// Initialize the device for Atom storage only
/** \param nlocal Total number of local particles to allocate memory for
* \param nall Total number of local+ghost particles
*
* Returns:
* - 0 if successfull
* - -1 if fix gpu not found
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
/// Initialize the neighbor list storage
/** \param charge True if charges need to be stored
* \param rot True if quaternions need to be stored
* \param nlocal Total number of local particles to allocate memory for
* \param host_nlocal Initial number of host particles to allocate memory for
* \param nall Total number of local+ghost particles
* \param maxspecial Maximum mumber of special bonded atoms per atom
* \param gpu_host 0 if host will not perform force calculations,
* 1 if gpu_nbor is true, and host needs a half nbor list,
* 2 if gpu_nbor is true, and host needs a full nbor list
@ -73,23 +105,11 @@ class Device {
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(Answer<numtyp,acctyp> &a, const bool charge, const bool rot,
const int nlocal, const int host_nlocal, const int nall,
Neighbor *nbor, const int maxspecial, const int gpu_host,
const int max_nbors, const double cell_size, const bool pre_cut,
const int threads_per_atom, const bool vel=false);
/// Initialize the device for Atom storage only
/** \param nlocal Total number of local particles to allocate memory for
* \param nall Total number of local+ghost particles
*
* Returns:
* - 0 if successfull
* - -1 if fix gpu not found
* - -3 if there is an out of memory error
* - -4 if the GPU library was not compiled for GPU
* - -5 Double precision is not supported on card **/
int init(Answer<numtyp,acctyp> &ans, const int nlocal, const int nall);
int init_nbor(Neighbor *nbor, const int nlocal,
const int host_nlocal, const int nall,
const int maxspecial, const int gpu_host,
const int max_nbors, const double cell_size,
const bool pre_cut, const int threads_per_atom);
/// Output a message for pair_style acceleration with device stats
void init_message(FILE *screen, const char *name,
@ -173,7 +193,7 @@ class Device {
/// Return host memory usage in bytes
double host_memory_usage() const;
/// Return the number of procs sharing a device (size of device commincator)
/// Return the number of procs sharing a device (size of device communicator)
inline int procs_per_gpu() const { return _procs_per_gpu; }
/// Return the number of threads per proc
inline int num_threads() const { return _nthreads; }
@ -260,12 +280,12 @@ class Device {
/// Atom Data
Atom<numtyp,acctyp> atom;
// --------------------------- NBOR DATA ----------------------------
// --------------------------- NBOR SHARED KERNELS ----------------
/// Neighbor Data
/// Shared kernels for neighbor lists
NeighborShared _neighbor_shared;
// ------------------------ LONG RANGE DATA -------------------------
// ------------------------ LONG RANGE DATA -----------------------
// Long Range Data
int _long_range_precompute;

View File

@ -10,6 +10,5 @@ twojmax 6
rfac0 0.99363
rmin0 0
diagonalstyle 3
bzeroflag 0
quadraticflag 0

View File

@ -8,6 +8,5 @@ twojmax 8
rfac0 0.99363
rmin0 0
diagonalstyle 3
bzeroflag 0
quadraticflag 0

12
src/.gitignore vendored
View File

@ -167,6 +167,10 @@
/pair_spin.h
/pair_spin_dmi.cpp
/pair_spin_dmi.h
/pair_spin_dipole_cut.cpp
/pair_spin_dipole_cut.h
/pair_spin_dipole_long.cpp
/pair_spin_dipole_long.h
/pair_spin_exchange.cpp
/pair_spin_exchange.h
/pair_spin_magelec.cpp
@ -428,6 +432,10 @@
/ewald.h
/ewald_cg.cpp
/ewald_cg.h
/ewald_dipole.cpp
/ewald_dipole.h
/ewald_dipole_spin.cpp
/ewald_dipole_spin.h
/ewald_disp.cpp
/ewald_disp.h
/ewald_n.cpp
@ -1027,6 +1035,10 @@
/pppm.h
/pppm_cg.cpp
/pppm_cg.h
/pppm_dipole.cpp
/pppm_dipole.h
/pppm_dipole_spin.cpp
/pppm_dipole_spin.h
/pppm_disp.cpp
/pppm_disp.h
/pppm_disp_tip4p.cpp

View File

@ -2,12 +2,10 @@
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
@ -19,7 +17,12 @@
#include "atom.h"
#include "comm.h"
#include "force.h"
#include "neighbor.h"
#include "neigh_list.h"
#include "neigh_request.h"
#include "update.h"
#include "integrate.h"
#include "respa.h"
#include "math_const.h"
#include "memory.h"
#include "error.h"
@ -31,6 +34,7 @@ using namespace MathConst;
PairLJClass2::PairLJClass2(LAMMPS *lmp) : Pair(lmp)
{
respa_enable = 1;
writedata = 1;
}
@ -133,6 +137,270 @@ void PairLJClass2::compute(int eflag, int vflag)
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
*/
void PairLJClass2::compute_inner()
{
int i,j,ii,jj,inum,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
double rsq,rinv,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
int *ilist,*jlist,*numneigh,**firstneigh;
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
double *special_lj = force->special_lj;
int newton_pair = force->newton_pair;
inum = list->inum_inner;
ilist = list->ilist_inner;
numneigh = list->numneigh_inner;
firstneigh = list->firstneigh_inner;
double cut_out_on = cut_respa[0];
double cut_out_off = cut_respa[1];
double cut_out_diff = cut_out_off - cut_out_on;
double cut_out_on_sq = cut_out_on*cut_out_on;
double cut_out_off_sq = cut_out_off*cut_out_off;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq < cut_out_off_sq) {
r2inv = 1.0/rsq;
rinv = sqrt(r2inv);
r3inv = r2inv*rinv;
r6inv = r3inv*r3inv;
jtype = type[j];
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
if (rsq > cut_out_on_sq) {
rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
fpair *= 1.0 - rsw*rsw*(3.0 - 2.0*rsw);
}
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
if (newton_pair || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
}
}
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2::compute_middle()
{
int i,j,ii,jj,inum,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,fpair;
double rsq,rinv,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
int *ilist,*jlist,*numneigh,**firstneigh;
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
double *special_lj = force->special_lj;
int newton_pair = force->newton_pair;
inum = list->inum_middle;
ilist = list->ilist_middle;
numneigh = list->numneigh_middle;
firstneigh = list->firstneigh_middle;
double cut_in_off = cut_respa[0];
double cut_in_on = cut_respa[1];
double cut_out_on = cut_respa[2];
double cut_out_off = cut_respa[3];
double cut_in_diff = cut_in_on - cut_in_off;
double cut_out_diff = cut_out_off - cut_out_on;
double cut_in_off_sq = cut_in_off*cut_in_off;
double cut_in_on_sq = cut_in_on*cut_in_on;
double cut_out_on_sq = cut_out_on*cut_out_on;
double cut_out_off_sq = cut_out_off*cut_out_off;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
if (rsq < cut_out_off_sq && rsq > cut_in_off_sq) {
r2inv = 1.0/rsq;
rinv = sqrt(r2inv);
r3inv = r2inv*rinv;
r6inv = r3inv*r3inv;
jtype = type[j];
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
if (rsq < cut_in_on_sq) {
rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
fpair *= rsw*rsw*(3.0 - 2.0*rsw);
}
if (rsq > cut_out_on_sq) {
rsw = (sqrt(rsq) - cut_out_on)/cut_out_diff;
fpair *= 1.0 + rsw*rsw*(2.0*rsw - 3.0);
}
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
if (newton_pair || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
}
}
}
}
/* ---------------------------------------------------------------------- */
void PairLJClass2::compute_outer(int eflag, int vflag)
{
int i,j,ii,jj,inum,jnum,itype,jtype;
double xtmp,ytmp,ztmp,delx,dely,delz,evdwl,fpair;
double rsq,rinv,r2inv,r3inv,r6inv,forcelj,factor_lj,rsw;
int *ilist,*jlist,*numneigh,**firstneigh;
evdwl = 0.0;
ev_init(eflag,vflag);
double **x = atom->x;
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
double *special_lj = force->special_lj;
int newton_pair = force->newton_pair;
inum = list->inum;
ilist = list->ilist;
numneigh = list->numneigh;
firstneigh = list->firstneigh;
double cut_in_off = cut_respa[2];
double cut_in_on = cut_respa[3];
double cut_in_diff = cut_in_on - cut_in_off;
double cut_in_off_sq = cut_in_off*cut_in_off;
double cut_in_on_sq = cut_in_on*cut_in_on;
// loop over neighbors of my atoms
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
xtmp = x[i][0];
ytmp = x[i][1];
ztmp = x[i][2];
itype = type[i];
jlist = firstneigh[i];
jnum = numneigh[i];
for (jj = 0; jj < jnum; jj++) {
j = jlist[jj];
factor_lj = special_lj[sbmask(j)];
j &= NEIGHMASK;
delx = xtmp - x[j][0];
dely = ytmp - x[j][1];
delz = ztmp - x[j][2];
rsq = delx*delx + dely*dely + delz*delz;
jtype = type[j];
if (rsq < cutsq[itype][jtype]) {
if (rsq > cut_in_off_sq) {
r2inv = 1.0/rsq;
rinv = sqrt(r2inv);
r3inv = r2inv*rinv;
r6inv = r3inv*r3inv;
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
if (rsq < cut_in_on_sq) {
rsw = (sqrt(rsq) - cut_in_off)/cut_in_diff;
fpair *= rsw*rsw*(3.0 - 2.0*rsw);
}
f[i][0] += delx*fpair;
f[i][1] += dely*fpair;
f[i][2] += delz*fpair;
if (newton_pair || j < nlocal) {
f[j][0] -= delx*fpair;
f[j][1] -= dely*fpair;
f[j][2] -= delz*fpair;
}
}
if (eflag) {
r2inv = 1.0/rsq;
rinv = sqrt(r2inv);
r3inv = r2inv*rinv;
r6inv = r3inv*r3inv;
evdwl = r6inv*(lj3[itype][jtype]*r3inv-lj4[itype][jtype]) -
offset[itype][jtype];
evdwl *= factor_lj;
}
if (vflag) {
if (rsq <= cut_in_off_sq) {
r2inv = 1.0/rsq;
rinv = sqrt(r2inv);
r3inv = r2inv*rinv;
r6inv = r3inv*r3inv;
forcelj = r6inv * (lj1[itype][jtype]*r3inv - lj2[itype][jtype]);
fpair = factor_lj*forcelj*r2inv;
} else if (rsq < cut_in_on_sq)
fpair = factor_lj*forcelj*r2inv;
}
if (evflag) ev_tally(i,j,nlocal,newton_pair,
evdwl,0.0,fpair,delx,dely,delz);
}
}
}
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
@ -212,6 +480,38 @@ void PairLJClass2::coeff(int narg, char **arg)
if (count == 0) error->all(FLERR,"Incorrect args for pair coefficients");
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairLJClass2::init_style()
{
// request regular or rRESPA neighbor list
int irequest;
int respa = 0;
if (update->whichflag == 1 && strstr(update->integrate_style,"respa")) {
if (((Respa *) update->integrate)->level_inner >= 0) respa = 1;
if (((Respa *) update->integrate)->level_middle >= 0) respa = 2;
}
irequest = neighbor->request(this,instance_me);
if (respa >= 1) {
neighbor->requests[irequest]->respaouter = 1;
neighbor->requests[irequest]->respainner = 1;
}
if (respa == 2) neighbor->requests[irequest]->respamiddle = 1;
// set rRESPA cutoffs
if (strstr(update->integrate_style,"respa") &&
((Respa *) update->integrate)->level_inner >= 0)
cut_respa = ((Respa *) update->integrate)->cutoff;
else cut_respa = NULL;
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
@ -246,6 +546,11 @@ double PairLJClass2::init_one(int i, int j)
lj4[j][i] = lj4[i][j];
offset[j][i] = offset[i][j];
// check interior rRESPA cutoff
if (cut_respa && cut[i][j] < cut_respa[3])
error->all(FLERR,"Pair cutoff < Respa interior cutoff");
// compute I,J contribution to long-range tail correction
// count total # of atoms of type I and J via Allreduce

View File

@ -2,12 +2,10 @@
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
http://lammps.sandia.gov, Sandia National Laboratories
Steve Plimpton, sjplimp@sandia.gov
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
@ -31,6 +29,7 @@ class PairLJClass2 : public Pair {
virtual void compute(int, int);
virtual void settings(int, char **);
void coeff(int, char **);
void init_style();
virtual double init_one(int, int);
void write_restart(FILE *);
void read_restart(FILE *);
@ -41,11 +40,16 @@ class PairLJClass2 : public Pair {
double single(int, int, int, int, double, double, double, double &);
void *extract(const char *, int &);
void compute_inner();
void compute_middle();
void compute_outer(int, int);
protected:
double cut_global;
double **cut;
double **epsilon,**sigma;
double **lj1,**lj2,**lj3,**lj4,**offset;
double *cut_respa;
virtual void allocate();
};
@ -56,15 +60,13 @@ class PairLJClass2 : public Pair {
#endif
/* ERROR/WARNING messages:
E: Illegal ... command
Self-explanatory. Check the input script syntax and compare to the
documentation for the command. You can use -echo screen as a
command-line option when running LAMMPS to see the offending line.
E: Incorrect args for pair coefficients
Self-explanatory. Check the input script or data file.
E: Pair cutoff < Respa interior cutoff
One or more pairwise cutoffs are too short to use with the specified
rRESPA cutoffs.
*/

View File

@ -44,7 +44,7 @@ using namespace MathConst;
PairLJCutDipoleLong::PairLJCutDipoleLong(LAMMPS *lmp) : Pair(lmp)
{
single_enable = 0;
ewaldflag = dipoleflag = 1;
ewaldflag = pppmflag = dipoleflag = 1;
respa_enable = 0;
}

View File

@ -1,9 +1,8 @@
This package implements GPU optimizations of various LAMMPS styles.
Section 5.3.1 on the manual gives details of what hardware and Cuda
Section 3.7 of the manual gives details of what hardware and Cuda
software is required on your system, and full details on how to build
and use this package. See the KOKKOS package, which also has
GPU-enabled styles.
and use this package. The KOKKOS package also has GPU-enabled styles.
This package uses an external library provided in lib/gpu which must
be compiled before making LAMMPS. See the lib/gpu/README file and the

View File

@ -219,17 +219,6 @@ void FixGPU::init()
error->all(FLERR,"GPU package does not (yet) work with "
"atom_style template");
// hybrid cannot be used with force/neigh option
if (_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH)
if (force->pair_match("^hybrid",0) != NULL)
error->all(FLERR,"Cannot use pair hybrid with GPU neighbor list builds");
if (_particle_split < 0)
if (force->pair_match("^hybrid",0) != NULL)
error->all(FLERR,"GPU split param must be positive "
"for hybrid pair styles");
// neighbor list builds on the GPU with triclinic box is not yet supported
if ((_gpu_mode == GPU_NEIGH || _gpu_mode == GPU_HYB_NEIGH) &&

View File

@ -65,14 +65,6 @@ E: GPU package does not (yet) work with atom_style template
Self-explanatory.
E: Cannot use pair hybrid with GPU neighbor list builds
Neighbor list builds must be done on the CPU for this pair style.
E: GPU split param must be positive for hybrid pair styles
See the package gpu command.
E: Cannot use package gpu neigh yes with triclinic box
This is a current restriction in LAMMPS.

View File

@ -22,6 +22,7 @@
#include "memory_kokkos.h"
#include "error.h"
#include "kokkos.h"
#include "atom_masks.h"
using namespace LAMMPS_NS;
@ -270,8 +271,10 @@ int AtomKokkos::add_custom(const char *name, int flag)
int n = strlen(name) + 1;
dname[index] = new char[n];
strcpy(dname[index],name);
this->sync(Device,DVECTOR_MASK);
memoryKK->grow_kokkos(k_dvector,dvector,ndvector,nmax,
"atom:dvector");
this->modified(Device,DVECTOR_MASK);
}
return index;

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
/* ---------------------------------------------------------------------- */
@ -59,14 +59,15 @@ AtomVecAngleKokkos::AtomVecAngleKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
void AtomVecAngleKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atomKK->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -98,7 +99,7 @@ void AtomVecAngleKokkos::grow(int n)
"atom:angle_atom3");
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@ -282,7 +283,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
atomKK->sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
@ -309,7 +310,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
}
}
} else {
sync(Device,X_MASK);
atomKK->sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
@ -397,8 +398,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
const int nfirst, const int &pbc_flag,
const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
@ -429,8 +430,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
}
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
atomKK->sync(Device,X_MASK);
atomKK->modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
@ -493,13 +494,13 @@ struct AtomVecAngleKokkos_UnpackComm {
void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
atomKK->sync(Device,X_MASK);
atomKK->modified(Device,X_MASK);
struct AtomVecAngleKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
}
@ -642,7 +643,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
atomKK->sync(Host,F_MASK);
int m = 0;
const int last = first + n;
@ -659,7 +660,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0)
modified(Host,F_MASK);
atomKK->modified(Host,F_MASK);
int m = 0;
for (int i = 0; i < n; i++) {
@ -960,9 +961,9 @@ struct AtomVecAngleKokkos_UnpackBorder {
void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space) {
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
if(space==Host) {
struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
@ -984,7 +985,7 @@ void AtomVecAngleKokkos::unpack_border(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -1010,7 +1011,7 @@ void AtomVecAngleKokkos::unpack_border_vel(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -1412,7 +1413,7 @@ int AtomVecAngleKokkos::unpack_exchange(double *buf)
{
int nlocal = atom->nlocal;
if (nlocal == nmax) grow(0);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | SPECIAL_MASK);
@ -1487,7 +1488,7 @@ int AtomVecAngleKokkos::size_restart()
int AtomVecAngleKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | SPECIAL_MASK);
@ -1541,7 +1542,7 @@ int AtomVecAngleKokkos::unpack_restart(double *buf)
if (atom->nextra_store)
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
}
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | SPECIAL_MASK);

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
/* ---------------------------------------------------------------------- */
@ -55,14 +55,15 @@ AtomVecAtomicKokkos::AtomVecAtomicKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
void AtomVecAtomicKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atomKK->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -74,7 +75,7 @@ void AtomVecAtomicKokkos::grow(int n)
memoryKK->grow_kokkos(atomKK->k_f,atomKK->f,nmax,3,"atom:f");
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@ -393,9 +394,9 @@ struct AtomVecAtomicKokkos_UnpackBorder {
void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
while (first+n >= nmax) grow(0);
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
if(space==Host) {
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
Kokkos::parallel_for(n,f);
@ -415,7 +416,7 @@ void AtomVecAtomicKokkos::unpack_border(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -440,7 +441,7 @@ void AtomVecAtomicKokkos::unpack_border_vel(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -668,7 +669,7 @@ int AtomVecAtomicKokkos::unpack_exchange(double *buf)
{
int nlocal = atom->nlocal;
if (nlocal == nmax) grow(0);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK);
int m = 1;
@ -720,7 +721,7 @@ int AtomVecAtomicKokkos::size_restart()
int AtomVecAtomicKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK );
int m = 1;
@ -755,7 +756,7 @@ int AtomVecAtomicKokkos::unpack_restart(double *buf)
if (atom->nextra_store)
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
}
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK );
int m = 1;

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
/* ---------------------------------------------------------------------- */
@ -58,14 +58,15 @@ AtomVecBondKokkos::AtomVecBondKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
void AtomVecBondKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atomKK->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -84,7 +85,7 @@ void AtomVecBondKokkos::grow(int n)
memoryKK->grow_kokkos(atomKK->k_bond_atom,atomKK->bond_atom,nmax,atomKK->bond_per_atom,"atom:bond_atom");
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atomKK->nextra_grow; iextra++)
@ -468,9 +469,9 @@ struct AtomVecBondKokkos_UnpackBorder {
void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space) {
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
if(space==Host) {
struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
@ -492,7 +493,7 @@ void AtomVecBondKokkos::unpack_border(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -518,7 +519,7 @@ void AtomVecBondKokkos::unpack_border_vel(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -866,7 +867,7 @@ int AtomVecBondKokkos::unpack_exchange(double *buf)
{
int nlocal = atom->nlocal;
if (nlocal == nmax) grow(0);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
int k;
@ -934,7 +935,7 @@ int AtomVecBondKokkos::size_restart()
int AtomVecBondKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
int m = 1;
buf[m++] = h_x(i,0);
@ -978,7 +979,7 @@ int AtomVecBondKokkos::unpack_restart(double *buf)
if (atom->nextra_store)
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
}
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK | SPECIAL_MASK);
int m = 1;
h_x(nlocal,0) = buf[m++];

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
/* ---------------------------------------------------------------------- */
@ -58,14 +58,15 @@ AtomVecChargeKokkos::AtomVecChargeKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
void AtomVecChargeKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atomKK->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -79,7 +80,7 @@ void AtomVecChargeKokkos::grow(int n)
memoryKK->grow_kokkos(atomKK->k_q,atomKK->q,nmax,"atom:q");
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@ -494,7 +495,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
f(buf.view<LMPDeviceType>(),d_x,d_tag,d_type,d_mask,d_q,first);
Kokkos::parallel_for(n,f);
}
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
}
/* ---------------------------------------------------------------------- */
@ -510,7 +511,7 @@ void AtomVecChargeKokkos::unpack_border(int n, int first, double *buf)
if (i == nmax) {
grow(0);
}
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -536,7 +537,7 @@ void AtomVecChargeKokkos::unpack_border_vel(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -797,7 +798,7 @@ int AtomVecChargeKokkos::unpack_exchange(double *buf)
{
int nlocal = atom->nlocal;
if (nlocal == nmax) grow(0);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | Q_MASK);
int m = 1;
@ -850,7 +851,7 @@ int AtomVecChargeKokkos::size_restart()
int AtomVecChargeKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | Q_MASK);
int m = 1;
@ -888,7 +889,7 @@ int AtomVecChargeKokkos::unpack_restart(double *buf)
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
}
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | Q_MASK);
int m = 1;

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
/* ---------------------------------------------------------------------- */
@ -60,14 +60,15 @@ AtomVecDPDKokkos::AtomVecDPDKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
void AtomVecDPDKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atomKK->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -93,7 +94,7 @@ void AtomVecDPDKokkos::grow(int n)
modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
}
/* ----------------------------------------------------------------------
@ -158,7 +159,7 @@ void AtomVecDPDKokkos::grow_reset()
void AtomVecDPDKokkos::copy(int i, int j, int delflag)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
UCG_MASK | UCGNEW_MASK |
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
@ -184,7 +185,7 @@ void AtomVecDPDKokkos::copy(int i, int j, int delflag)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
UCG_MASK | UCGNEW_MASK |
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
@ -268,7 +269,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecDPDKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,
@ -303,7 +304,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
}
}
} else {
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,
@ -410,8 +411,8 @@ struct AtomVecDPDKokkos_PackCommSelf {
int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst, const int &pbc_flag, const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,
@ -446,8 +447,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
}
}
} else {
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,
@ -528,15 +529,15 @@ struct AtomVecDPDKokkos_UnpackComm {
void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
struct AtomVecDPDKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
struct AtomVecDPDKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
buf,first);
@ -552,7 +553,7 @@ int AtomVecDPDKokkos::pack_comm(int n, int *list, double *buf,
int i,j,m;
double dx,dy,dz;
sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
m = 0;
if (pbc_flag == 0) {
@ -598,7 +599,7 @@ int AtomVecDPDKokkos::pack_comm_vel(int n, int *list, double *buf,
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->sync(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
m = 0;
if (pbc_flag == 0) {
@ -685,7 +686,7 @@ void AtomVecDPDKokkos::unpack_comm(int n, int first, double *buf)
h_uChem[i] = buf[m++];
}
modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
}
/* ---------------------------------------------------------------------- */
@ -709,7 +710,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
h_uChem[i] = buf[m++];
}
modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Host,X_MASK|V_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
}
/* ---------------------------------------------------------------------- */
@ -717,7 +718,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
atomKK->sync(Host,F_MASK);
int m = 0;
const int last = first + n;
@ -734,8 +735,8 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0) {
sync(Host,F_MASK);
modified(Host,F_MASK);
atomKK->sync(Host,F_MASK);
atomKK->modified(Host,F_MASK);
}
int m = 0;
@ -819,7 +820,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
{
X_FLOAT dx,dy,dz;
sync(space,ALL_MASK);
atomKK->sync(space,ALL_MASK);
if (pbc_flag != 0) {
if (domain->triclinic == 0) {
@ -876,7 +877,7 @@ int AtomVecDPDKokkos::pack_border(int n, int *list, double *buf,
int i,j,m;
double dx,dy,dz;
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
m = 0;
if (pbc_flag == 0) {
@ -937,7 +938,7 @@ int AtomVecDPDKokkos::pack_border_vel(int n, int *list, double *buf,
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
m = 0;
if (pbc_flag == 0) {
@ -1032,7 +1033,7 @@ int AtomVecDPDKokkos::pack_comm_hybrid(int n, int *list, double *buf)
{
int i,j,m;
sync(Host,DPDTHETA_MASK | UCOND_MASK |
atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK);
m = 0;
@ -1052,7 +1053,7 @@ int AtomVecDPDKokkos::pack_border_hybrid(int n, int *list, double *buf)
{
int i,j,m;
sync(Host,DPDTHETA_MASK | UCOND_MASK |
atomKK->sync(Host,DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
m = 0;
@ -1127,11 +1128,11 @@ struct AtomVecDPDKokkos_UnpackBorder {
void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
UCG_MASK|UCGNEW_MASK);
while (first+n >= nmax) grow(0);
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
if(space==Host) {
@ -1179,7 +1180,7 @@ void AtomVecDPDKokkos::unpack_border(int n, int first, double *buf)
m += modify->fix[atom->extra_border[iextra]]->
unpack_border(n,first,&buf[m]);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
}
@ -1217,7 +1218,7 @@ void AtomVecDPDKokkos::unpack_border_vel(int n, int first, double *buf)
m += modify->fix[atom->extra_border[iextra]]->
unpack_border(n,first,&buf[m]);
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
}
@ -1237,7 +1238,7 @@ int AtomVecDPDKokkos::unpack_comm_hybrid(int n, int first, double *buf)
h_uChem(i) = buf[m++];
}
modified(Host,DPDTHETA_MASK | UCOND_MASK |
atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK );
return m;
@ -1260,7 +1261,7 @@ int AtomVecDPDKokkos::unpack_border_hybrid(int n, int first, double *buf)
h_uCGnew(i) = buf[m++];
}
modified(Host,DPDTHETA_MASK | UCOND_MASK |
atomKK->modified(Host,DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK);
return m;
@ -1384,7 +1385,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
DVECTOR_MASK);
@ -1402,7 +1403,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
int AtomVecDPDKokkos::pack_exchange(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
DVECTOR_MASK);
@ -1518,7 +1519,7 @@ int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nre
k_count.sync<LMPHostType>();
}
modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
DVECTOR_MASK);
@ -1556,7 +1557,7 @@ int AtomVecDPDKokkos::unpack_exchange(double *buf)
m += modify->fix[atom->extra_grow[iextra]]->
unpack_exchange(nlocal,&buf[m]);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
DVECTOR_MASK);
@ -1593,7 +1594,7 @@ int AtomVecDPDKokkos::size_restart()
int AtomVecDPDKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);
@ -1658,7 +1659,7 @@ int AtomVecDPDKokkos::unpack_restart(double *buf)
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
}
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | DPDTHETA_MASK |
UCG_MASK | UCGNEW_MASK |
UCOND_MASK | UMECH_MASK | UCHEM_MASK | DVECTOR_MASK);

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
/* ---------------------------------------------------------------------- */
@ -58,14 +58,15 @@ AtomVecFullKokkos::AtomVecFullKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
void AtomVecFullKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atomKK->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -123,7 +124,7 @@ void AtomVecFullKokkos::grow(int n)
atomKK->improper_per_atom,"atom:improper_atom4");
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@ -608,9 +609,9 @@ struct AtomVecFullKokkos_UnpackBorder {
void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space) {
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
if(space==Host) {
struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
@ -632,7 +633,7 @@ void AtomVecFullKokkos::unpack_border(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -659,7 +660,7 @@ void AtomVecFullKokkos::unpack_border_vel(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -1204,7 +1205,7 @@ int AtomVecFullKokkos::unpack_exchange(double *buf)
{
int nlocal = atom->nlocal;
if (nlocal == nmax) grow(0);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
@ -1297,7 +1298,7 @@ int AtomVecFullKokkos::size_restart()
int AtomVecFullKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
@ -1370,10 +1371,10 @@ int AtomVecFullKokkos::unpack_restart(double *buf)
if (atom->nextra_store)
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
}
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | Q_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);

View File

@ -307,7 +307,7 @@ int AtomVecHybridKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf, int
int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
sync(Host,X_MASK);
atomKK->sync(Host,X_MASK);
int i,j,k,m;
double dx,dy,dz;
@ -351,7 +351,7 @@ int AtomVecHybridKokkos::pack_comm(int n, int *list, double *buf,
int AtomVecHybridKokkos::pack_comm_vel(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
int i,j,k,m;
double dx,dy,dz,dvx,dvy,dvz;
@ -463,7 +463,7 @@ void AtomVecHybridKokkos::unpack_comm(int n, int first, double *buf)
h_x(i,2) = buf[m++];
}
modified(Host,X_MASK);
atomKK->modified(Host,X_MASK);
// unpack sub-style contributions as contiguous chunks
@ -500,7 +500,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf)
}
}
modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
// unpack sub-style contributions as contiguous chunks
@ -512,7 +512,7 @@ void AtomVecHybridKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecHybridKokkos::pack_reverse(int n, int first, double *buf)
{
sync(Host,F_MASK);
atomKK->sync(Host,F_MASK);
int i,k,m,last;
@ -546,7 +546,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf)
h_f(j,2) += buf[m++];
}
modified(Host,F_MASK);
atomKK->modified(Host,F_MASK);
// unpack sub-style contributions as contiguous chunks
@ -559,7 +559,7 @@ void AtomVecHybridKokkos::unpack_reverse(int n, int *list, double *buf)
int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
int i,j,k,m;
double dx,dy,dz;
@ -613,7 +613,7 @@ int AtomVecHybridKokkos::pack_border(int n, int *list, double *buf,
int AtomVecHybridKokkos::pack_border_vel(int n, int *list, double *buf,
int pbc_flag, int *pbc)
{
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
int i,j,k,m;
double dx,dy,dz,dvx,dvy,dvz;
int omega_flag = atom->omega_flag;
@ -741,7 +741,7 @@ void AtomVecHybridKokkos::unpack_border(int n, int first, double *buf)
h_mask[i] = (int) ubuf(buf[m++]).i;
}
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
// unpack sub-style contributions as contiguous chunks
@ -787,7 +787,7 @@ void AtomVecHybridKokkos::unpack_border_vel(int n, int first, double *buf)
}
}
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
// unpack sub-style contributions as contiguous chunks
@ -969,7 +969,7 @@ void AtomVecHybridKokkos::create_atom(int itype, double *coord)
void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **values)
{
sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
atomKK->sync(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
int nlocal = atom->nlocal;
if (nlocal == nmax) grow(0);
@ -1000,7 +1000,7 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val
h_angmom(nlocal,2) = 0.0;
}
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|IMAGE_MASK|MASK_MASK|V_MASK|OMEGA_MASK/*|ANGMOM_MASK*/);
// each sub-style parses sub-style specific values
@ -1017,13 +1017,13 @@ void AtomVecHybridKokkos::data_atom(double *coord, imageint imagetmp, char **val
void AtomVecHybridKokkos::data_vel(int m, char **values)
{
sync(Host,V_MASK);
atomKK->sync(Host,V_MASK);
h_v(m,0) = atof(values[0]);
h_v(m,1) = atof(values[1]);
h_v(m,2) = atof(values[2]);
modified(Host,V_MASK);
atomKK->modified(Host,V_MASK);
// each sub-style parses sub-style specific values
@ -1038,7 +1038,7 @@ void AtomVecHybridKokkos::data_vel(int m, char **values)
void AtomVecHybridKokkos::pack_data(double **buf)
{
sync(Host,TAG_MASK|TYPE_MASK|X_MASK);
atomKK->sync(Host,TAG_MASK|TYPE_MASK|X_MASK);
int k,m;
@ -1089,7 +1089,7 @@ void AtomVecHybridKokkos::write_data(FILE *fp, int n, double **buf)
void AtomVecHybridKokkos::pack_vel(double **buf)
{
sync(Host,V_MASK);
atomKK->sync(Host,V_MASK);
int k,m;

View File

@ -267,6 +267,114 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType,int TRICLINIC>
struct AtomVecKokkos_PackCommSelfFused {
typedef DeviceType device_type;
typename ArrayTypes<DeviceType>::t_x_array_randomread _x;
typename ArrayTypes<DeviceType>::t_x_array _xw;
typename ArrayTypes<DeviceType>::t_int_2d_const _list;
typename ArrayTypes<DeviceType>::t_int_2d_const _pbc;
typename ArrayTypes<DeviceType>::t_int_1d_const _pbc_flag;
typename ArrayTypes<DeviceType>::t_int_1d_const _firstrecv;
typename ArrayTypes<DeviceType>::t_int_1d_const _sendnum_scan;
typename ArrayTypes<DeviceType>::t_int_1d_const _g2l;
X_FLOAT _xprd,_yprd,_zprd,_xy,_xz,_yz;
AtomVecKokkos_PackCommSelfFused(
const typename DAT::tdual_x_array &x,
const typename DAT::tdual_int_2d &list,
const typename DAT::tdual_int_2d &pbc,
const typename DAT::tdual_int_1d &pbc_flag,
const typename DAT::tdual_int_1d &firstrecv,
const typename DAT::tdual_int_1d &sendnum_scan,
const typename DAT::tdual_int_1d &g2l,
const X_FLOAT &xprd, const X_FLOAT &yprd, const X_FLOAT &zprd,
const X_FLOAT &xy, const X_FLOAT &xz, const X_FLOAT &yz):
_x(x.view<DeviceType>()),_xw(x.view<DeviceType>()),
_list(list.view<DeviceType>()),
_pbc(pbc.view<DeviceType>()),
_pbc_flag(pbc_flag.view<DeviceType>()),
_firstrecv(firstrecv.view<DeviceType>()),
_sendnum_scan(sendnum_scan.view<DeviceType>()),
_g2l(g2l.view<DeviceType>()),
_xprd(xprd),_yprd(yprd),_zprd(zprd),
_xy(xy),_xz(xz),_yz(yz) {};
KOKKOS_INLINE_FUNCTION
void operator() (const int& ii) const {
int iswap = 0;
while (ii >= _sendnum_scan[iswap]) iswap++;
int i = ii;
if (iswap > 0)
i = ii - _sendnum_scan[iswap-1];
const int _nfirst = _firstrecv[iswap];
const int nlocal = _firstrecv[0];
int j = _list(iswap,i);
if (j >= nlocal)
j = _g2l(j-nlocal);
if (_pbc_flag(ii) == 0) {
_xw(i+_nfirst,0) = _x(j,0);
_xw(i+_nfirst,1) = _x(j,1);
_xw(i+_nfirst,2) = _x(j,2);
} else {
if (TRICLINIC == 0) {
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd;
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd;
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
} else {
_xw(i+_nfirst,0) = _x(j,0) + _pbc(ii,0)*_xprd + _pbc(ii,5)*_xy + _pbc(ii,4)*_xz;
_xw(i+_nfirst,1) = _x(j,1) + _pbc(ii,1)*_yprd + _pbc(ii,3)*_yz;
_xw(i+_nfirst,2) = _x(j,2) + _pbc(ii,2)*_zprd;
}
}
}
};
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan,
const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc,
const DAT::tdual_int_1d &g2l) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(domain->triclinic) {
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(domain->triclinic) {
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
} else {
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,0> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
Kokkos::parallel_for(n,f);
}
}
return n*3;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>

View File

@ -51,6 +51,14 @@ class AtomVecKokkos : public AtomVec {
const int & iswap, const int nfirst,
const int &pbc_flag, const int pbc[]);
virtual int
pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list,
const DAT::tdual_int_1d &sendnum_scan,
const DAT::tdual_int_1d &firstrecv,
const DAT::tdual_int_1d &pbc_flag,
const DAT::tdual_int_2d &pbc,
const DAT::tdual_int_1d &g2l);
virtual int
pack_comm_kokkos(const int &n, const DAT::tdual_int_2d &list,
const int & iswap, const DAT::tdual_xfloat_2d &buf,

View File

@ -24,7 +24,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
/* ---------------------------------------------------------------------- */
@ -58,14 +58,15 @@ AtomVecMolecularKokkos::AtomVecMolecularKokkos(LAMMPS *lmp) : AtomVecKokkos(lmp)
void AtomVecMolecularKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atomKK->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -121,7 +122,7 @@ void AtomVecMolecularKokkos::grow(int n)
atomKK->improper_per_atom,"atom:improper_atom4");
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
if (atom->nextra_grow)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
@ -361,7 +362,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
atomKK->sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
@ -388,7 +389,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
}
}
} else {
sync(Device,X_MASK);
atomKK->sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
@ -477,8 +478,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
const int nfirst, const int &pbc_flag,
const int* const pbc) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
@ -505,8 +506,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
}
}
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
atomKK->sync(Device,X_MASK);
atomKK->modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
@ -565,13 +566,13 @@ struct AtomVecMolecularKokkos_UnpackComm {
void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
atomKK->sync(Device,X_MASK);
atomKK->modified(Device,X_MASK);
struct AtomVecMolecularKokkos_UnpackComm<LMPDeviceType> f(atomKK->k_x,buf,first);
Kokkos::parallel_for(n,f);
}
@ -714,7 +715,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK);
atomKK->sync(Host,F_MASK);
int m = 0;
const int last = first + n;
@ -731,7 +732,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0)
modified(Host,F_MASK);
atomKK->modified(Host,F_MASK);
int m = 0;
for (int i = 0; i < n; i++) {
@ -1032,9 +1033,9 @@ struct AtomVecMolecularKokkos_UnpackBorder {
void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,
ExecutionSpace space) {
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
if(space==Host) {
struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
@ -1056,7 +1057,7 @@ void AtomVecMolecularKokkos::unpack_border(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -1082,7 +1083,7 @@ void AtomVecMolecularKokkos::unpack_border_vel(int n, int first, double *buf)
last = first + n;
for (i = first; i < last; i++) {
if (i == nmax) grow(0);
modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
atomKK->modified(Host,X_MASK|V_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
h_x(i,0) = buf[m++];
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
@ -1615,7 +1616,7 @@ int AtomVecMolecularKokkos::unpack_exchange(double *buf)
{
int nlocal = atom->nlocal;
if (nlocal == nmax) grow(0);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
@ -1707,7 +1708,7 @@ int AtomVecMolecularKokkos::size_restart()
int AtomVecMolecularKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);
@ -1780,7 +1781,7 @@ int AtomVecMolecularKokkos::unpack_restart(double *buf)
memory->grow(atom->extra,nmax,atom->nextra_store,"atom:extra");
}
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | MOLECULE_MASK | BOND_MASK |
ANGLE_MASK | DIHEDRAL_MASK | IMPROPER_MASK | SPECIAL_MASK);

View File

@ -30,7 +30,7 @@
using namespace LAMMPS_NS;
#define DELTA 10000
#define DELTA 10
static const double MY_PI = 3.14159265358979323846; // pi
@ -93,14 +93,15 @@ void AtomVecSphereKokkos::init()
void AtomVecSphereKokkos::grow(int n)
{
if (n == 0) nmax += DELTA;
int step = MAX(DELTA,nmax*0.01);
if (n == 0) nmax += step;
else nmax = n;
atom->nmax = nmax;
if (nmax < 0 || nmax > MAXSMALLINT)
error->one(FLERR,"Per-processor system is too big");
sync(Device,ALL_MASK);
modified(Device,ALL_MASK);
atomKK->sync(Device,ALL_MASK);
atomKK->modified(Device,ALL_MASK);
memoryKK->grow_kokkos(atomKK->k_tag,atomKK->tag,nmax,"atom:tag");
memoryKK->grow_kokkos(atomKK->k_type,atomKK->type,nmax,"atom:type");
@ -120,7 +121,7 @@ void AtomVecSphereKokkos::grow(int n)
modify->fix[atom->extra_grow[iextra]]->grow_arrays(nmax);
grow_reset();
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
}
/* ----------------------------------------------------------------------
@ -172,7 +173,7 @@ void AtomVecSphereKokkos::grow_reset()
void AtomVecSphereKokkos::copy(int i, int j, int delflag)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | RADIUS_MASK |
RMASS_MASK | OMEGA_MASK);
@ -197,7 +198,7 @@ void AtomVecSphereKokkos::copy(int i, int j, int delflag)
for (int iextra = 0; iextra < atom->nextra_grow; iextra++)
modify->fix[atom->extra_grow[iextra]]->copy_arrays(i,j,delflag);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | RADIUS_MASK |
RMASS_MASK | OMEGA_MASK);
}
@ -277,7 +278,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
@ -316,7 +317,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
}
}
} else {
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
@ -464,7 +465,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
const int* const pbc)
{
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
if(pbc_flag) {
if(deform_vremap) {
if(domain->triclinic) {
@ -595,7 +596,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
}
}
} else {
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
if(pbc_flag) {
if(deform_vremap) {
if(domain->triclinic) {
@ -795,8 +796,8 @@ int AtomVecSphereKokkos::pack_comm_self(
if (radvary == 0)
return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
if(commKK->forward_comm_on_host) {
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
@ -835,8 +836,8 @@ int AtomVecSphereKokkos::pack_comm_self(
}
}
} else {
sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
@ -926,14 +927,14 @@ void AtomVecSphereKokkos::unpack_comm_kokkos(
return;
}
if(commKK->forward_comm_on_host) {
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
buf,first);
Kokkos::parallel_for(n,f);
} else {
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
struct AtomVecSphereKokkos_UnpackComm<LMPDeviceType> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -998,7 +999,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
if (radvary == 0) {
struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
atomKK->k_x,
@ -1015,7 +1016,7 @@ void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
Kokkos::parallel_for(n,f);
}
} else {
modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
if (radvary == 0) {
struct AtomVecSphereKokkos_UnpackCommVel<LMPDeviceType,0> f(
atomKK->k_x,
@ -1044,7 +1045,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf,
if (radvary == 0) {
// Not sure if we need to call sync for X here
sync(Host,X_MASK);
atomKK->sync(Host,X_MASK);
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
@ -1071,7 +1072,7 @@ int AtomVecSphereKokkos::pack_comm(int n, int *list, double *buf,
}
}
} else {
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
@ -1115,7 +1116,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf,
double dx,dy,dz,dvx,dvy,dvz;
if (radvary == 0) {
sync(Host,X_MASK|V_MASK|OMEGA_MASK);
atomKK->sync(Host,X_MASK|V_MASK|OMEGA_MASK);
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
@ -1178,7 +1179,7 @@ int AtomVecSphereKokkos::pack_comm_vel(int n, int *list, double *buf,
}
}
} else {
sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
m = 0;
if (pbc_flag == 0) {
for (i = 0; i < n; i++) {
@ -1257,7 +1258,7 @@ int AtomVecSphereKokkos::pack_comm_hybrid(int n, int *list, double *buf)
{
if (radvary == 0) return 0;
sync(Host,RADIUS_MASK|RMASS_MASK);
atomKK->sync(Host,RADIUS_MASK|RMASS_MASK);
int m = 0;
for (int i = 0; i < n; i++) {
@ -1280,7 +1281,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf)
h_x(i,1) = buf[m++];
h_x(i,2) = buf[m++];
}
modified(Host,X_MASK);
atomKK->modified(Host,X_MASK);
} else {
int m = 0;
const int last = first + n;
@ -1291,7 +1292,7 @@ void AtomVecSphereKokkos::unpack_comm(int n, int first, double *buf)
h_radius[i] = buf[m++];
h_rmass[i] = buf[m++];
}
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
}
}
@ -1313,7 +1314,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf)
h_omega(i,1) = buf[m++];
h_omega(i,2) = buf[m++];
}
modified(Host,X_MASK|V_MASK|OMEGA_MASK);
atomKK->modified(Host,X_MASK|V_MASK|OMEGA_MASK);
} else {
int m = 0;
const int last = first + n;
@ -1330,7 +1331,7 @@ void AtomVecSphereKokkos::unpack_comm_vel(int n, int first, double *buf)
h_omega(i,1) = buf[m++];
h_omega(i,2) = buf[m++];
}
modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
}
}
@ -1346,7 +1347,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
h_radius[i] = buf[m++];
h_rmass[i] = buf[m++];
}
modified(Host,RADIUS_MASK|RMASS_MASK);
atomKK->modified(Host,RADIUS_MASK|RMASS_MASK);
return m;
}
@ -1355,7 +1356,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
sync(Host,F_MASK|TORQUE_MASK);
atomKK->sync(Host,F_MASK|TORQUE_MASK);
int m = 0;
const int last = first + n;
@ -1375,7 +1376,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
{
if(n > 0)
sync(Host,TORQUE_MASK);
atomKK->sync(Host,TORQUE_MASK);
int m = 0;
const int last = first + n;
@ -1392,7 +1393,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0) {
modified(Host,F_MASK|TORQUE_MASK);
atomKK->modified(Host,F_MASK|TORQUE_MASK);
}
int m = 0;
@ -1412,7 +1413,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf)
{
if(n > 0) {
modified(Host,TORQUE_MASK);
atomKK->modified(Host,TORQUE_MASK);
}
int m = 0;
@ -1492,7 +1493,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
X_FLOAT dx,dy,dz;
// This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
sync(space,ALL_MASK);
atomKK->sync(space,ALL_MASK);
if (pbc_flag != 0) {
if (domain->triclinic == 0) {
@ -1549,7 +1550,7 @@ int AtomVecSphereKokkos::pack_border(
int i,j,m;
double dx,dy,dz;
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
m = 0;
if (pbc_flag == 0) {
@ -1686,7 +1687,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
X_FLOAT dvx=0,dvy=0,dvz=0;
// This was in atom_vec_dpd_kokkos but doesn't appear in any other atom_vec
sync(space,ALL_MASK);
atomKK->sync(space,ALL_MASK);
if (pbc_flag != 0) {
if (domain->triclinic == 0) {
@ -1776,7 +1777,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf,
int i,j,m;
double dx,dy,dz,dvx,dvy,dvz;
sync(Host,ALL_MASK);
atomKK->sync(Host,ALL_MASK);
m = 0;
if (pbc_flag == 0) {
@ -1866,7 +1867,7 @@ int AtomVecSphereKokkos::pack_border_vel(int n, int *list, double *buf,
int AtomVecSphereKokkos::pack_border_hybrid(int n, int *list, double *buf)
{
sync(Host,RADIUS_MASK|RMASS_MASK);
atomKK->sync(Host,RADIUS_MASK|RMASS_MASK);
int m = 0;
for (int i = 0; i < n; i++) {
@ -1942,7 +1943,7 @@ void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first,
Kokkos::parallel_for(n,f);
}
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
RADIUS_MASK|RMASS_MASK);
}
@ -1969,7 +1970,7 @@ void AtomVecSphereKokkos::unpack_border(int n, int first, double *buf)
m += modify->fix[atom->extra_border[iextra]]->
unpack_border(n,first,&buf[m]);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK);
}
@ -2052,7 +2053,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos(
Kokkos::parallel_for(n,f);
}
modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
}
@ -2085,7 +2086,7 @@ void AtomVecSphereKokkos::unpack_border_vel(int n, int first, double *buf)
m += modify->fix[atom->extra_border[iextra]]->
unpack_border(n,first,&buf[m]);
modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
atomKK->modified(Host,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
}
/* ---------------------------------------------------------------------- */
@ -2098,7 +2099,7 @@ int AtomVecSphereKokkos::unpack_border_hybrid(int n, int first, double *buf)
h_radius[i] = buf[m++];
h_rmass[i] = buf[m++];
}
modified(Host,RADIUS_MASK|RMASS_MASK);
atomKK->modified(Host,RADIUS_MASK|RMASS_MASK);
return m;
}
@ -2218,7 +2219,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
OMEGA_MASK);
@ -2239,7 +2240,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
int AtomVecSphereKokkos::pack_exchange(int i, double *buf)
{
sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
OMEGA_MASK);
@ -2354,7 +2355,7 @@ int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int
k_count.sync<LMPHostType>();
}
modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(space,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
OMEGA_MASK);
@ -2391,7 +2392,7 @@ int AtomVecSphereKokkos::unpack_exchange(double *buf)
m += modify->fix[atom->extra_grow[iextra]]->
unpack_exchange(nlocal,&buf[m]);
modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | V_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | RADIUS_MASK | RMASS_MASK |
OMEGA_MASK);
@ -2427,7 +2428,7 @@ int AtomVecSphereKokkos::size_restart()
int AtomVecSphereKokkos::pack_restart(int i, double *buf)
{
sync(Host,X_MASK | TAG_MASK | TYPE_MASK |
atomKK->sync(Host,X_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | V_MASK |
RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
@ -2494,7 +2495,7 @@ int AtomVecSphereKokkos::unpack_restart(double *buf)
for (int i = 0; i < size; i++) extra[nlocal][i] = buf[m++];
}
modified(Host,X_MASK | TAG_MASK | TYPE_MASK |
atomKK->modified(Host,X_MASK | TAG_MASK | TYPE_MASK |
MASK_MASK | IMAGE_MASK | V_MASK |
RADIUS_MASK | RMASS_MASK | OMEGA_MASK);
@ -2616,14 +2617,14 @@ int AtomVecSphereKokkos::data_atom_hybrid(int nlocal, char **values)
void AtomVecSphereKokkos::data_vel(int m, char **values)
{
sync(Host,V_MASK|OMEGA_MASK);
atomKK->sync(Host,V_MASK|OMEGA_MASK);
h_v(m,0) = atof(values[0]);
h_v(m,1) = atof(values[1]);
h_v(m,2) = atof(values[2]);
h_omega(m,0) = atof(values[3]);
h_omega(m,1) = atof(values[4]);
h_omega(m,2) = atof(values[5]);
modified(Host,V_MASK|OMEGA_MASK);
atomKK->modified(Host,V_MASK|OMEGA_MASK);
}
/* ----------------------------------------------------------------------
@ -2632,11 +2633,11 @@ void AtomVecSphereKokkos::data_vel(int m, char **values)
int AtomVecSphereKokkos::data_vel_hybrid(int m, char **values)
{
sync(Host,OMEGA_MASK);
atomKK->sync(Host,OMEGA_MASK);
omega[m][0] = atof(values[0]);
omega[m][1] = atof(values[1]);
omega[m][2] = atof(values[2]);
modified(Host,OMEGA_MASK);
atomKK->modified(Host,OMEGA_MASK);
return 3;
}
@ -2711,7 +2712,7 @@ int AtomVecSphereKokkos::write_data_hybrid(FILE *fp, double *buf)
void AtomVecSphereKokkos::pack_vel(double **buf)
{
sync(Host,TAG_MASK|V_MASK|OMEGA_MASK);
atomKK->sync(Host,TAG_MASK|V_MASK|OMEGA_MASK);
int nlocal = atom->nlocal;
for (int i = 0; i < nlocal; i++) {
@ -2731,7 +2732,7 @@ void AtomVecSphereKokkos::pack_vel(double **buf)
int AtomVecSphereKokkos::pack_vel_hybrid(int i, double *buf)
{
sync(Host,OMEGA_MASK);
atomKK->sync(Host,OMEGA_MASK);
buf[0] = h_omega(i,0);
buf[1] = h_omega(i,1);

View File

@ -57,10 +57,9 @@ CommKokkos::CommKokkos(LAMMPS *lmp) : CommBrick(lmp)
memory->destroy(buf_recv);
buf_recv = NULL;
k_exchange_sendlist = DAT::
tdual_int_1d("comm:k_exchange_sendlist",100);
k_exchange_copylist = DAT::
tdual_int_1d("comm:k_exchange_copylist",100);
k_exchange_lists = DAT::tdual_int_2d("comm:k_exchange_lists",2,100);
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
k_count = DAT::tdual_int_scalar("comm:k_count");
k_sendflag = DAT::tdual_int_1d("comm:k_sendflag",100);
@ -187,71 +186,80 @@ void CommKokkos::forward_comm_device(int dummy)
k_sendlist.sync<DeviceType>();
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,X_MASK);
for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap] != me) {
if (comm_x_only) {
if (size_forward_recv[iswap]) {
buf = atomKK->k_x.view<DeviceType>().data() +
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
}
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) {
MPI_Send(k_buf_send.view<DeviceType>().data(),
n,MPI_DOUBLE,sendproc[iswap],0,world);
}
if (comm->nprocs == 1 && !ghost_velocity) {
k_swap.sync<DeviceType>();
k_swap2.sync<DeviceType>();
k_pbc.sync<DeviceType>();
n = avec->pack_comm_self_fused(totalsend,k_sendlist,k_sendnum_scan,
k_firstrecv,k_pbc_flag,k_pbc,k_g2l);
} else {
if (size_forward_recv[iswap]) {
MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,X_MASK);
for (int iswap = 0; iswap < nswap; iswap++) {
if (sendproc[iswap] != me) {
if (comm_x_only) {
if (size_forward_recv[iswap]) {
buf = atomKK->k_x.view<DeviceType>().data() +
firstrecv[iswap]*atomKK->k_x.view<DeviceType>().extent(1);
MPI_Irecv(buf,size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
}
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,
iswap,k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) {
MPI_Send(k_buf_send.view<DeviceType>().data(),
n,MPI_DOUBLE,sendproc[iswap],0,world);
}
if (size_forward_recv[iswap]) {
MPI_Wait(&request,MPI_STATUS_IGNORE);
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::
space,X_MASK);
}
} else if (ghost_velocity) {
if (size_forward_recv[iswap]) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
}
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) {
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
}
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
} else {
if (size_forward_recv[iswap])
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n)
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
}
} else if (ghost_velocity) {
if (size_forward_recv[iswap]) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
}
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n) {
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
}
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
} else {
if (size_forward_recv[iswap])
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),
size_forward_recv[iswap],MPI_DOUBLE,
recvproc[iswap],0,world,&request);
n = avec->pack_comm_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
if (n)
MPI_Send(k_buf_send.view<DeviceType>().data(),n,
MPI_DOUBLE,sendproc[iswap],0,world);
if (size_forward_recv[iswap]) MPI_Wait(&request,MPI_STATUS_IGNORE);
avec->unpack_comm_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_recv);
DeviceType::fence();
}
} else {
if (!ghost_velocity) {
if (sendnum[iswap])
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
} else {
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
DeviceType::fence();
if (!ghost_velocity) {
if (sendnum[iswap])
n = avec->pack_comm_self(sendnum[iswap],k_sendlist,iswap,
firstrecv[iswap],pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
} else {
n = avec->pack_comm_vel_kokkos(sendnum[iswap],k_sendlist,iswap,
k_buf_send,pbc_flag[iswap],pbc[iswap]);
DeviceType::fence();
avec->unpack_comm_vel_kokkos(recvnum[iswap],firstrecv[iswap],k_buf_send);
DeviceType::fence();
}
}
}
}
@ -496,9 +504,8 @@ void CommKokkos::exchange()
}
atomKK->sync(Host,ALL_MASK);
atomKK->modified(Host,ALL_MASK);
CommBrick::exchange();
atomKK->modified(Host,ALL_MASK);
}
/* ---------------------------------------------------------------------- */
@ -565,146 +572,149 @@ void CommKokkos::exchange_device()
atom->nghost = 0;
atom->avec->clear_bonus();
// subbox bounds for orthogonal or triclinic
if (comm->nprocs > 1) { // otherwise no-op
if (triclinic == 0) {
sublo = domain->sublo;
subhi = domain->subhi;
} else {
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
// subbox bounds for orthogonal or triclinic
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
if (triclinic == 0) {
sublo = domain->sublo;
subhi = domain->subhi;
} else {
sublo = domain->sublo_lamda;
subhi = domain->subhi_lamda;
}
// loop over dimensions
for (int dim = 0; dim < 3; dim++) {
atomKK->sync(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
// fill buffer with atoms leaving my box, using < and >=
// when atom is deleted, fill it in with last atom
// loop over dimensions
for (int dim = 0; dim < 3; dim++) {
x = atom->x;
lo = sublo[dim];
hi = subhi[dim];
nlocal = atom->nlocal;
i = nsend = 0;
// fill buffer with atoms leaving my box, using < and >=
// when atom is deleted, fill it in with last atom
if (true) {
if (k_sendflag.h_view.extent(0)<nlocal) k_sendflag.resize(nlocal);
k_sendflag.sync<DeviceType>();
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
k_count.h_view() = 0;
k_count.modify<LMPHostType>();
k_count.sync<DeviceType>();
x = atom->x;
lo = sublo[dim];
hi = subhi[dim];
nlocal = atom->nlocal;
i = nsend = 0;
BuildExchangeListFunctor<DeviceType>
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
nlocal,dim,lo,hi);
Kokkos::parallel_for(nlocal,f);
k_exchange_sendlist.modify<DeviceType>();
k_sendflag.modify<DeviceType>();
k_count.modify<DeviceType>();
if (true) {
if (k_sendflag.h_view.extent(0)<nlocal) k_sendflag.resize(nlocal);
k_sendflag.sync<DeviceType>();
k_count.h_view() = k_exchange_sendlist.h_view.extent(0);
while (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
k_count.h_view() = 0;
k_count.modify<LMPHostType>();
k_count.sync<DeviceType>();
k_count.sync<LMPHostType>();
if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
k_exchange_sendlist.resize(k_count.h_view()*1.1);
k_exchange_copylist.resize(k_count.h_view()*1.1);
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
BuildExchangeListFunctor<DeviceType>
f(atomKK->k_x,k_exchange_sendlist,k_count,k_sendflag,
nlocal,dim,lo,hi);
Kokkos::parallel_for(nlocal,f);
k_exchange_sendlist.modify<DeviceType>();
k_sendflag.modify<DeviceType>();
k_count.modify<DeviceType>();
k_count.sync<LMPHostType>();
if (k_count.h_view()>=k_exchange_sendlist.h_view.extent(0)) {
k_exchange_lists.resize(2,k_count.h_view()*1.1);
k_exchange_sendlist = Kokkos::subview(k_exchange_lists,0,Kokkos::ALL);
k_exchange_copylist = Kokkos::subview(k_exchange_lists,1,Kokkos::ALL);
k_count.h_view()=k_exchange_sendlist.h_view.extent(0);
}
}
k_exchange_lists.sync<LMPHostType>();
k_sendflag.sync<LMPHostType>();
int sendpos = nlocal-1;
nlocal -= k_count.h_view();
for(int i = 0; i < k_count.h_view(); i++) {
if (k_exchange_sendlist.h_view(i)<nlocal) {
while (k_sendflag.h_view(sendpos)) sendpos--;
k_exchange_copylist.h_view(i) = sendpos;
sendpos--;
} else
k_exchange_copylist.h_view(i) = -1;
}
k_exchange_copylist.modify<LMPHostType>();
k_exchange_copylist.sync<DeviceType>();
nsend = k_count.h_view();
if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend =
avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>::space,
dim,lo,hi);
DeviceType::fence();
} else {
while (i < nlocal) {
if (x[i][dim] < lo || x[i][dim] >= hi) {
if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend += avec->pack_exchange(i,&buf_send[nsend]);
avec->copy(nlocal-1,i,1);
nlocal--;
} else i++;
}
}
k_exchange_copylist.sync<LMPHostType>();
k_exchange_sendlist.sync<LMPHostType>();
k_sendflag.sync<LMPHostType>();
atom->nlocal = nlocal;
int sendpos = nlocal-1;
nlocal -= k_count.h_view();
for(int i = 0; i < k_count.h_view(); i++) {
if (k_exchange_sendlist.h_view(i)<nlocal) {
while (k_sendflag.h_view(sendpos)) sendpos--;
k_exchange_copylist.h_view(i) = sendpos;
sendpos--;
} else
k_exchange_copylist.h_view(i) = -1;
}
// send/recv atoms in both directions
// if 1 proc in dimension, no send/recv, set recv buf to send buf
// if 2 procs in dimension, single send/recv
// if more than 2 procs in dimension, send/recv to both neighbors
k_exchange_copylist.modify<LMPHostType>();
k_exchange_copylist.sync<DeviceType>();
nsend = k_count.h_view();
if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend =
avec->pack_exchange_kokkos(k_count.h_view(),k_buf_send,
k_exchange_sendlist,k_exchange_copylist,
ExecutionSpaceFromDevice<DeviceType>::space,
dim,lo,hi);
DeviceType::fence();
} else {
while (i < nlocal) {
if (x[i][dim] < lo || x[i][dim] >= hi) {
if (nsend > maxsend) grow_send_kokkos(nsend,1);
nsend += avec->pack_exchange(i,&buf_send[nsend]);
avec->copy(nlocal-1,i,1);
nlocal--;
} else i++;
}
}
atom->nlocal = nlocal;
if (procgrid[dim] == 1) {
nrecv = nsend;
if (nrecv) {
atom->nlocal=avec->
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
} else {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
nrecv = nrecv1;
if (procgrid[dim] > 2) {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE);
nrecv += nrecv2;
}
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
// send/recv atoms in both directions
// if 1 proc in dimension, no send/recv, set recv buf to send buf
// if 2 procs in dimension, single send/recv
// if more than 2 procs in dimension, send/recv to both neighbors
if (procgrid[dim] == 1) {
nrecv = nsend;
if (nrecv) {
atom->nlocal=avec->
unpack_exchange_kokkos(k_buf_send,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
} else {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][0],0,
&nrecv1,1,MPI_INT,procneigh[dim][1],0,world,MPI_STATUS_IGNORE);
nrecv = nrecv1;
if (procgrid[dim] > 2) {
MPI_Sendrecv(&nsend,1,MPI_INT,procneigh[dim][1],0,
&nrecv2,1,MPI_INT,procneigh[dim][0],0,world,MPI_STATUS_IGNORE);
nrecv += nrecv2;
}
if (nrecv > maxrecv) grow_recv_kokkos(nrecv);
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nrecv1,
MPI_DOUBLE,procneigh[dim][1],0,
world,&request);
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
MPI_DOUBLE,procneigh[dim][0],0,world);
MPI_Wait(&request,MPI_STATUS_IGNORE);
if (procgrid[dim] > 2) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nrecv1,
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
MPI_Irecv(k_buf_recv.view<DeviceType>().data(),nrecv1,
MPI_DOUBLE,procneigh[dim][1],0,
world,&request);
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
MPI_DOUBLE,procneigh[dim][1],0,world);
MPI_DOUBLE,procneigh[dim][0],0,world);
MPI_Wait(&request,MPI_STATUS_IGNORE);
if (procgrid[dim] > 2) {
MPI_Irecv(k_buf_recv.view<DeviceType>().data()+nrecv1,
nrecv2,MPI_DOUBLE,procneigh[dim][0],0,
world,&request);
MPI_Send(k_buf_send.view<DeviceType>().data(),nsend,
MPI_DOUBLE,procneigh[dim][1],0,world);
MPI_Wait(&request,MPI_STATUS_IGNORE);
}
if (nrecv) {
atom->nlocal = avec->
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
}
if (nrecv) {
atom->nlocal = avec->
unpack_exchange_kokkos(k_buf_recv,nrecv,atom->nlocal,dim,lo,hi,
ExecutionSpaceFromDevice<DeviceType>::space);
DeviceType::fence();
}
// check incoming atoms to see if they are in my box
// if so, add to my list
}
// check incoming atoms to see if they are in my box
// if so, add to my list
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
}
atomKK->modified(ExecutionSpaceFromDevice<DeviceType>::space,ALL_MASK);
if (atom->firstgroupname) {
/* this is not yet implemented with Kokkos */
atomKK->sync(Host,ALL_MASK);
@ -742,14 +752,15 @@ void CommKokkos::borders()
if (!exchange_comm_classic) {
if (exchange_comm_on_host) borders_device<LMPHostType>();
else borders_device<LMPDeviceType>();
return;
} else {
atomKK->sync(Host,ALL_MASK);
CommBrick::borders();
k_sendlist.modify<LMPHostType>();
atomKK->modified(Host,ALL_MASK);
}
atomKK->sync(Host,ALL_MASK);
k_sendlist.sync<LMPHostType>();
CommBrick::borders();
k_sendlist.modify<LMPHostType>();
atomKK->modified(Host,ALL_MASK);
if (comm->nprocs == 1 && !ghost_velocity && !forward_comm_classic)
copy_swap_info();
}
/* ---------------------------------------------------------------------- */
@ -817,7 +828,6 @@ void CommKokkos::borders_device() {
AtomVecKokkos *avec = (AtomVecKokkos *) atom->avec;
ExecutionSpace exec_space = ExecutionSpaceFromDevice<DeviceType>::space;
k_sendlist.sync<DeviceType>();
atomKK->sync(exec_space,ALL_MASK);
// do swaps over all 3 dimensions
@ -1037,6 +1047,69 @@ void CommKokkos::borders_device() {
atom->map_set();
}
}
/* ----------------------------------------------------------------------
copy swap info
------------------------------------------------------------------------- */
void CommKokkos::copy_swap_info()
{
if (nswap > k_swap.extent(1)) {
k_swap = DAT::tdual_int_2d("comm:swap",2,nswap);
k_firstrecv = Kokkos::subview(k_swap,0,Kokkos::ALL);
k_sendnum_scan = Kokkos::subview(k_swap,1,Kokkos::ALL);
}
int scan = 0;
for (int iswap = 0; iswap < nswap; iswap++) {
scan += sendnum[iswap];
k_sendnum_scan.h_view[iswap] = scan;
k_firstrecv.h_view[iswap] = firstrecv[iswap];
}
totalsend = scan;
// create map of ghost to local atom id
// store periodic boundary transform from local to ghost
k_sendlist.sync<LMPHostType>();
if (totalsend > k_pbc.extent(0)) {
k_pbc = DAT::tdual_int_2d("comm:pbc",totalsend,6);
k_swap2 = DAT::tdual_int_2d("comm:swap2",2,totalsend);
k_pbc_flag = Kokkos::subview(k_swap2,0,Kokkos::ALL);
k_g2l = Kokkos::subview(k_swap2,1,Kokkos::ALL);
}
for (int iswap = 0; iswap < nswap; iswap++) {
for (int i = 0; i < sendnum[iswap]; i++) {
int source = sendlist[iswap][i] - atom->nlocal;
int dest = firstrecv[iswap] + i - atom->nlocal;
k_pbc_flag.h_view(dest) = pbc_flag[iswap];
k_pbc.h_view(dest,0) = pbc[iswap][0];
k_pbc.h_view(dest,1) = pbc[iswap][1];
k_pbc.h_view(dest,2) = pbc[iswap][2];
k_pbc.h_view(dest,3) = pbc[iswap][3];
k_pbc.h_view(dest,4) = pbc[iswap][4];
k_pbc.h_view(dest,5) = pbc[iswap][5];
k_g2l.h_view(dest) = atom->nlocal + source;
if (source >= 0) {
k_pbc_flag.h_view(dest) = k_pbc_flag.h_view(dest) || k_pbc_flag.h_view(source);
k_pbc.h_view(dest,0) += k_pbc.h_view(source,0);
k_pbc.h_view(dest,1) += k_pbc.h_view(source,1);
k_pbc.h_view(dest,2) += k_pbc.h_view(source,2);
k_pbc.h_view(dest,3) += k_pbc.h_view(source,3);
k_pbc.h_view(dest,4) += k_pbc.h_view(source,4);
k_pbc.h_view(dest,5) += k_pbc.h_view(source,5);
k_g2l.h_view(dest) = k_g2l.h_view(source);
}
}
}
k_swap.modify<LMPHostType>();
k_swap2.modify<LMPHostType>();
k_pbc.modify<LMPHostType>();
}
/* ----------------------------------------------------------------------
realloc the size of the send buffer as needed with BUFFACTOR and bufextra
if flag = 1, realloc

Some files were not shown because too many files have changed in this diff Show More