Merge conflicts + intel

This commit is contained in:
jtclemm
2023-10-22 22:34:33 -06:00
236 changed files with 9314 additions and 2704 deletions

1
.github/CODEOWNERS vendored
View File

@ -135,6 +135,7 @@ src/timer.* @akohlmey
src/utils.* @akohlmey @rbberger
src/verlet.* @sjplimp @stanmoore1
src/math_eigen_impl.h @jewettaij
src/fix_press_langevin.* @Bibobu
# tools
tools/coding_standard/* @akohlmey @rbberger

View File

@ -1,6 +1,6 @@
set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.01.3.fix.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
set(PACELIB_URL "https://github.com/ICAMS/lammps-user-pace/archive/refs/tags/v.2023.10.04.tar.gz" CACHE STRING "URL for PACE evaluator library sources")
set(PACELIB_MD5 "4f0b3b5b14456fe9a73b447de3765caa" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
set(PACELIB_MD5 "70ff79f4e59af175e55d24f3243ad1ff" CACHE STRING "MD5 checksum of PACE evaluator library tarball")
mark_as_advanced(PACELIB_URL)
mark_as_advanced(PACELIB_MD5)
GetFallbackURL(PACELIB_URL PACELIB_FALLBACK)

View File

@ -21,6 +21,13 @@ file(WRITE qtdeploy.bat "@ECHO OFF\r\nset VSCMD_DEBUG=0\r\nCALL ${VC_INIT} x64\r
execute_process(COMMAND cmd.exe /c qtdeploy.bat COMMAND_ECHO STDERR)
file(REMOVE qtdeploy.bat)
# download and uncompress static FFMpeg and gzip binaries
file(DOWNLOAD "https://download.lammps.org/thirdparty/ffmpeg-gzip.zip" ffmpeg-gzip.zip)
file(WRITE unpackzip.ps1 "Expand-Archive -Path ffmpeg-gzip.zip -DestinationPath LAMMPS_GUI")
execute_process(COMMAND powershell -ExecutionPolicy Bypass -File unpackzip.ps1)
file(REMOVE unpackzip.ps1)
file(REMOVE ffmpeg-gzip.zip)
# create zip archive
file(WRITE makearchive.ps1 "Compress-Archive -Path LAMMPS_GUI -CompressionLevel Optimal -DestinationPath LAMMPS_GUI-Win10-amd64.zip")
execute_process(COMMAND powershell -ExecutionPolicy Bypass -File makearchive.ps1)

View File

@ -10,5 +10,3 @@ set(CMAKE_CXX_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
set(CMAKE_C_FLAGS_RELEASE "-O3 -DNDEBUG" CACHE STRING "" FORCE)
set(BUILD_MPI FALSE CACHE BOOL "" FORCE)
set(BUILD_SHARED_LIBS FALSE CACHE BOOL "" FORCE)
set(LAMMPS_EXCEPTIONS TRUE CACHE BOOL "" FORCE)

View File

@ -69,7 +69,7 @@ OPT.
* :doc:`drude/transform/inverse <fix_drude_transform>`
* :doc:`dt/reset (k) <fix_dt_reset>`
* :doc:`edpd/source <fix_dpd_source>`
* :doc:`efield <fix_efield>`
* :doc:`efield (k) <fix_efield>`
* :doc:`efield/tip4p <fix_efield>`
* :doc:`ehex <fix_ehex>`
* :doc:`electrode/conp (i) <fix_electrode>`
@ -181,6 +181,7 @@ OPT.
* :doc:`pour <fix_pour>`
* :doc:`precession/spin <fix_precession_spin>`
* :doc:`press/berendsen <fix_press_berendsen>`
* :doc:`press/langevin <fix_press_langevin>`
* :doc:`print <fix_print>`
* :doc:`propel/self <fix_propel_self>`
* :doc:`property/atom (k) <fix_property_atom>`
@ -232,7 +233,7 @@ OPT.
* :doc:`spring <fix_spring>`
* :doc:`spring/chunk <fix_spring_chunk>`
* :doc:`spring/rg <fix_spring_rg>`
* :doc:`spring/self <fix_spring_self>`
* :doc:`spring/self (k) <fix_spring_self>`
* :doc:`srd <fix_srd>`
* :doc:`store/force <fix_store_force>`
* :doc:`store/state <fix_store_state>`

View File

@ -265,7 +265,7 @@ OPT.
* :doc:`smd/tri_surface <pair_smd_triangulated_surface>`
* :doc:`smd/ulsph <pair_smd_ulsph>`
* :doc:`smtbq <pair_smtbq>`
* :doc:`snap (k) <pair_snap>`
* :doc:`snap (ik) <pair_snap>`
* :doc:`soft (go) <pair_soft>`
* :doc:`sph/heatconduction <pair_sph_heatconduction>`
* :doc:`sph/idealgas <pair_sph_idealgas>`
@ -305,5 +305,5 @@ OPT.
* :doc:`wf/cut <pair_wf_cut>`
* :doc:`ylz <pair_ylz>`
* :doc:`yukawa (gko) <pair_yukawa>`
* :doc:`yukawa/colloid (go) <pair_yukawa_colloid>`
* :doc:`yukawa/colloid (gko) <pair_yukawa_colloid>`
* :doc:`zbl (gko) <pair_zbl>`

View File

@ -5,48 +5,61 @@ This document describes **LAMMPS GUI version 1.5**.
-----
LAMMPS GUI is a simple graphical text editor that is linked to the
:ref:`LAMMPS library <lammps_c_api>` and thus can run LAMMPS directly
using the contents of the editor's text buffer as input. It can
retrieve and display information from LAMMPS while it is running and is
adapted in multiple ways specifically for editing LAMMPS input files.
LAMMPS GUI is a graphical text editor customized for editing LAMMPS
input files that is linked to the :ref:`LAMMPS library <lammps_c_api>`
and thus can run LAMMPS directly using the contents of the editor's text
buffer as input. It can retrieve and display information from LAMMPS
while it is running, display visualizations created with the :doc:`dump
image command <dump_image>`, and is adapted specifically for editing
LAMMPS input files through text completion and reformatting, and linking
to the online LAMMPS documentation for known LAMMPS commands and styles.
.. note::
Pre-compiled, ready-to-use LAMMPS GUI executables for Linux (Ubuntu
20.04LTS or later and compatible), macOS (version 11 aka Big Sur or
later), and Windows (version 10 or later) :ref:`are available
<lammps-gui-install>` for download. The executables are linked to
a current version of LAMMPS as well. The source code for the
LAMMPS GUI is included in the ``tools/lammps-gui`` folder of the
LAMMPS distribution and it can be compiled alongside LAMMPS with
CMake.
<lammps_gui_install>` for download. They may be linked to a
development version of LAMMPS in case they need features not yet
available in a released version. Serial LAMMPS executables of the
same LAMMPS version are included as well. The source code for the
LAMMPS GUI is included in the LAMMPS source code and can be found in
the ``tools/lammps-gui`` folder. It can be compiled alongside LAMMPS
when :doc:`compiling with CMake <Build_cmake>`.
LAMMPS GUI tries to be similar to what people traditionally would do
to run LAMMPS using a command line window: editing inputs with a text
editor, run LAMMPS on the input with selected command line flags, and
then extract data from the created files and view them. That
procedure is quite effective and often required when running LAMMPS on
high-performance computing facilities, or for people proficient in
using the command line, as that allows them to use tools for the
individual steps which they are most comfortable with.
LAMMPS GUI tries to provide an experience similar to what people
traditionally would do to run LAMMPS using a command line window:
The main benefit of a GUI application is that many basic tasks can be
done directly from the GUI without switching to a text console or
requiring external programs, let alone scripts to extract data from
the generated output. It also integrates well with graphical desktop
environments.
- editing inputs with a text editor
- run LAMMPS on the input with selected command line flags
- and then use or extract data from the created files and visualize it
That procedure is quite effective for people proficient in using the
command line, as that allows them to use tools for the individual steps
which they are most comfortable with. It is often required when running
LAMMPS on high-performance computing facilities.
The main benefit of using the LAMMPS GUI application instead is that
many basic tasks can be done directly from the GUI without switching to
a text console window or using external programs, let alone writing
scripts to extract data from the generated output. It also integrates
well with graphical desktop environments.
LAMMPS GUI thus makes it easier for beginners to get started running
simple LAMMPS simulations. It is very suitable for tutorials on
LAMMPS since you only need to learn how to use a single program. It
is also designed to keep the barrier low when you decide to switch to
a full featured, standalone programming editor and more sophisticated
simple LAMMPS simulations. It is very suitable for tutorials on LAMMPS
since you only need to learn how to use a single program for most tasks
and thus time can be saved and people can focus on learning LAMMPS. It
is also designed to keep the barrier low when you decide to switch to a
full featured, standalone programming editor and more sophisticated
visualization and analysis tools and run LAMMPS from a command line.
The following text provides a detailed tour of the features and
functionality of the LAMMPS GUI.
Suggestions for new features and reports of bugs are always welcome.
You can use the :doc:`the same channels as for LAMMPS itself
<Errors_bugs>` for that purpose.
-----
Main window
@ -86,9 +99,9 @@ save them.
Running LAMMPS
^^^^^^^^^^^^^^
From within the LAMMPS GUI main window LAMMPS can be started either
from the ``Run`` menu using the ``Run LAMMPS from Editor Buffer``
entry, by the hotkey `Ctrl-Enter` (`Command-Enter` on macOS), or by
From within the LAMMPS GUI main window LAMMPS can be started either from
the ``Run`` menu using the ``Run LAMMPS from Editor Buffer`` entry, by
the keyboard shortcut `Ctrl-Enter` (`Command-Enter` on macOS), or by
clicking on the green "Run" button in the status bar. All of these
operations will cause LAMMPS to process the entire input script, which
may contain multiple :doc:`run <run>` or :doc:`minimize <minimize>`
@ -147,10 +160,10 @@ More information on those windows and how to adjust their behavior and
contents is given below.
An active LAMMPS run can be stopped cleanly by using either the ``Stop
LAMMPS`` entry in the ``Run`` menu, the hotkey `Ctrl-/` (`Command-/`
on macOS), or by clicking on the red button in the status bar. This
will cause the running LAMMPS process to complete the current timestep
(or iteration for energy minimization) and then complete the
LAMMPS`` entry in the ``Run`` menu, the keyboard shortcut `Ctrl-/`
(`Command-/` on macOS), or by clicking on the red button in the status
bar. This will cause the running LAMMPS process to complete the current
timestep (or iteration for energy minimization) and then complete the
processing of the buffer while skipping all run or minimize commands.
This is equivalent to the input script command :doc:`timer timeout 0
<timer>` and is implemented by calling the
@ -172,17 +185,20 @@ be seen in the command line window, as shown below.
LAMMPS GUI captures the screen output as it is generated and updates
the log window regularly during a run.
By default, there will be a new window for each run, so that it is
possible to visually compare outputs from different runs. It is also
possible to change the behavior of LAMMPS GUI in the preferences dialog
to *replace* an existing log window for a new run or to not show the log
window by default. It is also possible to show or hide the current log
window from the ``View`` menu.
By default, the log window will be replaced each time a run is started.
The runs are counted and the run number for the current run is displayed
in the window title. It is possible to change the behavior of LAMMPS
GUI in the preferences dialog to create a *new* log window for every run
or to not show the current log window. It is also possible to show or
hide the *current* log window from the ``View`` menu.
The text in the log window is read-only and cannot be modified, but
editor commands to select and copy all or parts of the text can be used.
The "Select All" and "Copy" functions are also available via a context
menu by clicking with the right mouse button.
keyboard shortcuts to select and copy all or parts of the text can be
used to transfer text to another program. Also, the keyboard shortcut
`Ctrl-S` (`Command-S` on macOS) is available to save the log buffer to a
file. The "Select All" and "Copy" functions, as well as a "Save Log to
File" option are also available from a context menu by clicking with the
right mouse button into the log window text area.
Chart Window
------------
@ -199,10 +215,16 @@ The drop down menu on the top right allows selection of different
properties that are computed and written to thermo output. Only one
property can be shown at a time. The plots will be updated with new
data as the run progresses, so they can be used to visually monitor the
evolution of available properties. From the ``File`` menu on the top
left, it is possible to save an image of the currently displayed plot or
export the data in either plain text columns (for use by plotting tools
like `gnuplot <http://www.gnuplot.info/>`_ or `grace
evolution of available properties. The window title will show the
current run number that this chart window corresponds to. Same as
explained for the log window above, by default, the chart window will
be replaced on each new run, but the behavior can be changed in the
preferences dialog.
From the ``File`` menu on the top left, it is possible to save an image
of the currently displayed plot or export the data in either plain text
columns (for use by plotting tools like `gnuplot
<http://www.gnuplot.info/>`_ or `grace
<https://plasma-gate.weizmann.ac.il/Grace/>`_), or as CSV data which can
be imported for further processing with Microsoft Excel or `pandas
<https://pandas.pydata.org/>`_
@ -225,19 +247,20 @@ displays the images created by LAMMPS as they are written.
:align: center
:scale: 50%
The various buttons at the bottom right of the window allow either
single stepping through the sequence of images or playing an animation
(as a continuous loop or once from first to last). It is also possible
to zoom in or zoom out of the displayed images. The slide show window
will be closed when a new file is loaded.
The various buttons at the bottom right of the window allow single
stepping through the sequence of images or playing an animation (as a
continuous loop or once from first to last). It is also possible to
zoom in or zoom out of the displayed images, and to export the slide
show animation to a movie file, if `ffmpeg <https://ffmpeg.org/>`_ is
installed.
Variable Info
-------------
During a run, it may be of interest to monitor the value of input
script variables, for example to monitor the progress of loops. This
can be done by enabling the "Variables Window" in the ``View`` menu or
by using the `Ctrl-Shift-W` hotkey. This will show info similar to
During a run, it may be of interest to monitor the value of input script
variables, for example to monitor the progress of loops. This can be
done by enabling the "Variables Window" in the ``View`` menu or by using
the `Ctrl-Shift-W` keyboard shortcut. This will show info similar to
the :doc:`info variables <info>` command in a separate window as shown
below.
@ -250,16 +273,27 @@ during a run. It will show "(none)" if there are no variables
defined. Note that it is also possible to *set* :doc:`index style
variables <variable>`, that would normally be set via command line
flags, via the "Set Variables..." dialog from the ``Run`` menu.
LAMMPS GUI will automatically set the variable "gui_run" to the
current value of the run counter. That way it would be possible
to automatically record a log for each run attempt by using the
command
.. code-block:: LAMMPS
log logfile-${gui_run}.txt
at the beginning of an input file. That would record logs to files
``logfile-1.txt``, ``logfile-2.txt``, and so on for successive runs.
Viewing Snapshot Images
-----------------------
By selecting the ``Create Image`` entry in the ``Run`` menu, or by
hitting the `Ctrl-I` (`Command-I` on macOS) hotkey, or by clicking on
the "palette" button in the status bar, LAMMPS GUI will send a custom
:doc:`write_dump image <dump_image>` command to LAMMPS and read the
resulting snapshot image with the current state of the system into an
image viewer window. This functionality is not available *during* an
hitting the `Ctrl-I` (`Command-I` on macOS) keyboard shortcut, or by
clicking on the "palette" button in the status bar, LAMMPS GUI will send
a custom :doc:`write_dump image <dump_image>` command to LAMMPS and read
the resulting snapshot image with the current state of the system into
an image viewer window. This functionality is not available *during* an
ongoing run. When LAMMPS is not yet initialized, LAMMPS GUI will try to
identify the line with the first run or minimize command and execute all
command up to that line from the input buffer and then add a "run 0"
@ -306,34 +340,41 @@ contents to a file.
Context Specific Word Completion
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
By default, LAMMPS GUI will display a small popup frame with possible
completions for LAMMPS input script commands or styles after 2
characters of a word have been typed. The word can then be completed
through selecting an entry by scrolling down with the cursor keys and
selecting with the 'Enter' key or by clicking on the entry with the
mouse. The automatic completion popup can be disabled in the
``Preferences`` dialog, but the completion can still be requested
manually by either hitting the 'Shift-TAB' key or by right-clicking with
the mouse and selecting the option from the context menu. Most of the
completion information is taken from the LAMMPS instance and thus it
will be adjusted to only show options available that have been enabled
while compiling LAMMPS, however that excludes accelerated styles and
commands, only non-suffix versions are shown.
By default, LAMMPS GUI will display a small pop-up frame with possible
choices for LAMMPS input script commands or styles after 2 characters of
a word have been typed.
.. image:: JPG/lammps-gui-complete.png
:align: center
:scale: 75%
The word can then be completed through selecting an entry by scrolling
up and down with the cursor keys and selecting with the 'Enter' key or
by clicking on the entry with the mouse. The automatic completion
pop-up can be disabled in the ``Preferences`` dialog, but the completion
can still be requested manually by either hitting the 'Shift-TAB' key or
by right-clicking with the mouse and selecting the option from the
context menu. Most of the completion information is taken from the
LAMMPS instance and thus it will be adjusted to only show available
options that have been enabled while compiling LAMMPS. That, however,
excludes accelerated styles and commands; for improved clarity, only the
non-suffix version of styles are shown.
Line Reformatting
^^^^^^^^^^^^^^^^^
The editor supports reformatting lines according to the syntax in
order to have consistently aligned lines. This primarily means adding
space padding to commands, type specifiers, IDs and names. This
The editor supports reformatting lines according to the syntax in order
to have consistently aligned lines. This primarily means adding
whitespace padding to commands, type specifiers, IDs and names. This
reformatting is performed by default when hitting the 'Enter' key to
start a new line. This feature can be turned off in the
start a new line. This feature can be turned on or off in the
``Preferences`` dialog, but it can still be manually performed by
hitting the 'TAB' key.
hitting the 'TAB' key. The amount of padding can also be changed in the
``Preferences`` dialog.
Internally this functionality is achieved by splitting the line into
"words" and then putting it back together with padding added where the
context can be detected; otherwise a single space is used.
context can be detected; otherwise a single space is used between words.
Context Specific Help
^^^^^^^^^^^^^^^^^^^^^
@ -343,23 +384,23 @@ Context Specific Help
:scale: 50%
A unique feature of the LAMMPS GUI is the option to look up the
documentation for the command in the current line. This can be done
by either clicking the right mouse button or by using the `Ctrl-?`
hotkey. When clicking the mouse there are additional entries in the
context menu that will open the corresponding documentation page in
the online LAMMPS documentation. When using the hotkey, the first of
documentation for the command in the current line. This can be done by
either clicking the right mouse button or by using the `Ctrl-?` keyboard
shortcut. When clicking the mouse there are additional entries in the
context menu that will open the corresponding documentation page in the
online LAMMPS documentation. When using the keyboard, the first of
those entries will be chosen directly.
Menu
----
The menu bar has entries ``File``, ``Edit``, ``Run``, ``View``, and
``About``. Instead of using the mouse to click on them, the
individual menus can also be activated by hitting the `Alt` key
together with the corresponding underlined letter, that is `Alt-F`
will activate the ``File`` menu. For the corresponding activated
sub-menus, the underlined letter together with the `Alt` key can again
be used to select entries instead of using the mouse.
``About``. Instead of using the mouse to click on them, the individual
menus can also be activated by hitting the `Alt` key together with the
corresponding underlined letter, that is `Alt-F` will activate the
``File`` menu. For the corresponding activated sub-menus, the key
corresponding the underlined letters can again be used to select entries
instead of using the mouse.
File
^^^^
@ -385,8 +426,8 @@ Edit
The ``Edit`` menu offers the usual editor functions like ``Undo``,
``Redo``, ``Cut``, ``Copy``, ``Paste``. It can also open a
``Preferences`` dialog (hotkey `Ctrl-P`) and allows deletion of all
stored preferences so they will be reset to default values.
``Preferences`` dialog (keyboard shortcut `Ctrl-P`) and allows deletion
of all stored preferences so they will be reset to default values.
Run
^^^
@ -516,7 +557,7 @@ General Settings:
- *Replace image window on new render:* when checked, an existing
chart window will be replaced when a new snapshot image is requested,
otherwise each command will create a new image window.
- *Path to LAMMPS Shared Library File:* this options is only available
- *Path to LAMMPS Shared Library File:* this option is only visible
when LAMMPS GUI was compiled to load the LAMMPS library at run time
instead of being linked to it directly. With the ``Browse..`` button
or by changing the text, a different shared library file with a
@ -574,26 +615,26 @@ the range between 1 and 32.
The two settings which follow enable or disable the automatic
reformatting when hitting the 'Enter' key and the automatic display of
the completion popup window.
the completion pop-up window.
-----------
Hotkeys
-------
Keyboard Shortcuts
------------------
Almost all functionality is accessible from the menu or via hotkeys.
The following hotkeys are available (On macOS use the Command key
instead of Ctrl/Control).
Almost all functionality is accessible from the menu of the editor
window or through keyboard shortcuts. The following shortcuts are
available (On macOS use the Command key instead of Ctrl/Control).
.. list-table::
:header-rows: 1
:widths: auto
* - Hotkey
* - Shortcut
- Function
- Hotkey
- Shortcut
- Function
- Hotkey
- Shortcut
- Function
* - Ctrl+N
- New File
@ -620,7 +661,7 @@ instead of Ctrl/Control).
- Ctrl+I
- Snapshot Image
* - Ctrl+Q
- Quit
- Quit Application
- Ctrl+V
- Paste text
- Ctrl+L
@ -653,3 +694,7 @@ instead of Ctrl/Control).
Further editing keybindings `are documented with the Qt documentation
<https://doc.qt.io/qt-5/qplaintextedit.html#editing-key-bindings>`_. In
case of conflicts the list above takes precedence.
All other windows only support a subset of keyboard shortcuts listed
above. Typically, the shortcuts `Ctrl-/` (Stop Run), `Ctrl-W` (Close
Window), and `Ctrl-Q` (Quit Application) are supported.

View File

@ -1,7 +1,7 @@
Output from LAMMPS (thermo, dumps, computes, fixes, variables)
==============================================================
There are four basic kinds of LAMMPS output:
There are four basic forms of LAMMPS output:
* :doc:`Thermodynamic output <thermo_style>`, which is a list of
quantities printed every few timesteps to the screen and logfile.
@ -20,18 +20,17 @@ output files, depending on what :doc:`dump <dump>` and :doc:`fix <fix>`
commands you specify.
As discussed below, LAMMPS gives you a variety of ways to determine
what quantities are computed and printed when the thermodynamics,
what quantities are calculated and printed when the thermodynamics,
dump, or fix commands listed above perform output. Throughout this
discussion, note that users can also :doc:`add their own computes and
fixes to LAMMPS <Modify>` which can then generate values that can then
be output with these commands.
fixes to LAMMPS <Modify>` which can generate values that can then be
output with these commands.
The following subsections discuss different LAMMPS commands related
to output and the kind of data they operate on and produce:
* :ref:`Global/per-atom/local/per-grid data <global>`
* :ref:`Scalar/vector/array data <scalar>`
* :ref:`Per-grid data <grid>`
* :ref:`Disambiguation <disambiguation>`
* :ref:`Thermodynamic output <thermo>`
* :ref:`Dump file output <dump>`
@ -48,34 +47,65 @@ to output and the kind of data they operate on and produce:
Global/per-atom/local/per-grid data
-----------------------------------
Various output-related commands work with four different styles of
Various output-related commands work with four different "styles" of
data: global, per-atom, local, and per-grid. A global datum is one or
more system-wide values, e.g. the temperature of the system. A
per-atom datum is one or more values per atom, e.g. the kinetic energy
of each atom. Local datums are calculated by each processor based on
the atoms it owns, but there may be zero or more per atom, e.g. a list
the atoms it owns, and there may be zero or more per atom, e.g. a list
of bond distances.
A per-grid datum is one or more values per grid cell, for a grid which
overlays the simulation domain. The grid cells and the data they
store are distributed across processors; each processor owns the grid
cells whose center point falls within its subdomain.
overlays the simulation domain. Similar to atoms and per-atom data,
the grid cells and the data they store are distributed across
processors; each processor owns the grid cells whose center points
fall within its subdomain.
.. _scalar:
Scalar/vector/array data
------------------------
Global, per-atom, and local datums can come in three kinds: a single
scalar value, a vector of values, or a 2d array of values. The doc
page for a "compute" or "fix" or "variable" that generates data will
specify both the style and kind of data it produces, e.g. a per-atom
vector.
Global, per-atom, local, and per-grid datums can come in three
"kinds": a single scalar value, a vector of values, or a 2d array of
values. More specifically these are the valid kinds for each style:
When a quantity is accessed, as in many of the output commands
discussed below, it can be referenced via the following bracket
notation, where ID in this case is the ID of a compute. The leading
"c\_" would be replaced by "f\_" for a fix, or "v\_" for a variable:
* global scalar
* global vector
* global array
* per-atom vector
* per-atom array
* local vector
* local array
* per-grid vector
* per-grid array
A per-atom vector means a single value per atom; the "vector" is the
length of the number of atoms. A per-atom array means multiple values
per atom. Similarly a local vector or array means one or multiple
values per entity (e.g. per bond in the system). And a per-grid
vector or array means one or multiple values per grid cell.
The doc page for a compute or fix or variable that generates data will
specify both the styles and kinds of data it produces, e.g. a per-atom
vector. Note that a compute or fix may generate multiple styles and
kinds of output. However, for per-atom data only a vector or array is
output, never both. Likewise for per-local and per-grid data. An
example of a fix which generates multiple styles and kinds of data is
the :doc:`fix mdi/qm <fix_mdi_qm>` command. It outputs a global
scalar, global vector, and per-atom array for the quantum mechanical
energy and virial of the system and forces on each atom.
By contrast, different variable styles generate only a single kind of
data: a global scalar for an equal-style variable, global vector for a
vector-style variable, and a per-atom vector for an atom-style
variable.
When data is accessed by another command, as in many of the output
commands discussed below, it can be referenced via the following
bracket notation, where ID in this case is the ID of a compute. The
leading "c\_" would be replaced by "f\_" for a fix, or "v\_" for a
variable (and ID would be the name of the variable):
+-------------+--------------------------------------------+
| c_ID | entire scalar, vector, or array |
@ -85,40 +115,56 @@ notation, where ID in this case is the ID of a compute. The leading
| c_ID[I][J] | one element of array |
+-------------+--------------------------------------------+
In other words, using one bracket reduces the dimension of the data
once (vector -> scalar, array -> vector). Using two brackets reduces
the dimension twice (array -> scalar). Thus a command that uses
scalar values as input can typically also process elements of a vector
or array.
Note that using one bracket reduces the dimension of the data once
(vector -> scalar, array -> vector). Using two brackets reduces the
dimension twice (array -> scalar). Thus a command that uses scalar
values as input can also conceptually operate on an element of a
vector or array.
.. _grid:
Per-grid data
------------------------
Per-grid data can come in two kinds: a vector of values (one per grid
cekk), or a 2d array of values (multiple values per grid ckk). The
doc page for a "compute" or "fix" that generates data will specify
names for both the grid(s) and datum(s) it produces, e.g. per-grid
vectors or arrays, which can be referenced by other commands. See the
:doc:`Howto grid <Howto_grid>` doc page for more details.
Per-grid vectors or arrays are accessed similarly, except that the ID
for the compute or fix includes a grid name and a data name. This is
because a fix or compute can create multiple grids (of different
sizes) and multiple sets of data (for each grid). The fix or compute
defines names for each grid and for each data set, so that all of them
can be accessed by other commands. See the :doc:`Howto grid
<Howto_grid>` doc page for more details.
.. _disambiguation:
Disambiguation
--------------
Some computes and fixes produce data in multiple styles, e.g. a global
scalar and a per-atom vector. Usually the context in which the input
script references the data determines which style is meant. Example:
if a compute provides both a global scalar and a per-atom vector, the
former will be accessed by using ``c_ID`` in an equal-style variable,
while the latter will be accessed by using ``c_ID`` in an atom-style
variable. Note that atom-style variable formulas can also access
global scalars, but in this case it is not possible to do this
directly because of the ambiguity. Instead, an equal-style variable
can be defined which accesses the global scalar, and that variable can
be used in the atom-style variable formula in place of ``c_ID``.
When a compute or fix produces data in multiple styles, e.g. global
and per-atom, a reference to the data can sometimes be ambiguous.
Usually the context in which the input script references the data
determines which style is meant.
For example, if a compute outputs a global vector and a per-atom
array, an element of the global vector will be accessed by using
``c_ID[I]`` in :doc:`thermodynamic output <thermo_style>`, while a
column of the per-atom array will be accessed by using ``c_ID[I]`` in
a :doc:`dump custom <dump>` command.
However, if a :doc:`atom-style variable <variable>` references
``c_ID[I]``, then it could be intended to refer to a single element of
the global vector or a column of the per-atom array. The doc page for
any command that has a potential ambiguity (variables are the most
common) will explain how to resolve the ambiguity.
In this case, an atom-style variables references per-atom data if it
exists. If access to an element of a global vector is needed (as in
this example), an equal-style variable which references the value can
be defined and used in the atom-style variable formula instead.
Similarly, :doc:`thermodynamic output <thermo_style>` can only
reference global data from a compute or fix. But you can indirectly
access per-atom data as follows. The reference ``c_ID[245][2]`` for
the ID of a :doc:`compute displace/atom <compute_displace_atom>`
command, refers to the y-component of displacement for the atom with
ID 245. While you cannot use that reference directly in the
:doc:`thermo_style <thermo_style>` command, you can use it an
equal-style variable formula, and then reference the variable in
thermodynamic output.
.. _thermo:
@ -389,7 +435,7 @@ output and input data types must match, e.g. global/per-atom/local
data and scalar/vector/array data.
Also note that, as described above, when a command takes a scalar as
input, that could be an element of a vector or array. Likewise a
input, that could also be an element of a vector or array. Likewise a
vector input could be a column of an array.
+--------------------------------------------------------+----------------------------------------------+----------------------------------------------------+

View File

@ -12,7 +12,8 @@ is created, e.g. by the :doc:`create_box <create_box>` or
:doc:`read_data <read_data>` or :doc:`read_restart <read_restart>`
commands. Additionally, LAMMPS defines box size parameters lx,ly,lz
where lx = xhi-xlo, and similarly in the y and z dimensions. The 6
parameters, as well as lx,ly,lz, can be output via the :doc:`thermo_style custom <thermo_style>` command.
parameters, as well as lx,ly,lz, can be output via the
:doc:`thermo_style custom <thermo_style>` command.
LAMMPS also allows simulations to be performed in triclinic
(non-orthogonal) simulation boxes shaped as a parallelepiped with

View File

@ -5,7 +5,7 @@ LAMMPS is designed to be a fast, parallel engine for molecular
dynamics (MD) simulations. It provides only a modest amount of
functionality for setting up simulations and analyzing their output.
Specifically, LAMMPS was not conceived and designed for:
Originally, LAMMPS was not conceived and designed for:
* being run through a GUI
* building molecular systems, or building molecular topologies
@ -14,9 +14,10 @@ Specifically, LAMMPS was not conceived and designed for:
* visualize your MD simulation interactively
* plot your output data
Over the years some of these limitations have been reduced or
removed, through features added to LAMMPS or external tools
that either closely interface with LAMMPS or extend LAMMPS.
Over the years many of these limitations have been reduced or
removed. In part through features added to LAMMPS and in part
through external tools that either closely interface with LAMMPS
or extend LAMMPS.
Here are suggestions on how to perform these tasks:
@ -24,8 +25,9 @@ Here are suggestions on how to perform these tasks:
wraps the library interface is provided. Thus, GUI interfaces can be
written in Python or C/C++ that run LAMMPS and visualize or plot its
output. Examples of this are provided in the python directory and
described on the :doc:`Python <Python_head>` doc page. Also, there
are several external wrappers or GUI front ends.
described on the :doc:`Python <Python_head>` doc page. As of version
2 August 2023 :ref:`a GUI tool <lammps_gui>` is included in LAMMPS.
Also, there are several external wrappers or GUI front ends.
* **Builder:** Several pre-processing tools are packaged with LAMMPS.
Some of them convert input files in formats produced by other MD codes
such as CHARMM, AMBER, or Insight into LAMMPS input formats. Some of

Binary file not shown.

Before

Width:  |  Height:  |  Size: 73 KiB

After

Width:  |  Height:  |  Size: 105 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 36 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 120 KiB

After

Width:  |  Height:  |  Size: 95 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 91 KiB

After

Width:  |  Height:  |  Size: 90 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 119 KiB

After

Width:  |  Height:  |  Size: 130 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 36 KiB

After

Width:  |  Height:  |  Size: 37 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

After

Width:  |  Height:  |  Size: 44 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 75 KiB

After

Width:  |  Height:  |  Size: 81 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 50 KiB

After

Width:  |  Height:  |  Size: 50 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 60 KiB

After

Width:  |  Height:  |  Size: 67 KiB

View File

@ -645,9 +645,14 @@ LAMMPS GUI
Overview
^^^^^^^^
LAMMPS GUI is a simple graphical text editor that is linked to the
:ref:`LAMMPS C-library interface <lammps_c_api>` and thus can run LAMMPS
directly using the contents of the editor's text buffer as input.
LAMMPS GUI is a graphical text editor customized for editing LAMMPS
input files that is linked to the :ref:`LAMMPS C-library <lammps_c_api>`
and thus can run LAMMPS directly using the contents of the editor's text
buffer as input. It can retrieve and display information from LAMMPS
while it is running, display visualizations created with the :doc:`dump
image command <dump_image>`, and is adapted specifically for editing
LAMMPS input files through text completion and reformatting, and linking
to the online LAMMPS documentation for known LAMMPS commands and styles.
This is similar to what people traditionally would do to run LAMMPS:
using a regular text editor to edit the input and run the necessary
@ -656,9 +661,9 @@ terminal window. This similarity is a design goal. While making it easy
for beginners to start with LAMMPS, it is also the intention to simplify
the transition to workflows like most experienced LAMMPS users do.
All features have been extensively exposed to hotkeys, so that there is
also appeal for experienced LAMMPS users, too, especially for
prototyping and testing simulations setups.
All features have been extensively exposed to keyboard shortcuts, so
that there is also appeal for experienced LAMMPS users for prototyping
and testing simulations setups.
Features
^^^^^^^^
@ -697,22 +702,26 @@ Prerequisites and portability
LAMMPS GUI is programmed in C++ based on the C++11 standard and using
the `Qt GUI framework <https://www.qt.io/product/framework>`_.
Currently, Qt version 5.12 or later is required; Qt 5.15LTS is
recommended; Qt 6.x not (yet) supported. Building LAMMPS with CMake is
required. The LAMMPS GUI has been successfully compiled and tested on:
recommended; support for Qt version 6.x is under active development and
thus far only tested with Qt 6.5LTS on Linux. Building LAMMPS with
CMake is required.
The LAMMPS GUI has been successfully compiled and tested on:
- Ubuntu Linux 20.04LTS x86_64 using GCC 9, Qt version 5.12
- Fedora Linux 38 x86\_64 using GCC 13 and Clang 16, Qt version 5.15LTS
- Fedora Linux 38 x86\_64 using GCC 13, Qt version 6.5LTS
- Apple macOS 12 (Monterey) and macOS 13 (Ventura) with Xcode on arm64 and x86\_64, Qt version 5.15LTS
- Windows 10 and 11 x86_64 with Visual Studio 2022 and Visual C++ 14.36, Qt version 5.15LTS
- Windows 10 and 11 x86_64 with MinGW / GCC 10.0 cross-compiler on Fedora 38, Qt version 5.15LTS
.. _lammps-gui-install:
.. _lammps_gui_install:
Pre-compiled executables
^^^^^^^^^^^^^^^^^^^^^^^^
Pre-compiled LAMMPS executables including the GUI are currently
Pre-compiled LAMMPS executable packages that include the GUI are currently
available from https://download.lammps.org/static or
https://github.com/lammps/lammps/releases. You can unpack the archives
(or mount the macOS disk image) and run the GUI directly in place. The
@ -737,7 +746,10 @@ stored in a location where CMake can find them without additional help.
Otherwise, the location of the Qt library installation must be indicated
by setting ``-D Qt5_DIR=/path/to/qt5/lib/cmake/Qt5``, which is a path to
a folder inside the Qt installation that contains the file
``Qt5Config.cmake``.
``Qt5Config.cmake``. Similarly, for Qt6 the location of the Qt library
installation can be indicated by setting ``-D Qt6_DIR=/path/to/qt6/lib/cmake/Qt6``,
if necessary. When both, Qt5 and Qt6 are available, Qt6 will be preferred
unless ``-D LAMMPS_GUI_USE_QT5=yes`` is set.
It should be possible to build the LAMMPS GUI as a standalone
compilation (e.g. when LAMMPS has been compiled with traditional make),

View File

@ -65,6 +65,11 @@ switch. This is described on the :doc:`Build_settings <Build_settings>`
doc page. If atom IDs are not used, they must be specified as 0 for
all atoms, e.g. in a data or restart file.
.. note::
If a :doc:`triclinic simulation box <Howto_triclinic>` is used,
atom IDs are required, due to how neighbor lists are built.
The *map* keyword determines how atoms with specific IDs are found
when required. An example are the bond (angle, etc) methods which
need to find the local index of an atom with a specific global ID

View File

@ -27,58 +27,62 @@ Examples
Description
"""""""""""
Define a computation that will be performed on a group of atoms.
Quantities calculated by a compute are instantaneous values, meaning
they are calculated from information about atoms on the current
timestep or iteration, though a compute may internally store some
information about a previous state of the system. Defining a compute
does not perform a computation. Instead computes are invoked by other
LAMMPS commands as needed (e.g., to calculate a temperature needed for
a thermostat fix or to generate thermodynamic or dump file output).
See the :doc:`Howto output <Howto_output>` page for a summary of
various LAMMPS output options, many of which involve computes.
Define a diagnostic computation that will be performed on a group of
atoms. Quantities calculated by a compute are instantaneous values,
meaning they are calculated from information about atoms on the
current timestep or iteration, though internally a compute may store
some information about a previous state of the system. Defining a
compute does not perform the computation. Instead computes are
invoked by other LAMMPS commands as needed (e.g., to calculate a
temperature needed for a thermostat fix or to generate thermodynamic
or dump file output). See the :doc:`Howto output <Howto_output>` page
for a summary of various LAMMPS output options, many of which involve
computes.
The ID of a compute can only contain alphanumeric characters and
underscores.
----------
Computes calculate one or more of four styles of quantities: global,
per-atom, local, or per-atom. A global quantity is one or more
system-wide values, e.g. the temperature of the system. A per-atom
quantity is one or more values per atom, e.g. the kinetic energy of
each atom. Per-atom values are set to 0.0 for atoms not in the
specified compute group. Local quantities are calculated by each
processor based on the atoms it owns, but there may be zero or more
per atom, e.g. a list of bond distances. Per-grid quantities are
calculated on a regular 2d or 3d grid which overlays a 2d or 3d
simulation domain. The grid points and the data they store are
distributed across processors; each processor owns the grid points
which fall within its subdomain.
Computes calculate and store any of four *styles* of quantities:
global, per-atom, local, or per-grid.
Computes that produce per-atom quantities have the word "atom" at the
end of their style, e.g. *ke/atom*\ . Computes that produce local
quantities have the word "local" at the end of their style,
e.g. *bond/local*\ . Computes that produce per-grid quantities have
the word "grid" at the end of their style, e.g. *property/grid*\ .
Styles with neither "atom" or "local" or "grid" at the end of their
style name produce global quantities.
A global quantity is one or more system-wide values, e.g. the
temperature of the system. A per-atom quantity is one or more values
per atom, e.g. the kinetic energy of each atom. Per-atom values are
set to 0.0 for atoms not in the specified compute group. Local
quantities are calculated by each processor based on the atoms it
owns, but there may be zero or more per atom, e.g. a list of bond
distances. Per-grid quantities are calculated on a regular 2d or 3d
grid which overlays a 2d or 3d simulation domain. The grid points and
the data they store are distributed across processors; each processor
owns the grid points which fall within its subdomain.
Note that a single compute typically produces either global or
per-atom or local or per-grid values. It does not compute both global
and per-atom values. It can produce local values or per-grid values
in tandem with global or per-atom quantities. The compute doc page
will explain the details.
As a general rule of thumb, computes that produce per-atom quantities
have the word "atom" at the end of their style, e.g. *ke/atom*\ .
Computes that produce local quantities have the word "local" at the
end of their style, e.g. *bond/local*\ . Computes that produce
per-grid quantities have the word "grid" at the end of their style,
e.g. *property/grid*\ . And styles with neither "atom" or "local" or
"grid" at the end of their style name produce global quantities.
Global, per-atom, local, and per-grid quantities come in three kinds:
a single scalar value, a vector of values, or a 2d array of values.
The doc page for each compute describes the style and kind of values
it produces, e.g. a per-atom vector. Some computes produce more than
one kind of a single style, e.g. a global scalar and a global vector.
Global, per-atom, local, and per-grid quantities can also be of three
*kinds*: a single scalar value (global only), a vector of values, or a
2d array of values. For per-atom, local, and per-grid quantities, a
"vector" means a single value for each atom, each local entity
(e.g. bond), or grid cell. Likewise an "array", means multiple values
for each atom, each local entity, or each grid cell.
When a compute quantity is accessed, as in many of the output commands
discussed below, it can be referenced via the following bracket
notation, where ID is the ID of the compute:
Note that a single compute can produce any combination of global,
per-atom, local, or per-grid values. Likewise it can produce any
combination of scalar, vector, or array output for each style. The
exception is that for per-atom, local, and per-grid output, either a
vector or array can be produced, but not both. The doc page for each
compute explains the values it produces.
When a compute output is accessed by another input script command it
is referenced via the following bracket notation, where ID is the ID
of the compute:
+-------------+--------------------------------------------+
| c_ID | entire scalar, vector, or array |
@ -89,17 +93,23 @@ notation, where ID is the ID of the compute:
+-------------+--------------------------------------------+
In other words, using one bracket reduces the dimension of the
quantity once (vector :math:`\to` scalar, array :math:`\to` vector). Using two
brackets reduces the dimension twice (array :math:`\to` scalar). Thus a
command that uses scalar compute values as input can also process elements of a
vector or array.
quantity once (vector :math:`\to` scalar, array :math:`\to` vector).
Using two brackets reduces the dimension twice (array :math:`\to`
scalar). Thus, for example, a command that uses global scalar compute
values as input can also process elements of a vector or array.
Depending on the command, this can either be done directly using the
syntax in the table, or by first defining a :doc:`variable <variable>`
of the appropriate style to store the quantity, then using the
variable as an input to the command.
Note that commands and :doc:`variables <variable>` which use compute
quantities typically do not allow for all kinds (e.g., a command may
require a vector of values, not a scalar). This means there is no
ambiguity about referring to a compute quantity as c_ID even if it
produces, for example, both a scalar and vector. The doc pages for
various commands explain the details.
Note that commands and :doc:`variables <variable>` which take compute
outputs as input typically do not allow for all styles and kinds of
data (e.g., a command may require global but not per-atom values, or
it may require a vector of values, not a scalar). This means there is
typically no ambiguity about referring to a compute output as c_ID
even if it produces, for example, both a scalar and vector. The doc
pages for various commands explain the details, including how any
ambiguities are resolved.
----------

View File

@ -37,13 +37,16 @@ Syntax
v_name = per-atom vector calculated by an atom-style variable with name
* zero or more keyword/args pairs may be appended
* keyword = *replace*
* keyword = *replace* or *inputs*
.. parsed-literal::
*replace* args = vec1 vec2
vec1 = reduced value from this input vector will be replaced
vec2 = replace it with vec1[N] where N is index of max/min value from vec2
*inputs* arg = peratom or local
peratom = all inputs are per-atom quantities (default)
local = all input are local quantities
Examples
""""""""
@ -60,38 +63,44 @@ Description
"""""""""""
Define a calculation that "reduces" one or more vector inputs into
scalar values, one per listed input. The inputs can be per-atom or
local quantities; they cannot be global quantities. Atom attributes
are per-atom quantities, :doc:`computes <compute>` and :doc:`fixes <fix>`
may generate any of the three kinds of quantities, and :doc:`atom-style variables <variable>` generate per-atom quantities. See the
:doc:`variable <variable>` command and its special functions which can
perform the same operations as the compute reduce command on global
vectors.
scalar values, one per listed input. For the compute reduce command,
the inputs can be either per-atom or local quantities and must all be
of the same kind (per-atom or local); see discussion of the optional
*inputs* keyword below. The compute reduce/region command can only be
used with per-atom inputs.
Atom attributes are per-atom quantities, :doc:`computes <compute>` and
:doc:`fixes <fix>` can generate either per-atom or local quantities,
and :doc:`atom-style variables <variable>` generate per-atom
quantities. See the :doc:`variable <variable>` command and its
special functions which can perform the same reduction operations as
the compute reduce command on global vectors.
The reduction operation is specified by the *mode* setting. The *sum*
option adds the values in the vector into a global total. The *min*
or *max* options find the minimum or maximum value across all vector
values. The *minabs* or *maxabs* options find the minimum or maximum
value across all absolute vector values. The *ave* setting adds the
vector values into a global total, then divides by the number of values
in the vector. The *sumsq* option sums the square of the values in the
vector into a global total. The *avesq* setting does the same as *sumsq*,
then divides the sum of squares by the number of values. The last two options
can be useful for calculating the variance of some quantity (e.g., variance =
sumsq :math:`-` ave\ :math:`^2`). The *sumabs* option sums the absolute
values in the vector into a global total. The *aveabs* setting does the same
as *sumabs*, then divides the sum of absolute values by the number of
vector values into a global total, then divides by the number of
values in the vector. The *sumsq* option sums the square of the
values in the vector into a global total. The *avesq* setting does
the same as *sumsq*, then divides the sum of squares by the number of
values. The last two options can be useful for calculating the
variance of some quantity (e.g., variance = sumsq :math:`-` ave\
:math:`^2`). The *sumabs* option sums the absolute values in the
vector into a global total. The *aveabs* setting does the same as
*sumabs*, then divides the sum of absolute values by the number of
values.
Each listed input is operated on independently. For per-atom inputs,
the group specified with this command means only atoms within the
group contribute to the result. For per-atom inputs, if the compute
reduce/region command is used, the atoms must also currently be within
the region. Note that an input that produces per-atom quantities may
define its own group which affects the quantities it returns. For
example, if a compute is used as an input which generates a per-atom
vector, it will generate values of 0.0 for atoms that are not in the
group specified for that compute.
group contribute to the result. Likewise for per-atom inputs, if the
compute reduce/region command is used, the atoms must also currently
be within the region. Note that an input that produces per-atom
quantities may define its own group which affects the quantities it
returns. For example, if a compute is used as an input which
generates a per-atom vector, it will generate values of 0.0 for atoms
that are not in the group specified for that compute.
Each listed input can be an atom attribute (position, velocity, force
component) or can be the result of a :doc:`compute <compute>` or
@ -123,52 +132,54 @@ array with six columns:
----------
The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*, *fy*, and
*fz*) are self-explanatory. Note that other atom attributes can be used as
inputs to this fix by using the
:doc:`compute property/atom <compute_property_atom>` command and then specifying
an input value from that compute.
The atom attribute values (*x*, *y*, *z*, *vx*, *vy*, *vz*, *fx*,
*fy*, and *fz*) are self-explanatory. Note that other atom attributes
can be used as inputs to this fix by using the :doc:`compute
property/atom <compute_property_atom>` command and then specifying an
input value from that compute.
If a value begins with "c\_", a compute ID must follow which has been
previously defined in the input script. Computes can generate
per-atom or local quantities. See the individual
:doc:`compute <compute>` page for details. If no bracketed integer
is appended, the vector calculated by the compute is used. If a
bracketed integer is appended, the Ith column of the array calculated
by the compute is used. Users can also write code for their own
compute styles and :doc:`add them to LAMMPS <Modify>`. See the
discussion above for how :math:`I` can be specified with a wildcard asterisk
to effectively specify multiple values.
previously defined in the input script. Valid computes can generate
per-atom or local quantities. See the individual :doc:`compute
<compute>` page for details. If no bracketed integer is appended, the
vector calculated by the compute is used. If a bracketed integer is
appended, the Ith column of the array calculated by the compute is
used. Users can also write code for their own compute styles and
:doc:`add them to LAMMPS <Modify>`. See the discussion above for how
:math:`I` can be specified with a wildcard asterisk to effectively
specify multiple values.
If a value begins with "f\_", a fix ID must follow which has been
previously defined in the input script. Fixes can generate per-atom
or local quantities. See the individual :doc:`fix <fix>` page for
details. Note that some fixes only produce their values on certain
timesteps, which must be compatible with when compute reduce
previously defined in the input script. Valid fixes can generate
per-atom or local quantities. See the individual :doc:`fix <fix>`
page for details. Note that some fixes only produce their values on
certain timesteps, which must be compatible with when compute reduce
references the values, else an error results. If no bracketed integer
is appended, the vector calculated by the fix is used. If a bracketed
integer is appended, the Ith column of the array calculated by the fix
is used. Users can also write code for their own fix style and
:doc:`add them to LAMMPS <Modify>`. See the discussion above for how
:math:`I` can be specified with a wildcard asterisk to effectively specify
multiple values.
:math:`I` can be specified with a wildcard asterisk to effectively
specify multiple values.
If a value begins with "v\_", a variable name must follow which has
been previously defined in the input script. It must be an
:doc:`atom-style variable <variable>`. Atom-style variables can
reference thermodynamic keywords and various per-atom attributes, or
invoke other computes, fixes, or variables when they are evaluated, so
this is a very general means of generating per-atom quantities to reduce.
this is a very general means of generating per-atom quantities to
reduce.
----------
If the *replace* keyword is used, two indices *vec1* and *vec2* are
specified, where each index ranges from 1 to the number of input values.
The replace keyword can only be used if the *mode* is *min* or *max*\ .
It works as follows. A min/max is computed as usual on the *vec2*
input vector. The index :math:`N` of that value within *vec2* is also stored.
Then, instead of performing a min/max on the *vec1* input vector, the
stored index is used to select the :math:`N`\ th element of the *vec1* vector.
specified, where each index ranges from 1 to the number of input
values. The replace keyword can only be used if the *mode* is *min*
or *max*\ . It works as follows. A min/max is computed as usual on
the *vec2* input vector. The index :math:`N` of that value within
*vec2* is also stored. Then, instead of performing a min/max on the
*vec1* input vector, the stored index is used to select the :math:`N`\
th element of the *vec1* vector.
Thus, for example, if you wish to use this compute to find the bond
with maximum stretch, you can do it as follows:
@ -190,6 +201,16 @@ information in this context, the *replace* keywords will extract the
atom IDs for the two atoms in the bond of maximum stretch. These atom
IDs and the bond stretch will be printed with thermodynamic output.
.. versionadded:: TBD
The *inputs* keyword allows selection of whether all the inputs are
per-atom or local quantities. As noted above, all the inputs must be
the same kind (per-atom or local). Per-atom is the default setting.
If a compute or fix is specified as an input, it must produce per-atom
or local data to match this setting. If it produces both, e.g. for
the :doc:`compute voronoi/atom <compute_voronoi_atom>` command, then
this keyword selects between them.
----------
If a single input is specified this compute produces a global scalar
@ -197,38 +218,41 @@ value. If multiple inputs are specified, this compute produces a
global vector of values, the length of which is equal to the number of
inputs specified.
As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the value(s)
produced by this compute are all "extensive", meaning their value
scales linearly with the number of atoms involved. If normalized
values are desired, this compute can be accessed by the
As discussed below, for the *sum*, *sumabs*, and *sumsq* modes, the
value(s) produced by this compute are all "extensive", meaning their
value scales linearly with the number of atoms involved. If
normalized values are desired, this compute can be accessed by the
:doc:`thermo_style custom <thermo_style>` command with
:doc:`thermo_modify norm yes <thermo_modify>` set as an option.
Or it can be accessed by a
:doc:`variable <variable>` that divides by the appropriate atom count.
:doc:`thermo_modify norm yes <thermo_modify>` set as an option. Or it
can be accessed by a :doc:`variable <variable>` that divides by the
appropriate atom count.
----------
Output info
"""""""""""
This compute calculates a global scalar if a single input value is specified
or a global vector of length :math:`N`, where :math:`N` is the number of
inputs, and which can be accessed by indices 1 to :math:`N`. These values can
be used by any command that uses global scalar or vector values from a
compute as input. See the :doc:`Howto output <Howto_output>` doc page
for an overview of LAMMPS output options.
This compute calculates a global scalar if a single input value is
specified or a global vector of length :math:`N`, where :math:`N` is
the number of inputs, and which can be accessed by indices 1 to
:math:`N`. These values can be used by any command that uses global
scalar or vector values from a compute as input. See the :doc:`Howto
output <Howto_output>` doc page for an overview of LAMMPS output
options.
All the scalar or vector values calculated by this compute are
"intensive", except when the *sum*, *sumabs*, or *sumsq* modes are used on
per-atom or local vectors, in which case the calculated values are
"extensive".
The scalar or vector values will be in whatever :doc:`units <units>` the
quantities being reduced are in.
The scalar or vector values will be in whatever :doc:`units <units>`
the quantities being reduced are in.
Restrictions
""""""""""""
none
As noted above, the compute reduce/region command can only be used
with per-atom inputs.
Related commands
""""""""""""""""
@ -238,4 +262,4 @@ Related commands
Default
"""""""
none
The default value for the *inputs* keyword is peratom.

View File

@ -13,7 +13,7 @@ Syntax
* ID, group-ID are documented in :doc:`compute <compute>` command
* voronoi/atom = style name of this compute command
* zero or more keyword/value pairs may be appended
* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors* or *peratom*
* keyword = *only_group* or *occupation* or *surface* or *radius* or *edge_histo* or *edge_threshold* or *face_threshold* or *neighbors*
.. parsed-literal::
@ -31,7 +31,6 @@ Syntax
*face_threshold* arg = minarea
minarea = minimum area for a face to be counted
*neighbors* value = *yes* or *no* = store list of all neighbors or no
*peratom* value = *yes* or *no* = per-atom quantities accessible or no
Examples
""""""""
@ -53,14 +52,12 @@ atoms in the simulation box. The tessellation is calculated using all
atoms in the simulation, but non-zero values are only stored for atoms
in the group.
By default two per-atom quantities are calculated by this compute.
The first is the volume of the Voronoi cell around each atom. Any
point in an atom's Voronoi cell is closer to that atom than any other.
The second is the number of faces of the Voronoi cell. This is
equal to the number of nearest neighbors of the central atom,
plus any exterior faces (see note below). If the *peratom* keyword
is set to "no", the per-atom quantities are still calculated,
but they are not accessible.
Two per-atom quantities are calculated by this compute. The first is
the volume of the Voronoi cell around each atom. Any point in an
atom's Voronoi cell is closer to that atom than any other. The second
is the number of faces of the Voronoi cell. This is equal to the
number of nearest neighbors of the central atom, plus any exterior
faces (see note below).
----------
@ -97,13 +94,13 @@ present in atom_style sphere for granular models.
The *edge_histo* keyword activates the compilation of a histogram of
number of edges on the faces of the Voronoi cells in the compute
group. The argument *maxedge* of the this keyword is the largest number
of edges on a single Voronoi cell face expected to occur in the
sample. This keyword adds the generation of a global vector with
*maxedge*\ +1 entries. The last entry in the vector contains the number of
faces with more than *maxedge* edges. Since the polygon with the
smallest amount of edges is a triangle, entries 1 and 2 of the vector
will always be zero.
group. The argument *maxedge* of the this keyword is the largest
number of edges on a single Voronoi cell face expected to occur in the
sample. This keyword generates output of a global vector by this
compute with *maxedge*\ +1 entries. The last entry in the vector
contains the number of faces with more than *maxedge* edges. Since the
polygon with the smallest amount of edges is a triangle, entries 1 and
2 of the vector will always be zero.
The *edge_threshold* and *face_threshold* keywords allow the
suppression of edges below a given minimum length and faces below a
@ -127,8 +124,8 @@ to locate vacancies (the coordinates are given by the atom coordinates
at the time step when the compute was first invoked), while column two
data can be used to identify interstitial atoms.
If the *neighbors* value is set to yes, then this compute creates a
local array with 3 columns. There is one row for each face of each
If the *neighbors* value is set to yes, then this compute also creates
a local array with 3 columns. There is one row for each face of each
Voronoi cell. The 3 columns are the atom ID of the atom that owns the
cell, the atom ID of the atom in the neighboring cell (or zero if the
face is external), and the area of the face. The array can be
@ -143,8 +140,8 @@ containing all the Voronoi neighbors in a system:
compute 6 all voronoi/atom neighbors yes
dump d2 all local 1 dump.neighbors index c_6[1] c_6[2] c_6[3]
If the *face_threshold* keyword is used, then only faces
with areas greater than the threshold are stored.
If the *face_threshold* keyword is used, then only faces with areas
greater than the threshold are stored.
----------
@ -158,48 +155,52 @@ Voro++ software in the src/VORONOI/README file.
.. note::
The calculation of Voronoi volumes is performed by each processor for
the atoms it owns, and includes the effect of ghost atoms stored by
the processor. This assumes that the Voronoi cells of owned atoms
are not affected by atoms beyond the ghost atom cut-off distance.
This is usually a good assumption for liquid and solid systems, but
may lead to underestimation of Voronoi volumes in low density
systems. By default, the set of ghost atoms stored by each processor
is determined by the cutoff used for :doc:`pair_style <pair_style>`
interactions. The cutoff can be set explicitly via the
:doc:`comm_modify cutoff <comm_modify>` command. The Voronoi cells
for atoms adjacent to empty regions will extend into those regions up
to the communication cutoff in :math:`x`, :math:`y`, or :math:`z`.
In that situation, an exterior face is created at the cutoff distance
normal to the :math:`x`, :math:`y`, or :math:`z` direction. For
triclinic systems, the exterior face is parallel to the corresponding
reciprocal lattice vector.
The calculation of Voronoi volumes is performed by each processor
for the atoms it owns, and includes the effect of ghost atoms
stored by the processor. This assumes that the Voronoi cells of
owned atoms are not affected by atoms beyond the ghost atom cut-off
distance. This is usually a good assumption for liquid and solid
systems, but may lead to underestimation of Voronoi volumes in low
density systems. By default, the set of ghost atoms stored by each
processor is determined by the cutoff used for :doc:`pair_style
<pair_style>` interactions. The cutoff can be set explicitly via
the :doc:`comm_modify cutoff <comm_modify>` command. The Voronoi
cells for atoms adjacent to empty regions will extend into those
regions up to the communication cutoff in :math:`x`, :math:`y`, or
:math:`z`. In that situation, an exterior face is created at the
cutoff distance normal to the :math:`x`, :math:`y`, or :math:`z`
direction. For triclinic systems, the exterior face is parallel to
the corresponding reciprocal lattice vector.
.. note::
The Voro++ package performs its calculation in 3d. This will
still work for a 2d LAMMPS simulation, provided all the atoms have the
same :math:`z`-coordinate. The Voronoi cell of each atom will be a columnar
polyhedron with constant cross-sectional area along the :math:`z`-direction
and two exterior faces at the top and bottom of the simulation box. If
the atoms do not all have the same :math:`z`-coordinate, then the columnar
cells will be accordingly distorted. The cross-sectional area of each
Voronoi cell can be obtained by dividing its volume by the :math:`z` extent
of the simulation box. Note that you define the :math:`z` extent of the
simulation box for 2d simulations when using the
:doc:`create_box <create_box>` or :doc:`read_data <read_data>` commands.
The Voro++ package performs its calculation in 3d. This will still
work for a 2d LAMMPS simulation, provided all the atoms have the
same :math:`z`-coordinate. The Voronoi cell of each atom will be a
columnar polyhedron with constant cross-sectional area along the
:math:`z`-direction and two exterior faces at the top and bottom of
the simulation box. If the atoms do not all have the same
:math:`z`-coordinate, then the columnar cells will be accordingly
distorted. The cross-sectional area of each Voronoi cell can be
obtained by dividing its volume by the :math:`z` extent of the
simulation box. Note that you define the :math:`z` extent of the
simulation box for 2d simulations when using the :doc:`create_box
<create_box>` or :doc:`read_data <read_data>` commands.
Output info
"""""""""""
By default, this compute calculates a per-atom array with two
columns. In regular dynamic tessellation mode the first column is the
Voronoi volume, the second is the neighbor count, as described above
(read above for the output data in case the *occupation* keyword is
specified). These values can be accessed by any command that uses
per-atom values from a compute as input. See the :doc:`Howto output <Howto_output>` page for an overview of LAMMPS output
options. If the *peratom* keyword is set to "no", the per-atom array
is still created, but it is not accessible.
.. deprecated:: TBD
The *peratom* keyword was removed as it is no longer required.
This compute calculates a per-atom array with two columns. In regular
dynamic tessellation mode the first column is the Voronoi volume, the
second is the neighbor count, as described above (read above for the
output data in case the *occupation* keyword is specified). These
values can be accessed by any command that uses per-atom values from a
compute as input. See the :doc:`Howto output <Howto_output>` page for
an overview of LAMMPS output options.
If the *edge_histo* keyword is used, then this compute generates a
global vector of length *maxedge*\ +1, containing a histogram of the
@ -209,17 +210,6 @@ If the *neighbors* value is set to *yes*, then this compute calculates a
local array with three columns. There is one row for each face of each
Voronoi cell.
.. note::
Some LAMMPS commands such as the :doc:`compute reduce <compute_reduce>`
command can accept either a per-atom or local quantity. If this compute
produces both quantities, the command
may access the per-atom quantity, even if you want to access the local
quantity. This effect can be eliminated by using the *peratom*
keyword to turn off the production of the per-atom quantities. For
the default value *yes* both quantities are produced. For the value
*no*, only the local array is produced.
The Voronoi cell volume will be in distance :doc:`units <units>` cubed.
The Voronoi face area will be in distance :doc:`units <units>` squared.
@ -227,7 +217,8 @@ Restrictions
""""""""""""
This compute is part of the VORONOI package. It is only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
LAMMPS was built with that package. See the :doc:`Build package
<Build_package>` page for more info.
It also requires you have a copy of the Voro++ library built and
installed on your system. See instructions on obtaining and
@ -241,5 +232,4 @@ Related commands
Default
"""""""
*neighbors* no, *peratom* yes
The default for the neighbors keyword is no.

View File

@ -77,35 +77,44 @@ for individual fixes for info on which ones can be restarted.
----------
Some fixes calculate one or more of four styles of quantities: global,
per-atom, local, or per-grid, which can be used by other commands or
output as described below. A global quantity is one or more
system-wide values, e.g. the energy of a wall interacting with
particles. A per-atom quantity is one or more values per atom,
e.g. the displacement vector for each atom since time 0. Per-atom
values are set to 0.0 for atoms not in the specified fix group. Local
quantities are calculated by each processor based on the atoms it
owns, but there may be zero or more per atoms. Per-grid quantities
are calculated on a regular 2d or 3d grid which overlays a 2d or 3d
simulation domain. The grid points and the data they store are
distributed across processors; each processor owns the grid points
which fall within its subdomain.
Some fixes calculate and store any of four *styles* of quantities:
global, per-atom, local, or per-grid.
Note that a single fix typically produces either global or per-atom or
local or per-grid values (or none at all). It does not produce both
global and per-atom. It can produce local or per-grid values in
tandem with global or per-atom values. The fix doc page will explain
the details.
A global quantity is one or more system-wide values, e.g. the energy
of a wall interacting with particles. A per-atom quantity is one or
more values per atom, e.g. the original coordinates of each atom at
time 0. Per-atom values are set to 0.0 for atoms not in the specified
fix group. Local quantities are calculated by each processor based on
the atoms it owns, but there may be zero or more per atom, e.g. values
for each bond. Per-grid quantities are calculated on a regular 2d or
3d grid which overlays a 2d or 3d simulation domain. The grid points
and the data they store are distributed across processors; each
processor owns the grid points which fall within its subdomain.
Global, per-atom, local, and per-grid quantities come in three kinds:
a single scalar value, a vector of values, or a 2d array of values.
The doc page for each fix describes the style and kind of values it
produces, e.g. a per-atom vector. Some fixes produce more than one
kind of a single style, e.g. a global scalar and a global vector.
As a general rule of thumb, fixes that produce per-atom quantities
have the word "atom" at the end of their style, e.g. *ave/atom*\ .
Fixes that produce local quantities have the word "local" at the end
of their style, e.g. *store/local*\ . Fixes that produce per-grid
quantities have the word "grid" at the end of their style,
e.g. *ave/grid*\ .
When a fix quantity is accessed, as in many of the output commands
discussed below, it can be referenced via the following bracket
notation, where ID is the ID of the fix:
Global, per-atom, local, and per-grid quantities can also be of three
*kinds*: a single scalar value (global only), a vector of values, or a
2d array of values. For per-atom, local, and per-grid quantities, a
"vector" means a single value for each atom, each local entity
(e.g. bond), or grid cell. Likewise an "array", means multiple values
for each atom, each local entity, or each grid cell.
Note that a single fix can produce any combination of global,
per-atom, local, or per-grid values. Likewise it can produce any
combination of scalar, vector, or array output for each style. The
exception is that for per-atom, local, and per-grid output, either a
vector or array can be produced, but not both. The doc page for each
fix explains the values it produces, if any.
When a fix output is accessed by another input script command it is
referenced via the following bracket notation, where ID is the ID of
the fix:
+-------------+--------------------------------------------+
| f_ID | entire scalar, vector, or array |
@ -116,19 +125,23 @@ notation, where ID is the ID of the fix:
+-------------+--------------------------------------------+
In other words, using one bracket reduces the dimension of the
quantity once (vector :math:`\to` scalar, array :math:`\to` vector). Using two
brackets reduces the dimension twice (array :math:`\to` scalar). Thus, a
command that uses scalar fix values as input can also process elements of a
vector or array.
quantity once (vector :math:`\to` scalar, array :math:`\to` vector).
Using two brackets reduces the dimension twice (array :math:`\to`
scalar). Thus, for example, a command that uses global scalar fix
values as input can also process elements of a vector or array.
Depending on the command, this can either be done directly using the
syntax in the table, or by first defining a :doc:`variable <variable>`
of the appropriate style to store the quantity, then using the
variable as an input to the command.
Note that commands and :doc:`variables <variable>` that use fix
quantities typically do not allow for all kinds (e.g., a command may
require a vector of values, not a scalar), and even if they do, the context
in which they are called can be used to resolve which output is being
requested. This means there is no
ambiguity about referring to a fix quantity as f_ID even if it
produces, for example, both a scalar and vector. The doc pages for
various commands explain the details.
Note that commands and :doc:`variables <variable>` which take fix
outputs as input typically do not allow for all styles and kinds of
data (e.g., a command may require global but not per-atom values, or
it may require a vector of values, not a scalar). This means there is
typically no ambiguity about referring to a fix output as c_ID even if
it produces, for example, both a scalar and vector. The doc pages for
various commands explain the details, including how any ambiguities
are resolved.
----------
@ -333,6 +346,7 @@ accelerated styles exist.
* :doc:`pour <fix_pour>` - pour new atoms/molecules into a granular simulation domain
* :doc:`precession/spin <fix_precession_spin>` - apply a precession torque to each magnetic spin
* :doc:`press/berendsen <fix_press_berendsen>` - pressure control by Berendsen barostat
* :doc:`press/langevin <fix_press_langevin>` - pressure control by Langevin barostat
* :doc:`print <fix_print>` - print text and variables during a simulation
* :doc:`propel/self <fix_propel_self>` - model self-propelled particles
* :doc:`property/atom <fix_property_atom>` - add customized per-atom values

View File

@ -79,9 +79,10 @@ Description
Use one or more values as inputs every few timesteps to create a
single histogram. The histogram can then be averaged over longer
timescales. The resulting histogram can be used by other :doc:`output commands <Howto_output>`, and can also be written to a file. The
fix ave/histo/weight command has identical syntax to fix ave/histo,
except that exactly two values must be specified. See details below.
timescales. The resulting histogram can be used by other :doc:`output
commands <Howto_output>`, and can also be written to a file. The fix
ave/histo/weight command has identical syntax to fix ave/histo, except
that exactly two values must be specified. See details below.
The group specified with this command is ignored for global and local
input values. For per-atom input values, only atoms in the group
@ -96,14 +97,18 @@ different ways; see the discussion of the *beyond* keyword below.
Each input value can be an atom attribute (position, velocity, force
component) or can be the result of a :doc:`compute <compute>` or
:doc:`fix <fix>` or the evaluation of an equal-style or vector-style or
atom-style :doc:`variable <variable>`. The set of input values can be
either all global, all per-atom, or all local quantities. Inputs of
different kinds (e.g. global and per-atom) cannot be mixed. Atom
attributes are per-atom vector values. See the page for
individual "compute" and "fix" commands to see what kinds of
quantities they generate. See the optional *kind* keyword below for
how to force the fix ave/histo command to disambiguate if necessary.
:doc:`fix <fix>` or the evaluation of an equal-style or vector-style
or atom-style :doc:`variable <variable>`. The set of input values can
be either all global, all per-atom, or all local quantities. Inputs
of different kinds (e.g. global and per-atom) cannot be mixed. Atom
attributes are per-atom vector values. See the page for individual
"compute" and "fix" commands to see what kinds of quantities they
generate.
Note that a compute or fix can produce multiple kinds of data (global,
per-atom, local). If LAMMPS cannot unambiguously determine which kind
of data to use, the optional *kind* keyword discussed below can force
the desired disambiguation.
Note that the output of this command is a single histogram for all
input values combined together, not one histogram per input value.
@ -258,13 +263,14 @@ keyword is set to *vector*, then all input values must be global or
per-atom or local vectors, or columns of global or per-atom or local
arrays.
The *kind* keyword only needs to be set if a compute or fix produces
more than one kind of output (global, per-atom, local). If this is
not the case, then LAMMPS will determine what kind of input is
provided and whether all the input arguments are consistent. If a
compute or fix produces more than one kind of output, the *kind*
keyword should be used to specify which output will be used. The
remaining input arguments must still be consistent.
The *kind* keyword only needs to be used if any of the specified input
computes or fixes produce more than one kind of output (global,
per-atom, local). If not, LAMMPS will determine the kind of data all
the inputs produce and verify it is all the same kind. If not, an
error will be triggered. If a compute or fix produces more than one
kind of output, the *kind* keyword should be used to specify which
output will be used. The other input arguments must still be
consistent.
The *beyond* keyword determines how input values that fall outside the
*lo* to *hi* bounds are treated. Values such that *lo* :math:`\le` value

View File

@ -1,4 +1,5 @@
.. index:: fix efield
.. index:: fix efield/kk
.. index:: fix efield/tip4p
fix efield command
@ -210,6 +211,12 @@ the iteration count during the minimization.
system (the quantity being minimized), you MUST enable the
:doc:`fix_modify <fix_modify>` *energy* option for this fix.
----------
.. include:: accel_styles.rst
----------
Restrictions
""""""""""""

View File

@ -0,0 +1,301 @@
.. index:: fix press/langevin
fix press/langevin command
===========================
Syntax
""""""
.. parsed-literal::
fix ID group-ID press/langevin keyword value ...
* ID, group-ID are documented in :doc:`fix <fix>` command
* press/langevin = style name of this fix command
.. parsed-literal::
one or more keyword value pairs may be appended
keyword = *iso* or *aniso* or *tri* or *x* or *y* or *z* or *xy* or *xz* or *yz* or *couple* or *dilate* or *modulus* or *temp* or *flip*
*iso* or *aniso* or *tri* values = Pstart Pstop Pdamp
Pstart,Pstop = scalar external pressure at start/end of run (pressure units)
Pdamp = pressure damping parameter (time units)
*x* or *y* or *z* or *xy* or *xz* or *yz* values = Pstart Pstop Pdamp
Pstart,Pstop = external stress tensor component at start/end of run (pressure units)
Pdamp = pressure damping parameter
*flip* value = *yes* or *no* = allow or disallow box flips when it becomes highly skewed
*couple* = *none* or *xyz* or *xy* or *yz* or *xz*
*friction* value = Friction coefficient for the barostat (time units)
*temp* values = Tstart, Tstop, seed
Tstart, Tstop = target temperature used for the barostat at start/end of run
seed = seed of the random number generator
*dilate* value = *all* or *partial*
Examples
""""""""
.. code-block:: LAMMPS
fix 1 all press/langevin iso 0.0 0.0 1000.0 temp 300 300 487374
fix 2 all press/langevin aniso 0.0 0.0 1000.0 temp 100 300 238 dilate partial
Description
"""""""""""
Adjust the pressure of the system by using a Langevin stochastic barostat
:ref:`(Gronbech) <Gronbech>`, which rescales the system volume and
(optionally) the atoms coordinates within the simulation box every
timestep.
The Langevin barostat couple each direction *L* with a pseudo-particle that obeys
the Langevin equation such as:
.. math::
f_P = & \frac{N k_B T_{target}}{V} + \frac{1}{V d}\sum_{i=1}^{N} \vec r_i \cdot \vec f_i - P_{target} \\
Q\ddot{L} + \alpha{}\dot{L} = & f_P + \beta(t)\\
L^{n+1} = & L^{n} + bdt\dot{L}^{n} \frac{bdt^{2}}{2Q} \\
\dot{L}^{n+1} = & \alpha\dot{L}^{n} + \frac{dt}{2Q}\left(a f^{n}_{P} + f^{n+1}_{P}\right) + \frac{b}{Q}\beta^{n+1} \\
a = & \frac{1-\frac{\alpha{}dt}{2Q}}{1+\frac{\alpha{}dt}{2Q}} \\
b = & \frac{1}{1+\frac{\alpha{}dt}{2Q}} \\
\left< \beta(t)\beta(t') \right> = & 2\alpha k_B Tdt
Where :math:`dt` is the timestep :math:`\dot{L}` and :math:`\ddot{L}` the first
and second derivatives of the coupled direction with regard to time,
:math:`\alpha` is a friction coefficient, :math:`\beta` is a random gaussian
variable and :math:`Q` the effective mass of the coupled pseudoparticle. The
two first terms on the right-hand side of the first equation are the virial
expression of the canonical pressure. It is to be noted that the temperature
used to compute the pressure is not based on the atom velocities but rather on
the canonical
target temperature directly. This temperature is specified using the *temp*
keyword parameter and should be close to the expected target temperature of the
system.
Regardless of what atoms are in the fix group, a global pressure is
computed for all atoms. Similarly, when the size of the simulation
box is changed, all atoms are re-scaled to new positions, unless the
keyword *dilate* is specified with a value of *partial*, in which case
only the atoms in the fix group are re-scaled. The latter can be
useful for leaving the coordinates of atoms in a solid substrate
unchanged and controlling the pressure of a surrounding fluid.
.. note::
Unlike the :doc:`fix npt <fix_nh>` or :doc:`fix nph <fix_nh>` commands which
perform Nose-Hoover barostatting AND time integration, this fix does NOT
perform time integration of the atoms but only of the barostat coupled
coordinate. It then only modifies the box size and atom coordinates to
effect barostatting. Thus you must use a separate time integration fix,
like :doc:`fix nve <fix_nve>` or :doc:`fix nvt <fix_nh>` to actually update
the positions and velocities of atoms. This fix can be used in conjunction
with thermostatting fixes to control the temperature, such as :doc:`fix nvt
<fix_nh>` or :doc:`fix langevin <fix_langevin>` or :doc:`fix temp/berendsen
<fix_temp_berendsen>`.
See the :doc:`Howto barostat <Howto_barostat>` page for a
discussion of different ways to perform barostatting.
----------
The barostat is specified using one or more of the *iso*, *aniso*, *tri* *x*,
*y*, *z*, *xy*, *xz*, *yz*, and *couple* keywords. These keywords give you the
ability to specify the 3 diagonal components of an external stress tensor, and
to couple various of these components together so that the dimensions they
represent are varied together during a constant-pressure simulation.
The target pressures for each of the 6 diagonal components of the stress tensor
can be specified independently via the *x*, *y*, *z*, keywords, which
correspond to the 3 simulation box dimensions, and the *xy*, *xz* and *yz*
keywords which corresponds to the 3 simulation box tilt factors. For each
component, the external pressure or tensor component at each timestep is a
ramped value during the run from *Pstart* to *Pstop*\ . If a target pressure is
specified for a component, then the corresponding box dimension will change
during a simulation. For example, if the *y* keyword is used, the y-box length
will change. A box dimension will not change if that component is not
specified, although you have the option to change that dimension via the
:doc:`fix deform <fix_deform>` command.
The *Pdamp* parameter can be seen in the same way as a Nose-Hoover parameter as
it is used to compute the mass of the fictitious particle. Without friction,
the barostat can be compared to a single particle Nose-Hoover barostat and
should follow a similar decay in time. The mass of the barostat is
linked to *Pdamp* by the relation
:math:`Q=(N_{at}+1)\cdot{}k_BT_{target}\cdot{}P_{damp}^2`. Note that *Pdamp*
should be expressed in time units.
.. note::
As for Berendsen barostat, a Langevin barostat will not work well for
arbitrary values of *Pdamp*\ . If *Pdamp* is too small, the pressure and
volume can fluctuate wildly; if it is too large, the pressure will take a
very long time to equilibrate. A good choice for many models is a *Pdamp*
of around 1000 timesteps. However, note that *Pdamp* is specified in time
units, and that timesteps are NOT the same as time units for most
:doc:`units <units>` settings.
----------
The *temp* keyword sets the temperature to use in the equation of motion of the
barostat. This value is used to compute the value of the force :math:`f_P` in
the equation of motion. It is important to note that this value is not the
instantaneous temperature but a target temperature that ramps from *Tstart* to
*Tstop*. Also the required argument *seed* sets the seed for the random
number generator used in the generation of the random forces.
----------
The *couple* keyword allows two or three of the diagonal components of
the pressure tensor to be "coupled" together. The value specified
with the keyword determines which are coupled. For example, *xz*
means the *Pxx* and *Pzz* components of the stress tensor are coupled.
*Xyz* means all 3 diagonal components are coupled. Coupling means two
things: the instantaneous stress will be computed as an average of the
corresponding diagonal components, and the coupled box dimensions will
be changed together in lockstep, meaning coupled dimensions will be
dilated or contracted by the same percentage every timestep. The
*Pstart*, *Pstop*, *Pdamp* parameters for any coupled dimensions must
be identical. *Couple xyz* can be used for a 2d simulation; the *z*
dimension is simply ignored.
----------
The *iso*, *aniso* and *tri* keywords are simply shortcuts that are
equivalent to specifying several other keywords together.
The keyword *iso* means couple all 3 diagonal components together when
pressure is computed (hydrostatic pressure), and dilate/contract the
dimensions together. Using "iso Pstart Pstop Pdamp" is the same as
specifying these 4 keywords:
.. parsed-literal::
x Pstart Pstop Pdamp
y Pstart Pstop Pdamp
z Pstart Pstop Pdamp
couple xyz
The keyword *aniso* means *x*, *y*, and *z* dimensions are controlled
independently using the *Pxx*, *Pyy*, and *Pzz* components of the
stress tensor as the driving forces, and the specified scalar external
pressure. Using "aniso Pstart Pstop Pdamp" is the same as specifying
these 4 keywords:
.. parsed-literal::
x Pstart Pstop Pdamp
y Pstart Pstop Pdamp
z Pstart Pstop Pdamp
couple none
The keyword *tri* is the same as *aniso* but also adds the control on the
shear pressure coupled with the tilt factors.
.. parsed-literal::
x Pstart Pstop Pdamp
y Pstart Pstop Pdamp
z Pstart Pstop Pdamp
xy Pstart Pstop Pdamp
xz Pstart Pstop Pdamp
yz Pstart Pstop Pdamp
couple none
----------
The *flip* keyword allows the tilt factors for a triclinic box to
exceed half the distance of the parallel box length, as discussed
below. If the *flip* value is set to *yes*, the bound is enforced by
flipping the box when it is exceeded. If the *flip* value is set to
*no*, the tilt will continue to change without flipping. Note that if
applied stress induces large deformations (e.g. in a liquid), this
means the box shape can tilt dramatically and LAMMPS will run less
efficiently, due to the large volume of communication needed to
acquire ghost atoms around a processor's irregular-shaped subdomain.
For extreme values of tilt, LAMMPS may also lose atoms and generate an
error.
----------
The *friction* keyword sets the friction parameter :math:`\alpha` in the
equations of motion of the barostat. For each barostat direction, the value of
:math:`\alpha` depends on both *Pdamp* and *friction*. The value given as a
parameter is the Langevin characteristic time
:math:`\tau_{L}=\frac{Q}{\alpha}` in time units. The langevin time can be understood as a
decorrelation time for the pressure. A long Langevin time value will make the
barostat act as an underdamped oscillator while a short value will make it
act as an overdamped oscillator. The ideal configuration would be to find
the critical parameter of the barostat. Empirically this is observed to
occur for :math:`\tau_{L}\approx{}P_{damp}`. For this reason, if the *friction*
keyword is not used, the default value *Pdamp* is used for each barostat direction.
----------
This fix computes pressure each timestep. To do
this, the fix creates its own computes of style "pressure",
as if this command had been issued:
.. code-block:: LAMMPS
compute fix-ID_press group-ID pressure NULL virial
The kinetic contribution to the pressure is taken as the ensemble value
:math:`\frac{Nk_bT}{V}` and computed by the fix itself.
See the :doc:`compute pressure <compute_pressure>` command for details. Note
that the IDs of the new compute is the fix-ID + underscore + "press" and the
group for the new computes is the same as the fix group.
Note that this is NOT the compute used by thermodynamic output (see the
:doc:`thermo_style <thermo_style>` command) with ID = *thermo_press*. This
means you can change the attributes of this fix's pressure via the
:doc:`compute_modify <compute_modify>` command or print this temperature or
pressure during thermodynamic output via the :doc:`thermo_style custom
<thermo_style>` command using the appropriate compute-ID. It also means that
changing attributes of *thermo_temp* or *thermo_press* will have no effect on
this fix.
Restart, fix_modify, output, run start/stop, minimize info
"""""""""""""""""""""""""""""""""""""""""""""""""""""""""""
No information about this fix is written to :doc:`binary restart files <restart>`.
The :doc:`fix_modify <fix_modify>` *press* option is
supported by this fix. You can use it to assign a
:doc:`compute <compute>` you have defined to this fix which will be used
in its pressure calculations.
No global or per-atom quantities are stored by this fix for access by
various :doc:`output commands <Howto_output>`.
This fix can ramp its target pressure and temperature over multiple runs, using
the *start* and *stop* keywords of the :doc:`run <run>` command. See the
:doc:`run <run>` command for details of how to do this. It is recommended that
the ramped temperature is the same as the effective temperature of the
thermostatted system. That is, if the system's temperature is ramped by other
commands, it is recommended to do the same with this pressure control.
This fix is not invoked during :doc:`energy minimization <minimize>`.
Restrictions
""""""""""""
Any dimension being adjusted by this fix must be periodic.
Related commands
""""""""""""""""
:doc:`fix press/berendsen <fix_press_berendsen>`,
:doc:`fix nve <fix_nve>`, :doc:`fix nph <fix_nh>`, :doc:`fix npt <fix_nh>`, :doc:`fix langevin <fix_langevin>`,
:doc:`fix_modify <fix_modify>`
Default
"""""""
The keyword defaults are *dilate* = all, *flip* = yes, and *friction* = *Pdamp*.
----------
.. _Gronbech:
**(Gronbech)** Gronbech-Jensen, Farago, J Chem Phys, 141, 194108 (2014).

View File

@ -843,7 +843,7 @@ stress/atom <compute_stress_atom>` commands. The former can be
accessed by :doc:`thermodynamic output <thermo_style>`. The default
setting for this fix is :doc:`fix_modify virial yes <fix_modify>`.
All of the *rigid* styles (not the *rigid/small* styles) compute a
All of the *rigid* styles (but not the *rigid/small* styles) compute a
global array of values which can be accessed by various :doc:`output
commands <Howto_output>`. Similar information about the bodies
defined by the *rigid/small* styles can be accessed via the
@ -887,7 +887,8 @@ Restrictions
""""""""""""
These fixes are all part of the RIGID package. It is only enabled if
LAMMPS was built with that package. See the :doc:`Build package <Build_package>` page for more info.
LAMMPS was built with that package. See the :doc:`Build package
<Build_package>` page for more info.
Assigning a temperature via the :doc:`velocity create <velocity>`
command to a system with :doc:`rigid bodies <fix_rigid>` may not have

View File

@ -1,4 +1,5 @@
.. index:: fix spring/self
.. index:: fix spring/self/kk
fix spring/self command
=======================
@ -80,6 +81,12 @@ invoked by the :doc:`minimize <minimize>` command.
you MUST enable the :doc:`fix_modify <fix_modify>` *energy* option for
this fix.
----------
.. include:: accel_styles.rst
----------
Restrictions
""""""""""""
none

View File

@ -61,24 +61,30 @@ Description
Treat a group of particles as stochastic rotation dynamics (SRD)
particles that serve as a background solvent when interacting with big
(colloidal) particles in groupbig-ID. The SRD formalism is described
in :ref:`(Hecht) <Hecht>`. The key idea behind using SRD particles as a
cheap coarse-grained solvent is that SRD particles do not interact
with each other, but only with the solute particles, which in LAMMPS
can be spheroids, ellipsoids, or line segments, or triangles, or rigid
bodies containing multiple spheroids or ellipsoids or line segments
or triangles. The collision and rotation properties of the model
imbue the SRD particles with fluid-like properties, including an
effective viscosity. Thus simulations with large solute particles can
be run more quickly, to measure solute properties like diffusivity
and viscosity in a background fluid. The usual LAMMPS fixes for such
simulations, such as :doc:`fix deform <fix_deform>`, :doc:`fix viscosity <fix_viscosity>`, and :doc:`fix nvt/sllod <fix_nvt_sllod>`,
can be used in conjunction with the SRD model.
in :ref:`(Hecht) <Hecht>`. The same methodology is also called
multi-particle collision dynamics (MPCD) in the literature.
For more details on how the SRD model is implemented in LAMMPS, :ref:`this paper <Petersen1>` describes the implementation and usage of pure SRD
fluids. :ref:`This paper <Lechman>`, which is nearly complete, describes
the implementation and usage of mixture systems (solute particles in
an SRD fluid). See the examples/srd directory for sample input
scripts using SRD particles in both settings.
The key idea behind using SRD particles as a cheap coarse-grained
solvent is that SRD particles do not interact with each other, but
only with the solute particles, which in LAMMPS can be spheroids,
ellipsoids, or line segments, or triangles, or rigid bodies containing
multiple spheroids or ellipsoids or line segments or triangles. The
collision and rotation properties of the model imbue the SRD particles
with fluid-like properties, including an effective viscosity. Thus
simulations with large solute particles can be run more quickly, to
measure solute properties like diffusivity and viscosity in a
background fluid. The usual LAMMPS fixes for such simulations, such
as :doc:`fix deform <fix_deform>`, :doc:`fix viscosity
<fix_viscosity>`, and :doc:`fix nvt/sllod <fix_nvt_sllod>`, can be
used in conjunction with the SRD model.
These 3 papers give more details on how the SRD model is implemented
in LAMMPS. :ref:`(Petersen) <Petersen1>` describes pure SRD fluid
systems. :ref:`(Bolintineanu1) <Bolintineanu1>` describes models
where pure SRD fluids :ref:interact with boundary walls.
:ref:`(Bolintineanu2) <Bolintineanu2>` describes mixture models where
large colloidal particles are solvated by an SRD fluid. See the
``examples/srd`` :ref:directory for sample input scripts.
This fix does two things:
@ -357,28 +363,28 @@ These are the 12 quantities. All are values for the current timestep,
except for quantity 5 and the last three, each of which are
cumulative quantities since the beginning of the run.
* (1) # of SRD/big collision checks performed
* (2) # of SRDs which had a collision
* (3) # of SRD/big collisions (including multiple bounces)
* (4) # of SRD particles inside a big particle
* (5) # of SRD particles whose velocity was rescaled to be < Vmax
* (6) # of bins for collision searching
* (7) # of bins for SRD velocity rotation
* (8) # of bins in which SRD temperature was computed
* (9) SRD temperature
* (10) # of SRD particles which have undergone max # of bounces
* (11) max # of bounces any SRD particle has had in a single step
* (12) # of reneighborings due to SRD particles moving too far
(1) # of SRD/big collision checks performed
(2) # of SRDs which had a collision
(3) # of SRD/big collisions (including multiple bounces)
(4) # of SRD particles inside a big particle
(5) # of SRD particles whose velocity was rescaled to be < Vmax
(6) # of bins for collision searching
(7) # of bins for SRD velocity rotation
(8) # of bins in which SRD temperature was computed
(9) SRD temperature
(10) # of SRD particles which have undergone max # of bounces
(11) max # of bounces any SRD particle has had in a single step
(12) # of reneighborings due to SRD particles moving too far
No parameter of this fix can be used with the *start/stop* keywords of
the :doc:`run <run>` command. This fix is not invoked during :doc:`energy minimization <minimize>`.
the :doc:`run <run>` command. This fix is not invoked during
:doc:`energy minimization <minimize>`.
Restrictions
""""""""""""
This command can only be used if LAMMPS was built with the SRD
package. See the :doc:`Build package <Build_package>` doc
page for more info.
This command can only be used if LAMMPS was built with the SRD package.
See the :doc:`Build package <Build_package>` doc page for more info.
Related commands
""""""""""""""""
@ -404,6 +410,12 @@ no, and rescale = yes.
**(Petersen)** Petersen, Lechman, Plimpton, Grest, in' t Veld, Schunk, J
Chem Phys, 132, 174106 (2010).
.. _Lechman:
.. _Bolintineanu1:
**(Lechman)** Lechman, et al, in preparation (2010).
**(Bolintineanu1)**
Bolintineanu, Lechman, Plimpton, Grest, Phys Rev E, 86, 066703 (2012).
.. _Bolintineanu2:
**(Bolintineanu2)** Bolintineanu, Grest, Lechman, Pierce, Plimpton,
Schunk, Comp Particle Mechanics, 1, 321-356 (2014).

View File

@ -22,12 +22,12 @@ Examples
.. code-block:: LAMMPS
pair_style hybrid/overlay ilp/tmd 16.0 1
pair_coeff * * ilp/tmd TMD.ILP Mo S S
pair_coeff * * ilp/tmd MoS2.ILP Mo S S
pair_style hybrid/overlay sw/mod sw/mod ilp/tmd 16.0
pair_coeff * * sw/mod 1 tmd.sw.mod Mo S S NULL NULL NULL
pair_coeff * * sw/mod 2 tmd.sw.mod NULL NULL NULL Mo S S
pair_coeff * * ilp/tmd TMD.ILP Mo S S Mo S S
pair_coeff * * ilp/tmd MoS2.ILP Mo S S Mo S S
Description
"""""""""""
@ -69,7 +69,7 @@ calculating the normals.
each atom `i`, its six nearest neighboring atoms belonging to the same
sub-layer are chosen to define the normal vector `{\bf n}_i`.
The parameter file (e.g. TMD.ILP), is intended for use with *metal*
The parameter file (e.g. MoS2.ILP), is intended for use with *metal*
:doc:`units <units>`, with energies in meV. Two additional parameters,
*S*, and *rcut* are included in the parameter file. *S* is designed to
facilitate scaling of energies. *rcut* is designed to build the neighbor
@ -77,7 +77,7 @@ list for calculating the normals for each atom pair.
.. note::
The parameters presented in the parameter file (e.g. TMD.ILP),
The parameters presented in the parameter file (e.g. MoS2.ILP),
are fitted with taper function by setting the cutoff equal to 16.0
Angstrom. Using different cutoff or taper function should be careful.
These parameters provide a good description in both short- and long-range
@ -133,10 +133,10 @@ if LAMMPS was built with that package. See the :doc:`Build package
This pair style requires the newton setting to be *on* for pair
interactions.
The TMD.ILP potential file provided with LAMMPS (see the potentials
The MoS2.ILP potential file provided with LAMMPS (see the potentials
directory) are parameterized for *metal* units. You can use this
potential with any LAMMPS units, but you would need to create your own
custom TMD.ILP potential file with coefficients listed in the appropriate
custom MoS2.ILP potential file with coefficients listed in the appropriate
units, if your simulation does not use *metal* units.
Related commands

View File

@ -43,22 +43,22 @@ Examples
Description
"""""""""""
Style *reaxff* computes the ReaxFF potential of van Duin, Goddard and
co-workers. ReaxFF uses distance-dependent bond-order functions to
Pair style *reaxff* computes the ReaxFF potential of van Duin, Goddard
and co-workers. ReaxFF uses distance-dependent bond-order functions to
represent the contributions of chemical bonding to the potential
energy. There is more than one version of ReaxFF. The version
energy. There is more than one version of ReaxFF. The version
implemented in LAMMPS uses the functional forms documented in the
supplemental information of the following paper:
:ref:`(Chenoweth et al., 2008) <Chenoweth_20082>`. The version integrated
into LAMMPS matches the version of ReaxFF From Summer 2010. For more
technical details about the pair reaxff implementation of ReaxFF, see
the :ref:`(Aktulga) <Aktulga>` paper. The *reaxff* style was initially
implemented as a stand-alone C code and is now converted to C++ and
integrated into LAMMPS as a package.
:ref:`(Chenoweth et al., 2008) <Chenoweth_20082>` and matches the
version of the reference ReaxFF implementation from Summer 2010. For
more technical details about the implementation of ReaxFF in pair style
*reaxff*, see the :ref:`(Aktulga) <Aktulga>` paper. The *reaxff* style
was initially implemented as a stand-alone C code and is now converted
to C++ and integrated into LAMMPS as a package.
The *reaxff/kk* style is a Kokkos version of the ReaxFF potential that
is derived from the *reaxff* style. The Kokkos version can run on GPUs
and can also use OpenMP multithreading. For more information about the
is derived from the *reaxff* style. The Kokkos version can run on GPUs
and can also use OpenMP multithreading. For more information about the
Kokkos package, see :doc:`Packages details <Packages_details>` and
:doc:`Speed kokkos <Speed_kokkos>` doc pages. One important
consideration when using the *reaxff/kk* style is the choice of either

View File

@ -1,10 +1,11 @@
.. index:: pair_style snap
.. index:: pair_style snap/intel
.. index:: pair_style snap/kk
pair_style snap command
=======================
Accelerator Variants: *snap/kk*
Accelerator Variants: *snap/intel*, *snap/kk*
Syntax
""""""
@ -260,6 +261,14 @@ This style is part of the ML-SNAP package. It is only enabled if LAMMPS
was built with that package. See the :doc:`Build package
<Build_package>` page for more info.
The *snap/intel* accelerator variant will *only* be available if LAMMPS
is built with Intel *compilers* and for CPUs with AVX-512 support.
While the INTEL package in general allows multiple floating point
precision modes to be selected, *snap/intel* will currently always use
full double precision regardless of the precision mode selected.
Additionally, the *intel* variant of snap will **NOT** use multiple
threads with OpenMP.
Related commands
""""""""""""""""

View File

@ -1,11 +1,12 @@
.. index:: pair_style yukawa/colloid
.. index:: pair_style yukawa/colloid/gpu
.. index:: pair_style yukawa/colloid/kk
.. index:: pair_style yukawa/colloid/omp
pair_style yukawa/colloid command
=================================
Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/omp*
Accelerator Variants: *yukawa/colloid/gpu*, *yukawa/colloid/kk*, *yukawa/colloid/omp*
Syntax
""""""
@ -131,6 +132,12 @@ per-type polydispersity is allowed. This means all particles of the
same type must have the same diameter. Each type can have a different
diameter.
----------
.. include:: accel_styles.rst
----------
Related commands
""""""""""""""""

View File

@ -385,19 +385,20 @@ creates a global vector with 6 values.
The *c_ID* and *c_ID[I]* and *c_ID[I][J]* keywords allow global values
calculated by a compute to be output. As discussed on the
:doc:`compute <compute>` doc page, computes can calculate global,
per-atom, or local values. Only global values can be referenced by
this command. However, per-atom compute values for an individual atom
can be referenced in a :doc:`variable <variable>` and the variable
referenced by thermo_style custom, as discussed below. See the
discussion above for how the I in *c_ID[I]* can be specified with a
wildcard asterisk to effectively specify multiple values from a global
compute vector.
per-atom, local, and per-grid values. Only global values can be
referenced by this command. However, per-atom compute values for an
individual atom can be referenced in a :doc:`equal-style variable
<variable>` and the variable referenced by thermo_style custom, as
discussed below. See the discussion above for how the I in *c_ID[I]*
can be specified with a wildcard asterisk to effectively specify
multiple values from a global compute vector.
The ID in the keyword should be replaced by the actual ID of a compute
that has been defined elsewhere in the input script. See the
:doc:`compute <compute>` command for details. If the compute calculates
a global scalar, vector, or array, then the keyword formats with 0, 1,
or 2 brackets will reference a scalar value from the compute.
:doc:`compute <compute>` command for details. If the compute
calculates a global scalar, vector, or array, then the keyword formats
with 0, 1, or 2 brackets will reference a scalar value from the
compute.
Note that some computes calculate "intensive" global quantities like
temperature; others calculate "extensive" global quantities like
@ -410,13 +411,14 @@ norm <thermo_modify>` option being used.
The *f_ID* and *f_ID[I]* and *f_ID[I][J]* keywords allow global values
calculated by a fix to be output. As discussed on the :doc:`fix
<fix>` doc page, fixes can calculate global, per-atom, or local
values. Only global values can be referenced by this command.
However, per-atom fix values can be referenced for an individual atom
in a :doc:`variable <variable>` and the variable referenced by
thermo_style custom, as discussed below. See the discussion above for
how the I in *f_ID[I]* can be specified with a wildcard asterisk to
effectively specify multiple values from a global fix vector.
<fix>` doc page, fixes can calculate global, per-atom, local, and
per-grid values. Only global values can be referenced by this
command. However, per-atom fix values can be referenced for an
individual atom in a :doc:`equal-style variable <variable>` and the
variable referenced by thermo_style custom, as discussed below. See
the discussion above for how the I in *f_ID[I]* can be specified with
a wildcard asterisk to effectively specify multiple values from a
global fix vector.
The ID in the keyword should be replaced by the actual ID of a fix
that has been defined elsewhere in the input script. See the
@ -438,14 +440,15 @@ output. The name in the keyword should be replaced by the variable
name that has been defined elsewhere in the input script. Only
equal-style and vector-style variables can be referenced; the latter
requires a bracketed term to specify the Ith element of the vector
calculated by the variable. However, an atom-style variable can be
referenced for an individual atom by an equal-style variable and that
variable referenced. See the :doc:`variable <variable>` command for
details. Variables of style *equal* and *vector* and *atom* define a
formula which can reference per-atom properties or thermodynamic
keywords, or they can invoke other computes, fixes, or variables when
evaluated, so this is a very general means of creating thermodynamic
output.
calculated by the variable. However, an equal-style variable can use
an atom-style variable in its formula indexed by the ID of an
individual atom. This is a way to output a specific atom's per-atom
coordinates or other per-atom properties in thermo output. See the
:doc:`variable <variable>` command for details. Note that variables
of style *equal* and *vector* and *atom* define a formula which can
reference per-atom properties or thermodynamic keywords, or they can
invoke other computes, fixes, or variables when evaluated, so this is
a very general means of creating thermodynamic output.
Note that equal-style and vector-style variables are assumed to
produce "intensive" global quantities, which are thus printed as-is,

View File

@ -550,12 +550,11 @@ variables.
Most of the formula elements produce a scalar value. Some produce a
global or per-atom vector of values. Global vectors can be produced
by computes or fixes or by other vector-style variables. Per-atom
vectors are produced by atom vectors, compute references that
represent a per-atom vector, fix references that represent a per-atom
vector, and variables that are atom-style variables. Math functions
that operate on scalar values produce a scalar value; math function
that operate on global or per-atom vectors do so element-by-element
and produce a global or per-atom vector.
vectors are produced by atom vectors, computes or fixes which output a
per-atom vector or array, and variables that are atom-style variables.
Math functions that operate on scalar values produce a scalar value;
math function that operate on global or per-atom vectors do so
element-by-element and produce a global or per-atom vector.
A formula for equal-style variables cannot use any formula element
that produces a global or per-atom vector. A formula for a
@ -564,12 +563,13 @@ scalar value or a global vector value, but cannot use a formula
element that produces a per-atom vector. A formula for an atom-style
variable can use formula elements that produce either a scalar value
or a per-atom vector, but not one that produces a global vector.
Atom-style variables are evaluated by other commands that define a
:doc:`group <group>` on which they operate, e.g. a :doc:`dump <dump>` or
:doc:`compute <compute>` or :doc:`fix <fix>` command. When they invoke
the atom-style variable, only atoms in the group are included in the
formula evaluation. The variable evaluates to 0.0 for atoms not in
the group.
:doc:`group <group>` on which they operate, e.g. a :doc:`dump <dump>`
or :doc:`compute <compute>` or :doc:`fix <fix>` command. When they
invoke the atom-style variable, only atoms in the group are included
in the formula evaluation. The variable evaluates to 0.0 for atoms
not in the group.
----------
@ -1138,69 +1138,74 @@ only defined if an :doc:`atom_style <atom_style>` is being used that
defines molecule IDs.
Note that many other atom attributes can be used as inputs to a
variable by using the :doc:`compute property/atom <compute_property_atom>` command and then specifying
a quantity from that compute.
variable by using the :doc:`compute property/atom
<compute_property_atom>` command and then specifying a quantity from
that compute.
----------
Compute References
------------------
Compute references access quantities calculated by a
:doc:`compute <compute>`. The ID in the reference should be replaced by
the ID of a compute defined elsewhere in the input script. As
discussed in the page for the :doc:`compute <compute>` command,
computes can produce global, per-atom, or local values. Only global
and per-atom values can be used in a variable. Computes can also
produce a scalar, vector, or array.
Compute references access quantities calculated by a :doc:`compute
<compute>`. The ID in the reference should be replaced by the ID of a
compute defined elsewhere in the input script.
An equal-style variable can only use scalar values, which means a
global scalar, or an element of a global or per-atom vector or array.
A vector-style variable can use scalar values or a global vector of
values, or a column of a global array of values. Atom-style variables
can use global scalar values. They can also use per-atom vector
values, or a column of a per-atom array. See the doc pages for
individual computes to see what kind of values they produce.
As discussed on the page for the :doc:`compute <compute>` command,
computes can produce global, per-atom, local, and per-grid values.
Only global and per-atom values can be used in a variable. Computes
can also produce scalars (global only), vectors, and arrays. See the
doc pages for individual computes to see what different kinds of data
they produce.
Examples of different kinds of compute references are as follows.
There is typically no ambiguity (see exception below) as to what a
reference means, since computes only produce either global or per-atom
quantities, never both.
An equal-style variable can only use scalar values, either from global
or per-atom data. In the case of per-atom data, this would be a value
for a specific atom.
+-------------+-------------------------------------------------------------------------------------------------------+
| c_ID | global scalar, or per-atom vector |
+-------------+-------------------------------------------------------------------------------------------------------+
| c_ID[I] | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array |
+-------------+-------------------------------------------------------------------------------------------------------+
| c_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array |
+-------------+-------------------------------------------------------------------------------------------------------+
A vector-style variable can use scalar values (same as for equal-style
variables), or global vectors of values. The latter can also be a
column of a global array.
For I and J indices, integers can be specified or a variable name,
specified as v_name, where name is the name of the variable. The
rules for this syntax are the same as for the "Atom Values and
Vectors" discussion above.
Atom-style variables can use scalar values (same as for equal-style
variables), or per-atom vectors of values. The latter can also be a
column of a per-atom array.
One source of ambiguity for compute references is when a vector-style
variable refers to a compute that produces both a global scalar and a
global vector. Consider a compute with ID "foo" that does this,
referenced as follows by variable "a", where "myVec" is another
vector-style variable:
The various allowed compute references in the variable formulas for
equal-, vector-, and atom-style variables are listed in the following
table:
.. code-block:: LAMMPS
+--------+------------+------------------------------------------+
| equal | c_ID | global scalar |
| equal | c_ID[I] | element of global vector |
| equal | c_ID[I][J] | element of global array |
| equal | C_ID[I] | element of per-atom vector (I = atom ID) |
| equal | C_ID[I][J] | element of per-atom array (I = atom ID) |
+--------+------------+------------------------------------------+
| vector | c_ID | global vector |
| vector | c_ID[I] | column of global array |
+--------+------------+------------------------------------------+
| atom | c_ID | per-atom vector |
| atom | c_ID[I] | column of per-atom array |
+--------+------------+------------------------------------------+
variable a vector c_foo*v_myVec
Note that if an equal-style variable formula wishes to access per-atom
data from a compute, it must use capital "C" as the ID prefix and not
lower-case "c".
The reference "c_foo" could refer to either the global scalar or
global vector produced by compute "foo". In this case, "c_foo" will
always refer to the global scalar, and "C_foo" can be used to
reference the global vector. Similarly if the compute produces both a
global vector and global array, then "c_foo[I]" will always refer to
an element of the global vector, and "C_foo[I]" can be used to
reference the Ith column of the global array.
Also note that if a vector- or atom-style variable formula needs to
access a scalar value from a compute (i.e. the 5 kinds of values in
the first 5 lines of the table), it can not do so directly. Instead,
it can use a reference to an equal-style variable which stores the
scalar value from the compute.
Note that if a variable containing a compute is evaluated directly in
an input script (not during a run), then the values accessed by the
compute must be current. See the discussion below about "Variable
The I and J indices in these compute references can be integers or can
be a variable name, specified as v_name, where name is the name of the
variable. The rules for this syntax are the same as for indices in
the "Atom Values and Vectors" discussion above.
If a variable containing a compute is evaluated directly in an input
script (not during a run), then the values accessed by the compute
should be current. See the discussion below about "Variable
Accuracy".
----------
@ -1208,51 +1213,59 @@ Accuracy".
Fix References
--------------
Fix references access quantities calculated by a :doc:`fix <compute>`.
Fix references access quantities calculated by a :doc:`fix <fix>`.
The ID in the reference should be replaced by the ID of a fix defined
elsewhere in the input script. As discussed in the page for the
:doc:`fix <fix>` command, fixes can produce global, per-atom, or local
values. Only global and per-atom values can be used in a variable.
Fixes can also produce a scalar, vector, or array. An equal-style
variable can only use scalar values, which means a global scalar, or
an element of a global or per-atom vector or array. Atom-style
variables can use the same scalar values. They can also use per-atom
vector values. A vector value can be a per-atom vector itself, or a
column of an per-atom array. See the doc pages for individual fixes
to see what kind of values they produce.
elsewhere in the input script.
The different kinds of fix references are exactly the same as the
compute references listed in the above table, where "c\_" is replaced
by "f\_". Again, there is typically no ambiguity (see exception below)
as to what a reference means, since fixes only produce either global
or per-atom quantities, never both.
As discussed on the page for the :doc:`fix <fix>` command, fixes can
produce global, per-atom, local, and per-grid values. Only global and
per-atom values can be used in a variable. Fixes can also produce
scalars (global only), vectors, and arrays. See the doc pages for
individual fixes to see what different kinds of data they produce.
+-------------+-------------------------------------------------------------------------------------------------------+
| f_ID | global scalar, or per-atom vector |
+-------------+-------------------------------------------------------------------------------------------------------+
| f_ID[I] | Ith element of global vector, or atom I's value in per-atom vector, or Ith column from per-atom array |
+-------------+-------------------------------------------------------------------------------------------------------+
| f_ID[I][J] | I,J element of global array, or atom I's Jth value in per-atom array |
+-------------+-------------------------------------------------------------------------------------------------------+
An equal-style variable can only use scalar values, either from global
or per-atom data. In the case of per-atom data, this would be a value
for a specific atom.
For I and J indices, integers can be specified or a variable name,
specified as v_name, where name is the name of the variable. The
rules for this syntax are the same as for the "Atom Values and
Vectors" discussion above.
A vector-style variable can use scalar values (same as for equal-style
variables), or global vectors of values. The latter can also be a
column of a global array.
One source of ambiguity for fix references is the same ambiguity
discussed for compute references above. Namely when a vector-style
variable refers to a fix that produces both a global scalar and a
global vector. The solution is the same as for compute references.
For a fix with ID "foo", "f_foo" will always refer to the global
scalar, and "F_foo" can be used to reference the global vector. And
similarly for distinguishing between a fix's global vector versus
global array with "f_foo[I]" versus "F_foo[I]".
Atom-style variables can use scalar values (same as for equal-style
variables), or per-atom vectors of values. The latter can also be a
column of a per-atom array.
Note that if a variable containing a fix is evaluated directly in an
input script (not during a run), then the values accessed by the fix
should be current. See the discussion below about "Variable
Accuracy".
The allowed fix references in variable formulas for equal-, vector-,
and atom-style variables are listed in the following table:
+--------+------------+------------------------------------------+
| equal | f_ID | global scalar |
| equal | f_ID[I] | element of global vector |
| equal | f_ID[I][J] | element of global array |
| equal | F_ID[I] | element of per-atom vector (I = atom ID) |
| equal | F_ID[I][J] | element of per-atom array (I = atom ID) |
+--------+------------+------------------------------------------+
| vector | f_ID | global vector |
| vector | f_ID[I] | column of global array |
+--------+------------+------------------------------------------+
| atom | f_ID | per-atom vector |
| atom | f_ID[I] | column of per-atom array |
+--------+------------+------------------------------------------+
Note that if an equal-style variable formula wishes to access per-atom
data from a fix, it must use capital "F" as the ID prefix and not
lower-case "f".
Also note that if a vector- or atom-style variable formula needs to
access a scalar value from a fix (i.e. the 5 kinds of values in the
first 5 lines of the table), it can not do so directly. Instead, it
can use a reference to an equal-style variable which stores the scalar
value from the fix.
The I and J indices in these fix references can be integers or can be
a variable name, specified as v_name, where name is the name of the
variable. The rules for this syntax are the same as for indices in
the "Atom Values and Vectors" discussion above.
Note that some fixes only generate quantities on certain timesteps.
If a variable attempts to access the fix on non-allowed timesteps, an
@ -1260,6 +1273,10 @@ error is generated. For example, the :doc:`fix ave/time <fix_ave_time>`
command may only generate averaged quantities every 100 steps. See
the doc pages for individual fix commands for details.
If a variable containing a fix is evaluated directly in an input
script (not during a run), then the values accessed by the fix should
be current. See the discussion below about "Variable Accuracy".
----------
Variable References
@ -1294,26 +1311,32 @@ including other atom-style or atomfile-style variables. If it uses a
vector-style variable, a subscript must be used to access a single
value from the vector-style variable.
Examples of different kinds of variable references are as follows.
There is no ambiguity as to what a reference means, since variables
produce only a global scalar or global vector or per-atom vector.
The allowed variable references in variable formulas for equal-,
vector-, and atom-style variables are listed in the following table.
Note that there is no ambiguity as to what a reference means, since
referenced variables produce only a global scalar or global vector or
per-atom vector.
+------------+----------------------------------------------------------------------+
| v_name | global scalar from equal-style variable |
+------------+----------------------------------------------------------------------+
| v_name | global vector from vector-style variable |
+------------+----------------------------------------------------------------------+
| v_name | per-atom vector from atom-style or atomfile-style variable |
+------------+----------------------------------------------------------------------+
| v_name[I] | Ith element of a global vector from vector-style variable |
+------------+----------------------------------------------------------------------+
| v_name[I] | value of atom with ID = I from atom-style or atomfile-style variable |
+------------+----------------------------------------------------------------------+
+--------+-----------+-----------------------------------------------------------------------------------+
| equal | v_name | global scalar from an equal-style variable |
| equal | v_name[I] | element of global vector from a vector-style variable |
| equal | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| vector | v_name | global scalar from an equal-style variable |
| vector | v_name | global vector from a vector-style variable |
| vector | v_name[I] | element of global vector from a vector-style variable |
| vector | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
| atom | v_name | global scalar from an equal-style variable |
| atom | v_name | per-atom vector from an atom-style or atomfile-style variable |
| atom | v_name[I] | element of global vector from a vector-style variable |
| atom | v_name[I] | element of per-atom vector (I = atom ID) from an atom- or atomfile-style variable |
+--------+-----------+-----------------------------------------------------------------------------------+
For the I index, an integer can be specified or a variable name,
specified as v_name, where name is the name of the variable. The
rules for this syntax are the same as for the "Atom Values and
Vectors" discussion above.
rules for this syntax are the same as for indices in the "Atom Values
and Vectors" discussion above.
----------

View File

@ -2892,6 +2892,7 @@ pscrozi
pseudocode
Pseudocode
pseudodynamics
pseudoparticle
pseudopotential
psllod
pSp
@ -3755,6 +3756,7 @@ uncomment
uncommented
uncompress
uncompute
underdamped
underprediction
undump
uniaxial

View File

@ -65,7 +65,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*]
# fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
compute vbsum all reduce sum c_vb[*]
# fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
variable db_2_100 equal c_db[2][100]
variable db_2_100 equal C_db[2][100]
# test output: 1: total potential energy
# 2: xy component of stress tensor

View File

@ -65,7 +65,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*]
# fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
compute vbsum all reduce sum c_vb[*]
# fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
variable db_2_25 equal c_db[2][25]
variable db_2_25 equal C_db[2][25]
thermo 100

View File

@ -67,18 +67,18 @@ compute mygridlocal all sna/grid/local grid ${ngrid} ${ngrid} ${ngrid} &
# define output
variable B5atom equal c_b[2][5]
variable B5atom equal C_b[2][5]
variable B5grid equal c_mygrid[8][8]
variable rmse_global equal "sqrt( &
(c_mygrid[8][1] - x[2])^2 + &
(c_mygrid[8][2] - y[2])^2 + &
(c_mygrid[8][3] - z[2])^2 + &
(c_mygrid[8][4] - c_b[2][1])^2 + &
(c_mygrid[8][5] - c_b[2][2])^2 + &
(c_mygrid[8][6] - c_b[2][3])^2 + &
(c_mygrid[8][7] - c_b[2][4])^2 + &
(c_mygrid[8][8] - c_b[2][5])^2 &
(c_mygrid[8][4] - C_b[2][1])^2 + &
(c_mygrid[8][5] - C_b[2][2])^2 + &
(c_mygrid[8][6] - C_b[2][3])^2 + &
(c_mygrid[8][7] - C_b[2][4])^2 + &
(c_mygrid[8][8] - C_b[2][5])^2 &
)"
thermo_style custom step v_B5atom v_B5grid v_rmse_global

View File

@ -87,18 +87,18 @@ compute mygridlocal all sna/grid/local grid ${ngridx} ${ngridy} ${ngridz} &
# define output
variable B5atom equal c_b[7][5]
variable B5atom equal C_b[7][5]
variable B5grid equal c_mygrid[13][8]
# do not compare x,y,z because assignment of ids
# to atoms is not unnique for different processor grids
variable rmse_global equal "sqrt( &
(c_mygrid[13][4] - c_b[7][1])^2 + &
(c_mygrid[13][5] - c_b[7][2])^2 + &
(c_mygrid[13][6] - c_b[7][3])^2 + &
(c_mygrid[13][7] - c_b[7][4])^2 + &
(c_mygrid[13][8] - c_b[7][5])^2 &
(c_mygrid[13][4] - C_b[7][1])^2 + &
(c_mygrid[13][5] - C_b[7][2])^2 + &
(c_mygrid[13][6] - C_b[7][3])^2 + &
(c_mygrid[13][7] - C_b[7][4])^2 + &
(c_mygrid[13][8] - C_b[7][5])^2 &
)"
thermo_style custom step v_B5atom v_B5grid v_rmse_global

View File

@ -70,7 +70,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*]
# fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
compute vbsum all reduce sum c_vb[*]
# fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
variable db_2_25 equal c_db[2][25]
variable db_2_25 equal C_db[2][25]
# set up compute snap generating global array

View File

@ -70,7 +70,7 @@ compute bsum2 snapgroup2 reduce sum c_b[*]
# fix bsum2 all ave/time 1 1 1 c_bsum2 file bsum2.dat mode vector
compute vbsum all reduce sum c_vb[*]
# fix vbsum all ave/time 1 1 1 c_vbsum file vbsum.dat mode vector
variable db_2_100 equal c_db[2][100]
variable db_2_100 equal C_db[2][100]
# set up compute snap generating global array

View File

@ -146,10 +146,10 @@ variable i2 equal 257
compute v1 all voronoi/atom occupation
compute r0 all reduce sum c_v1[1]
compute r1 all reduce sum c_v1[2]
variable d5a equal c_v1[${i1}][1]
variable d5b equal c_v1[${i2}][1]
variable d5c equal c_v1[${i1}][2]
variable d5d equal c_v1[${i2}][2]
variable d5a equal C_v1[${i1}][1]
variable d5b equal C_v1[${i2}][1]
variable d5c equal C_v1[${i1}][2]
variable d5d equal C_v1[${i2}][2]
thermo_style custom c_r0 c_r1 v_d5a v_d5b v_d5c v_d5d
run 0

View File

@ -63,11 +63,9 @@ undump dlocal
# TEST 2:
#
# This compute voronoi generates
# local and global quantities, but
# not per-atom quantities
# This compute voronoi generates peratom and local and global quantities
compute v2 all voronoi/atom neighbors yes edge_histo 6 peratom no
compute v2 all voronoi/atom neighbors yes edge_histo 6
# write voronoi local quantities to a file
@ -75,7 +73,7 @@ dump d2 all local 1 dump.neighbors2 index c_v2[1] c_v2[2] c_v2[3]
# sum up a voronoi local quantity
compute sumarea all reduce sum c_v2[3]
compute sumarea all reduce sum c_v2[3] inputs local
# output voronoi global quantities
@ -83,6 +81,3 @@ thermo_style custom c_sumarea c_v2[3] c_v2[4] c_v2[5] c_v2[6] c_v2[7]
thermo 1
run 0

View File

@ -18,11 +18,11 @@ from install_helpers import fullpath, geturl, checkmd5sum, getfallback
# settings
thisdir = fullpath('.')
version ='v.2023.01.3.fix'
version ='v.2023.10.04'
# known checksums for different PACE versions. used to validate the download.
checksums = { \
'v.2023.01.3.fix': '4f0b3b5b14456fe9a73b447de3765caa'
'v.2023.10.04': '70ff79f4e59af175e55d24f3243ad1ff'
}
parser = ArgumentParser(prog='Install.py', description="LAMMPS library build wrapper script")

View File

@ -1024,7 +1024,10 @@ void FixBocs::final_integrate()
if (pstat_flag) {
if (pstyle == ISO) pressure->compute_scalar();
else pressure->compute_vector();
else {
temperature->compute_vector();
pressure->compute_vector();
}
couple();
pressure->addstep(update->ntimestep+1);
}
@ -1961,6 +1964,7 @@ void FixBocs::nhc_press_integrate()
int ich,i,pdof;
double expfac,factor_etap,kecurrent;
double kt = boltz * t_target;
double lkt_press;
// Update masses, to preserve initial freq, if flag set
@ -2006,7 +2010,8 @@ void FixBocs::nhc_press_integrate()
}
}
double lkt_press = pdof * kt;
if (pstyle == ISO) lkt_press = kt;
else lkt_press = pdof * kt;
etap_dotdot[0] = (kecurrent - lkt_press)/etap_mass[0];
double ncfac = 1.0/nc_pchain;

View File

@ -64,6 +64,7 @@ fi
if (test $1 = "COLLOID") then
depend GPU
depend KOKKOS
depend OPENMP
fi
@ -185,6 +186,7 @@ fi
if (test $1 = "ML-SNAP") then
depend ML-IAP
depend KOKKOS
depend INTEL
fi
if (test $1 = "CG-SPICA") then

View File

@ -24,6 +24,8 @@
using namespace LAMMPS_NS;
static constexpr char special_chars[] = "{}[],&:*#?|-<>=!%@\\";
/* ---------------------------------------------------------------------- */
DumpYAML::DumpYAML(class LAMMPS *_lmp, int narg, char **args) :
DumpCustom(_lmp, narg, args), thermo(false)
@ -67,7 +69,12 @@ void DumpYAML::write_header(bigint ndump)
const auto &fields = th->get_fields();
thermo_data += "thermo:\n - keywords: [ ";
for (int i = 0; i < nfield; ++i) thermo_data += fmt::format("{}, ", keywords[i]);
for (int i = 0; i < nfield; ++i) {
if (keywords[i].find_first_of(special_chars) == std::string::npos)
thermo_data += fmt::format("{}, ", keywords[i]);
else
thermo_data += fmt::format("'{}', ", keywords[i]);
}
thermo_data += "]\n - data: [ ";
for (int i = 0; i < nfield; ++i) {
@ -107,7 +114,12 @@ void DumpYAML::write_header(bigint ndump)
if (domain->triclinic) fmt::print(fp, " - [ {}, {}, {} ]\n", boxxy, boxxz, boxyz);
fmt::print(fp, "keywords: [ ");
for (const auto &item : utils::split_words(columns)) fmt::print(fp, "{}, ", item);
for (const auto &item : utils::split_words(columns)) {
if (item.find_first_of(special_chars) == std::string::npos)
fmt::print(fp, "{}, ", item);
else
fmt::print(fp, "'{}', ", item);
}
fputs(" ]\ndata:\n", fp);
} else // reset so that the remainder of the output is not multi-proc
filewriter = 0;

View File

@ -0,0 +1,70 @@
# Toy demonstration of SNAP "scale" parameter, using fix/adapt and hybrid/overlay
# Mixing linear and quadratic SNAP Ni potentials by Zuo et al. JCPA 2020
variable w index 10 # Warmup Timesteps
variable t index 100 # Main Run Timesteps
variable m index 1 # Main Run Timestep Multiplier
variable n index 0 # Use NUMA Mapping for Multi-Node
variable x index 4
variable y index 2
variable z index 2
variable rr equal floor($t*$m)
variable root getenv LMP_ROOT
if "$n > 0" then "processors * * * grid numa"
# mixing parameter
variable lambda equal 0.2
# Initialize simulation
variable a equal 3.52
units metal
# generate the box and atom positions using a FCC lattice
variable nx equal 20*$x
variable ny equal 20*$y
variable nz equal 20*$z
boundary p p p
lattice fcc $a
region box block 0 ${nx} 0 ${ny} 0 ${nz}
create_box 1 box
create_atoms 1 box
mass 1 34.
# choose bundled SNAP Ni potential from Zuo et al. JCPA 2020
pair_style hybrid/overlay snap snap
pair_coeff * * snap 1 &
${root}/examples/snap/Ni_Zuo_JPCA2020.snapcoeff &
${root}/examples/snap/Ni_Zuo_JPCA2020.snapparam Ni
pair_coeff * * snap 2 &
${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapcoeff &
${root}/examples/snap/Ni_Zuo_JPCA2020.quadratic.snapparam Ni
# scale according to mixing parameter
variable l1 equal ${lambda}
variable l2 equal 1.0-${lambda}
fix scale1 all adapt 1 pair snap:1 scale * * v_l1
fix scale2 all adapt 1 pair snap:2 scale * * v_l2
# Setup output
thermo 1
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459 loop geom
fix 1 all nve
if "$w > 0" then "run $w"
run ${rr}

View File

@ -35,7 +35,7 @@ export I_MPI_PIN_DOMAIN=core
# End settings for your system
#########################################################################
export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff"
export WORKLOADS="lj rhodo lc sw water eam airebo dpd tersoff snap"
export LMP_ARGS="-pk intel 0 -sf intel -screen none -v d 1"
export RLMP_ARGS="-pk intel 0 lrt yes -sf intel -screen none -v d 1"

View File

@ -20,6 +20,7 @@
#include "fix_intel.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "neighbor.h"
@ -470,6 +471,7 @@ void FixIntel::pair_init_check(const bool cdmessage)
int need_tag = 0;
if (atom->molecular != Atom::ATOMIC || three_body_neighbor()) need_tag = 1;
if (domain->triclinic && force->newton_pair) need_tag = 1;
// Clear buffers used for pair style
char kmode[80];

View File

@ -46,13 +46,38 @@ namespace ip_simd {
typedef __mmask16 SIMD_mask;
inline bool any(const SIMD_mask &m) { return m != 0; }
struct SIMD_int {
__m512i v;
SIMD_int() {}
SIMD_int(const __m512i in) : v(in) {}
inline int & operator[](const int i) { return ((int *)&(v))[i]; }
inline const int & operator[](const int i) const
{ return ((int *)&(v))[i]; }
operator __m512i() const { return v;}
};
struct SIMD256_int {
__m256i v;
SIMD256_int() {}
SIMD256_int(const __m256i in) : v(in) {}
SIMD256_int(const int in) : v(_mm256_set1_epi32(in)) {}
inline int & operator[](const int i) { return ((int *)&(v))[i]; }
inline const int & operator[](const int i) const
{ return ((int *)&(v))[i]; }
#ifdef __INTEL_LLVM_COMPILER
inline SIMD256_int operator&=(const int i)
{ v=_mm256_and_epi32(v, _mm256_set1_epi32(i)); return *this; };
#else
inline SIMD256_int operator&=(const int i)
{ v=_mm256_and_si256(v, _mm256_set1_epi32(i)); return *this; };
#endif
inline SIMD256_int operator+=(const int i)
{ v=_mm256_add_epi32(v, _mm256_set1_epi32(i)); return *this; };
operator __m256i() const { return v;}
};
struct SIMD_float {
__m512 v;
SIMD_float() {}
@ -64,7 +89,24 @@ namespace ip_simd {
__m512d v;
SIMD_double() {}
SIMD_double(const __m512d in) : v(in) {}
SIMD_double(const double in) { v=_mm512_set1_pd(in); }
inline double & operator[](const int i) { return ((double *)&(v))[i]; }
inline const double & operator[](const int i) const
{ return ((double *)&(v))[i]; }
operator __m512d() const { return v;}
SIMD_double & operator=(const double i)
{ _mm512_set1_pd(i); return *this; }
SIMD_double &operator=(const SIMD_double &i)
{ v = i.v; return *this; }
SIMD_double operator-() { return _mm512_xor_pd(v, _mm512_set1_pd(-0.0)); }
SIMD_double & operator+=(const SIMD_double & two)
{ v = _mm512_add_pd(v, two.v); return *this; }
SIMD_double & operator-=(const SIMD_double & two)
{ v = _mm512_sub_pd(v, two.v); return *this; }
SIMD_double & operator*=(const SIMD_double & two)
{ v = _mm512_mul_pd(v, two.v); return *this; }
};
template<class flt_t>
@ -99,6 +141,12 @@ namespace ip_simd {
// ------- Set Operations
inline SIMD256_int SIMD256_set(const int l0, const int l1, const int l2,
const int l3, const int l4, const int l5,
const int l6, const int l7) {
return _mm256_setr_epi32(l0,l1,l2,l3,l4,l5,l6,l7);
}
inline SIMD_int SIMD_set(const int l0, const int l1, const int l2,
const int l3, const int l4, const int l5,
const int l6, const int l7, const int l8,
@ -109,6 +157,10 @@ namespace ip_simd {
l8,l9,l10,l11,l12,l13,l14,l15);
}
inline SIMD256_int SIMD256_set(const int l) {
return _mm256_set1_epi32(l);
}
inline SIMD_int SIMD_set(const int l) {
return _mm512_set1_epi32(l);
}
@ -121,6 +173,10 @@ namespace ip_simd {
return _mm512_set1_pd(l);
}
inline SIMD256_int SIMD256_count() {
return SIMD256_set(0,1,2,3,4,5,6,7);
}
inline SIMD_int SIMD_zero_masked(const SIMD_mask &m, const SIMD_int &one) {
return _mm512_maskz_mov_epi32(m, one);
}
@ -147,6 +203,10 @@ namespace ip_simd {
// -------- Load Operations
inline SIMD256_int SIMD_load(const SIMD256_int *p) {
return _mm256_load_epi32((int *)p);
}
inline SIMD_int SIMD_load(const int *p) {
return _mm512_load_epi32(p);
}
@ -159,6 +219,10 @@ namespace ip_simd {
return _mm512_load_pd(p);
}
inline SIMD_double SIMD_load(const SIMD_double *p) {
return _mm512_load_pd((double *)p);
}
inline SIMD_int SIMD_loadz(const SIMD_mask &m, const int *p) {
return _mm512_maskz_load_epi32(m, p);
}
@ -171,6 +235,10 @@ namespace ip_simd {
return _mm512_maskz_load_pd(m, p);
}
inline SIMD256_int SIMD_gather(const int *p, const SIMD256_int &i) {
return _mm256_i32gather_epi32(p, i, _MM_SCALE_4);
}
inline SIMD_int SIMD_gather(const int *p, const SIMD_int &i) {
return _mm512_i32gather_epi32(i, p, _MM_SCALE_4);
}
@ -179,6 +247,10 @@ namespace ip_simd {
return _mm512_i32gather_ps(i, p, _MM_SCALE_4);
}
inline SIMD_double SIMD_gather(const double *p, const SIMD256_int &i) {
return _mm512_i32gather_pd(i, p, _MM_SCALE_8);
}
inline SIMD_double SIMD_gather(const double *p, const SIMD_int &i) {
return _mm512_i32gather_pd(_mm512_castsi512_si256(i), p, _MM_SCALE_8);
}
@ -201,6 +273,12 @@ namespace ip_simd {
_mm512_castsi512_si256(i), p, _MM_SCALE_8);
}
inline SIMD_double SIMD_gather(const SIMD_mask &m, const double *p,
const SIMD256_int &i) {
return _mm512_mask_i32gather_pd(_mm512_undefined_pd(), m,
i, p, _MM_SCALE_8);
}
template <typename T>
inline SIMD_int SIMD_gatherz_offset(const SIMD_mask &m, const int *p,
const SIMD_int &i) {
@ -252,6 +330,15 @@ namespace ip_simd {
return _mm512_store_pd(p,one);
}
inline void SIMD_store(SIMD_double *p, const SIMD_double &one) {
return _mm512_store_pd((double *)p,one);
}
inline void SIMD_scatter(const SIMD_mask &m, int *p,
const SIMD256_int &i, const SIMD256_int &vec) {
_mm256_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
}
inline void SIMD_scatter(const SIMD_mask &m, int *p,
const SIMD_int &i, const SIMD_int &vec) {
_mm512_mask_i32scatter_epi32(p, m, i, vec, _MM_SCALE_4);
@ -268,8 +355,22 @@ namespace ip_simd {
_MM_SCALE_8);
}
inline void SIMD_scatter(const SIMD_mask &m, double *p,
const SIMD256_int &i, const SIMD_double &vec) {
_mm512_mask_i32scatter_pd(p, m, i, vec, _MM_SCALE_8);
}
inline void SIMD_scatter(double *p,
const SIMD256_int &i, const SIMD_double &vec) {
_mm512_i32scatter_pd(p, i, vec, _MM_SCALE_8);
}
// ------- Arithmetic Operations
inline SIMD256_int operator+(const SIMD256_int &one, const SIMD256_int &two) {
return _mm256_add_epi32(one,two);
}
inline SIMD_int operator+(const SIMD_int &one, const SIMD_int &two) {
return _mm512_add_epi32(one,two);
}
@ -286,6 +387,10 @@ namespace ip_simd {
return _mm512_add_epi32(one,SIMD_set(two));
}
inline SIMD256_int operator+(const SIMD256_int &one, const int two) {
return _mm256_add_epi32(one,SIMD256_set(two));
}
inline SIMD_float operator+(const SIMD_float &one, const float two) {
return _mm512_add_ps(one,SIMD_set(two));
}
@ -299,6 +404,11 @@ namespace ip_simd {
return _mm512_mask_add_epi32(one,m,one,SIMD_set(two));
}
inline SIMD256_int SIMD_add(const SIMD_mask &m,
const SIMD256_int &one, const int two) {
return _mm256_mask_add_epi32(one,m,one,SIMD256_set(two));
}
inline SIMD_float SIMD_add(const SIMD_mask &m,
const SIMD_float &one, const float two) {
return _mm512_mask_add_ps(one,m,one,SIMD_set(two));
@ -309,6 +419,11 @@ namespace ip_simd {
return _mm512_mask_add_pd(one,m,one,SIMD_set(two));
}
inline SIMD_double SIMD_add(const SIMD_mask &m,
const SIMD_double &one, const SIMD_double &two) {
return _mm512_mask_add_pd(one,m,one,two);
}
inline SIMD_int SIMD_add(const SIMD_int &s, const SIMD_mask &m,
const SIMD_int &one, const SIMD_int &two) {
return _mm512_mask_add_epi32(s,m,one,two);
@ -387,6 +502,10 @@ namespace ip_simd {
return _mm512_mul_pd(one,two);
}
inline SIMD256_int operator*(const SIMD256_int &one, const int two) {
return _mm256_mullo_epi32(one,SIMD256_set(two));
}
inline SIMD_int operator*(const SIMD_int &one, const int two) {
return _mm512_mullo_epi32(one,SIMD_set(two));
}
@ -417,6 +536,12 @@ namespace ip_simd {
return _mm512_fmadd_pd(one,two,three);
}
inline SIMD_double SIMD_fma(const SIMD_mask m, const SIMD_double &one,
const SIMD_double &two,
const SIMD_double &three) {
return _mm512_mask3_fmadd_pd(one,two,three,m);
}
inline SIMD_float SIMD_fms(const SIMD_float &one, const SIMD_float &two,
const SIMD_float &three) {
return _mm512_fmsub_ps(one,two,three);
@ -493,6 +618,10 @@ namespace ip_simd {
return _mm512_pow_pd(one, two);
}
inline SIMD_double SIMD_pow(const SIMD_double &one, const double two) {
return _mm512_pow_pd(one, SIMD_set(two));
}
inline SIMD_float SIMD_exp(const SIMD_float &one) {
return _mm512_exp_ps(one);
}
@ -501,6 +630,18 @@ namespace ip_simd {
return _mm512_exp_pd(one);
}
inline SIMD_double SIMD_cos(const SIMD_double &one) {
return _mm512_cos_pd(one);
}
inline SIMD_double SIMD_sin(const SIMD_double &one) {
return _mm512_sin_pd(one);
}
inline SIMD_double SIMD_tan(const SIMD_double &one) {
return _mm512_tan_pd(one);
}
// ------- Comparison operations
inline SIMD_mask SIMD_lt(SIMD_mask m, const SIMD_int &one,
@ -533,6 +674,14 @@ namespace ip_simd {
return _mm512_mask_cmplt_pd_mask(m, SIMD_set(one), two);
}
inline SIMD_mask operator<(const SIMD256_int &one, const SIMD256_int &two) {
return _mm256_cmplt_epi32_mask(one,two);
}
inline SIMD_mask operator<(const int one, const SIMD256_int &two) {
return _mm256_cmplt_epi32_mask(SIMD256_set(one),two);
}
inline SIMD_mask operator<(const SIMD_int &one, const SIMD_int &two) {
return _mm512_cmplt_epi32_mask(one,two);
}
@ -577,6 +726,10 @@ namespace ip_simd {
return _mm512_cmple_ps_mask(SIMD_set(one), two);
}
inline SIMD_mask operator<=(const SIMD_double &one, const SIMD_double &two) {
return _mm512_cmple_pd_mask(one, two);
}
inline SIMD_mask operator<=(const double one, const SIMD_double &two) {
return _mm512_cmple_pd_mask(SIMD_set(one), two);
}
@ -593,6 +746,14 @@ namespace ip_simd {
return _mm512_cmplt_pd_mask(two,one);
}
inline SIMD_mask operator>(const SIMD_double &one, const double two) {
return _mm512_cmplt_pd_mask(SIMD_set(two),one);
}
inline SIMD_mask operator==(const SIMD256_int &one, const int two) {
return _mm256_cmpeq_epi32_mask(one,_mm256_set1_epi32(two));
}
inline SIMD_mask operator==(const SIMD_int &one, const SIMD_int &two) {
return _mm512_cmpeq_epi32_mask(one,two);
}

View File

@ -20,7 +20,9 @@
#include "atom.h"
#include "comm.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "modify.h"
#include "my_page.h"
#include "neigh_list.h"
@ -56,6 +58,9 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list,
const int * _noalias const numneigh_full = list->listfull->numneigh;
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
const double delta = 0.01 * force->angstrom;
const int triclinic = domain->triclinic;
#if defined(_OPENMP)
#pragma omp parallel
#endif
@ -82,25 +87,50 @@ void NPairHalffullNewtonIntel::build_t(NeighList *list,
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (x[j].z < ztmp) addme = 0;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) addme = 0;
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
if (!triclinic) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (x[j].z < ztmp) addme = 0;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) addme = 0;
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
}
}
if (addme)
neighptr[n++] = joriginal;
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (fabs(x[j].z-ztmp) > delta) {
if (x[j].z < ztmp) addme = 0;
} else if (fabs(x[j].y-ytmp) > delta) {
if (x[j].y < ytmp) addme = 0;
} else {
if (x[j].x < xtmp) addme = 0;
}
}
if (addme)
neighptr[n++] = joriginal;
}
if (addme)
neighptr[n++] = joriginal;
}
ilist[ii] = i;
@ -203,7 +233,7 @@ void NPairHalffullNewtonIntel::build_t3(NeighList *list, int *numhalf)
void NPairHalffullNewtonIntel::build(NeighList *list)
{
if (_fix->three_body_neighbor() == 0) {
if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
build_t(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)
@ -255,6 +285,8 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
const int ** _noalias const firstneigh_full = (const int ** const)list->listfull->firstneigh; // NOLINT
const flt_t cutsq_custom = cutoff_custom * cutoff_custom;
const double delta = 0.01 * force->angstrom;
const int triclinic = domain->triclinic;
#if defined(_OPENMP)
#pragma omp parallel
@ -282,35 +314,70 @@ void NPairHalffullNewtonTrimIntel::build_t(NeighList *list,
const int * _noalias const jlist = firstneigh_full[i];
const int jnum = numneigh_full[i];
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (x[j].z < ztmp) addme = 0;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) addme = 0;
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
if (!triclinic) {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (x[j].z < ztmp) addme = 0;
if (x[j].z == ztmp) {
if (x[j].y < ytmp) addme = 0;
if (x[j].y == ytmp && x[j].x < xtmp) addme = 0;
}
}
// trim to shorter cutoff
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutsq_custom) addme = 0;
if (addme)
neighptr[n++] = joriginal;
}
} else {
#if defined(LMP_SIMD_COMPILER)
#pragma vector aligned
#pragma ivdep
#endif
for (int jj = 0; jj < jnum; jj++) {
const int joriginal = jlist[jj];
const int j = joriginal & NEIGHMASK;
int addme = 1;
if (j < nlocal) {
if (i > j) addme = 0;
} else {
if (fabs(x[j].z-ztmp) > delta) {
if (x[j].z < ztmp) addme = 0;
} else if (fabs(x[j].y-ytmp) > delta) {
if (x[j].y < ytmp) addme = 0;
} else {
if (x[j].x < xtmp) addme = 0;
}
}
// trim to shorter cutoff
// trim to shorter cutoff
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
const flt_t delx = xtmp - x[j].x;
const flt_t dely = ytmp - x[j].y;
const flt_t delz = ztmp - x[j].z;
const flt_t rsq = delx * delx + dely * dely + delz * delz;
if (rsq > cutsq_custom) addme = 0;
if (rsq > cutsq_custom) addme = 0;
if (addme)
neighptr[n++] = joriginal;
if (addme)
neighptr[n++] = joriginal;
}
}
ilist[ii] = i;
@ -433,7 +500,7 @@ void NPairHalffullNewtonTrimIntel::build_t3(NeighList *list, int *numhalf,
void NPairHalffullNewtonTrimIntel::build(NeighList *list)
{
if (_fix->three_body_neighbor() == 0) {
if (_fix->three_body_neighbor() == 0 || domain->triclinic) {
if (_fix->precision() == FixIntel::PREC_MODE_MIXED)
build_t(list, _fix->get_mixed_buffers());
else if (_fix->precision() == FixIntel::PREC_MODE_DOUBLE)

View File

@ -204,6 +204,8 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
}
const int special_bound = sb;
const double delta = 0.01 * force->angstrom;
#ifdef _LMP_INTEL_OFFLOAD
const int * _noalias const binhead = this->binhead;
const int * _noalias const bins = this->bins;
@ -229,7 +231,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
in(ncache_stride,maxnbors,nthreads,maxspecial,nstencil,e_nall,offload) \
in(offload_end,separate_buffers,astart,aend,nlocal,molecular) \
in(ntypes,xperiodic,yperiodic,zperiodic,xprd_half,yprd_half,zprd_half) \
in(pack_width,special_bound) \
in(pack_width,special_bound,delta) \
out(overflow:length(5) alloc_if(0) free_if(0)) \
out(timer_compute:length(1) alloc_if(0) free_if(0)) \
signal(tag)
@ -331,7 +333,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
const flt_t ztmp = x[i].z;
const int itype = x[i].w;
tagint itag;
if (THREE) itag = tag[i];
if (THREE || (TRI && !FULL)) itag = tag[i];
const int ioffset = ntypes * itype;
const int ibin = atombin[i];
@ -365,7 +367,7 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
ty[u] = x[j].y;
tz[u] = x[j].z;
tjtype[u] = x[j].w;
if (THREE) ttag[u] = tag[j];
if (THREE || (TRI && !FULL)) ttag[u] = tag[j];
}
if (FULL == 0 && TRI != 1) {
@ -486,12 +488,32 @@ void NPairIntel::bin_newton(const int offload, NeighList *list,
// Triclinic
if (TRI) {
if (tz[u] < ztmp) addme = 0;
if (tz[u] == ztmp) {
if (ty[u] < ytmp) addme = 0;
if (ty[u] == ytmp) {
if (tx[u] < xtmp) addme = 0;
if (tx[u] == xtmp && j <= i) addme = 0;
if (FULL) {
if (tz[u] < ztmp) addme = 0;
if (tz[u] == ztmp) {
if (ty[u] < ytmp) addme = 0;
if (ty[u] == ytmp) {
if (tx[u] < xtmp) addme = 0;
if (tx[u] == xtmp && j <= i) addme = 0;
}
}
} else {
if (j <= i) addme = 0;
if (j >= nlocal) {
const tagint jtag = ttag[u];
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) addme = 0;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) addme = 0;
} else {
if (fabs(tz[u]-ztmp) > delta) {
if (tz[u] < ztmp) addme = 0;
} else if (fabs(ty[u]-ytmp) > delta) {
if (ty[u] < ytmp) addme = 0;
} else {
if (tx[u] < xtmp) addme = 0;
}
}
}
}
}

View File

@ -0,0 +1,779 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#include "pair_snap_intel.h"
#include "atom.h"
#include "comm.h"
#include "error.h"
#include "force.h"
#include "memory.h"
#include "modify.h"
#include "neigh_list.h"
#include "neighbor.h"
#include "sna_intel.h"
#include "tokenizer.h"
#include <cmath>
#include <cstring>
using namespace LAMMPS_NS;
using namespace ip_simd;
#define MAXLINE 1024
#define MAXWORD 3
/* ---------------------------------------------------------------------- */
PairSNAPIntel::PairSNAPIntel(LAMMPS *lmp) : Pair(lmp)
{
single_enable = 0;
restartinfo = 0;
one_coeff = 1;
manybody_flag = 1;
centroidstressflag = CENTROID_NOTAVAIL;
radelem = nullptr;
wjelem = nullptr;
coeffelem = nullptr;
sinnerelem = nullptr;
dinnerelem = nullptr;
beta = nullptr;
bispectrum = nullptr;
snaptr = nullptr;
}
/* ---------------------------------------------------------------------- */
PairSNAPIntel::~PairSNAPIntel()
{
if (copymode) return;
memory->destroy(radelem);
memory->destroy(wjelem);
memory->destroy(coeffelem);
memory->destroy(sinnerelem);
memory->destroy(dinnerelem);
memory->destroy(beta);
memory->destroy(bispectrum);
delete snaptr;
if (allocated) {
memory->destroy(setflag);
memory->destroy(cutsq);
memory->destroy(scale);
}
}
/* ----------------------------------------------------------------------
This version is a straightforward implementation
---------------------------------------------------------------------- */
void PairSNAPIntel::compute(int eflag, int vflag)
{
SNA_DVEC fij[3];
int *jlist,*numneigh,**firstneigh;
ev_init(eflag,vflag);
int tally_xyz = 0;
if (vflag_atom || (vflag && !vflag_fdotr)) tally_xyz = 1;
double **x = atom->x;
double *_x = atom->x[0];
double **f = atom->f;
int *type = atom->type;
int nlocal = atom->nlocal;
int newton_pair = force->newton_pair;
// compute dE_i/dB_i = beta_i for all i in list
numneigh = list->numneigh;
firstneigh = list->firstneigh;
SNA_DVEC sevdwl(0);
const int vw = snaptr->vector_width();
for (int ii = 0; ii < list->inum; ii+=vw) {
SNA_IVEC i, jnum;
int max_jnum = 0;
for (int l = 0; l < vw; l++) {
if (ii + l < list->inum) {
i[l] = list->ilist[ii + l];
jnum[l] = numneigh[i[l]];
} else {
i[l] = list->ilist[0];
jnum[l] = 0;
}
if (jnum[l] > max_jnum) max_jnum = jnum[l];
}
// ensure rij, inside, wj, and rcutij are of size jnum
snaptr->grow_rij(max_jnum);
SNA_IVEC zero_vec(0);
const SNA_DVEC xtmp = SIMD_gather(_x, i * 3);
const SNA_DVEC ytmp = SIMD_gather(_x, i * 3 + 1);
const SNA_DVEC ztmp = SIMD_gather(_x, i * 3 + 2);
const SNA_IVEC itype = SIMD_gather(type, i);
const SNA_IVEC ielem = SIMD_gather(map, itype);
const SNA_DVEC radi = SIMD_gather(radelem, ielem);
// rij[][3] = displacements between atom I and those neighbors
// inside = indices of neighbors of I within cutoff
// wj = weights for neighbors of I within cutoff
// rcutij = cutoffs for neighbors of I within cutoff
// note Rij sign convention => dU/dRij = dU/dRj = -dU/dRi
SNA_IVEC ninside(0);
for (int jj = 0; jj < max_jnum; jj++) {
SIMD_mask m(SIMD256_set(jj) < jnum);
SNA_IVEC j;
SV_for (int l = 0; l < vw; l++) {
jlist = firstneigh[i[l]];
if (jj < jnum[l]) j[l] = jlist[jj];
else j[l] = 0;
}
j &= NEIGHMASK;
const SNA_DVEC delx = SIMD_gather(m, _x, j * 3) - xtmp;
const SNA_DVEC dely = SIMD_gather(m, _x, j * 3 + 1) - ytmp;
const SNA_DVEC delz = SIMD_gather(m, _x, j * 3 + 2) - ztmp;
const SNA_IVEC jtype = SIMD_gather(type, j);
const SNA_DVEC rsq = delx*delx + dely*dely + delz*delz;
const SNA_DVEC vcut = SIMD_gather(m, cutsq[0],
itype * (atom->ntypes + 1) + jtype);
m &= rsq < vcut;
m &= rsq > SIMD_set(1e-20);
const SNA_IVEC jelem = SIMD_gather(map, jtype);
const SNA_IVEC ni3 = ninside * vw * 3 + SIMD256_count();
SIMD_scatter(m, (double *)(snaptr->rij[0]), ni3, delx);
SIMD_scatter(m, (double *)(snaptr->rij[0] + 1), ni3, dely);
SIMD_scatter(m, (double *)(snaptr->rij[0] + 2), ni3, delz);
const SNA_IVEC ni = ninside * vw + SIMD256_count();
SIMD_scatter(m, (int *)(snaptr->inside), ni, j);
SIMD_scatter(m, (double *)(snaptr->wj), ni,
SIMD_gather(m, wjelem, jelem));
SIMD_scatter(m, (double *)(snaptr->rcutij), ni,
(radi + SIMD_gather(m, radelem, jelem)) * rcutfac);
if (switchinnerflag) {
SIMD_scatter(m, (double *)(snaptr->sinnerij), ni,
(SIMD_gather(m, sinnerelem, ielem) +
SIMD_gather(m, sinnerelem, jelem)) * 0.5);
SIMD_scatter(m, (double *)(snaptr->dinnerij), ni,
(SIMD_gather(m, dinnerelem, ielem) +
SIMD_gather(m, dinnerelem, jelem)) * 0.5);
}
if (chemflag)
SIMD_scatter(m, (int *)(snaptr->element), ni, jelem);
ninside = SIMD_add(m, ninside, 1);
} // for jj
// compute Ui, Yi for atom I
if (chemflag)
snaptr->compute_ui(ninside, ielem, max_jnum);
else
snaptr->compute_ui(ninside, zero_vec, max_jnum);
// Compute bispectrum
if (quadraticflag || eflag) {
snaptr->compute_zi_or_yi<0>(beta);
if (chemflag)
snaptr->compute_bi(ielem);
else
snaptr->compute_bi(zero_vec);
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
SIMD_store(bispectrum + icoeff, SIMD_load(snaptr->blist + icoeff));
}
// Compute beta
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
SIMD_store(beta + icoeff, SIMD_gather(coeffelem[0],
ielem * ncoeffall + icoeff + 1));
if (quadraticflag) {
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
SNA_DVEC beta_i = SIMD_load(beta + icoeff) +
SIMD_gather(coeffelem[0], ielem * ncoeffall + k) * bveci;
k++;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
const SNA_DVEC ci = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
beta_i = beta_i + ci * SIMD_load(bispectrum + jcoeff);
SIMD_store(beta + jcoeff, ci * bveci + SIMD_load(beta + jcoeff));
k++;
}
SIMD_store(beta + icoeff, beta_i);
}
}
// for neighbors of I within cutoff:
// compute Fij = dEi/dRj = -dEi/dRi
// add to Fi, subtract from Fj
// scaling is that for type I
if (quadraticflag || eflag)
snaptr->compute_yi_from_zi(beta);
else
snaptr->compute_zi_or_yi<1>(beta);
SNA_DVEC fi_x(0.0), fi_y(0.0), fi_z(0.0);
SNA_DVEC scalev = SIMD_gather(scale[0], itype * (atom->ntypes+1) + itype);
for (int jj = 0; jj < max_jnum; jj++) {
snaptr->compute_duidrj(jj, ninside);
if (chemflag && nelements > 1)
snaptr->compute_deidrj_e(jj, ninside, fij);
else
snaptr->compute_deidrj(jj, ninside, fij);
SNA_DVEC fijs_x = fij[0] * scalev;
SNA_DVEC fijs_y = fij[1] * scalev;
SNA_DVEC fijs_z = fij[2] * scalev;
fi_x += fijs_x;
fi_y += fijs_y;
fi_z += fijs_z;
for (int l = 0; l < vw; l++) {
if (jj < ninside[l]) {
int j = snaptr->inside[jj][l];
f[j][0] -= fijs_x[l];
f[j][1] -= fijs_y[l];
f[j][2] -= fijs_z[l];
if (tally_xyz)
ev_tally_xyz(i[l],j,nlocal,newton_pair,0.0,0.0,
fij[0][l],fij[1][l],fij[2][l],
-snaptr->rij[jj][0][l],-snaptr->rij[jj][1][l],
-snaptr->rij[jj][2][l]);
}
} // for l
} // for jj
SIMD_mask m((SIMD256_count() + ii) < list->inum);
SNA_DVEC fix = SIMD_gather(m, f[0], i * 3) + fi_x;
SIMD_scatter(m, f[0], i * 3, fix);
SNA_DVEC fiy = SIMD_gather(m, f[0], i * 3 + 1) + fi_y;
SIMD_scatter(m, f[0], i * 3 + 1, fiy);
SNA_DVEC fiz = SIMD_gather(m, f[0], i * 3 + 2) + fi_z;
SIMD_scatter(m, f[0], i * 3 + 2, fiz);
// tally energy contribution
if (eflag) {
SNA_DVEC evdwl = SIMD_gather(coeffelem[0], ielem * ncoeffall);
for (int icoeff = 0; icoeff < ncoeff; icoeff++)
evdwl += SIMD_gather(coeffelem[0], ielem * ncoeffall + icoeff +1) *
bispectrum[icoeff];
if (quadraticflag) {
int k = ncoeff+1;
for (int icoeff = 0; icoeff < ncoeff; icoeff++) {
SNA_DVEC bveci = SIMD_load(bispectrum + icoeff);
SNA_DVEC c = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
k++;
evdwl += c * 0.5 * bveci * bveci;
for (int jcoeff = icoeff+1; jcoeff < ncoeff; jcoeff++) {
SNA_DVEC bvecj = SIMD_load(bispectrum + jcoeff);
SNA_DVEC cj = SIMD_gather(coeffelem[0], ielem * ncoeffall + k);
k++;
evdwl += cj * bveci * bvecj;
}
}
}
sevdwl += scalev * evdwl;
if (eatom) {
SNA_DVEC ea = SIMD_gather(m, eatom, i) + scalev * evdwl;
SIMD_scatter(m, eatom, i, ea);
}
} // if (eflag)
} // for ii
if (eflag) eng_vdwl += SIMD_sum(sevdwl);
if (vflag_fdotr) virial_fdotr_compute();
}
/* ----------------------------------------------------------------------
allocate all arrays
------------------------------------------------------------------------- */
void PairSNAPIntel::allocate()
{
allocated = 1;
int n = atom->ntypes;
memory->create(setflag,n+1,n+1,"pair:setflag");
memory->create(cutsq,n+1,n+1,"pair:cutsq");
memory->create(scale,n+1,n+1,"pair:scale");
map = new int[n+1];
}
/* ----------------------------------------------------------------------
global settings
------------------------------------------------------------------------- */
void PairSNAPIntel::settings(int narg, char ** /* arg */)
{
if (narg > 0)
error->all(FLERR,"Illegal pair_style command");
if ((comm->me == 0) && (comm->nthreads > 1))
error->warning(FLERR, "Pair style snap/intel does not use OpenMP threads");
}
/* ----------------------------------------------------------------------
set coeffs for one or more type pairs
------------------------------------------------------------------------- */
void PairSNAPIntel::coeff(int narg, char **arg)
{
if (!allocated) allocate();
if (narg != 4 + atom->ntypes) error->all(FLERR,"Incorrect args for pair coefficients");
map_element2type(narg-4,arg+4);
// read snapcoeff and snapparam files
read_files(arg[2],arg[3]);
if (!quadraticflag)
ncoeff = ncoeffall - 1;
else {
// ncoeffall should be (ncoeff+2)*(ncoeff+1)/2
// so, ncoeff = floor(sqrt(2*ncoeffall))-1
ncoeff = sqrt(2.0*ncoeffall)-1;
ncoeffq = (ncoeff*(ncoeff+1))/2;
int ntmp = 1+ncoeff+ncoeffq;
if (ntmp != ncoeffall) {
error->all(FLERR,"Incorrect SNAP coeff file");
}
}
snaptr = new SNAIntel(lmp, rfac0, twojmax,
rmin0, switchflag, bzeroflag,
chemflag, bnormflag, wselfallflag,
nelements, switchinnerflag);
if (ncoeff != snaptr->ncoeff) {
if (comm->me == 0)
printf("ncoeff = %d snancoeff = %d \n",ncoeff,snaptr->ncoeff);
error->all(FLERR,"Incorrect SNAP parameter file");
}
// Calculate maximum cutoff for all elements
rcutmax = 0.0;
for (int ielem = 0; ielem < nelements; ielem++)
rcutmax = MAX(2.0*radelem[ielem]*rcutfac,rcutmax);
// set default scaling
int n = atom->ntypes;
for (int ii = 0; ii < n+1; ii++)
for (int jj = 0; jj < n+1; jj++)
scale[ii][jj] = 1.0;
}
/* ----------------------------------------------------------------------
init specific to this pair style
------------------------------------------------------------------------- */
void PairSNAPIntel::init_style()
{
if (force->newton_pair == 0)
error->all(FLERR,"Pair style SNAP requires newton pair on");
// need a full neighbor list
neighbor->add_request(this, NeighConst::REQ_FULL);
snaptr->init();
fix = static_cast<FixIntel *>(modify->get_fix_by_id("package_intel"));
if (!fix) error->all(FLERR, "The 'package intel' command is required for /intel styles");
fix->pair_init_check();
memory->create(bispectrum,ncoeff,"PairSNAP:bispectrum");
memory->create(beta,ncoeff,"PairSNAP:beta");
}
/* ----------------------------------------------------------------------
init for one type pair i,j and corresponding j,i
------------------------------------------------------------------------- */
double PairSNAPIntel::init_one(int i, int j)
{
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
scale[j][i] = scale[i][j];
return (radelem[map[i]] +
radelem[map[j]])*rcutfac;
}
/* ---------------------------------------------------------------------- */
void PairSNAPIntel::read_files(char *coefffilename, char *paramfilename)
{
// open SNAP coefficient file on proc 0
FILE *fpcoeff;
if (comm->me == 0) {
fpcoeff = utils::open_potential(coefffilename,lmp,nullptr);
if (fpcoeff == nullptr)
error->one(FLERR,"Cannot open SNAP coefficient file {}: ",
coefffilename, utils::getsyserror());
}
char line[MAXLINE],*ptr;
int eof = 0;
int nwords = 0;
while (nwords == 0) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof) break;
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
// strip comment, skip line if blank
nwords = utils::count_words(utils::trim_comment(line));
}
if (nwords != 2)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
// strip single and double quotes from words
int nelemtmp = 0;
try {
ValueTokenizer words(utils::trim_comment(line),"\"' \t\n\r\f");
nelemtmp = words.next_int();
ncoeffall = words.next_int();
} catch (TokenizerException &e) {
error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
}
// clean out old arrays and set up element lists
memory->destroy(radelem);
memory->destroy(wjelem);
memory->destroy(coeffelem);
memory->destroy(sinnerelem);
memory->destroy(dinnerelem);
memory->create(radelem,nelements,"pair:radelem");
memory->create(wjelem,nelements,"pair:wjelem");
memory->create(coeffelem,nelements,ncoeffall,"pair:coeffelem");
memory->create(sinnerelem,nelements,"pair:sinnerelem");
memory->create(dinnerelem,nelements,"pair:dinnerelem");
// initialize checklist for all required nelements
int *elementflags = new int[nelements];
for (int jelem = 0; jelem < nelements; jelem++)
elementflags[jelem] = 0;
// loop over nelemtmp blocks in the SNAP coefficient file
for (int ielem = 0; ielem < nelemtmp; ielem++) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
std::vector<std::string> words;
try {
words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
} catch (TokenizerException &) {
// ignore
}
if (words.size() != 3)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
int jelem;
for (jelem = 0; jelem < nelements; jelem++)
if (words[0] == elements[jelem]) break;
// if this element not needed, skip this block
if (jelem == nelements) {
if (comm->me == 0) {
for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
continue;
}
if (elementflags[jelem] == 1)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
else
elementflags[jelem] = 1;
radelem[jelem] = utils::numeric(FLERR,words[1],false,lmp);
wjelem[jelem] = utils::numeric(FLERR,words[2],false,lmp);
if (comm->me == 0)
utils::logmesg(lmp,"SNAP Element = {}, Radius {}, Weight {}\n",
elements[jelem], radelem[jelem], wjelem[jelem]);
for (int icoeff = 0; icoeff < ncoeffall; icoeff++) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpcoeff);
if (ptr == nullptr) {
eof = 1;
fclose(fpcoeff);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
try {
ValueTokenizer coeff(utils::trim_comment(line));
if (coeff.count() != 1)
error->all(FLERR,"Incorrect format in SNAP coefficient file");
coeffelem[jelem][icoeff] = coeff.next_double();
} catch (TokenizerException &e) {
error->all(FLERR,"Incorrect format in SNAP coefficient file: {}", e.what());
}
}
}
if (comm->me == 0) fclose(fpcoeff);
for (int jelem = 0; jelem < nelements; jelem++) {
if (elementflags[jelem] == 0)
error->all(FLERR,"Element {} not found in SNAP coefficient file", elements[jelem]);
}
delete[] elementflags;
// set flags for required keywords
rcutfacflag = 0;
twojmaxflag = 0;
// Set defaults for optional keywords
rfac0 = 0.99363;
rmin0 = 0.0;
switchflag = 1;
bzeroflag = 1;
quadraticflag = 0;
chemflag = 0;
bnormflag = 0;
wselfallflag = 0;
switchinnerflag = 0;
chunksize = 32768;
parallel_thresh = 8192;
// set local input checks
int sinnerflag = 0;
int dinnerflag = 0;
// open SNAP parameter file on proc 0
FILE *fpparam;
if (comm->me == 0) {
fpparam = utils::open_potential(paramfilename,lmp,nullptr);
if (fpparam == nullptr)
error->one(FLERR,"Cannot open SNAP parameter file {}: {}",
paramfilename, utils::getsyserror());
}
eof = 0;
while (true) {
if (comm->me == 0) {
ptr = fgets(line,MAXLINE,fpparam);
if (ptr == nullptr) {
eof = 1;
fclose(fpparam);
}
}
MPI_Bcast(&eof,1,MPI_INT,0,world);
if (eof) break;
MPI_Bcast(line,MAXLINE,MPI_CHAR,0,world);
// words = ptrs to all words in line
// strip single and double quotes from words
std::vector<std::string> words;
try {
words = Tokenizer(utils::trim_comment(line),"\"' \t\n\r\f").as_vector();
} catch (TokenizerException &) {
// ignore
}
if (words.size() == 0) continue;
if (words.size() < 2)
error->all(FLERR,"Incorrect format in SNAP parameter file");
auto keywd = words[0];
auto keyval = words[1];
// check for keywords with more than one value per element
if (keywd == "sinner" || keywd == "dinner") {
if ((int)words.size() != nelements+1)
error->all(FLERR,"Incorrect SNAP parameter file");
// innerlogstr collects all values of sinner or dinner for log output below
std::string innerlogstr;
int iword = 1;
if (keywd == "sinner") {
for (int ielem = 0; ielem < nelements; ielem++) {
keyval = words[iword];
sinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
iword++;
innerlogstr += keyval + " ";
}
sinnerflag = 1;
} else if (keywd == "dinner") {
for (int ielem = 0; ielem < nelements; ielem++) {
keyval = words[iword];
dinnerelem[ielem] = utils::numeric(FLERR,keyval,false,lmp);
iword++;
innerlogstr += keyval + " ";
}
dinnerflag = 1;
}
if (comm->me == 0)
utils::logmesg(lmp,"SNAP keyword {} {} ... \n", keywd, innerlogstr);
} else {
// all other keywords take one value
if (nwords != 2)
error->all(FLERR,"Incorrect SNAP parameter file");
if (comm->me == 0)
utils::logmesg(lmp,"SNAP keyword {} {}\n",keywd,keyval);
if (keywd == "rcutfac") {
rcutfac = utils::numeric(FLERR,keyval,false,lmp);
rcutfacflag = 1;
} else if (keywd == "twojmax") {
twojmax = utils::inumeric(FLERR,keyval,false,lmp);
twojmaxflag = 1;
} else if (keywd == "rfac0")
rfac0 = utils::numeric(FLERR,keyval,false,lmp);
else if (keywd == "rmin0")
rmin0 = utils::numeric(FLERR,keyval,false,lmp);
else if (keywd == "switchflag")
switchflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "bzeroflag")
bzeroflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "quadraticflag")
quadraticflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "chemflag")
chemflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "bnormflag")
bnormflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "wselfallflag")
wselfallflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "switchinnerflag")
switchinnerflag = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "chunksize")
chunksize = utils::inumeric(FLERR,keyval,false,lmp);
else if (keywd == "parallelthresh")
parallel_thresh = utils::inumeric(FLERR,keyval,false,lmp);
else
error->all(FLERR,"Unknown parameter '{}' in SNAP parameter file", keywd);
}
}
if (rcutfacflag == 0 || twojmaxflag == 0)
error->all(FLERR,"Incorrect SNAP parameter file");
if (chemflag && nelemtmp != nelements)
error->all(FLERR,"Incorrect SNAP parameter file");
if (switchinnerflag && !(sinnerflag && dinnerflag))
error->all(FLERR,"Incorrect SNAP parameter file");
if (!switchinnerflag && (sinnerflag || dinnerflag))
error->all(FLERR,"Incorrect SNAP parameter file");
}
/* ----------------------------------------------------------------------
memory usage
------------------------------------------------------------------------- */
double PairSNAPIntel::memory_usage()
{
double bytes = Pair::memory_usage();
int n = atom->ntypes+1;
bytes += (double)n*n*sizeof(int); // setflag
bytes += (double)n*n*sizeof(double); // cutsq
bytes += (double)n*n*sizeof(double); // scale
bytes += (double)n*sizeof(int); // map
bytes += (double)ncoeff*sizeof(SNA_DVEC); // bispectrum
bytes += (double)ncoeff*sizeof(SNA_DVEC); // beta
bytes += snaptr->memory_usage(); // SNA object
return bytes;
}
/* ---------------------------------------------------------------------- */
void *PairSNAPIntel::extract(const char *str, int &dim)
{
dim = 2;
if (strcmp(str,"scale") == 0) return (void *) scale;
return nullptr;
}
#endif
#endif

View File

@ -0,0 +1,83 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#ifdef PAIR_CLASS
// clang-format off
PairStyle(snap/intel,PairSNAPIntel);
// clang-format on
#else
#ifndef LMP_PAIR_SNAP_INTEL_H
#define LMP_PAIR_SNAP_INTEL_H
#include "fix_intel.h"
#include "pair.h"
namespace ip_simd { class SIMD_double; class SIMD_int; };
#define SNA_DVEC ip_simd::SIMD_double
#define SNA_IVEC ip_simd::SIMD256_int
namespace LAMMPS_NS {
class PairSNAPIntel : public Pair {
public:
PairSNAPIntel(class LAMMPS *);
~PairSNAPIntel() override;
void compute(int, int) override;
void settings(int, char **) override;
void coeff(int, char **) override;
void init_style() override;
double init_one(int, int) override;
double memory_usage() override;
void *extract(const char *, int &) override;
double rcutfac, quadraticflag; // declared public to workaround gcc 4.9
int ncoeff; // compiler bug, manifest in KOKKOS package
protected:
FixIntel *fix;
int ncoeffq, ncoeffall;
class SNAIntel *snaptr;
virtual void allocate();
void read_files(char *, char *);
inline int equal(double *x, double *y);
inline double dist2(double *x, double *y);
double rcutmax; // max cutoff for all elements
double *radelem; // element radii
double *wjelem; // elements weights
double **coeffelem; // element bispectrum coefficients
SNA_DVEC *beta; // betas for all atoms in list
SNA_DVEC *bispectrum; // bispectrum components for all atoms in list
double **scale; // for thermodynamic integration
int twojmax, switchflag, bzeroflag, bnormflag;
int chemflag, wselfallflag;
int switchinnerflag; // inner cutoff switch
double *sinnerelem; // element inner cutoff midpoint
double *dinnerelem; // element inner cutoff half-width
int chunksize, parallel_thresh;
double rfac0, rmin0, wj1, wj2;
int rcutfacflag, twojmaxflag; // flags for required parameters
};
} // namespace LAMMPS_NS
#endif
#endif
#endif
#endif

1505
src/INTEL/sna_intel.cpp Normal file

File diff suppressed because it is too large Load Diff

187
src/INTEL/sna_intel.h Normal file
View File

@ -0,0 +1,187 @@
/* -*- c++ -*- -------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing authors: W. Michael Brown, Intel
------------------------------------------------------------------------- */
#ifndef LMP_SNA_INTEL_H
#define LMP_SNA_INTEL_H
#if defined(__AVX512F__)
#if defined(__INTEL_COMPILER) || defined(__INTEL_LLVM_COMPILER)
#include "pointers.h"
#include "intel_buffers.h"
#include "intel_simd.h"
#define SVW 8
#if defined(LMP_SIMD_COMPILER)
#if defined(USE_OMP_SIMD)
#define SV_for _Pragma("omp simd") _Pragma("vector aligned") for
#else
#define SV_for _Pragma("simd assert") _Pragma("vector aligned") for
#endif
#else
#define SV_for for
#endif
namespace LAMMPS_NS {
struct SNA_ZINDICES {
int j1, j2, j, ma1min, ma2max, mb1min;
int mb2max, na, nb, jju;
};
struct SNA_BINDICES {
int j1, j2, j;
};
#define SNA_DVEC ip_simd::SIMD_double
#define SNA_IVEC ip_simd::SIMD256_int
class SNAIntel : protected Pointers {
public:
SNAIntel(LAMMPS *, double, int, double, int, int, int, int, int, int, int);
SNAIntel(LAMMPS *lmp) : Pointers(lmp){};
~SNAIntel() override;
void build_indexlist();
void init();
double memory_usage();
int ncoeff;
inline int vector_width() const { return SVW; }
// functions for bispectrum coefficients
void compute_ui(const SNA_IVEC &, const SNA_IVEC &, const int max_jnum);
template <int> void compute_zi_or_yi(const SNA_DVEC *);
void compute_yi_from_zi(const SNA_DVEC *);
void compute_yterm(int, int, int, const double *);
void compute_bi(const SNA_IVEC &);
// functions for derivatives
void compute_duidrj(const int, const SNA_IVEC &);
void compute_deidrj_e(const int, const SNA_IVEC &, SNA_DVEC *);
void compute_deidrj(const int, const SNA_IVEC &, SNA_DVEC *);
double compute_sfac(double, double, double, double);
SNA_DVEC compute_sfac(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &);
inline SNA_DVEC compute_sfac_dsfac(const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &,
SNA_DVEC &);
// public bispectrum data
int twojmax;
SNA_DVEC *blist;
double **dblist;
// short neighbor list data
void grow_rij(int);
int nmax; // allocated size of short lists
SNA_DVEC **rij; // short rij list
SNA_IVEC *inside; // short neighbor list
SNA_DVEC *wj; // short weight list
SNA_DVEC *rcutij; // short cutoff list
// only allocated for switch_inner_flag=1
SNA_DVEC *sinnerij; // short inner cutoff midpoint list
SNA_DVEC *dinnerij; // short inner half-width list
// only allocated for chem_flag=1
SNA_IVEC *element; // short element list [0,nelements)
private:
double rmin0, rfac0;
// data for bispectrum coefficients
SNA_ZINDICES *idxz;
SNA_BINDICES *idxb;
double **rootpqarray;
double *cglist;
int ***idxcg_block;
SNA_DVEC *ulisttot_r, *ulisttot_i;
SNA_DVEC **ulist_r_ij, **ulist_i_ij; // short u list
int *idxu_block;
SNA_DVEC *zlist_r, *zlist_i;
int ***idxz_block;
int ***idxb_block;
SNA_DVEC **dulist_r, **dulist_i;
SNA_DVEC *ylist_r, *ylist_i;
int idxcg_max, idxu_max, idxz_max, idxb_max;
void create_twojmax_arrays();
void destroy_twojmax_arrays();
void init_clebsch_gordan();
void print_clebsch_gordan();
void init_rootpqarray();
void zero_uarraytot(const SNA_IVEC &);
void add_uarraytot(const SNA_DVEC &, const int, const SNA_IVEC &);
void compute_uarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &, const int,
const SNA_IVEC &);
double deltacg(int, int, int);
void compute_ncoeff();
void compute_duarray(const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &, const SNA_DVEC &,
const SNA_DVEC &, const SNA_DVEC &, int,
const SNA_IVEC &);
inline double choose_beta(const int, const int, const int,
const int, const int, const int, int &);
// Sets the style for the switching function
// 0 = none
// 1 = cosine
int switch_flag;
// Sets the style for the inner switching function
// 0 = none
// 1 = cosine
int switch_inner_flag;
// Self-weight
double wself;
int bzero_flag; // 1 if bzero subtracted from barray
double *bzero; // array of B values for isolated atoms
int bnorm_flag; // 1 if barray divided by j+1
int chem_flag; // 1 for multi-element bispectrum components
int wselfall_flag; // 1 for adding wself to all element labelings
int nelements; // number of elements
int ndoubles; // number of multi-element pairs
int ntriples; // number of multi-element triplets
};
} // namespace LAMMPS_NS
#endif
#endif
#endif

View File

@ -129,6 +129,8 @@ action fix_dt_reset_kokkos.cpp
action fix_dt_reset_kokkos.h
action fix_enforce2d_kokkos.cpp
action fix_enforce2d_kokkos.h
action fix_efield_kokkos.cpp
action fix_efield_kokkos.h
action fix_eos_table_rx_kokkos.cpp fix_eos_table_rx.cpp
action fix_eos_table_rx_kokkos.h fix_eos_table_rx.h
action fix_freeze_kokkos.cpp fix_freeze.cpp
@ -173,6 +175,8 @@ action fix_shake_kokkos.cpp fix_shake.cpp
action fix_shake_kokkos.h fix_shake.h
action fix_shardlow_kokkos.cpp fix_shardlow.cpp
action fix_shardlow_kokkos.h fix_shardlow.h
action fix_spring_self_kokkos.cpp
action fix_spring_self_kokkos.h
action fix_viscous_kokkos.cpp
action fix_viscous_kokkos.h
action fix_wall_gran_kokkos.cpp fix_wall_gran.cpp
@ -363,6 +367,8 @@ action pair_vashishta_kokkos.cpp pair_vashishta.cpp
action pair_vashishta_kokkos.h pair_vashishta.h
action pair_yukawa_kokkos.cpp
action pair_yukawa_kokkos.h
action pair_yukawa_colloid_kokkos.cpp pair_yukawa_colloid.cpp
action pair_yukawa_colloid_kokkos.h pair_yukawa_colloid.h
action pair_zbl_kokkos.cpp
action pair_zbl_kokkos.h
action pppm_kokkos.cpp pppm.cpp

View File

@ -44,6 +44,9 @@ AtomKokkos::AtomKokkos(LAMMPS *lmp) : Atom(lmp)
h_tag_min = Kokkos::subview(h_tag_min_max,0);
h_tag_max = Kokkos::subview(h_tag_min_max,1);
nprop_atom = 0;
fix_prop_atom = nullptr;
}
/* ---------------------------------------------------------------------- */
@ -112,6 +115,7 @@ AtomKokkos::~AtomKokkos()
memoryKK->destroy_kokkos(k_dvector, dvector);
dvector = nullptr;
delete [] fix_prop_atom;
}
/* ---------------------------------------------------------------------- */
@ -125,11 +129,37 @@ void AtomKokkos::init()
/* ---------------------------------------------------------------------- */
void AtomKokkos::update_property_atom()
{
nprop_atom = 0;
std::vector<Fix *> prop_atom_fixes;
for (auto &ifix : modify->get_fix_by_style("^property/atom")) {
if (!ifix->kokkosable)
error->all(FLERR, "KOKKOS package requires a Kokkos-enabled version of fix property/atom");
++nprop_atom;
prop_atom_fixes.push_back(ifix);
}
delete[] fix_prop_atom;
fix_prop_atom = new FixPropertyAtomKokkos *[nprop_atom];
int n = 0;
for (auto &ifix : prop_atom_fixes)
fix_prop_atom[n++] = dynamic_cast<FixPropertyAtomKokkos *>(ifix);
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
{
if (space == Device && lmp->kokkos->auto_sync) avecKK->modified(Host, mask);
if (space == Device && lmp->kokkos->auto_sync) {
avecKK->modified(Host, mask);
for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(Host, mask);
}
avecKK->sync(space, mask);
for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(space, mask);
}
/* ---------------------------------------------------------------------- */
@ -137,13 +167,20 @@ void AtomKokkos::sync(const ExecutionSpace space, unsigned int mask)
void AtomKokkos::modified(const ExecutionSpace space, unsigned int mask)
{
avecKK->modified(space, mask);
for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->modified(space, mask);
if (space == Device && lmp->kokkos->auto_sync) avecKK->sync(Host, mask);
if (space == Device && lmp->kokkos->auto_sync) {
avecKK->sync(Host, mask);
for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync(Host, mask);
}
}
/* ---------------------------------------------------------------------- */
void AtomKokkos::sync_overlapping_device(const ExecutionSpace space, unsigned int mask)
{
avecKK->sync_overlapping_device(space, mask);
for (int n = 0; n < nprop_atom; n++) fix_prop_atom[n]->sync_overlapping_device(space, mask);
}
/* ---------------------------------------------------------------------- */
@ -375,7 +412,7 @@ AtomVec *AtomKokkos::new_avec(const std::string &style, int trysuffix, int &sfla
int hybrid_substyle_flag = (avec != nullptr);
AtomVec *avec = Atom::new_avec(style, trysuffix, sflag);
if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a kokkos enabled atom_style");
if (!avec->kokkosable) error->all(FLERR, "KOKKOS package requires a Kokkos-enabled atom_style");
if (!hybrid_substyle_flag)
avecKK = dynamic_cast<AtomVecKokkos*>(avec);

View File

@ -14,6 +14,7 @@
#include "atom.h" // IWYU pragma: export
#include "kokkos_type.h"
#include "fix_property_atom_kokkos.h"
#include <Kokkos_Sort.hpp>
@ -25,6 +26,8 @@ namespace LAMMPS_NS {
class AtomKokkos : public Atom {
public:
bool sort_classic;
int nprop_atom;
FixPropertyAtomKokkos** fix_prop_atom;
DAT::tdual_tagint_1d k_tag;
DAT::tdual_int_1d k_type, k_mask;
@ -144,6 +147,7 @@ class AtomKokkos : public Atom {
}
void init() override;
void update_property_atom();
void allocate_type_arrays() override;
void sync(const ExecutionSpace space, unsigned int mask);
void modified(const ExecutionSpace space, unsigned int mask);

View File

@ -963,7 +963,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPDeviceType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPDeviceType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPDeviceType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPDeviceType>();
} else {
if (mask & X_MASK) atomKK->k_x.sync<LMPHostType>();
if (mask & V_MASK) atomKK->k_v.sync<LMPHostType>();
@ -980,7 +979,6 @@ void AtomVecDPDKokkos::sync(ExecutionSpace space, unsigned int mask)
if (mask & UCG_MASK) atomKK->k_uCG.sync<LMPHostType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.sync<LMPHostType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.sync<LMPHostType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.sync<LMPHostType>();
}
}
@ -1019,8 +1017,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
} else {
if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_x_array>(atomKK->k_x,space);
@ -1052,8 +1048,6 @@ void AtomVecDPDKokkos::sync_overlapping_device(ExecutionSpace space, unsigned in
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_uCGnew,space);
if ((mask & DUCHEM_MASK) && atomKK->k_duChem.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_efloat_1d>(atomKK->k_duChem,space);
if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
}
}
@ -1077,7 +1071,6 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPDeviceType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPDeviceType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPDeviceType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPDeviceType>();
} else {
if (mask & X_MASK) atomKK->k_x.modify<LMPHostType>();
if (mask & V_MASK) atomKK->k_v.modify<LMPHostType>();
@ -1094,6 +1087,5 @@ void AtomVecDPDKokkos::modified(ExecutionSpace space, unsigned int mask)
if (mask & UCG_MASK) atomKK->k_uCG.modify<LMPHostType>();
if (mask & UCGNEW_MASK) atomKK->k_uCGnew.modify<LMPHostType>();
if (mask & DUCHEM_MASK) atomKK->k_duChem.modify<LMPHostType>();
if (mask & DVECTOR_MASK) atomKK->k_dvector.modify<LMPHostType>();
}
}

View File

@ -139,6 +139,8 @@ class AtomVecKokkos : virtual public AtomVec {
DAT::tdual_int_1d k_count;
public:
#ifdef LMP_KOKKOS_GPU
template<class ViewType>
Kokkos::View<typename ViewType::data_type,

View File

@ -113,7 +113,7 @@ void FixDtResetKokkos<DeviceType>::end_of_step()
update->dt = dt;
update->dt_default = 0;
if (force->pair) force->pair->reset_dt();
for (int i = 0; i < modify->nfix; i++) modify->fix[i]->reset_dt();
for (auto &ifix : modify->get_fix_list()) ifix->reset_dt();
output->reset_dt();
}

View File

@ -0,0 +1,316 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Trung Nguyen (U Chicago)
------------------------------------------------------------------------- */
#include "fix_efield_kokkos.h"
#include "atom_kokkos.h"
#include "update.h"
#include "modify.h"
#include "domain_kokkos.h"
#include "region.h"
#include "input.h"
#include "variable.h"
#include "memory_kokkos.h"
#include "error.h"
#include "atom_masks.h"
#include "kokkos_base.h"
#include <cstring>
using namespace LAMMPS_NS;
using namespace FixConst;
enum{NONE,CONSTANT,EQUAL,ATOM};
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixEfieldKokkos<DeviceType>::FixEfieldKokkos(LAMMPS *lmp, int narg, char **arg) :
FixEfield(lmp, narg, arg)
{
kokkosable = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
memory->destroy(efield);
memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
d_efield = k_efield.view<DeviceType>();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixEfieldKokkos<DeviceType>::~FixEfieldKokkos()
{
if (copymode) return;
memoryKK->destroy_kokkos(k_efield,efield);
efield = nullptr;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEfieldKokkos<DeviceType>::init()
{
FixEfield::init();
if (utils::strmatch(update->integrate_style,"^respa"))
error->all(FLERR,"Cannot (yet) use respa with Kokkos");
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixEfieldKokkos<DeviceType>::post_force(int /*vflag*/)
{
atomKK->sync(execution_space, X_MASK | F_MASK | Q_MASK | IMAGE_MASK | MASK_MASK);
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
q = atomKK->k_q.view<DeviceType>();
image = atomKK->k_image.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
int nlocal = atom->nlocal;
// update region if necessary
if (region) {
if (!utils::strmatch(region->style, "^block"))
error->all(FLERR,"Cannot (yet) use {}-style region with fix efield/kk",region->style);
region->prematch();
DAT::tdual_int_1d k_match = DAT::tdual_int_1d("efield:k_match",nlocal);
KokkosBase* regionKKBase = dynamic_cast<KokkosBase*>(region);
regionKKBase->match_all_kokkos(groupbit,k_match);
k_match.template sync<DeviceType>();
d_match = k_match.template view<DeviceType>();
}
// reallocate sforce array if necessary
if (varflag == ATOM && atom->nmax > maxatom) {
maxatom = atom->nmax;
memoryKK->destroy_kokkos(k_efield,efield);
memoryKK->create_kokkos(k_efield,efield,maxatom,4,"efield:efield");
d_efield = k_efield.view<DeviceType>();
}
fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0;
double_4 fsum_kk;
force_flag = 0;
if (varflag == CONSTANT) {
copymode = 1;
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
//Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldConstant>(0,nlocal),*this,fsum_kk);
{
// local variables for lambda capture
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
auto l_ex = ex;
auto l_ey = ey;
auto l_ez = ez;
auto l_x = x;
auto l_q = q;
auto l_f = f;
auto l_mask = mask;
auto l_image = image;
auto l_groupbit = groupbit;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);
x_i[1] = l_x(i,1);
x_i[2] = l_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
auto qtmp = l_q(i);
auto fx = qtmp * l_ex;
auto fy = qtmp * l_ey;
auto fz = qtmp * l_ez;
l_f(i,0) += fx;
l_f(i,1) += fy;
l_f(i,2) += fz;
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
},fsum_kk);
}
copymode = 0;
// variable force, wrap with clear/add
} else {
atomKK->sync(Host,ALL_MASK); // this can be removed when variable class is ported to Kokkos
modify->clearstep_compute();
if (xstyle == EQUAL) ex = input->variable->compute_equal(xvar);
else if (xstyle == ATOM)
input->variable->compute_atom(xvar,igroup,&efield[0][0],4,0);
if (ystyle == EQUAL) ey = input->variable->compute_equal(yvar);
else if (ystyle == ATOM)
input->variable->compute_atom(yvar,igroup,&efield[0][1],4,0);
if (zstyle == EQUAL) ez = input->variable->compute_equal(zvar);
else if (zstyle == ATOM)
input->variable->compute_atom(zvar,igroup,&efield[0][2],4,0);
modify->addstep_compute(update->ntimestep + 1);
if (varflag == ATOM) { // this can be removed when variable class is ported to Kokkos
k_efield.modify<LMPHostType>();
k_efield.sync<DeviceType>();
}
copymode = 1;
// It would be more concise to use the operators below, but there is still an issue with unwrap (TODO below)
//Kokkos::parallel_reduce(Kokkos::RangePolicy<DeviceType, TagFixEfieldNonConstant>(0,nlocal),*this,fsum_kk);
{
// local variables for lambda capture
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
auto l_ex = ex;
auto l_ey = ey;
auto l_ez = ez;
auto l_d_efield = d_efield;
auto l_x = x;
auto l_q = q;
auto l_f = f;
auto l_mask = mask;
auto l_image = image;
auto l_groupbit = groupbit;
auto l_xstyle = xstyle;
auto l_ystyle = ystyle;
auto l_zstyle = zstyle;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double_4& fsum_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);
x_i[1] = l_x(i,1);
x_i[2] = l_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
auto qtmp = l_q(i);
auto fx = qtmp * l_ex;
auto fy = qtmp * l_ey;
auto fz = qtmp * l_ez;
if (l_xstyle == ATOM) l_f(i,0) += qtmp * l_d_efield(i,0);
else if (l_xstyle) l_f(i,0) += fx;
if (l_ystyle == ATOM) l_f(i,1) += qtmp * l_d_efield(i,1);
else if (l_ystyle) l_f(i,1) += fy;
if (l_zstyle == ATOM) l_f(i,2) += qtmp * l_d_efield(i,2);
else if (l_zstyle) l_f(i,2) += fz;
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
},fsum_kk);
}
copymode = 0;
}
atomKK->modified(execution_space, F_MASK);
fsum[0] = fsum_kk.d0;
fsum[1] = fsum_kk.d1;
fsum[2] = fsum_kk.d2;
fsum[3] = fsum_kk.d3;
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldConstant, const int &i, double_4& fsum_kk) const {
if (mask[i] & groupbit) {
if (region && !d_match[i]) return;
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
Few<double,3> x_i;
x_i[0] = x(i,0);
x_i[1] = x(i,1);
x_i[2] = x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
const F_FLOAT qtmp = q(i);
const F_FLOAT fx = qtmp * ex;
const F_FLOAT fy = qtmp * ey;
const F_FLOAT fz = qtmp * ez;
f(i,0) += fx;
f(i,1) += fy;
f(i,2) += fz;
// TODO: access to unwrap below crashes
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
}
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEfieldKokkos<DeviceType>::operator()(TagFixEfieldNonConstant, const int &i, double_4& fsum_kk) const {
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
if (mask[i] & groupbit) {
if (region && !d_match[i]) return;
Few<double,3> x_i;
x_i[0] = x(i,0);
x_i[1] = x(i,1);
x_i[2] = x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,image(i));
const F_FLOAT qtmp = q[i];
const F_FLOAT fx = qtmp * ex;
const F_FLOAT fy = qtmp * ey;
const F_FLOAT fz = qtmp * ez;
if (xstyle == ATOM) f(i,0) += d_efield(i,0);
else if (xstyle) f(i,0) += fx;
if (ystyle == ATOM) f(i,1) += d_efield(i,1);
else if (ystyle) f(i,1) += fy;
if (zstyle == ATOM) f(i,2) += d_efield(i,2);
else if (zstyle) f(i,2) += fz;
// TODO: access to unwrap below crashes
fsum_kk.d0 -= fx * unwrap[0] + fy * unwrap[1] + fz * unwrap[2];
fsum_kk.d1 += fx;
fsum_kk.d2 += fy;
fsum_kk.d3 += fz;
}
}
namespace LAMMPS_NS {
template class FixEfieldKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU
template class FixEfieldKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,86 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
// clang-format off
FixStyle(efield/kk,FixEfieldKokkos<LMPDeviceType>);
FixStyle(efield/kk/device,FixEfieldKokkos<LMPDeviceType>);
FixStyle(efield/kk/host,FixEfieldKokkos<LMPHostType>);
// clang-format on
#else
// clang-format off
#ifndef LMP_FIX_EFIELD_KOKKOS_H
#define LMP_FIX_EFIELD_KOKKOS_H
#include "fix_efield.h"
#include "kokkos_type.h"
namespace LAMMPS_NS {
struct e_double_4 {
double d0, d1, d2, d3;
KOKKOS_INLINE_FUNCTION
e_double_4() {
d0 = d1 = d2 = d3 = 0.0;
}
KOKKOS_INLINE_FUNCTION
e_double_4& operator+=(const e_double_4 &rhs) {
d0 += rhs.d0;
d1 += rhs.d1;
d2 += rhs.d2;
d3 += rhs.d3;
return *this;
}
};
typedef e_double_4 double_4;
struct TagFixEfieldConstant{};
struct TagFixEfieldNonConstant{};
template<class DeviceType>
class FixEfieldKokkos : public FixEfield {
public:
typedef DeviceType device_type;
typedef double_4 value_type;
typedef ArrayTypes<DeviceType> AT;
FixEfieldKokkos(class LAMMPS *, int, char **);
~FixEfieldKokkos() override;
void init() override;
void post_force(int) override;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEfieldConstant, const int&, double_4&) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixEfieldNonConstant, const int&, double_4&) const;
private:
DAT::tdual_ffloat_2d k_efield;
typename AT::t_ffloat_2d_randomread d_efield;
typename AT::t_int_1d d_match;
typename AT::t_x_array_randomread x;
typename AT::t_float_1d_randomread q;
typename AT::t_f_array f;
typename AT::t_imageint_1d_randomread image;
typename AT::t_int_1d_randomread mask;
};
}
#endif
#endif

View File

@ -30,7 +30,46 @@ FixPropertyAtomKokkos::FixPropertyAtomKokkos(LAMMPS *lmp, int narg, char **arg)
FixPropertyAtom(lmp, narg, arg)
{
atomKK = (AtomKokkos *) atom;
grow_arrays(atom->nmax);
kokkosable = 1;
dvector_flag = 0;
for (int nv = 0; nv < nvalue; nv++)
if (styles[nv] == DVEC) dvector_flag = 1;
}
/* ---------------------------------------------------------------------- */
void FixPropertyAtomKokkos::post_constructor()
{
atomKK->update_property_atom();
FixPropertyAtom::post_constructor();
}
/* ---------------------------------------------------------------------- */
FixPropertyAtomKokkos::~FixPropertyAtomKokkos()
{
// deallocate per-atom vectors in Atom class
// set ptrs to a null pointer, so they no longer exist for Atom class
for (int nv = 0; nv < nvalue; nv++) {
if (styles[nv] == MOLECULE) {
atom->molecule_flag = 0;
memoryKK->destroy_kokkos(atomKK->k_molecule,atom->molecule);
atom->molecule = nullptr;
} else if (styles[nv] == CHARGE) {
atom->q_flag = 0;
memoryKK->destroy_kokkos(atomKK->k_q,atom->q);
atom->q = nullptr;
} else if (styles[nv] == RMASS) {
atom->rmass_flag = 0;
memoryKK->destroy_kokkos(atomKK->k_rmass,atom->rmass);
atom->rmass = nullptr;
}
}
atomKK->update_property_atom();
}
/* ----------------------------------------------------------------------
@ -44,17 +83,17 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
{
for (int nv = 0; nv < nvalue; nv++) {
if (styles[nv] == MOLECULE) {
memory->grow(atom->molecule,nmax,"atom:molecule");
size_t nbytes = (nmax-nmax_old) * sizeof(tagint);
memset(&atom->molecule[nmax_old],0,nbytes);
atomKK->sync(Device,MOLECULE_MASK);
memoryKK->grow_kokkos(atomKK->k_molecule,atom->molecule,nmax,"atom:molecule");
atomKK->modified(Device,MOLECULE_MASK);
} else if (styles[nv] == CHARGE) {
memory->grow(atom->q,nmax,"atom:q");
size_t nbytes = (nmax-nmax_old) * sizeof(double);
memset(&atom->q[nmax_old],0,nbytes);
atomKK->sync(Device,Q_MASK);
memoryKK->grow_kokkos(atomKK->k_q,atom->q,nmax,"atom:q");
atomKK->modified(Device,Q_MASK);
} else if (styles[nv] == RMASS) {
memory->grow(atom->rmass,nmax,"atom:rmass");
size_t nbytes = (nmax-nmax_old) * sizeof(double);
memset(&atom->rmass[nmax_old],0,nbytes);
atomKK->sync(Device,RMASS_MASK);
memoryKK->grow_kokkos(atomKK->k_rmass,atom->rmass,nmax,"atom:rmass");
atomKK->modified(Device,RMASS_MASK);
} else if (styles[nv] == TEMPERATURE) {
memory->grow(atom->temperature, nmax, "atom:temperature");
size_t nbytes = (nmax - nmax_old) * sizeof(double);
@ -69,7 +108,7 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
memset(&atom->ivector[index[nv]][nmax_old],0,nbytes);
} else if (styles[nv] == DVEC) {
atomKK->sync(Device,DVECTOR_MASK);
memoryKK->grow_kokkos(atomKK->k_dvector,atomKK->dvector,atomKK->k_dvector.extent(0),nmax,
memoryKK->grow_kokkos(atomKK->k_dvector,atom->dvector,atomKK->k_dvector.extent(0),nmax,
"atom:dvector");
atomKK->modified(Device,DVECTOR_MASK);
} else if (styles[nv] == IARRAY) {
@ -84,3 +123,62 @@ void FixPropertyAtomKokkos::grow_arrays(int nmax)
}
nmax_old = nmax;
}
/* ---------------------------------------------------------------------- */
void FixPropertyAtomKokkos::sync(ExecutionSpace space, unsigned int mask)
{
if (space == Device) {
if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync<LMPDeviceType>();
if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync<LMPDeviceType>();
if (rmass_flag && (mask & RMASS_MASK)) {atomKK->k_rmass.sync<LMPDeviceType>();}
if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync<LMPDeviceType>();
} else {
if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.sync<LMPHostType>();
if (q_flag && (mask & Q_MASK)) atomKK->k_q.sync<LMPHostType>();
if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.sync<LMPHostType>();
if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.sync<LMPHostType>();
}
}
/* ---------------------------------------------------------------------- */
void FixPropertyAtomKokkos::sync_overlapping_device(ExecutionSpace space, unsigned int mask)
{
if (space == Device) {
if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPDeviceType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPDeviceType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPDeviceType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
} else {
if ((mask & MOLECULE_MASK) && atomKK->k_molecule.need_sync<LMPHostType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_tagint_1d>(atomKK->k_molecule,space);
if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
if ((mask & RMASS_MASK) && atomKK->k_rmass.need_sync<LMPHostType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_float_1d>(atomKK->k_rmass,space);
if ((mask & DVECTOR_MASK) && atomKK->k_dvector.need_sync<LMPHostType>())
atomKK->avecKK->perform_async_copy<DAT::tdual_float_2d>(atomKK->k_dvector,space);
}
}
/* ---------------------------------------------------------------------- */
void FixPropertyAtomKokkos::modified(ExecutionSpace space, unsigned int mask)
{
if (space == Device) {
if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify<LMPDeviceType>();
if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify<LMPDeviceType>();
if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify<LMPDeviceType>();
if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify<LMPDeviceType>();
} else {
if (molecule_flag && (mask & MOLECULE_MASK)) atomKK->k_molecule.modify<LMPHostType>();
if (q_flag && (mask & Q_MASK)) atomKK->k_q.modify<LMPHostType>();
if (rmass_flag && (mask & RMASS_MASK)) atomKK->k_rmass.modify<LMPHostType>();
if (dvector_flag && (mask & DVECTOR_MASK)) atomKK->k_dvector.modify<LMPHostType>();
}
}

View File

@ -22,14 +22,23 @@ FixStyle(property/atom/kk,FixPropertyAtomKokkos);
#define LMP_FIX_PROPERTY_ATOM_KOKKOS_H
#include "fix_property_atom.h"
#include "atom_vec_kokkos.h"
namespace LAMMPS_NS {
class FixPropertyAtomKokkos : public FixPropertyAtom {
public:
FixPropertyAtomKokkos(class LAMMPS *, int, char **);
void post_constructor() override;
~FixPropertyAtomKokkos() override;
void grow_arrays(int) override;
void sync(ExecutionSpace space, unsigned int mask);
void modified(ExecutionSpace space, unsigned int mask);
void sync_overlapping_device(ExecutionSpace space, unsigned int mask);
private:
int dvector_flag;
};
}

View File

@ -0,0 +1,332 @@
// clang-format off
/* ----------------------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
/* ----------------------------------------------------------------------
Contributing author: Trung Nguyen (U Chicago)
------------------------------------------------------------------------- */
#include "fix_spring_self_kokkos.h"
#include "atom_kokkos.h"
#include "update.h"
#include "modify.h"
#include "domain_kokkos.h"
#include "region.h"
#include "input.h"
#include "variable.h"
#include "memory_kokkos.h"
#include "error.h"
#include "atom_masks.h"
#include "kokkos_base.h"
#include <cstring>
using namespace LAMMPS_NS;
using namespace FixConst;
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixSpringSelfKokkos<DeviceType>::FixSpringSelfKokkos(LAMMPS *lmp, int narg, char **arg) :
FixSpringSelf(lmp, narg, arg)
{
kokkosable = 1;
exchange_comm_device = 1;
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = EMPTY_MASK;
datamask_modify = EMPTY_MASK;
xoriginal_tmp = xoriginal;
xoriginal = nullptr;
int nmax = atom->nmax;
grow_arrays(nmax);
for (int i = 0; i < atom->nlocal; i++) {
k_xoriginal.h_view(i,0) = xoriginal_tmp[i][0];
k_xoriginal.h_view(i,1) = xoriginal_tmp[i][1];
k_xoriginal.h_view(i,2) = xoriginal_tmp[i][2];
}
k_xoriginal.modify_host();
d_count = typename AT::t_int_scalar("spring/self:count");
h_count = Kokkos::create_mirror_view(d_count);
memory->destroy(xoriginal_tmp);
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
FixSpringSelfKokkos<DeviceType>::~FixSpringSelfKokkos()
{
if (copymode) return;
memoryKK->destroy_kokkos(k_xoriginal,xoriginal);
xoriginal = nullptr;
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixSpringSelfKokkos<DeviceType>::init()
{
FixSpringSelf::init();
if (utils::strmatch(update->integrate_style,"^respa"))
error->all(FLERR,"Cannot (yet) use respa with Kokkos");
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
void FixSpringSelfKokkos<DeviceType>::post_force(int /*vflag*/)
{
atomKK->sync(execution_space, X_MASK | F_MASK | IMAGE_MASK | MASK_MASK);
x = atomKK->k_x.view<DeviceType>();
f = atomKK->k_f.view<DeviceType>();
image = atomKK->k_image.view<DeviceType>();
mask = atomKK->k_mask.view<DeviceType>();
int nlocal = atom->nlocal;
double espring_kk;
k_xoriginal.modify<LMPHostType>();
k_xoriginal.sync<DeviceType>();
copymode = 1;
{
// local variables for lambda capture
auto prd = Few<double,3>(domain->prd);
auto h = Few<double,6>(domain->h);
auto triclinic = domain->triclinic;
auto l_k = k;
auto l_xoriginal = d_xoriginal;
auto l_x = x;
auto l_f = f;
auto l_mask = mask;
auto l_image = image;
auto l_groupbit = groupbit;
auto l_xflag = xflag;
auto l_yflag = yflag;
auto l_zflag = zflag;
Kokkos::parallel_reduce(nlocal, LAMMPS_LAMBDA(const int& i, double& espring_kk) {
if (l_mask[i] & l_groupbit) {
Few<double,3> x_i;
x_i[0] = l_x(i,0);
x_i[1] = l_x(i,1);
x_i[2] = l_x(i,2);
auto unwrap = DomainKokkos::unmap(prd,h,triclinic,x_i,l_image(i));
auto dx = unwrap[0] - l_xoriginal(i, 0);
auto dy = unwrap[1] - l_xoriginal(i, 1);
auto dz = unwrap[2] - l_xoriginal(i, 2);
if (!l_xflag) dx = 0.0;
if (!l_yflag) dy = 0.0;
if (!l_zflag) dz = 0.0;
l_f(i,0) -= l_k*dx;
l_f(i,1) -= l_k*dy;
l_f(i,2) -= l_k*dz;
espring_kk += l_k * (dx*dx + dy*dy + dz*dz);
}
},espring_kk);
}
copymode = 0;
atomKK->modified(execution_space, F_MASK);
espring = 0.5*espring_kk;
}
/* ----------------------------------------------------------------------
allocate local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixSpringSelfKokkos<DeviceType>::grow_arrays(int nmax)
{
memoryKK->grow_kokkos(k_xoriginal,xoriginal,nmax,"spring/self:xoriginal");
d_xoriginal = k_xoriginal.view<DeviceType>();
}
/* ----------------------------------------------------------------------
copy values within local atom-based arrays
------------------------------------------------------------------------- */
template<class DeviceType>
void FixSpringSelfKokkos<DeviceType>::copy_arrays(int i, int j, int delflag)
{
k_xoriginal.sync_host();
FixSpringSelf::copy_arrays(i,j,delflag);
k_xoriginal.modify_host();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixSpringSelfKokkos<DeviceType>::pack_exchange_item(const int &mysend, int &offset, const bool &final) const
{
const int i = d_exchange_sendlist(mysend);
d_buf[mysend] = nsend + offset;
int m = nsend + offset;
d_buf[m++] = d_xoriginal(i,0);
d_buf[m++] = d_xoriginal(i,1);
d_buf[m++] = d_xoriginal(i,2);
if (mysend == nsend-1) d_count() = m;
offset = m - nsend;
const int j = d_copylist(mysend);
if (j > -1) {
d_xoriginal(i,0) = d_xoriginal(j,0);
d_xoriginal(i,1) = d_xoriginal(j,1);
d_xoriginal(i,2) = d_xoriginal(j,2);
}
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
int FixSpringSelfKokkos<DeviceType>::pack_exchange_kokkos(
const int &nsend, DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d k_exchange_sendlist, DAT::tdual_int_1d k_copylist,
ExecutionSpace space)
{
k_buf.sync<DeviceType>();
k_copylist.sync<DeviceType>();
k_exchange_sendlist.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_copylist = k_copylist.view<DeviceType>();
d_exchange_sendlist = k_exchange_sendlist.view<DeviceType>();
this->nsend = nsend;
k_xoriginal.template sync<DeviceType>();
Kokkos::deep_copy(d_count,0);
copymode = 1;
FixSpringSelfKokkosPackExchangeFunctor<DeviceType> pack_exchange_functor(this);
Kokkos::parallel_scan(nsend,pack_exchange_functor);
copymode = 0;
k_buf.modify<DeviceType>();
if (space == Host) k_buf.sync<LMPHostType>();
else k_buf.sync<LMPDeviceType>();
k_xoriginal.template modify<DeviceType>();
Kokkos::deep_copy(h_count,d_count);
return h_count();
}
/* ---------------------------------------------------------------------- */
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixSpringSelfKokkos<DeviceType>::operator()(TagFixSpringSelfUnpackExchange, const int &i) const
{
int index = d_indices(i);
if (index > -1) {
int m = d_buf[i];
d_xoriginal(index,0) = static_cast<tagint> (d_buf[m++]);
d_xoriginal(index,1) = static_cast<tagint> (d_buf[m++]);
d_xoriginal(index,2) = static_cast<tagint> (d_buf[m++]);
}
}
/* ---------------------------------------------------------------------- */
template <class DeviceType>
void FixSpringSelfKokkos<DeviceType>::unpack_exchange_kokkos(
DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d &k_indices, int nrecv,
ExecutionSpace /*space*/)
{
k_buf.sync<DeviceType>();
k_indices.sync<DeviceType>();
d_buf = typename ArrayTypes<DeviceType>::t_xfloat_1d_um(
k_buf.template view<DeviceType>().data(),
k_buf.extent(0)*k_buf.extent(1));
d_indices = k_indices.view<DeviceType>();
k_xoriginal.template sync<DeviceType>();
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixSpringSelfUnpackExchange>(0,nrecv),*this);
copymode = 0;
k_xoriginal.template modify<DeviceType>();
}
/* ----------------------------------------------------------------------
pack values in local atom-based arrays for exchange with another proc
------------------------------------------------------------------------- */
template<class DeviceType>
int FixSpringSelfKokkos<DeviceType>::pack_exchange(int i, double *buf)
{
k_xoriginal.sync_host();
int m = FixSpringSelf::pack_exchange(i,buf);
k_xoriginal.modify_host();
return m;
}
/* ----------------------------------------------------------------------
unpack values in local atom-based arrays from exchange with another proc
------------------------------------------------------------------------- */
template<class DeviceType>
int FixSpringSelfKokkos<DeviceType>::unpack_exchange(int nlocal, double *buf)
{
k_xoriginal.sync_host();
int m = FixSpringSelf::unpack_exchange(nlocal,buf);
k_xoriginal.modify_host();
return m;
}
namespace LAMMPS_NS {
template class FixSpringSelfKokkos<LMPDeviceType>;
#ifdef LMP_KOKKOS_GPU
template class FixSpringSelfKokkos<LMPHostType>;
#endif
}

View File

@ -0,0 +1,108 @@
/* -*- c++ -*- ----------------------------------------------------------
LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
https://www.lammps.org/, Sandia National Laboratories
LAMMPS development team: developers@lammps.org
Copyright (2003) Sandia Corporation. Under the terms of Contract
DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
certain rights in this software. This software is distributed under
the GNU General Public License.
See the README file in the top-level LAMMPS directory.
------------------------------------------------------------------------- */
#ifdef FIX_CLASS
// clang-format off
FixStyle(spring/self/kk,FixSpringSelfKokkos<LMPDeviceType>);
FixStyle(spring/self/kk/device,FixSpringSelfKokkos<LMPDeviceType>);
FixStyle(spring/self/kk/host,FixSpringSelfKokkos<LMPHostType>);
// clang-format on
#else
// clang-format off
#ifndef LMP_FIX_SPRING_SELF_KOKKOS_H
#define LMP_FIX_SPRING_SELF_KOKKOS_H
#include "fix_spring_self.h"
#include "kokkos_type.h"
#include "kokkos_base.h"
namespace LAMMPS_NS {
struct TagFixSpringSelfUnpackExchange{};
template<class DeviceType>
class FixSpringSelfKokkos : public FixSpringSelf, public KokkosBase {
public:
typedef DeviceType device_type;
typedef double value_type;
typedef ArrayTypes<DeviceType> AT;
FixSpringSelfKokkos(class LAMMPS *, int, char **);
~FixSpringSelfKokkos() override;
void init() override;
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void post_force(int) override;
KOKKOS_INLINE_FUNCTION
void pack_exchange_item(const int&, int &, const bool &) const;
KOKKOS_INLINE_FUNCTION
void operator()(TagFixSpringSelfUnpackExchange, const int&) const;
int pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &buf,
DAT::tdual_int_1d k_sendlist,
DAT::tdual_int_1d k_copylist,
ExecutionSpace space) override;
void unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,
DAT::tdual_int_1d &indices,int nrecv,
ExecutionSpace space) override;
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;
protected:
DAT::tdual_x_array k_xoriginal;
typename AT::t_x_array d_xoriginal;
typename AT::t_x_array_randomread x;
typename AT::t_f_array f;
typename AT::t_imageint_1d_randomread image;
typename AT::t_int_1d_randomread mask;
int nsend;
typename AT::t_int_2d d_sendlist;
typename AT::t_xfloat_1d_um d_buf;
typename AT::t_int_1d d_exchange_sendlist;
typename AT::t_int_1d d_copylist;
typename AT::t_int_1d d_indices;
typename AT::t_int_scalar d_count;
HAT::t_int_scalar h_count;
double **xoriginal_tmp; // original coords of atoms
};
template <class DeviceType>
struct FixSpringSelfKokkosPackExchangeFunctor {
typedef DeviceType device_type;
typedef int value_type;
FixSpringSelfKokkos<DeviceType> c;
FixSpringSelfKokkosPackExchangeFunctor(FixSpringSelfKokkos<DeviceType>* c_ptr):c(*c_ptr) {};
KOKKOS_INLINE_FUNCTION
void operator()(const int &i, int &offset, const bool &final) const {
c.pack_exchange_item(i, offset, final);
}
};
}
#endif
#endif

View File

@ -137,13 +137,13 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
int set_flag = 0;
char *str;
if ((str = getenv("SLURM_LOCALID"))) {
if (str = getenv("SLURM_LOCALID")) {
int local_rank = atoi(str);
device = local_rank % ngpus;
if (device >= skip_gpu) device++;
set_flag = 1;
}
if ((str = getenv("MPT_LRANK"))) {
if (str = getenv("FLUX_TASK_LOCAL_ID")) {
if (ngpus > 0) {
int local_rank = atoi(str);
device = local_rank % ngpus;
@ -151,7 +151,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
set_flag = 1;
}
}
if ((str = getenv("MV2_COMM_WORLD_LOCAL_RANK"))) {
if (str = getenv("MPT_LRANK")) {
if (ngpus > 0) {
int local_rank = atoi(str);
device = local_rank % ngpus;
@ -159,7 +159,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
set_flag = 1;
}
}
if ((str = getenv("OMPI_COMM_WORLD_LOCAL_RANK"))) {
if (str = getenv("MV2_COMM_WORLD_LOCAL_RANK")) {
if (ngpus > 0) {
int local_rank = atoi(str);
device = local_rank % ngpus;
@ -167,7 +167,15 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
set_flag = 1;
}
}
if ((str = getenv("PMI_LOCAL_RANK"))) {
if (str = getenv("OMPI_COMM_WORLD_LOCAL_RANK")) {
if (ngpus > 0) {
int local_rank = atoi(str);
device = local_rank % ngpus;
if (device >= skip_gpu) device++;
set_flag = 1;
}
}
if (str = getenv("PMI_LOCAL_RANK")) {
if (ngpus > 0) {
int local_rank = atoi(str);
device = local_rank % ngpus;

View File

@ -41,11 +41,6 @@ class KokkosBase {
int, int *) {return 0;};
virtual void unpack_forward_comm_fix_kokkos(int, int, DAT::tdual_xfloat_1d &) {}
// Region
virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
// Fix
virtual int pack_exchange_kokkos(const int & /*nsend*/, DAT::tdual_xfloat_2d & /*k_buf*/,
DAT::tdual_int_1d /*k_sendlist*/,
DAT::tdual_int_1d /*k_copylist*/,
@ -54,6 +49,9 @@ class KokkosBase {
DAT::tdual_int_1d & /*indices*/, int /*nrecv*/,
ExecutionSpace /*space*/) {}
// Region
virtual void match_all_kokkos(int, DAT::tdual_int_1d) {}
using KeyViewType = DAT::t_x_array;
using BinOp = BinOp3DLAMMPS<KeyViewType>;
virtual void

View File

@ -59,6 +59,9 @@ void MinKokkos::init()
{
Min::init();
if (!fix_minimize->kokkosable)
error->all(FLERR,"KOKKOS package requires fix minimize/kk");
fix_minimize_kk = (FixMinimizeKokkos*) fix_minimize;
}

View File

@ -362,6 +362,17 @@ void ModifyKokkos::pre_reverse(int eflag, int vflag)
void ModifyKokkos::post_force(int vflag)
{
for (int i = 0; i < n_post_force_group; i++) {
atomKK->sync(fix[list_post_force_group[i]]->execution_space,
fix[list_post_force_group[i]]->datamask_read);
int prev_auto_sync = lmp->kokkos->auto_sync;
if (!fix[list_post_force_group[i]]->kokkosable) lmp->kokkos->auto_sync = 1;
fix[list_post_force_group[i]]->post_force(vflag);
lmp->kokkos->auto_sync = prev_auto_sync;
atomKK->modified(fix[list_post_force_group[i]]->execution_space,
fix[list_post_force_group[i]]->datamask_modify);
}
for (int i = 0; i < n_post_force; i++) {
atomKK->sync(fix[list_post_force[i]]->execution_space,
fix[list_post_force[i]]->datamask_read);

View File

@ -112,9 +112,8 @@ void NeighBondKokkos<DeviceType>::init_topology_kk() {
int i,m;
int bond_off = 0;
int angle_off = 0;
for (i = 0; i < modify->nfix; i++)
if ((strcmp(modify->fix[i]->style,"shake") == 0)
|| (strcmp(modify->fix[i]->style,"rattle") == 0))
for (const auto &ifix : modify->get_fix_list())
if (utils::strmatch(ifix->style,"^shake") || utils::strmatch(ifix->style,"^rattle"))
bond_off = angle_off = 1;
if (force->bond && force->bond_match("quartic")) bond_off = 1;

View File

@ -308,7 +308,8 @@ void NeighborKokkos::build_kokkos(int topoflag)
for (i = 0; i < npair_perpetual; i++) {
m = plist[i];
if (!lists[m]->kokkos) atomKK->sync(Host,ALL_MASK);
if (!lists[m]->copy) lists[m]->grow(nlocal,nall);
if (!lists[m]->copy || lists[m]->trim || lists[m]->kk2cpu)
lists[m]->grow(nlocal,nall);
neigh_pair[m]->build_setup();
neigh_pair[m]->build(lists[m]);
}

View File

@ -18,6 +18,7 @@
#include "atom_masks.h"
#include "atom_vec.h"
#include "domain.h"
#include "force.h"
#include "neigh_list_kokkos.h"
#include <cmath>
@ -26,8 +27,8 @@ using namespace LAMMPS_NS;
/* ---------------------------------------------------------------------- */
template<class DeviceType, int NEWTON, int TRIM>
NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
template<class DeviceType, int NEWTON, int TRI, int TRIM>
NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) : NPair(lmp) {
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
}
@ -41,13 +42,14 @@ NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::NPairHalffullKokkos(LAMMPS *lmp) :
if ghost, also store neighbors of ghost atoms & set inum,gnum correctly
------------------------------------------------------------------------- */
template<class DeviceType, int NEWTON, int TRIM>
void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
template<class DeviceType, int NEWTON, int TRI, int TRIM>
void NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::build(NeighList *list)
{
if (NEWTON || TRIM) {
x = atomKK->k_x.view<DeviceType>();
atomKK->sync(execution_space,X_MASK);
}
nlocal = atom->nlocal;
cutsq_custom = cutoff_custom*cutoff_custom;
@ -66,6 +68,8 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
d_numneigh = k_list->d_numneigh;
d_neighbors = k_list->d_neighbors;
delta = 0.01 * force->angstrom;
// loop over parent full list
copymode = 1;
@ -78,9 +82,9 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::build(NeighList *list)
k_list->k_ilist.template modify<DeviceType>();
}
template<class DeviceType, int NEWTON, int TRIM>
template<class DeviceType, int NEWTON, int TRI, int TRIM>
KOKKOS_INLINE_FUNCTION
void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCompute, const int &ii) const {
void NPairHalffullKokkos<DeviceType,NEWTON,TRI,TRIM>::operator()(TagNPairHalffullCompute, const int &ii) const {
int n = 0;
const int i = d_ilist_full(ii);
@ -92,6 +96,11 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
}
// loop over full neighbor list
// use i < j < nlocal to eliminate half the local/local interactions
// for triclinic, must use delta to eliminate half the local/ghost interactions
// cannot use I/J exact coord comparision as for orthog
// b/c transforming orthog -> lambda -> orthog for ghost atoms
// with an added PBC offset can shift all 3 coords by epsilon
const int jnum = d_numneigh_full(i);
const AtomNeighbors neighbors_i = AtomNeighbors(&d_neighbors(i,0),d_numneigh(i),
@ -103,6 +112,14 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
if (NEWTON) {
if (j < nlocal) {
if (i > j) continue;
} else if (TRI) {
if (fabs(x(j,2)-ztmp) > delta) {
if (x(j,2) < ztmp) continue;
} else if (fabs(x(j,1)-ytmp) > delta) {
if (x(j,1) < ytmp) continue;
} else {
if (x(j,0) < xtmp) continue;
}
} else {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
@ -141,14 +158,18 @@ void NPairHalffullKokkos<DeviceType,NEWTON,TRIM>::operator()(TagNPairHalffullCom
}
namespace LAMMPS_NS {
template class NPairHalffullKokkos<LMPDeviceType,0,0>;
template class NPairHalffullKokkos<LMPDeviceType,0,1>;
template class NPairHalffullKokkos<LMPDeviceType,1,0>;
template class NPairHalffullKokkos<LMPDeviceType,1,1>;
template class NPairHalffullKokkos<LMPDeviceType,0,0,0>;
template class NPairHalffullKokkos<LMPDeviceType,0,0,1>;
template class NPairHalffullKokkos<LMPDeviceType,1,0,0>;
template class NPairHalffullKokkos<LMPDeviceType,1,0,1>;
template class NPairHalffullKokkos<LMPDeviceType,1,1,0>;
template class NPairHalffullKokkos<LMPDeviceType,1,1,1>;
#ifdef LMP_KOKKOS_GPU
template class NPairHalffullKokkos<LMPHostType,0,0>;
template class NPairHalffullKokkos<LMPHostType,0,1>;
template class NPairHalffullKokkos<LMPHostType,1,0>;
template class NPairHalffullKokkos<LMPHostType,1,1>;
template class NPairHalffullKokkos<LMPHostType,0,0,0>;
template class NPairHalffullKokkos<LMPHostType,0,0,1>;
template class NPairHalffullKokkos<LMPHostType,1,0,0>;
template class NPairHalffullKokkos<LMPHostType,1,0,1>;
template class NPairHalffullKokkos<LMPHostType,1,1,0>;
template class NPairHalffullKokkos<LMPHostType,1,1,1>;
#endif
}

View File

@ -16,53 +16,79 @@
// Trim off
// Newton
// Newton, no triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
NPairStyle(halffull/newton/kk/device,
NPairKokkosHalffullNewtonDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
NP_ORTHO | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
NPairStyle(halffull/newton/kk/host,
NPairKokkosHalffullNewtonHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
NP_ORTHO | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonDevice;
typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
NPairStyle(halffull/newton/skip/kk/device,
NPairKokkosHalffullNewtonDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
NP_ORTHO | NP_SKIP | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
NPairStyle(halffull/newton/skip/kk/host,
NPairKokkosHalffullNewtonHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_SKIP | NP_KOKKOS_HOST);
// Newton, triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
NPairStyle(halffull/newton/tri/kk/device,
NPairKokkosHalffullNewtonTriDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
NPairStyle(halffull/newton/tri/kk/host,
NPairKokkosHalffullNewtonTriHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
NPairStyle(halffull/newton/tri/skip/kk/device,
NPairKokkosHalffullNewtonTriDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
NPairStyle(halffull/newton/tri/skip/kk/host,
NPairKokkosHalffullNewtonTriHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_HOST);
// Newtoff
// Newtoff (can be triclinic but template param always set to 0)
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
NPairStyle(halffull/newtoff/kk/device,
NPairKokkosHalffullNewtoffDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
NPairStyle(halffull/newtoff/kk/host,
NPairKokkosHalffullNewtoffHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffDevice;
typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
NPairStyle(halffull/newtoff/skip/kk/device,
NPairKokkosHalffullNewtoffDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
NPairStyle(halffull/newtoff/skip/kk/host,
NPairKokkosHalffullNewtoffHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
@ -70,166 +96,244 @@ NPairStyle(halffull/newtoff/skip/kk/host,
//************ Ghost **************
// Newton
// Newton, no triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
NPairStyle(halffull/newton/ghost/kk/device,
NPairKokkosHalffullNewtonGhostDevice,
NPairKokkosHalffullNewtonDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
NP_ORTHO | NP_GHOST | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
NPairStyle(halffull/newton/ghost/kk/host,
NPairKokkosHalffullNewtonHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
NP_ORTHO | NP_GHOST | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,0> NPairKokkosHalffullNewtonGhostDevice;
typedef NPairHalffullKokkos<LMPDeviceType,1,0,0> NPairKokkosHalffullNewtonDevice;
NPairStyle(halffull/newton/skip/ghost/kk/device,
NPairKokkosHalffullNewtonGhostDevice,
NPairKokkosHalffullNewtonDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,0> NPairKokkosHalffullNewtonHost;
typedef NPairHalffullKokkos<LMPHostType,1,0,0> NPairKokkosHalffullNewtonHost;
NPairStyle(halffull/newton/skip/ghost/kk/host,
NPairKokkosHalffullNewtonHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
// Newton, triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
NPairStyle(halffull/newton/tri/ghost/kk/device,
NPairKokkosHalffullNewtonTriDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
NPairStyle(halffull/newton/tri/ghost/kk/host,
NPairKokkosHalffullNewtonTriHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,1,0> NPairKokkosHalffullNewtonTriDevice;
NPairStyle(halffull/newton/tri/skip/ghost/kk/device,
NPairKokkosHalffullNewtonTriDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1,0> NPairKokkosHalffullNewtonTriHost;
NPairStyle(halffull/newton/tri/skip/ghost/kk/host,
NPairKokkosHalffullNewtonTriHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
// Newtoff
// Newtoff (can be triclinic but template param always set to 0)
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
NPairStyle(halffull/newtoff/ghost/kk/device,
NPairKokkosHalffullNewtoffGhostDevice,
NPairKokkosHalffullNewtoffDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
NPairStyle(halffull/newtoff/ghost/kk/host,
NPairKokkosHalffullNewtoffHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,0,0> NPairKokkosHalffullNewtoffGhostDevice;
typedef NPairHalffullKokkos<LMPDeviceType,0,0,0> NPairKokkosHalffullNewtoffDevice;
NPairStyle(halffull/newtoff/skip/ghost/kk/device,
NPairKokkosHalffullNewtoffGhostDevice,
NPairKokkosHalffullNewtoffDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,0> NPairKokkosHalffullNewtoffHost;
typedef NPairHalffullKokkos<LMPHostType,0,0,0> NPairKokkosHalffullNewtoffHost;
NPairStyle(halffull/newtoff/skip/ghost/kk/host,
NPairKokkosHalffullNewtoffHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_KOKKOS_HOST);
//************ Trim **************
// Newton
// Newton, no triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
NPairStyle(halffull/newton/trim/kk/device,
NPairKokkosHalffullNewtonTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
NP_ORTHO | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
NPairStyle(halffull/newton/trim/kk/host,
NPairKokkosHalffullNewtonTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRIM | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
NPairStyle(halffull/newton/trim/skip/kk/device,
NPairKokkosHalffullNewtonTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
NPairStyle(halffull/newton/trim/skip/kk/host,
NPairKokkosHalffullNewtonTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
// Newton, triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
NPairStyle(halffull/newton/tri/trim/kk/device,
NPairKokkosHalffullNewtonTriTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
NPairStyle(halffull/newton/tri/trim/kk/host,
NPairKokkosHalffullNewtonTriTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonTrimDevice;
NPairStyle(halffull/newton/skip/trim/kk/device,
typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
NPairStyle(halffull/newton/tri/trim/skip/kk/device,
NPairKokkosHalffullNewtonTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
NPairStyle(halffull/newton/skip/trim/kk/host,
NPairKokkosHalffullNewtonTrimHost,
typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
NPairStyle(halffull/newton/tri/trim/skip/kk/host,
NPairKokkosHalffullNewtonTriTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
// Newtoff
// Newtoff (can be triclinic but template param always set to 0)
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
NPairStyle(halffull/newtoff/trim/kk/device,
NPairKokkosHalffullNewtoffTrimDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
NPairStyle(halffull/newtoff/trim/kk/host,
NPairKokkosHalffullNewtoffTrimHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_TRIM | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffTrimDevice;
NPairStyle(halffull/newtoff/skip/trim/kk/device,
typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
NPairStyle(halffull/newtoff/trim/skip/kk/device,
NPairKokkosHalffullNewtoffTrimDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
NPairStyle(halffull/newtoff/skip/trim/kk/host,
typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
NPairStyle(halffull/newtoff/trim/skip/kk/host,
NPairKokkosHalffullNewtoffTrimHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
//************ Ghost **************
// Newton
// Newton, no triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
NPairStyle(halffull/newton/ghost/trim/kk/device,
NPairKokkosHalffullNewtonGhostTrimDevice,
typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
NPairStyle(halffull/newton/tri/trim/ghost/kk/device,
NPairKokkosHalffullNewtonTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
NPairStyle(halffull/newton/trim/ghost/kk/host,
NPairKokkosHalffullNewtonTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,0,1> NPairKokkosHalffullNewtonTrimDevice;
NPairStyle(halffull/newton/trim/skip/ghost/kk/device,
NPairKokkosHalffullNewtonTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,0,1> NPairKokkosHalffullNewtonTrimHost;
NPairStyle(halffull/newton/trim/skip/ghost/kk/host,
NPairKokkosHalffullNewtonTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
// Newton, triclinic
typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
NPairStyle(halffull/newton/tri/trim/ghost/kk/device,
NPairKokkosHalffullNewtonTriTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
NPairStyle(halffull/newton/ghost/trim/kk/host,
NPairKokkosHalffullNewtonTrimHost,
typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
NPairStyle(halffull/newton/tri/trim/ghost/kk/host,
NPairKokkosHalffullNewtonTriTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,1,1> NPairKokkosHalffullNewtonGhostTrimDevice;
NPairStyle(halffull/newton/skip/ghost/trim/kk/device,
NPairKokkosHalffullNewtonGhostTrimDevice,
typedef NPairHalffullKokkos<LMPDeviceType,1,1,1> NPairKokkosHalffullNewtonTriTrimDevice;
NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/device,
NPairKokkosHalffullNewtonTriTrimDevice,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,1,1> NPairKokkosHalffullNewtonTrimHost;
NPairStyle(halffull/newton/skip/ghost/trim/kk/host,
NPairKokkosHalffullNewtonTrimHost,
typedef NPairHalffullKokkos<LMPHostType,1,1,1> NPairKokkosHalffullNewtonTriTrimHost;
NPairStyle(halffull/newton/tri/trim/skip/ghost/kk/host,
NPairKokkosHalffullNewtonTriTrimHost,
NP_HALF_FULL | NP_NEWTON | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
// Newtoff
// Newtoff (can be triclinic but template param always set to 0)
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
NPairStyle(halffull/newtoff/ghost/trim/kk/device,
NPairKokkosHalffullNewtoffGhostTrimDevice,
typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
NPairStyle(halffull/newtoff/trim/ghost/kk/device,
NPairKokkosHalffullNewtoffTrimDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
NPairStyle(halffull/newtoff/ghost/trim/kk/host,
typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
NPairStyle(halffull/newtoff/trim/ghost/kk/host,
NPairKokkosHalffullNewtoffTrimHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_TRIM | NP_KOKKOS_HOST);
typedef NPairHalffullKokkos<LMPDeviceType,0,1> NPairKokkosHalffullNewtoffGhostTrimDevice;
NPairStyle(halffull/newtoff/skip/ghost/trim/kk/device,
NPairKokkosHalffullNewtoffGhostTrimDevice,
typedef NPairHalffullKokkos<LMPDeviceType,0,0,1> NPairKokkosHalffullNewtoffTrimDevice;
NPairStyle(halffull/newtoff/trim/skip/ghost/kk/device,
NPairKokkosHalffullNewtoffTrimDevice,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_DEVICE);
typedef NPairHalffullKokkos<LMPHostType,0,1> NPairKokkosHalffullNewtoffTrimHost;
NPairStyle(halffull/newtoff/skip/ghost/trim/kk/host,
typedef NPairHalffullKokkos<LMPHostType,0,0,1> NPairKokkosHalffullNewtoffTrimHost;
NPairStyle(halffull/newtoff/trim/skip/ghost/kk/host,
NPairKokkosHalffullNewtoffTrimHost,
NP_HALF_FULL | NP_NEWTOFF | NP_HALF | NP_NSQ | NP_BIN | NP_MULTI |
NP_ORTHO | NP_TRI | NP_GHOST | NP_SKIP | NP_TRIM | NP_KOKKOS_HOST);
// clang-format on
#else
@ -244,7 +348,7 @@ namespace LAMMPS_NS {
struct TagNPairHalffullCompute{};
template<class DeviceType, int NEWTON, int TRIM>
template<class DeviceType, int NEWTON, int TRI, int TRIM>
class NPairHalffullKokkos : public NPair {
public:
typedef DeviceType device_type;
@ -257,8 +361,8 @@ class NPairHalffullKokkos : public NPair {
void operator()(TagNPairHalffullCompute, const int&) const;
private:
int nlocal;
double cutsq_custom;
int nlocal,triclinic;
double cutsq_custom,delta;
typename AT::t_x_array_randomread x;

View File

@ -155,6 +155,8 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
list->grow(nall);
const double delta = 0.01 * force->angstrom;
NeighborKokkosExecute<DeviceType>
data(*list,
k_cutneighsq.view<DeviceType>(),
@ -176,7 +178,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
atomKK->molecular,
nbinx,nbiny,nbinz,mbinx,mbiny,mbinz,mbinxlo,mbinylo,mbinzlo,
bininvx,bininvy,bininvz,
exclude, nex_type,
delta, exclude, nex_type,
k_ex1_type.view<DeviceType>(),
k_ex2_type.view<DeviceType>(),
k_ex_type.view<DeviceType>(),
@ -217,6 +219,8 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
atomKK->sync(Device,X_MASK|RADIUS_MASK|TYPE_MASK);
}
if (HALF && NEWTON && TRI) atomKK->sync(Device,TAG_MASK);
data.special_flag[0] = special_flag[0];
data.special_flag[1] = special_flag[1];
data.special_flag[2] = special_flag[2];
@ -261,7 +265,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
//#endif
} else {
if (SIZE) {
NPairKokkosBuildFunctorSize<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctorSize<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 7 * sizeof(X_FLOAT) * factor);
#ifdef LMP_KOKKOS_GPU
if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
int team_size = atoms_per_bin*factor;
@ -279,7 +283,7 @@ void NPairKokkos<DeviceType,HALF,NEWTON,GHOST,TRI,SIZE>::build(NeighList *list_)
Kokkos::parallel_for(nall, f);
#endif
} else {
NPairKokkosBuildFunctor<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 5 * sizeof(X_FLOAT) * factor);
NPairKokkosBuildFunctor<DeviceType,HALF,NEWTON,TRI> f(data,atoms_per_bin * 6 * sizeof(X_FLOAT) * factor);
#ifdef LMP_KOKKOS_GPU
if (ExecutionSpaceFromDevice<DeviceType>::space == Device) {
int team_size = atoms_per_bin*factor;
@ -414,6 +418,8 @@ void NeighborKokkosExecute<DeviceType>::
const X_FLOAT ytmp = x(i, 1);
const X_FLOAT ztmp = x(i, 2);
const int itype = type(i);
tagint itag;
if (HalfNeigh && Newton && Tri) itag = tag(i);
const int ibin = c_atom2bin(i);
@ -484,13 +490,29 @@ void NeighborKokkosExecute<DeviceType>::
if (HalfNeigh && !Newton && j <= i) continue;
if (!HalfNeigh && j == i) continue;
// for triclinic, bin stencil is full in all 3 dims
// must use itag/jtag to eliminate half the I/J interactions
// cannot use I/J exact coord comparision
// b/c transforming orthog -> lambda -> orthog for ghost atoms
// with an added PBC offset can shift all 3 coords by epsilon
if (HalfNeigh && Newton && Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
if (x(j,1) == ytmp) {
if (x(j,0) < xtmp) continue;
if (x(j,0) == xtmp && j <= i) continue;
if (j <= i) continue;
if (j >= nlocal) {
const tagint jtag = tag(j);
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (fabs(x(j,2)-ztmp) > delta) {
if (x(j,2) < ztmp) continue;
} else if (fabs(x(j,1)-ytmp) > delta) {
if (x(j,1) < ytmp) continue;
} else {
if (x(j,0) < xtmp) continue;
}
}
}
}
@ -568,8 +590,9 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
size_t sharedsize) const
{
auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
/* loop over atoms in i's bin,
*/
// loop over atoms in i's bin
const int atoms_per_bin = c_bins.extent(1);
const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin;
const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
@ -579,15 +602,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
if (ibin >= mbins) return;
X_FLOAT* other_x = sharedmem + 5*atoms_per_bin*MY_BIN;
int* other_id = (int*) &other_x[4 * atoms_per_bin];
X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN;
int* other_id = (int*) &other_x[5 * atoms_per_bin];
int bincount_current = c_bincount[ibin];
for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
int n = 0;
@ -595,6 +617,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
X_FLOAT ytmp;
X_FLOAT ztmp;
int itype;
tagint itag;
const int index = (i >= 0 && i < nlocal) ? i : 0;
const AtomNeighbors neighbors_i = neigh_transpose ?
neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
@ -608,6 +631,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
other_x[MY_II + atoms_per_bin] = ytmp;
other_x[MY_II + 2 * atoms_per_bin] = ztmp;
other_x[MY_II + 3 * atoms_per_bin] = itype;
if (HalfNeigh && Newton && Tri) {
itag = tag(i);
other_x[MY_II + 4 * atoms_per_bin] = itag;
}
}
other_id[MY_II] = i;
@ -695,6 +722,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
other_x[MY_II + atoms_per_bin] = x(j, 1);
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
other_x[MY_II + 3 * atoms_per_bin] = type(j);
if (HalfNeigh && Newton && Tri)
other_x[MY_II + 4 * atoms_per_bin] = tag(j);
}
other_id[MY_II] = j;
@ -708,13 +737,29 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGPU(typename Kokkos::TeamPolic
if (HalfNeigh && !Newton && j <= i) continue;
if (!HalfNeigh && j == i) continue;
// for triclinic, bin stencil is full in all 3 dims
// must use itag/jtag to eliminate half the I/J interactions
// cannot use I/J exact coord comparision
// b/c transforming orthog -> lambda -> orthog for ghost atoms
// with an added PBC offset can shift all 3 coords by epsilon
if (HalfNeigh && Newton && Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
if (x(j,1) == ytmp) {
if (x(j,0) < xtmp) continue;
if (x(j,0) == xtmp && j <= i) continue;
if (j <= i) continue;
if (j >= nlocal) {
const tagint jtag = other_x[m + 4 * atoms_per_bin];
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (fabs(x(j,2)-ztmp) > delta) {
if (x(j,2) < ztmp) continue;
} else if (fabs(x(j,1)-ytmp) > delta) {
if (x(j,1) < ytmp) continue;
} else {
if (x(j,0) < xtmp) continue;
}
}
}
}
@ -905,6 +950,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemGhostGPU(typename Kokkos::Team
size_t sharedsize) const
{
auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
// loop over atoms in i's bin
const int atoms_per_bin = c_bins.extent(1);
@ -1084,6 +1130,8 @@ void NeighborKokkosExecute<DeviceType>::
const X_FLOAT ztmp = x(i, 2);
const X_FLOAT radi = radius(i);
const int itype = type(i);
tagint itag;
if (HalfNeigh && Newton && Tri) itag = tag(i);
const int ibin = c_atom2bin(i);
@ -1167,13 +1215,29 @@ void NeighborKokkosExecute<DeviceType>::
if (HalfNeigh && !Newton && j <= i) continue;
if (!HalfNeigh && j == i) continue;
// for triclinic, bin stencil is full in all 3 dims
// must use itag/jtag to eliminate half the I/J interactions
// cannot use I/J exact coord comparision
// b/c transforming orthog -> lambda -> orthog for ghost atoms
// with an added PBC offset can shift all 3 coords by epsilon
if (HalfNeigh && Newton && Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
if (x(j,1) == ytmp) {
if (x(j,0) < xtmp) continue;
if (x(j,0) == xtmp && j <= i) continue;
if (j <= i) continue;
if (j >= nlocal) {
const tagint jtag = tag(j);
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (fabs(x(j,2)-ztmp) > delta) {
if (x(j,2) < ztmp) continue;
} else if (fabs(x(j,1)-ytmp) > delta) {
if (x(j,1) < ytmp) continue;
} else {
if (x(j,0) < xtmp) continue;
}
}
}
}
@ -1245,8 +1309,9 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
size_t sharedsize) const
{
auto* sharedmem = static_cast<X_FLOAT *>(dev.team_shmem().get_shmem(sharedsize));
/* loop over atoms in i's bin,
*/
// loop over atoms in i's bin
const int atoms_per_bin = c_bins.extent(1);
const int BINS_PER_TEAM = dev.team_size()/atoms_per_bin <1?1:dev.team_size()/atoms_per_bin;
const int TEAMS_PER_BIN = atoms_per_bin/dev.team_size()<1?1:atoms_per_bin/dev.team_size();
@ -1256,15 +1321,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
if (ibin >= mbins) return;
X_FLOAT* other_x = sharedmem + 6*atoms_per_bin*MY_BIN;
int* other_id = (int*) &other_x[5 * atoms_per_bin];
X_FLOAT* other_x = sharedmem + 7*atoms_per_bin*MY_BIN;
int* other_id = (int*) &other_x[6 * atoms_per_bin];
int bincount_current = c_bincount[ibin];
for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
int n = 0;
@ -1273,6 +1337,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
X_FLOAT ztmp;
X_FLOAT radi;
int itype;
tagint itag;
const int index = (i >= 0 && i < nlocal) ? i : 0;
const AtomNeighbors neighbors_i = neigh_transpose ?
neigh_list.get_neighbors_transpose(index) : neigh_list.get_neighbors(index);
@ -1289,6 +1354,10 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
other_x[MY_II + 2 * atoms_per_bin] = ztmp;
other_x[MY_II + 3 * atoms_per_bin] = itype;
other_x[MY_II + 4 * atoms_per_bin] = radi;
if (HalfNeigh && Newton && Tri) {
itag = tag(i);
other_x[MY_II + 5 * atoms_per_bin] = itag;
}
}
other_id[MY_II] = i;
#if defined(KOKKOS_ENABLE_CUDA) || defined(KOKKOS_ENABLE_HIP)
@ -1381,6 +1450,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
other_x[MY_II + 3 * atoms_per_bin] = type(j);
other_x[MY_II + 4 * atoms_per_bin] = radius(j);
if (HalfNeigh && Newton && Tri)
other_x[MY_II + 5 * atoms_per_bin] = tag(j);
}
other_id[MY_II] = j;
@ -1394,13 +1465,29 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeGPU(typename Kokkos::TeamP
if (HalfNeigh && !Newton && j <= i) continue;
if (!HalfNeigh && j == i) continue;
// for triclinic, bin stencil is full in all 3 dims
// must use itag/jtag to eliminate half the I/J interactions
// cannot use I/J exact coord comparision
// b/c transforming orthog -> lambda -> orthog for ghost atoms
// with an added PBC offset can shift all 3 coords by epsilon
if (HalfNeigh && Newton && Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
if (x(j,1) == ytmp) {
if (x(j,0) < xtmp) continue;
if (x(j,0) == xtmp && j <= i) continue;
if (j <= i) continue;
if (j >= nlocal) {
const tagint jtag = other_x[m + 5 * atoms_per_bin];
if (itag > jtag) {
if ((itag+jtag) % 2 == 0) continue;
} else if (itag < jtag) {
if ((itag+jtag) % 2 == 1) continue;
} else {
if (fabs(x(j,2)-ztmp) > delta) {
if (x(j,2) < ztmp) continue;
} else if (fabs(x(j,1)-ytmp) > delta) {
if (x(j,1) < ytmp) continue;
} else {
if (x(j,0) < xtmp) continue;
}
}
}
}

View File

@ -189,6 +189,8 @@ class NeighborKokkosExecute
public:
NeighListKokkos<DeviceType> neigh_list;
const double delta;
// data from Neighbor class
const typename AT::t_xfloat_2d_randomread cutneighsq;
@ -282,7 +284,7 @@ class NeighborKokkosExecute
const int & _mbinx,const int & _mbiny,const int & _mbinz,
const int & _mbinxlo,const int & _mbinylo,const int & _mbinzlo,
const X_FLOAT &_bininvx,const X_FLOAT &_bininvy,const X_FLOAT &_bininvz,
const int & _exclude,const int & _nex_type,
const double &_delta,const int & _exclude,const int & _nex_type,
const typename AT::t_int_1d_const & _ex1_type,
const typename AT::t_int_1d_const & _ex2_type,
const typename AT::t_int_2d_const & _ex_type,
@ -301,7 +303,7 @@ class NeighborKokkosExecute
const typename ArrayTypes<LMPHostType>::t_int_scalar _h_resize,
const typename AT::t_int_scalar _new_maxneighs,
const typename ArrayTypes<LMPHostType>::t_int_scalar _h_new_maxneighs):
neigh_list(_neigh_list), cutneighsq(_cutneighsq),exclude(_exclude),
neigh_list(_neigh_list), cutneighsq(_cutneighsq),delta(_delta),exclude(_exclude),
nex_type(_nex_type),ex1_type(_ex1_type),ex2_type(_ex2_type),
ex_type(_ex_type),nex_group(_nex_group),
ex1_bit(_ex1_bit),ex2_bit(_ex2_bit),

View File

@ -62,8 +62,8 @@ void NPairTrimKokkos<DeviceType>::trim_to_kokkos(NeighList *list)
d_ilist_copy = k_list_copy->d_ilist;
d_numneigh_copy = k_list_copy->d_numneigh;
d_neighbors_copy = k_list_copy->d_neighbors;
int inum_copy = list->listcopy->inum;
if (list->ghost) inum_copy += list->listcopy->gnum;
int inum_trim = list->listcopy->inum;
if (list->ghost) inum_trim += list->listcopy->gnum;
NeighListKokkos<DeviceType>* k_list = static_cast<NeighListKokkos<DeviceType>*>(list);
k_list->maxneighs = k_list_copy->maxneighs; // simple, but could be made more memory efficient
@ -75,7 +75,7 @@ void NPairTrimKokkos<DeviceType>::trim_to_kokkos(NeighList *list)
// loop over parent list and trim
copymode = 1;
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNPairTrim>(0,inum_copy),*this);
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagNPairTrim>(0,inum_trim),*this);
copymode = 0;
list->inum = k_list_copy->inum;
@ -132,8 +132,8 @@ void NPairTrimKokkos<DeviceType>::trim_to_cpu(NeighList *list)
int inum = listcopy->inum;
int gnum = listcopy->gnum;
int inum_all = inum;
if (list->ghost) inum_all += gnum;
int inum_trim = inum;
if (list->ghost) inum_trim += gnum;
auto h_ilist = listcopy_kk->k_ilist.h_view;
auto h_numneigh = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_numneigh);
auto h_neighbors = Kokkos::create_mirror_view_and_copy(LMPHostType(),listcopy_kk->d_neighbors);
@ -151,7 +151,7 @@ void NPairTrimKokkos<DeviceType>::trim_to_cpu(NeighList *list)
MyPage<int> *ipage = list->ipage;
ipage->reset();
for (int ii = 0; ii < inum_all; ii++) {
for (int ii = 0; ii < inum_trim; ii++) {
int n = 0;
neighptr = ipage->vget();

View File

@ -112,15 +112,18 @@ class PairBuckCoulCutKokkos : public PairBuckCoulCut {
void allocate() override;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true,0>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,true,1>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALF,true>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALFTHREAD,true>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false,0>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,FULL,false,1>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALF,false>;
friend struct PairComputeFunctor<PairBuckCoulCutKokkos,HALFTHREAD,false>;
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALF,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALFTHREAD,void>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,0>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,FULL,1>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALF>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulCutKokkos,HALFTHREAD>(PairBuckCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckCoulCutKokkos,void>(PairBuckCoulCutKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairBuckCoulCutKokkos>(PairBuckCoulCutKokkos*);

View File

@ -115,27 +115,33 @@ class PairBuckCoulLongKokkos : public PairBuckCoulLong {
void allocate() override;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<1> >(PairBuckCoulLongKokkos*,
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<1>>(PairBuckCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<0> >(PairBuckCoulLongKokkos*,
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairBuckCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairBuckCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckCoulLongKokkos,CoulLongTable<0>>(PairBuckCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairBuckCoulLongKokkos>(PairBuckCoulLongKokkos*);

View File

@ -91,16 +91,19 @@ class PairBuckKokkos : public PairBuck {
int nlocal,nall,eflag,vflag;
void allocate() override;
friend struct PairComputeFunctor<PairBuckKokkos,FULL,true>;
friend struct PairComputeFunctor<PairBuckKokkos,FULL,true,0>;
friend struct PairComputeFunctor<PairBuckKokkos,FULL,true,1>;
friend struct PairComputeFunctor<PairBuckKokkos,HALF,true>;
friend struct PairComputeFunctor<PairBuckKokkos,HALFTHREAD,true>;
friend struct PairComputeFunctor<PairBuckKokkos,FULL,false>;
friend struct PairComputeFunctor<PairBuckKokkos,FULL,false,0>;
friend struct PairComputeFunctor<PairBuckKokkos,FULL,false,1>;
friend struct PairComputeFunctor<PairBuckKokkos,HALF,false>;
friend struct PairComputeFunctor<PairBuckKokkos,HALFTHREAD,false>;
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckKokkos,void>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,0>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,FULL,1>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALF>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairBuckKokkos,HALFTHREAD>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairBuckKokkos>(PairBuckKokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairBuckKokkos>(PairBuckKokkos*);
};

View File

@ -112,15 +112,18 @@ class PairCoulCutKokkos : public PairCoulCut {
double qqrd2e;
void allocate() override;
friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true>;
friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true,0>;
friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,true,1>;
friend struct PairComputeFunctor<PairCoulCutKokkos,HALF,true>;
friend struct PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,true>;
friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false>;
friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false,0>;
friend struct PairComputeFunctor<PairCoulCutKokkos,FULL,false,1>;
friend struct PairComputeFunctor<PairCoulCutKokkos,HALF,false>;
friend struct PairComputeFunctor<PairCoulCutKokkos,HALFTHREAD,false>;
friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALF,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALFTHREAD,void>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,0>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,FULL,1>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALF>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulCutKokkos,HALFTHREAD>(PairCoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairCoulCutKokkos,void>(PairCoulCutKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairCoulCutKokkos>(PairCoulCutKokkos*);

View File

@ -112,15 +112,18 @@ class PairCoulDebyeKokkos : public PairCoulDebye {
double qqrd2e;
void allocate() override;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true,0>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,true,1>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALF,true>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALFTHREAD,true>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false,0>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,FULL,false,1>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALF,false>;
friend struct PairComputeFunctor<PairCoulDebyeKokkos,HALFTHREAD,false>;
friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALF,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALFTHREAD,void>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,0>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,FULL,1>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALF>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulDebyeKokkos,HALFTHREAD>(PairCoulDebyeKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairCoulDebyeKokkos,void>(PairCoulDebyeKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairCoulDebyeKokkos>(PairCoulDebyeKokkos*);

View File

@ -114,27 +114,33 @@ class PairCoulLongKokkos : public PairCoulLong {
void allocate() override;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<1> >(PairCoulLongKokkos*,
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<1>>(PairCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<0> >(PairCoulLongKokkos*,
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairCoulLongKokkos,CoulLongTable<0>>(PairCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairCoulLongKokkos>(PairCoulLongKokkos*);

View File

@ -1477,7 +1477,7 @@ void PairEAMAlloyKokkos<DeviceType>::file2array_alloy()
template<typename DeviceType>
template<class TAG>
struct PairEAMAlloyKokkos<DeviceType>::policyInstance {
KOKKOS_INLINE_FUNCTION
static auto get(int inum) {
auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
return policy;
@ -1488,7 +1488,7 @@ struct PairEAMAlloyKokkos<DeviceType>::policyInstance {
template<>
template<class TAG>
struct PairEAMAlloyKokkos<Kokkos::Experimental::HIP>::policyInstance {
KOKKOS_INLINE_FUNCTION
static auto get(int inum) {
static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
"Breaking assumption of spline dim for KernelAB and KernelC scratch caching");

View File

@ -1487,7 +1487,7 @@ void PairEAMFSKokkos<DeviceType>::file2array_fs()
template<typename DeviceType>
template<class TAG>
struct PairEAMFSKokkos<DeviceType>::policyInstance {
KOKKOS_INLINE_FUNCTION
static auto get(int inum) {
auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
return policy;
@ -1498,7 +1498,7 @@ struct PairEAMFSKokkos<DeviceType>::policyInstance {
template<>
template<class TAG>
struct PairEAMFSKokkos<Kokkos::Experimental::HIP>::policyInstance {
KOKKOS_INLINE_FUNCTION
static auto get(int inum) {
static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
"Breaking assumption of spline dim for KernelAB and KernelC scratch caching");

View File

@ -1162,7 +1162,7 @@ void PairEAMKokkos<DeviceType>::ev_tally(EV_FLOAT &ev, const int &i, const int &
template<typename DeviceType>
template<class TAG>
struct PairEAMKokkos<DeviceType>::policyInstance {
KOKKOS_INLINE_FUNCTION
static auto get(int inum) {
auto policy = Kokkos::RangePolicy<DeviceType, TAG>(0,inum);
return policy;
@ -1173,7 +1173,7 @@ struct PairEAMKokkos<DeviceType>::policyInstance {
template<>
template<class TAG>
struct PairEAMKokkos<Kokkos::Experimental::HIP>::policyInstance {
KOKKOS_INLINE_FUNCTION
static auto get(int inum) {
static_assert(t_ffloat_2d_n7::static_extent(2) == 7,
"Breaking assumption of spline dim for KernelAB and KernelC scratch caching");

View File

@ -50,7 +50,7 @@ struct DoCoul<1> {
//Specialisation for Neighborlist types Half, HalfThread, Full
template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, class Specialisation = void>
template <class PairStyle, int NEIGHFLAG, bool STACKPARAMS, int ZEROFLAG = 0, class Specialisation = void>
struct PairComputeFunctor {
typedef typename PairStyle::device_type device_type ;
typedef ArrayTypes<device_type> AT;
@ -137,7 +137,7 @@ struct PairComputeFunctor {
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
if (NEIGHFLAG == FULL) {
if (NEIGHFLAG == FULL && ZEROFLAG) {
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
@ -211,7 +211,7 @@ struct PairComputeFunctor {
F_FLOAT fytmp = 0.0;
F_FLOAT fztmp = 0.0;
if (NEIGHFLAG == FULL) {
if (NEIGHFLAG == FULL && ZEROFLAG) {
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
@ -292,11 +292,13 @@ struct PairComputeFunctor {
const X_FLOAT ztmp = c.x(i,2);
const int itype = c.type(i);
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
if (ZEROFLAG) {
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
}
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
@ -355,11 +357,13 @@ struct PairComputeFunctor {
const int itype = c.type(i);
const F_FLOAT qtmp = c.q(i);
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
if (ZEROFLAG) {
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
}
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
@ -423,11 +427,13 @@ struct PairComputeFunctor {
const X_FLOAT ztmp = c.x(i,2);
const int itype = c.type(i);
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
if (ZEROFLAG) {
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
}
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
@ -525,11 +531,13 @@ struct PairComputeFunctor {
const int itype = c.type(i);
const F_FLOAT qtmp = c.q(i);
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
if (ZEROFLAG) {
Kokkos::single(Kokkos::PerThread(team), [&] (){
f(i,0) = 0.0;
f(i,1) = 0.0;
f(i,2) = 0.0;
});
}
const AtomNeighborsConst neighbors_i = list.get_neighbors_const(i);
const int jnum = list.d_numneigh[i];
@ -740,7 +748,7 @@ struct PairComputeFunctor {
// By having the enable_if with a ! and without it, exactly one of the functions
// pair_compute_neighlist will match - either the dummy version
// or the real one further below.
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<!((NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0), NeighListKokkos<typename PairStyle::device_type>*> list) {
EV_FLOAT ev;
(void) fpair;
@ -770,7 +778,7 @@ int GetTeamSize(FunctorStyle& KOKKOS_GPU_ARG(functor), int KOKKOS_GPU_ARG(inum),
}
// Submit ParallelFor for NEIGHFLAG=HALF,HALFTHREAD,FULL
template<class PairStyle, unsigned NEIGHFLAG, class Specialisation>
template<class PairStyle, unsigned NEIGHFLAG, int ZEROFLAG = 0, class Specialisation = void>
EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&PairStyle::EnabledNeighFlags) != 0, NeighListKokkos<typename PairStyle::device_type>*> list) {
EV_FLOAT ev;
@ -784,13 +792,13 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
int atoms_per_team = 32;
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
else Kokkos::parallel_for(policy,ff);
} else {
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
atoms_per_team = GetTeamSize<typename PairStyle::device_type>(ff, list->inum, (fpair->eflag || fpair->vflag), atoms_per_team, vector_length);
Kokkos::TeamPolicy<typename PairStyle::device_type,Kokkos::IndexType<int> > policy(list->inum,atoms_per_team,vector_length);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(policy,ff,ev);
@ -798,12 +806,12 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
}
} else {
if (fpair->atom->ntypes > MAX_TYPES_STACKPARAMS) {
PairComputeFunctor<PairStyle,NEIGHFLAG,false,Specialisation > ff(fpair,list);
PairComputeFunctor<PairStyle,NEIGHFLAG,false,ZEROFLAG,Specialisation > ff(fpair,list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
ff.contribute();
} else {
PairComputeFunctor<PairStyle,NEIGHFLAG,true,Specialisation > ff(fpair,list);
PairComputeFunctor<PairStyle,NEIGHFLAG,true,ZEROFLAG,Specialisation > ff(fpair,list);
if (fpair->eflag || fpair->vflag) Kokkos::parallel_reduce(list->inum,ff,ev);
else Kokkos::parallel_for(list->inum,ff);
ff.contribute();
@ -812,16 +820,21 @@ EV_FLOAT pair_compute_neighlist (PairStyle* fpair, std::enable_if_t<(NEIGHFLAG&P
return ev;
}
template<class PairStyle, class Specialisation>
template<class PairStyle, class Specialisation = void>
EV_FLOAT pair_compute (PairStyle* fpair, NeighListKokkos<typename PairStyle::device_type>* list) {
EV_FLOAT ev;
if (fpair->neighflag == FULL) {
fpair->fuse_force_clear_flag = 1;
ev = pair_compute_neighlist<PairStyle,FULL,Specialisation> (fpair,list);
if (utils::strmatch(fpair->lmp->force->pair_style,"^hybrid/overlay")) {
fpair->fuse_force_clear_flag = 0;
ev = pair_compute_neighlist<PairStyle,FULL,0,Specialisation> (fpair,list);
} else {
fpair->fuse_force_clear_flag = 1;
ev = pair_compute_neighlist<PairStyle,FULL,1,Specialisation> (fpair,list);
}
} else if (fpair->neighflag == HALFTHREAD) {
ev = pair_compute_neighlist<PairStyle,HALFTHREAD,Specialisation> (fpair,list);
ev = pair_compute_neighlist<PairStyle,HALFTHREAD,0,Specialisation> (fpair,list);
} else if (fpair->neighflag == HALF) {
ev = pair_compute_neighlist<PairStyle,HALF,Specialisation> (fpair,list);
ev = pair_compute_neighlist<PairStyle,HALF,0,Specialisation> (fpair,list);
}
return ev;
}

View File

@ -110,27 +110,33 @@ class PairLJCharmmCoulCharmmImplicitKokkos : public PairLJCharmmCoulCharmmImplic
void allocate() override;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,CoulLongTable<1> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<1> >(PairLJCharmmCoulCharmmImplicitKokkos*,
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<1>>(PairLJCharmmCoulCharmmImplicitKokkos*,
NeighListKokkos<DeviceType>*);
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,CoulLongTable<0> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<0> >(PairLJCharmmCoulCharmmImplicitKokkos*,
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,true,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,FULL,false,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALF,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmImplicitKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmImplicitKokkos,CoulLongTable<0>>(PairLJCharmmCoulCharmmImplicitKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCharmmCoulCharmmImplicitKokkos>(PairLJCharmmCoulCharmmImplicitKokkos*);

View File

@ -108,27 +108,33 @@ class PairLJCharmmCoulCharmmKokkos : public PairLJCharmmCoulCharmm {
void allocate() override;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,CoulLongTable<1> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<1> >(PairLJCharmmCoulCharmmKokkos*,
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<1>>(PairLJCharmmCoulCharmmKokkos*,
NeighListKokkos<DeviceType>*);
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,CoulLongTable<0> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<0> >(PairLJCharmmCoulCharmmKokkos*,
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,true,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,FULL,false,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALF,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulCharmmKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulCharmmKokkos,CoulLongTable<0>>(PairLJCharmmCoulCharmmKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCharmmCoulCharmmKokkos>(PairLJCharmmCoulCharmmKokkos*);

View File

@ -106,27 +106,33 @@ class PairLJCharmmCoulLongKokkos : public PairLJCharmmCoulLong {
void allocate() override;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<1> >(PairLJCharmmCoulLongKokkos*,
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<1>>(PairLJCharmmCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<0> >(PairLJCharmmCoulLongKokkos*,
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJCharmmCoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJCharmmCoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJCharmmCoulLongKokkos,CoulLongTable<0>>(PairLJCharmmCoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJCharmmCoulLongKokkos>(PairLJCharmmCoulLongKokkos*);

View File

@ -104,15 +104,18 @@ class PairLJClass2CoulCutKokkos : public PairLJClass2CoulCut {
double qqrd2e;
void allocate() override;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true,0>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,true,1>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALF,true>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALFTHREAD,true>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false,0>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,FULL,false,1>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALF,false>;
friend struct PairComputeFunctor<PairLJClass2CoulCutKokkos,HALFTHREAD,false>;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALF,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALFTHREAD,void>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,0>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,FULL,1>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALF>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulCutKokkos,HALFTHREAD>(PairLJClass2CoulCutKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2CoulCutKokkos,void>(PairLJClass2CoulCutKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJClass2CoulCutKokkos>(PairLJClass2CoulCutKokkos*);

View File

@ -107,27 +107,33 @@ class PairLJClass2CoulLongKokkos : public PairLJClass2CoulLong {
double qqrd2e;
void allocate() override;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,CoulLongTable<1> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,CoulLongTable<1> >;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<1> >(PairLJClass2CoulLongKokkos*,
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,1,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,0,CoulLongTable<1>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<1>>;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,1,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,0,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<1>>(PairLJClass2CoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,CoulLongTable<0> >;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,CoulLongTable<0> >;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<0> >(PairLJClass2CoulLongKokkos*,
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,true,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,true,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,FULL,false,1,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALF,false,0,CoulLongTable<0>>;
friend struct PairComputeFunctor<PairLJClass2CoulLongKokkos,HALFTHREAD,false,0,CoulLongTable<0>>;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,FULL,1,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALF,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2CoulLongKokkos,HALFTHREAD,0,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2CoulLongKokkos,CoulLongTable<0>>(PairLJClass2CoulLongKokkos*,
NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJClass2CoulLongKokkos>(PairLJClass2CoulLongKokkos*);

View File

@ -96,16 +96,19 @@ class PairLJClass2Kokkos : public PairLJClass2 {
int nlocal,nall,eflag,vflag;
void allocate() override;
friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true,0>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,true,1>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,HALF,true>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,true>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false,0>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,FULL,false,1>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,HALF,false>;
friend struct PairComputeFunctor<PairLJClass2Kokkos,HALFTHREAD,false>;
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2Kokkos,void>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,0>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,FULL,1>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALF>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute_neighlist<PairLJClass2Kokkos,HALFTHREAD>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend EV_FLOAT pair_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*,NeighListKokkos<DeviceType>*);
friend void pair_virial_fdotr_compute<PairLJClass2Kokkos>(PairLJClass2Kokkos*);
};

Some files were not shown because too many files have changed in this diff Show More