Merge branch 'master' into citation-refactor

This commit is contained in:
Axel Kohlmeyer
2021-01-08 18:32:30 -05:00
546 changed files with 9286 additions and 7269 deletions

View File

@ -25,7 +25,7 @@ set(LAMMPS_POTENTIALS_DIR ${LAMMPS_DIR}/potentials)
find_package(Git)
# by default, install into $HOME/.local (not /usr/local), so that no root access (and sudo!!) is needed
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "default install path" FORCE )
endif()
@ -33,7 +33,7 @@ endif()
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules)
# make sure LIBRARY_PATH is set if environment variable is set
if (DEFINED ENV{LIBRARY_PATH})
if(DEFINED ENV{LIBRARY_PATH})
list(APPEND CMAKE_LIBRARY_PATH "$ENV{LIBRARY_PATH}")
message(STATUS "Appending $ENV{LIBRARY_PATH} to CMAKE_LIBRARY_PATH: ${CMAKE_LIBRARY_PATH}")
endif()
@ -373,7 +373,7 @@ else()
set(CUDA_REQUEST_PIC)
endif()
foreach(PKG_WITH_INCL KSPACE PYTHON VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
foreach(PKG_WITH_INCL KSPACE PYTHON MLIAP VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
USER-QUIP USER-SCAFACOS USER-SMD USER-VTK KIM LATTE MESSAGE MSCG COMPRESS)
if(PKG_${PKG_WITH_INCL})
include(Packages/${PKG_WITH_INCL})
@ -580,7 +580,7 @@ add_dependencies(lammps gitversion)
############################################
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
list (FIND LANGUAGES "Fortran" _index)
if (${_index} GREATER -1)
if(${_index} GREATER -1)
target_link_libraries(lammps PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
endif()
set(LAMMPS_CXX_HEADERS angle.h atom.h bond.h citeme.h comm.h compute.h dihedral.h domain.h error.h fix.h force.h group.h improper.h
@ -737,14 +737,14 @@ if(OPTIONS)
endif()
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
list (FIND LANGUAGES "Fortran" _index)
if (${_index} GREATER -1)
if(${_index} GREATER -1)
message(STATUS "Fortran Compiler: ${CMAKE_Fortran_COMPILER}
Type: ${CMAKE_Fortran_COMPILER_ID}
Version: ${CMAKE_Fortran_COMPILER_VERSION}
Fortran Flags:${CMAKE_Fortran_FLAGS} ${CMAKE_Fortran_FLAGS_${BTYPE}}")
endif()
list (FIND LANGUAGES "C" _index)
if (${_index} GREATER -1)
if(${_index} GREATER -1)
message(STATUS "C compiler: ${CMAKE_C_COMPILER}
Type: ${CMAKE_C_COMPILER_ID}
Version: ${CMAKE_C_COMPILER_VERSION}

View File

@ -8,7 +8,7 @@ else()
find_package(Python3 COMPONENTS Interpreter QUIET)
endif()
if (Python3_EXECUTABLE)
if(Python3_EXECUTABLE)
if(Python3_VERSION VERSION_GREATER_EQUAL 3.5)
add_custom_target(
check-whitespace

View File

@ -0,0 +1,30 @@
# Find the Cythonize tool.
#
# This code sets the following variables:
#
# Cythonize_EXECUTABLE
#
# adapted from https://github.com/cmarshall108/cython-cmake-example/blob/master/cmake/FindCython.cmake
#=============================================================================
if(CMAKE_VERSION VERSION_LESS 3.12)
find_package(PythonInterp 3.6 QUIET) # Deprecated since version 3.12
if(PYTHONINTERP_FOUND)
set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE})
endif()
else()
find_package(Python3 3.6 COMPONENTS Interpreter QUIET)
endif()
# Use the Cython executable that lives next to the Python executable
# if it is a local installation.
if(Python3_EXECUTABLE)
get_filename_component(_python_path ${Python3_EXECUTABLE} PATH)
find_program(Cythonize_EXECUTABLE
NAMES cythonize3 cythonize cythonize.bat
HINTS ${_python_path})
endif()
include(FindPackageHandleStandardArgs)
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Cythonize REQUIRED_VARS Cythonize_EXECUTABLE)
mark_as_advanced(Cythonize_EXECUTABLE)

View File

@ -50,6 +50,7 @@ function(check_for_autogen_files source_dir)
file(GLOB SRC_AUTOGEN_FILES ${source_dir}/style_*.h)
file(GLOB SRC_AUTOGEN_PACKAGES ${source_dir}/packages_*.h)
list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h)
list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp)
foreach(_SRC ${SRC_AUTOGEN_FILES})
get_filename_component(FILENAME "${_SRC}" NAME)
if(EXISTS ${source_dir}/${FILENAME})

View File

@ -1,7 +1,7 @@
# Download and configure custom MPICH files for Windows
message(STATUS "Downloading and configuring MPICH-1.4.1 for Windows")
include(ExternalProject)
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
ExternalProject_Add(mpi4win_build
URL https://download.lammps.org/thirdparty/mpich2-win64-devel.tar.gz
URL_MD5 4939fdb59d13182fd5dd65211e469f14

View File

@ -0,0 +1,31 @@
# if PYTHON package is included we may also include Python support in MLIAP
set(MLIAP_ENABLE_PYTHON_DEFAULT OFF)
if(PKG_PYTHON)
find_package(Cythonize)
if(Cythonize_FOUND)
set(MLIAP_ENABLE_PYTHON_DEFAULT ON)
endif()
endif()
option(MLIAP_ENABLE_PYTHON "Build MLIAP package with Python support" ${MLIAP_ENABLE_PYTHON_DEFAULT})
if(MLIAP_ENABLE_PYTHON)
find_package(Cythonize REQUIRED)
if(NOT PKG_PYTHON)
message(FATAL_ERROR "Must enable PYTHON package for including Python support in MLIAP")
endif()
set(MLIAP_BINARY_DIR ${CMAKE_BINARY_DIR}/cython)
set(MLIAP_CYTHON_SRC ${LAMMPS_SOURCE_DIR}/MLIAP/mliap_model_python_couple.pyx)
get_filename_component(MLIAP_CYTHON_BASE ${MLIAP_CYTHON_SRC} NAME_WE)
file(MAKE_DIRECTORY ${MLIAP_BINARY_DIR})
add_custom_command(OUTPUT ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.h
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${MLIAP_CYTHON_SRC} ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
COMMAND ${Cythonize_EXECUTABLE} -3 ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
WORKING_DIRECTORY ${MLIAP_BINARY_DIR}
MAIN_DEPENDENCY ${MLIAP_CYTHON_SRC}
COMMENT "Generating C++ sources with cythonize...")
target_compile_definitions(lammps PRIVATE -DMLIAP_PYTHON)
target_sources(lammps PRIVATE ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp)
target_include_directories(lammps PRIVATE ${MLIAP_BINARY_DIR})
endif()

View File

@ -37,6 +37,7 @@ This is the list of packages that may require additional steps.
* :ref:`KOKKOS <kokkos>`
* :ref:`LATTE <latte>`
* :ref:`MESSAGE <message>`
* :ref:`MLIAP <mliap>`
* :ref:`MSCG <mscg>`
* :ref:`OPT <opt>`
* :ref:`POEMS <poems>`
@ -770,6 +771,54 @@ be installed on your system.
----------
.. _mliap:
MLIAP package
---------------------------
Building the MLIAP package requires including the :ref:`SNAP <PKG-SNAP>`
package. There will be an error message if this requirement is not satisfied.
Using the *mliappy* model also requires enabling Python support, which
in turn requires the :ref:`PYTHON <PKG-PYTHON>`
package **and** requires you have the `cython <https://cython.org>`_ software
installed and with it a working ``cythonize`` command. This feature requires
compiling LAMMPS with Python version 3.6 or later.
.. tabs::
.. tab:: CMake build
.. code-block:: bash
-D MLIAP_ENABLE_PYTHON=value # enable mliappy model (default is autodetect)
Without this setting, CMake will check whether it can find a
suitable Python version and the ``cythonize`` command and choose
the default accordingly. During the build procedure the provided
.pyx file(s) will be automatically translated to C++ code and compiled.
Please do **not** run ``cythonize`` manually in the ``src/MLIAP`` folder,
as that can lead to compilation errors if Python support is not enabled.
If you did by accident, please remove the generated .cpp and .h files.
.. tab:: Traditional make
The build uses the ``lib/python/Makefile.mliap_python`` file in the
compile/link process to add a rule to update the files generated by
the ``cythonize`` command in case the corresponding .pyx file(s) were
modified. You may need to modify ``lib/python/Makefile.lammps``
if the LAMMPS build fails.
To manually enforce building MLIAP with Python support enabled,
you can add
``-DMLIAP_PYTHON`` to the ``LMP_INC`` variable in your machine makefile.
You may have to manually run the ``cythonize`` command on .pyx file(s)
in the ``src`` folder, if this is not automatically done during
installing the MLIAP package. Please do **not** run ``cythonize``
in the ``src/MLIAP`` folder, as that can lead to compilation errors
if Python support is not enabled.
If you did by accident, please remove the generated .cpp and .h files.
----------
.. _mscg:
MSCG package

View File

@ -1,5 +1,4 @@
Include packages in build
=========================
In LAMMPS, a package is a group of files that enable a specific set of

View File

@ -662,19 +662,31 @@ MLIAP package
**Contents:**
A general interface for machine-learning interatomic potentials.
A general interface for machine-learning interatomic potentials, including PyTorch.
**Install:**
To use this package, also the :ref:`SNAP package <PKG-SNAP>` needs to be installed.
To use this package, also the :ref:`SNAP package <PKG-SNAP>` package needs
to be installed. To make the *mliappy* model available, also the
:ref:`PYTHON package <PKG-PYTHON>` package needs to be installed, the version of
Python must be 3.6 or later, and the `cython <https://cython.org/>`_ software
must be installed.
**Author:** Aidan Thompson (Sandia).
**Author:** Aidan Thompson (Sandia), Nicholas Lubbers (LANL).
**Supporting info:**
* src/MLIAP: filenames -> commands
* src/MLIAP/README
* :doc:`pair_style mliap <pair_mliap>`
* examples/mliap
* :doc:`compute_style mliap <compute_mliap>`
* examples/mliap (see README)
When built with the *mliappy* model this package includes an extension for
coupling with Python models, including PyTorch. In this case, the Python
interpreter linked to LAMMPS will need the ``cython`` and ``numpy`` modules
installed. The provided examples build models with PyTorch, which would
therefore also needs to be installed to run those examples.
----------

View File

@ -38,14 +38,14 @@ produce an executable compatible with a specific hardware.
:class: note
Kokkos with CUDA currently implicitly assumes that the MPI library is
CUDA-aware. This is not always the case, especially when using
GPU-aware. This is not always the case, especially when using
pre-compiled MPI libraries provided by a Linux distribution. This is
not a problem when using only a single GPU with a single MPI
rank. When running with multiple MPI ranks, you may see segmentation
faults without CUDA-aware MPI support. These can be avoided by adding
the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the
faults without GPU-aware MPI support. These can be avoided by adding
the flags :doc:`-pk kokkos gpu/aware off <Run_options>` to the
LAMMPS command line or by using the command :doc:`package kokkos
cuda/aware off <package>` in the input file.
gpu/aware off <package>` in the input file.
.. admonition:: AMD GPU support
:class: note
@ -242,8 +242,8 @@ case, also packing/unpacking communication buffers on the host may give
speedup (see the KOKKOS :doc:`package <package>` command). Using CUDA MPS
is recommended in this scenario.
Using a CUDA-aware MPI library is highly recommended. CUDA-aware MPI use can be
avoided by using :doc:`-pk kokkos cuda/aware no <package>`. As above for
Using a GPU-aware MPI library is highly recommended. GPU-aware MPI use can be
avoided by using :doc:`-pk kokkos gpu/aware off <package>`. As above for
multi-core CPUs (and no GPU), if N is the number of physical cores/node,
then the number of MPI tasks/node should not exceed N.

View File

@ -18,7 +18,7 @@ Syntax
.. parsed-literal::
*model* values = style
style = *linear* or *quadratic*
style = *linear* or *quadratic* or *mliappy*
*descriptor* values = style filename
style = *sna*
filename = name of file containing descriptor definitions
@ -56,13 +56,15 @@ and it is also straightforward to add new descriptor styles.
The compute *mliap* command must be followed by two keywords
*model* and *descriptor* in either order.
The *model* keyword is followed by a model style, currently limited to
either *linear* or *quadratic*.
The *model* keyword is followed by the model style (*linear*, *quadratic* or *mliappy*).
The *mliappy* model is only available
if lammps is built with MLIAPPY package.
The *descriptor* keyword is followed by a descriptor style, and additional arguments.
Currently the only descriptor style is *sna*, indicating the bispectrum component
descriptors used by the Spectral Neighbor Analysis Potential (SNAP) potentials of
:doc:`pair_style snap <pair_snap>`.
The compute currently supports just one descriptor style, but it is
is straightforward to add new descriptor styles.
The SNAP descriptor style *sna* is the same as that used by :doc:`pair_style snap <pair_snap>`,
including the linear, quadratic, and chem variants.
A single additional argument specifies the descriptor filename
containing the parameters and setting used by the SNAP descriptor.
The descriptor filename usually ends in the *.mliap.descriptor* extension.
@ -162,9 +164,10 @@ potentials, see the examples in `FitSNAP <https://github.com/FitSNAP/FitSNAP>`_.
Restrictions
""""""""""""
This compute is part of the MLIAP package. It is only enabled if
LAMMPS was built with that package. In addition, building LAMMPS with the MLIAP package
This compute is part of the MLIAP package. It is only enabled if LAMMPS
was built with that package. In addition, building LAMMPS with the MLIAP package
requires building LAMMPS with the SNAP package.
The *mliappy* model requires building LAMMPS with the PYTHON package.
See the :doc:`Build package <Build_package>` doc page for more info.
Related commands

View File

@ -115,8 +115,8 @@ The optional keyword *chunksize* is only applicable when using the
the KOKKOS package and is ignored otherwise. This keyword controls
the number of atoms in each pass used to compute the bond-orientational
order parameters and is used to avoid running out of memory. For example
if there are 4000 atoms in the simulation and the *chunksize*
is set to 2000, the parameter calculation will be broken up
if there are 32768 atoms in the simulation and the *chunksize*
is set to 16384, the parameter calculation will be broken up
into two passes.
The value of :math:`Q_l` is set to zero for atoms not in the
@ -193,7 +193,7 @@ Default
The option defaults are *cutoff* = pair style cutoff, *nnn* = 12,
*degrees* = 5 4 6 8 10 12 i.e. :math:`Q_4`, :math:`Q_6`, :math:`Q_8`, :math:`Q_{10}`, and :math:`Q_{12}`,
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 2000
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 16384
----------

View File

@ -93,7 +93,7 @@ from a compute, fix, or variable, then see the :doc:`fix ave/chunk <fix_ave_chun
:doc:`fix ave/histo <fix_ave_histo>` commands. If you wish to convert a
per-atom quantity into a single global value, see the :doc:`compute reduce <compute_reduce>` command.
The input values must either be all scalars. What kinds of
The input values must be all scalars. What kinds of
correlations between input values are calculated is determined by the
*type* keyword as discussed below.

View File

@ -68,7 +68,7 @@ Syntax
*no_affinity* values = none
*kokkos* args = keyword value ...
zero or more keyword/value pairs may be appended
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* or *pair/only*
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* *pair/comm/forward* *fix/comm/forward* or *comm/reverse* or *gpu/aware* or *pair/only*
*neigh* value = *full* or *half*
full = full neighbor list
half = half neighbor list built in thread-safe manner
@ -84,16 +84,18 @@ Syntax
*binsize* value = size
size = bin size for neighbor list construction (distance units)
*comm* value = *no* or *host* or *device*
use value for comm/exchange and comm/forward and comm/reverse
use value for comm/exchange and comm/forward and pair/comm/forward and fix/comm/forward and comm/reverse
*comm/exchange* value = *no* or *host* or *device*
*comm/forward* value = *no* or *host* or *device*
*pair/comm/forward* value = *no* or *device*
*fix/comm/forward* value = *no* or *device*
*comm/reverse* value = *no* or *host* or *device*
no = perform communication pack/unpack in non-KOKKOS mode
host = perform pack/unpack on host (e.g. with OpenMP threading)
device = perform pack/unpack on device (e.g. on GPU)
*cuda/aware* = *off* or *on*
off = do not use CUDA-aware MPI
on = use CUDA-aware MPI (default)
*gpu/aware* = *off* or *on*
off = do not use GPU-aware MPI
on = use GPU-aware MPI (default)
*pair/only* = *off* or *on*
off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default)
on = use device acceleration only for pair styles (and host acceleration for others)
@ -498,7 +500,8 @@ because the GPU is faster at performing pairwise interactions, then this
rule of thumb may give too large a binsize and the default should be
overridden with a smaller value.
The *comm* and *comm/exchange* and *comm/forward* and *comm/reverse*
The *comm* and *comm/exchange* and *comm/forward* and *pair/comm/forward*
and *fix/comm/forward* and comm/reverse*
keywords determine whether the host or device performs the packing and
unpacking of data when communicating per-atom data between processors.
"Exchange" communication happens only on timesteps that neighbor lists
@ -506,18 +509,22 @@ are rebuilt. The data is only for atoms that migrate to new processors.
"Forward" communication happens every timestep. "Reverse" communication
happens every timestep if the *newton* option is on. The data is for
atom coordinates and any other atom properties that needs to be updated
for ghost atoms owned by each processor.
for ghost atoms owned by each processor. "Pair/comm" controls additional
communication in pair styles, such as pair_style EAM. "Fix/comm" controls
additional communication in fixes, such as fix SHAKE.
The *comm* keyword is simply a short-cut to set the same value for both
the *comm/exchange* and *comm/forward* and *comm/reverse* keywords.
The *comm* keyword is simply a short-cut to set the same value for all
the comm keywords.
The value options for all 3 keywords are *no* or *host* or *device*\ . A
The value options for the keywords are *no* or *host* or *device*\ . A
value of *no* means to use the standard non-KOKKOS method of
packing/unpacking data for the communication. A value of *host* means to
use the host, typically a multi-core CPU, and perform the
packing/unpacking in parallel with threads. A value of *device* means to
use the device, typically a GPU, to perform the packing/unpacking
operation.
operation. If a value of *host* is used for the *pair/comm/forward* or
*fix/comm/forward* keyword, it will be automatically be changed to *no*
since these keywords don't support *host* mode.
The optimal choice for these keywords depends on the input script and
the hardware used. The *no* value is useful for verifying that the
@ -538,18 +545,18 @@ pack/unpack communicated data. When running small systems on a GPU,
performing the exchange pack/unpack on the host CPU can give speedup
since it reduces the number of CUDA kernel launches.
The *cuda/aware* keyword chooses whether CUDA-aware MPI will be used. When
The *gpu/aware* keyword chooses whether GPU-aware MPI will be used. When
this keyword is set to *on*\ , buffers in GPU memory are passed directly
through MPI send/receive calls. This reduces overhead of first copying
the data to the host CPU. However CUDA-aware MPI is not supported on all
the data to the host CPU. However GPU-aware MPI is not supported on all
systems, which can lead to segmentation faults and would require using a
value of *off*\ . If LAMMPS can safely detect that CUDA-aware MPI is not
value of *off*\ . If LAMMPS can safely detect that GPU-aware MPI is not
available (currently only possible with OpenMPI v2.0.0 or later), then
the *cuda/aware* keyword is automatically set to *off* by default. When
the *cuda/aware* keyword is set to *off* while any of the *comm*
the *gpu/aware* keyword is automatically set to *off* by default. When
the *gpu/aware* keyword is set to *off* while any of the *comm*
keywords are set to *device*\ , the value for these *comm* keywords will
be automatically changed to *no*\ . This setting has no effect if not
running on GPUs or if using only one MPI rank. CUDA-aware MPI is available
running on GPUs or if using only one MPI rank. GPU-aware MPI is available
for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the
"MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM
Spectrum MPI when the "-gpu" flag is used.
@ -558,7 +565,7 @@ The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied
when using an accelerator device. By default device acceleration is
always used for all available styles. With *pair/only* set to *on* the
suffix setting will choose device acceleration only for pair styles and
run all other force computations concurrently on the host CPU.
run all other force computations on the host CPU.
The *comm* flags will also automatically be changed to *no*\ . This can
result in better performance for certain configurations and system sizes.
@ -671,8 +678,8 @@ script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
For the KOKKOS package, the option defaults for GPUs are neigh = full,
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
value, comm = device, cuda/aware = on. When LAMMPS can safely detect
that CUDA-aware MPI is not available, the default value of cuda/aware
value, comm = device, gpu/aware = on. When LAMMPS can safely detect
that GPU-aware MPI is not available, the default value of gpu/aware
becomes "off". For CPUs or Xeon Phis, the option defaults are neigh =
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The
option neigh/thread = on when there are 16K atoms or less on an MPI

View File

@ -16,7 +16,7 @@ Syntax
.. parsed-literal::
*model* values = style filename
style = *linear* or *quadratic*
style = *linear* or *quadratic* or *mliappy*
filename = name of file containing model definitions
*descriptor* values = style filename
style = *sna*
@ -40,12 +40,15 @@ definitions of the interatomic potential functional form (*model*)
and the geometric quantities that characterize the atomic positions
(*descriptor*). By defining *model* and *descriptor* separately,
it is possible to use many different models with a given descriptor,
or many different descriptors with a given model. Currently, the pair_style
supports just two models, *linear* and *quadratic*,
and one descriptor, *sna*, the SNAP descriptor used by :doc:`pair_style snap <pair_snap>`, including the linear, quadratic,
and chem variants. Work is currently underway to extend
the interface to handle neural network energy models,
and it is also straightforward to add new descriptor styles.
or many different descriptors with a given model. The
pair style currently supports just one descriptor style, but it is
is straightforward to add new descriptor styles.
The SNAP descriptor style *sna* is the same as that used by :doc:`pair_style snap <pair_snap>`,
including the linear, quadratic, and chem variants.
The available models are *linear*, *quadratic*, and *mliappy*.
The *mliappy* style can be used to couple python models,
e.g. PyTorch neural network energy models, and requires building
LAMMPS with the PYTHON package (see below).
In order to train a model, it is useful to know the gradient or derivative
of energy, force, and stress w.r.t. model parameters. This information
can be accessed using the related :doc:`compute mliap <compute_mliap>` command.
@ -59,9 +62,8 @@ that specify the mapping of MLIAP
element names to LAMMPS atom types,
where N is the number of LAMMPS atom types.
The *model* keyword is followed by a model style, currently limited to
either *linear* or *quadratic*. In both cases,
this is followed by a single argument specifying the model filename containing the
The *model* keyword is followed by the model style. This is followed
by a single argument specifying the model filename containing the
parameters for a set of elements.
The model filename usually ends in the *.mliap.model* extension.
It may contain parameters for many elements. The only requirement is that it
@ -82,6 +84,16 @@ for the :doc:`pair_style snap <pair_snap>` coefficient file.
Specifically, the line containing the element weight and radius is omitted,
since these are handled by the *descriptor*.
Notes on mliappy models:
When the *model* keyword is *mliappy*, the filename should end in '.pt',
'.pth' for pytorch models, or be a pickle file. To load a model from
memory (i.e. an existing python object), specify the filename as
"LATER", and then call `lammps.mliap.load_model(model)` from python
before using the pair style. When using lammps via the library mode, you will need to call
`lammps.mliappy.activate_mliappy(lmp)` on the active lammps object
before the pair style is defined. This call locates and loads the mliap-specific
python module that is built into lammps.
The *descriptor* keyword is followed by a descriptor style, and additional arguments.
Currently the only descriptor style is *sna*, indicating the bispectrum component
descriptors used by the Spectral Neighbor Analysis Potential (SNAP) potentials of
@ -138,11 +150,13 @@ This pair style can only be used via the *pair* keyword of the
Restrictions
""""""""""""
This style is part of the MLIAP package. It is only enabled if LAMMPS
This pair style is part of the MLIAP package. It is only enabled if LAMMPS
was built with that package. In addition, building LAMMPS with the MLIAP package
requires building LAMMPS with the SNAP package.
The *mliappy* model requires building LAMMPS with the PYTHON package.
See the :doc:`Build package <Build_package>` doc page for more info.
Related commands
""""""""""""""""

View File

@ -152,7 +152,7 @@ The default values for these keywords are
* *chemflag* = 0
* *bnormflag* = 0
* *wselfallflag* = 0
* *chunksize* = 2000
* *chunksize* = 4096
If *quadraticflag* is set to 1, then the SNAP energy expression includes additional quadratic terms
that have been shown to increase the overall accuracy of the potential without much increase
@ -189,8 +189,8 @@ pair style *snap* with the KOKKOS package and is ignored otherwise.
This keyword controls
the number of atoms in each pass used to compute the bispectrum
components and is used to avoid running out of memory. For example
if there are 4000 atoms in the simulation and the *chunksize*
is set to 2000, the bispectrum calculation will be broken up
if there are 8192 atoms in the simulation and the *chunksize*
is set to 4096, the bispectrum calculation will be broken up
into two passes.
Detailed definitions for all the other keywords

View File

@ -558,6 +558,7 @@ Cygwin
cylindrically
Cyrot
cyrstals
cython
Daivis
Dammak
dampflag
@ -1918,6 +1919,7 @@ mK
mkdir
mkv
mliap
mliappy
mlparks
Mniszewski
mnt
@ -2508,6 +2510,7 @@ Pstart
Pstop
pstyle
Ptarget
pth
pthread
pthreads
ptm
@ -2536,6 +2539,7 @@ pymodule
pymol
pypar
pythonic
pytorch
Pyy
pz
Pz

103
examples/mliap/README Normal file
View File

@ -0,0 +1,103 @@
This directory contains multiple examples of
machine-learning potentials defined using the
MLIAP package in LAMMPS. The input files
are described below.
in.mliap.snap.Ta06A
-------------------
Run linear SNAP, equivalent to examples/snap/in.snap.Ta06A
in.mliap.snap.WBe.PRB2019
-------------------------
Run linear SNAP, equivalent to examples/snap/in.snap.WBe.PRB2019
in.mliap.snap.quadratic
-----------------------
Run quadratic SNAP
in.mliap.snap.chem
------------------
Run EME-SNAP, equivalent to examples/snap/in.snap.InP.JCPA2020
in.mliap.snap.compute
---------------------
Generate the A matrix, the gradients (w.r.t. coefficients)
of total potential energy, forces, and stress tensor for
linear SNAP, equivalent to in.snap.compute
in.mliap.quadratic.compute
--------------------------
Generate the A matrix, the gradients (w.r.t. coefficients)
of total potential energy, forces, and stress tensor for
for quadratic SNAP, equivalent to in.snap.compute.quadratic
in.mliap.pytorch.Ta06A
-----------------------
This reproduces the output of in.mliap.snap.Ta06A above,
but using the Python coupling to PyTorch.
This example can be run in two different ways:
1: Running a LAMMPS executable: in.mliap.pytorch.Ta06A
First run ``python convert_mliap_Ta06A.py``. It creates
a PyTorch energy model that replicates the
SNAP Ta06A potential and saves it in the file
"Ta06A.mliap.pytorch.model.pt".
You can then run the example as follows
`lmp -in in.mliap.pytorch.Ta06A -echo both`
The resultant log.lammps output should be identical to that generated
by in.mliap.snap.Ta06A.
If this fails, see the instructions for building the MLIAP package
with Python support enabled. Also, confirm that the
LAMMPS Python embedded Python interpreter is
working by running ../examples/in.python.
2: Running a Python script: mliap_pytorch_Ta06A.py
Before testing this, ensure that the previous method
(running a LAMMPS executable) works.
You can run the example in serial:
`python mliap_pytorch_Ta06A.py`
or in parallel:
`mpirun -np 4 python mliap_pytorch_Ta06A.py`
The resultant log.lammps output should be identical to that generated
by in.mliap.snap.Ta06A and in.mliap.pytorch.Ta06A.
Not all Python installations support this mode of operation.
It requires that the Python interpreter be initialized. If not,
the script will exit with an error message.
in.mliap.pytorch.relu1hidden
----------------------------
This example demonstrates a simple neural network potential
using PyTorch and SNAP descriptors.
`lmp -in in.mliap.pytorch.relu1hidden -echo both`
It was trained on just the energy component (no forces) of
the data used in the original SNAP Ta06A potential for
tantalum (Thompson, Swiler, Trott, Foiles, Tucker,
J Comp Phys, 285, 316 (2015).). Because of the very small amount
of energy training data, it uses just 1 hidden layer with
a ReLU activation function. It is not expected to be
very accurate for forces.
NOTE: Unlike the previous example, this example uses
a pre-built PyTorch file `Ta06A.mliap.pytorch.model.pt`.
It is read using `torch.load`,
which implicitly uses the Python `pickle` module.
This is known to be insecure. It is possible to construct malicious
pickle data that will execute arbitrary code during unpickling. Never
load data that could have come from an untrusted source, or that
could have been tampered with. Only load data you trust.

View File

@ -0,0 +1,18 @@
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
# Definition of SNAP potential Ta_Cand06A
# Assumes 1 LAMMPS atom type
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 73
# Specify hybrid with SNAP, ZBL
pair_style hybrid/overlay &
zbl ${zblcutinner} ${zblcutouter} &
mliap model mliappy Ta06A.mliap.pytorch.model.pt &
descriptor sna Ta06A.mliap.descriptor
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff * * mliap Ta

View File

@ -0,0 +1,26 @@
import sys
import numpy as np
import torch
# torch.nn.modules useful for defining a MLIAPPY model.
from lammps.mliap.pytorch import TorchWrapper, IgnoreElems
# Read coefficients
coeffs = np.genfromtxt("Ta06A.mliap.model",skip_header=6)
# Write coefficients to a pytorch linear model
bias = coeffs[0]
weights = coeffs[1:]
lin = torch.nn.Linear(weights.shape[0],1)
lin.to(torch.float64)
with torch.autograd.no_grad():
lin.weight.set_(torch.from_numpy(weights).unsqueeze(0))
lin.bias.set_(torch.as_tensor(bias,dtype=torch.float64).unsqueeze(0))
# Wrap the pytorch model for usage with mliappy coupling.
model = IgnoreElems(lin) # The linear module does not use the types.
n_descriptors = lin.weight.shape[1]
n_elements = 1
linked_model = TorchWrapper(model,n_descriptors=n_descriptors,n_elements=n_elements)
torch.save(linked_model,"Ta06A.mliap.pytorch.model.pt")

View File

@ -0,0 +1,53 @@
# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.316
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable ny equal ${nrep}
variable nz equal ${nrep}
boundary p p p
lattice bcc $a
region box block 0 ${nx} 0 ${ny} 0 ${nz}
create_box 1 box
create_atoms 1 box
mass 1 180.88
# choose potential
include Ta06A.mliap.pytorch
# Setup output
compute eatom all pe/atom
compute energy all reduce sum c_eatom
compute satom all stress/atom NULL
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
thermo_style custom step temp epair c_energy etotal press v_press
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459 loop geom
fix 1 all nve
run ${nsteps}

View File

@ -0,0 +1,53 @@
# Demonstrate MLIAP interface to linear SNAP potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.316
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable ny equal ${nrep}
variable nz equal ${nrep}
boundary p p p
lattice bcc $a
region box block 0 ${nx} 0 ${ny} 0 ${nz}
create_box 1 box
create_atoms 1 box
mass 1 180.88
# choose potential
include relu1hidden.mliap.pytorch
# Setup output
compute eatom all pe/atom
compute energy all reduce sum c_eatom
compute satom all stress/atom NULL
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
thermo_style custom step temp epair c_energy etotal press v_press
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459 loop geom
fix 1 all nve
run ${nsteps}

View File

@ -1,4 +1,4 @@
# Demonstrate MLIAP interface to kinear SNAP potential
# Demonstrate MLIAP interface to linear SNAP potential
# Initialize simulation

View File

@ -0,0 +1,157 @@
LAMMPS (30 Nov 2020)
using 48 OpenMP thread(s) per MPI task
# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.316
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable nx equal 4
variable ny equal ${nrep}
variable ny equal 4
variable nz equal ${nrep}
variable nz equal 4
boundary p p p
lattice bcc $a
lattice bcc 3.316
Lattice spacing in x,y,z = 3.3160000 3.3160000 3.3160000
region box block 0 ${nx} 0 ${ny} 0 ${nz}
region box block 0 4 0 ${ny} 0 ${nz}
region box block 0 4 0 4 0 ${nz}
region box block 0 4 0 4 0 4
create_box 1 box
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (13.264000 13.264000 13.264000)
1 by 1 by 1 MPI processor grid
create_atoms 1 box
Created 128 atoms
create_atoms CPU = 0.002 seconds
mass 1 180.88
# choose potential
include Ta06A.mliap.pytorch
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
# Definition of SNAP potential Ta_Cand06A
# Assumes 1 LAMMPS atom type
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 73
# Specify hybrid with SNAP, ZBL
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
pair_style hybrid/overlay zbl 4 ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
pair_style hybrid/overlay zbl 4 4.8 mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
Loading python model complete.
Reading potential file Ta06A.mliap.descriptor with DATE: 2014-09-05
SNAP keyword rcutfac 4.67637
SNAP keyword twojmax 6
SNAP keyword nelems 1
SNAP keyword elems Ta
SNAP keyword radelems 0.5
SNAP keyword welems 1
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0
SNAP keyword bzeroflag 0
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff 1 1 zbl 73 ${zblz}
pair_coeff 1 1 zbl 73 73
pair_coeff * * mliap Ta
# Setup output
compute eatom all pe/atom
compute energy all reduce sum c_eatom
compute satom all stress/atom NULL
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
thermo_style custom step temp epair c_energy etotal press v_press
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459 loop geom
fix 1 all nve
run ${nsteps}
run 100
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 5.8
ghost atom cutoff = 5.8
binsize = 2.9, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair zbl, perpetual, half/full from (2)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
(2) pair mliap, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 159.8 | 159.8 | 159.8 Mbytes
Step Temp E_pair c_energy TotEng Press v_press
0 300 -11.85157 -11.85157 -11.813095 2717.1661 -2717.1661
10 296.01467 -11.851059 -11.851059 -11.813095 2697.4796 -2697.4796
20 284.53666 -11.849587 -11.849587 -11.813095 2289.1527 -2289.1527
30 266.51577 -11.847275 -11.847275 -11.813095 1851.7131 -1851.7131
40 243.05007 -11.844266 -11.844266 -11.813095 1570.684 -1570.684
50 215.51032 -11.840734 -11.840734 -11.813094 1468.1899 -1468.1899
60 185.48331 -11.836883 -11.836883 -11.813094 1524.8757 -1524.8757
70 154.6736 -11.832931 -11.832931 -11.813094 1698.3351 -1698.3351
80 124.79303 -11.829099 -11.829099 -11.813094 1947.0715 -1947.0715
90 97.448054 -11.825592 -11.825592 -11.813094 2231.9563 -2231.9563
100 74.035418 -11.822589 -11.822589 -11.813094 2515.8526 -2515.8526
Loop time of 2.00236 on 48 procs for 100 steps with 128 atoms
Performance: 2.157 ns/day, 11.124 hours/ns, 49.941 timesteps/s
288.8% CPU use with 1 MPI tasks x 48 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 1.9998 | 1.9998 | 1.9998 | 0.0 | 99.87
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.0011814 | 0.0011814 | 0.0011814 | 0.0 | 0.06
Output | 0.00059724 | 0.00059724 | 0.00059724 | 0.0 | 0.03
Modify | 0.00047352 | 0.00047352 | 0.00047352 | 0.0 | 0.02
Other | | 0.0003468 | | | 0.02
Nlocal: 128.000 ave 128 max 128 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 727.000 ave 727 max 727 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 3712.00 ave 3712 max 3712 min
Histogram: 1 0 0 0 0 0 0 0 0 0
FullNghs: 7424.00 ave 7424 max 7424 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 7424
Ave neighs/atom = 58.000000
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:03

View File

@ -0,0 +1,157 @@
LAMMPS (30 Nov 2020)
using 48 OpenMP thread(s) per MPI task
# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.316
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable nx equal 4
variable ny equal ${nrep}
variable ny equal 4
variable nz equal ${nrep}
variable nz equal 4
boundary p p p
lattice bcc $a
lattice bcc 3.316
Lattice spacing in x,y,z = 3.3160000 3.3160000 3.3160000
region box block 0 ${nx} 0 ${ny} 0 ${nz}
region box block 0 4 0 ${ny} 0 ${nz}
region box block 0 4 0 4 0 ${nz}
region box block 0 4 0 4 0 4
create_box 1 box
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (13.264000 13.264000 13.264000)
1 by 2 by 2 MPI processor grid
create_atoms 1 box
Created 128 atoms
create_atoms CPU = 0.002 seconds
mass 1 180.88
# choose potential
include Ta06A.mliap.pytorch
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
# Definition of SNAP potential Ta_Cand06A
# Assumes 1 LAMMPS atom type
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 73
# Specify hybrid with SNAP, ZBL
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
pair_style hybrid/overlay zbl 4 ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
pair_style hybrid/overlay zbl 4 4.8 mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
Loading python model complete.
Reading potential file Ta06A.mliap.descriptor with DATE: 2014-09-05
SNAP keyword rcutfac 4.67637
SNAP keyword twojmax 6
SNAP keyword nelems 1
SNAP keyword elems Ta
SNAP keyword radelems 0.5
SNAP keyword welems 1
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0
SNAP keyword bzeroflag 0
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff 1 1 zbl 73 ${zblz}
pair_coeff 1 1 zbl 73 73
pair_coeff * * mliap Ta
# Setup output
compute eatom all pe/atom
compute energy all reduce sum c_eatom
compute satom all stress/atom NULL
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
thermo_style custom step temp epair c_energy etotal press v_press
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459 loop geom
fix 1 all nve
run ${nsteps}
run 100
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 5.8
ghost atom cutoff = 5.8
binsize = 2.9, bins = 5 5 5
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair zbl, perpetual, half/full from (2)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
(2) pair mliap, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 159.7 | 159.7 | 159.7 Mbytes
Step Temp E_pair c_energy TotEng Press v_press
0 300 -11.85157 -11.85157 -11.813095 2717.1661 -2717.1661
10 296.01467 -11.851059 -11.851059 -11.813095 2697.4796 -2697.4796
20 284.53666 -11.849587 -11.849587 -11.813095 2289.1527 -2289.1527
30 266.51577 -11.847275 -11.847275 -11.813095 1851.7131 -1851.7131
40 243.05007 -11.844266 -11.844266 -11.813095 1570.684 -1570.684
50 215.51032 -11.840734 -11.840734 -11.813094 1468.1899 -1468.1899
60 185.48331 -11.836883 -11.836883 -11.813094 1524.8757 -1524.8757
70 154.6736 -11.832931 -11.832931 -11.813094 1698.3351 -1698.3351
80 124.79303 -11.829099 -11.829099 -11.813094 1947.0715 -1947.0715
90 97.448054 -11.825592 -11.825592 -11.813094 2231.9563 -2231.9563
100 74.035418 -11.822589 -11.822589 -11.813094 2515.8526 -2515.8526
Loop time of 0.562802 on 192 procs for 100 steps with 128 atoms
Performance: 7.676 ns/day, 3.127 hours/ns, 177.682 timesteps/s
99.7% CPU use with 4 MPI tasks x 48 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 0.53583 | 0.54622 | 0.55401 | 0.9 | 97.05
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.0071442 | 0.01491 | 0.025289 | 5.4 | 2.65
Output | 0.00092525 | 0.00095771 | 0.0010166 | 0.0 | 0.17
Modify | 0.00014479 | 0.00015043 | 0.00015893 | 0.0 | 0.03
Other | | 0.0005624 | | | 0.10
Nlocal: 32.0000 ave 32 max 32 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Nghost: 431.000 ave 431 max 431 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Neighs: 928.000 ave 928 max 928 min
Histogram: 4 0 0 0 0 0 0 0 0 0
FullNghs: 1856.00 ave 1856 max 1856 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Total # of neighbors = 7424
Ave neighs/atom = 58.000000
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:02

View File

@ -0,0 +1,104 @@
# Demonstrate how to load a model from the python side.
# This is essentially the same as in.mliap.pytorch.Ta06A
# except that python is the driving program, and lammps
# is in library mode.
before_loading =\
"""# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
# Initialize simulation
variable nsteps index 100
variable nrep equal 4
variable a equal 3.316
units metal
# generate the box and atom positions using a BCC lattice
variable nx equal ${nrep}
variable ny equal ${nrep}
variable nz equal ${nrep}
boundary p p p
lattice bcc $a
region box block 0 ${nx} 0 ${ny} 0 ${nz}
create_box 1 box
create_atoms 1 box
mass 1 180.88
# choose potential
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
# Definition of SNAP potential Ta_Cand06A
# Assumes 1 LAMMPS atom type
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 73
# Specify hybrid with SNAP, ZBL
pair_style hybrid/overlay &
zbl ${zblcutinner} ${zblcutouter} &
mliap model mliappy LATER &
descriptor sna Ta06A.mliap.descriptor
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff * * mliap Ta
"""
after_loading =\
"""
# Setup output
compute eatom all pe/atom
compute energy all reduce sum c_eatom
compute satom all stress/atom NULL
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
thermo_style custom step temp epair c_energy etotal press v_press
thermo 10
thermo_modify norm yes
# Set up NVE run
timestep 0.5e-3
neighbor 1.0 bin
neigh_modify once no every 1 delay 0 check yes
# Run MD
velocity all create 300.0 4928459 loop geom
fix 1 all nve
run ${nsteps}
"""
import lammps
lmp = lammps.lammps(cmdargs=['-echo','both'])
# Before defining the pair style, one must do the following:
import lammps.mliap
lammps.mliap.activate_mliappy(lmp)
# Otherwise, when running lammps in library mode,
# you will get an error:
# "ERROR: Loading MLIAPPY coupling module failure."
# Setup the simulation and declare an empty model
# by specifying model filename as "LATER"
lmp.commands_string(before_loading)
# Define the model however you like. In this example
# we load it from disk:
import torch
model = torch.load('Ta06A.mliap.pytorch.model.pt')
# Connect the PyTorch model to the mliap pair style.
lammps.mliap.load_model(model)
# run the simulation with the mliap pair style
lmp.commands_string(after_loading)

View File

@ -0,0 +1,18 @@
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
# Definition of SNAP potential Ta_Cand06A
# Assumes 1 LAMMPS atom type
variable zblcutinner equal 4
variable zblcutouter equal 4.8
variable zblz equal 73
# Specify hybrid with SNAP, ZBL
pair_style hybrid/overlay &
zbl ${zblcutinner} ${zblcutouter} &
mliap model mliappy relu1hidden.mliap.pytorch.model.pt &
descriptor sna Ta06A.mliap.descriptor
pair_coeff 1 1 zbl ${zblz} ${zblz}
pair_coeff * * mliap Ta

Binary file not shown.

View File

@ -0,0 +1,3 @@
../mliap_model_python_couple.cpp: ../mliap_model_python_couple.pyx
cythonize -3 ../mliap_model_python_couple.cpp

View File

@ -98,19 +98,23 @@ os.chdir(os.path.dirname(args.package))
from distutils.core import setup
from distutils.sysconfig import get_python_lib
import site
tryuser=False
#Arguments common to global or user install -- everything but data_files
setup_kwargs= dict(name="lammps",
version=verstr,
author="Steve Plimpton",
author_email="sjplimp@sandia.gov",
url="https://lammps.sandia.gov",
description="LAMMPS Molecular Dynamics Python package",
license="GPL",
packages=["lammps","lammps.mliap"],
)
tryuser=False
try:
sys.argv = ["setup.py","install"] # as if had run "python setup.py install"
setup(name = "lammps",
version = verstr,
author = "Steve Plimpton",
author_email = "sjplimp@sandia.gov",
url = "https://lammps.sandia.gov",
description = "LAMMPS Molecular Dynamics Python package",
license = "GPL",
packages=['lammps'],
data_files = [(os.path.join(get_python_lib(), 'lammps'), [args.lib])])
setup_kwargs['data_files']=[(os.path.join(get_python_lib(), 'lammps'), [args.lib])]
setup(**setup_kwargs)
except:
tryuser=True
print ("Installation into global site-packages folder failed.\nTrying user folder %s now." % site.USER_SITE)
@ -118,14 +122,7 @@ except:
if tryuser:
try:
sys.argv = ["setup.py","install","--user"] # as if had run "python setup.py install --user"
setup(name = "lammps",
version = verstr,
author = "Steve Plimpton",
author_email = "sjplimp@sandia.gov",
url = "https://lammps.sandia.gov",
description = "LAMMPS Molecular Dynamics Python package",
license = "GPL",
packages=['lammps'],
data_files = [(os.path.join(site.USER_SITE, 'lammps'), [args.lib])])
setup_kwargs['data_files']=[(os.path.join(site.USER_SITE, 'lammps'), [args.lib])]
setup(**setup_kwargs)
except:
print("Installation into user site package folder failed.")

View File

@ -0,0 +1,13 @@
# Check compatiblity of this build with the python shared library.
# If this fails, lammps will segfault because its library will
# try to improperly start up a new interpreter.
import sysconfig
import ctypes
library = sysconfig.get_config_vars('INSTSONAME')[0]
pylib = ctypes.CDLL(library)
if not pylib.Py_IsInitialized():
raise RuntimeError("This interpreter is not compatible with python-based mliap for LAMMPS.")
del sysconfig, ctypes, library, pylib
from .loader import load_model, activate_mliappy

View File

@ -0,0 +1,52 @@
# ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# -------------------------------------------------------------------------
# ----------------------------------------------------------------------
# Contributing author: Nicholas Lubbers (LANL)
# -------------------------------------------------------------------------
import sys
import importlib.util
import importlib.machinery
def activate_mliappy(lmp):
try:
# Begin Importlib magic to find the embedded python module
# This is needed because the filename for liblammps does not
# match the spec for normal python modules, wherein
# file names match with PyInit function names.
# Also, python normally doesn't look for extensions besides '.so'
# We fix both of these problems by providing an explict
# path to the extension module 'mliap_model_python_couple' in
path = lmp.lib._name
loader = importlib.machinery.ExtensionFileLoader('mliap_model_python_couple', path)
spec = importlib.util.spec_from_loader('mliap_model_python_couple', loader)
module = importlib.util.module_from_spec(spec)
sys.modules['mliap_model_python_couple'] = module
spec.loader.exec_module(module)
# End Importlib magic to find the embedded python module
except Exception as ee:
raise ImportError("Could not load MLIAP python coupling module.") from ee
def load_model(model):
try:
import mliap_model_python_couple
except ImportError as ie:
raise ImportError("MLIAP python module must be activated before loading\n"
"the pair style. Call lammps.mliap.activate_mliappy(lmp)."
) from ie
mliap_model_python_couple.load_from_python(model)

View File

@ -0,0 +1,65 @@
# ----------------------------------------------------------------------
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
# http://lammps.sandia.gov, Sandia National Laboratories
# Steve Plimpton, sjplimp@sandia.gov
#
# Copyright (2003) Sandia Corporation. Under the terms of Contract
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
# certain rights in this software. This software is distributed under
# the GNU General Public License.
#
# See the README file in the top-level LAMMPS directory.
# -------------------------------------------------------------------------
# ----------------------------------------------------------------------
# Contributing author: Nicholas Lubbers (LANL)
# -------------------------------------------------------------------------
import numpy as np
import torch
def calc_n_params(model):
return sum(p.nelement() for p in model.parameters())
class TorchWrapper(torch.nn.Module):
def __init__(self, model,n_descriptors,n_elements,n_params=None,device=None,dtype=torch.float64):
super().__init__()
self.model = model
self.device = device
self.dtype = dtype
# Put model on device and convert to dtype
self.to(self.dtype)
self.to(self.device)
if n_params is None:
n_params = calc_n_params(model)
self.n_params = n_params
self.n_descriptors = n_descriptors
self.n_elements = n_elements
def forward(self, elems, bispectrum, beta, energy):
bispectrum = torch.from_numpy(bispectrum).to(dtype=self.dtype, device=self.device).requires_grad_(True)
elems = torch.from_numpy(elems).to(dtype=torch.long, device=self.device) - 1
with torch.autograd.enable_grad():
energy_nn = self.model(bispectrum, elems)
if energy_nn.ndim > 1:
energy_nn = energy_nn.flatten()
beta_nn = torch.autograd.grad(energy_nn.sum(), bispectrum)[0]
beta[:] = beta_nn.detach().cpu().numpy().astype(np.float64)
energy[:] = energy_nn.detach().cpu().numpy().astype(np.float64)
class IgnoreElems(torch.nn.Module):
def __init__(self,subnet):
super().__init__()
self.subnet = subnet
def forward(self,bispectrum,elems):
return self.subnet(bispectrum)

View File

@ -22,5 +22,5 @@ setup(
url = "https://lammps.sandia.gov",
description = "LAMMPS Molecular Dynamics Python package",
license = "GPL",
packages=["lammps"]
packages=["lammps","lammps.mliap"],
)

View File

@ -99,20 +99,20 @@ void PairBrownian::compute(int eflag, int vflag)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -494,7 +494,7 @@ void PairBrownian::init_style()
// are re-calculated at every step.
flagdeform = flagwall = 0;
for (int i = 0; i < modify->nfix; i++){
for (int i = 0; i < modify->nfix; i++) {
if (strcmp(modify->fix[i]->style,"deform") == 0)
flagdeform = 1;
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
@ -514,14 +514,14 @@ void PairBrownian::init_style()
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
else {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
// Since fix->wall->init happens after pair->init_style
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);

View File

@ -82,20 +82,20 @@ void PairBrownianPoly::compute(int eflag, int vflag)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (j = 0; j < 3; j++){
for (j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -355,7 +355,7 @@ void PairBrownianPoly::init_style()
// are re-calculated at every step.
flagdeform = flagwall = 0;
for (int i = 0; i < modify->nfix; i++){
for (int i = 0; i < modify->nfix; i++) {
if (strcmp(modify->fix[i]->style,"deform") == 0)
flagdeform = 1;
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
@ -375,14 +375,14 @@ void PairBrownianPoly::init_style()
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
else {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
// Since fix->wall->init happens after pair->init_style
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);

View File

@ -155,20 +155,20 @@ void PairLubricate::compute(int eflag, int vflag)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -559,7 +559,7 @@ void PairLubricate::init_style()
// are re-calculated at every step.
shearing = flagdeform = flagwall = 0;
for (int i = 0; i < modify->nfix; i++){
for (int i = 0; i < modify->nfix; i++) {
if (strcmp(modify->fix[i]->style,"deform") == 0) {
shearing = flagdeform = 1;
if (((FixDeform *) modify->fix[i])->remapflag != Domain::V_REMAP)
@ -584,15 +584,15 @@ void PairLubricate::init_style()
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
else {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
//Since fix->wall->init happens after pair->init_style
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);

View File

@ -158,8 +158,8 @@ void PairLubricateU::compute(int eflag, int vflag)
// store back the saved forces and torques in original arrays
for(i=0;i<nlocal+nghost;i++) {
for(j=0;j<3;j++) {
for (i=0;i<nlocal+nghost;i++) {
for (j=0;j<3;j++) {
f[i][j] = fl[i][j];
torque[i][j] = Tl[i][j];
}
@ -223,7 +223,7 @@ void PairLubricateU::stage_one()
// Find the right hand side= -ve of all forces/torques
// b = 6*Npart in overall size
for(ii = 0; ii < inum; ii++) {
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
for (j = 0; j < 3; j++) {
bcg[6*ii+j] = -f[i][j];
@ -407,7 +407,7 @@ void PairLubricateU::stage_two(double **x)
// Find the right hand side= -ve of all forces/torques
// b = 6*Npart in overall size
for(ii = 0; ii < inum; ii++) {
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
for (j = 0; j < 3; j++) {
bcg[6*ii+j] = -f[i][j];
@ -581,20 +581,20 @@ void PairLubricateU::compute_Fh(double **x)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -813,20 +813,20 @@ void PairLubricateU::compute_RU()
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -1013,7 +1013,7 @@ void PairLubricateU::compute_RU()
torque[i][1] -= vxmu2f*ty;
torque[i][2] -= vxmu2f*tz;
if(newton_pair || j < nlocal) {
if (newton_pair || j < nlocal) {
torque[j][0] -= vxmu2f*tx;
torque[j][1] -= vxmu2f*ty;
torque[j][2] -= vxmu2f*tz;
@ -1084,20 +1084,20 @@ void PairLubricateU::compute_RU(double **x)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -1284,7 +1284,7 @@ void PairLubricateU::compute_RU(double **x)
torque[i][1] -= vxmu2f*ty;
torque[i][2] -= vxmu2f*tz;
if(newton_pair || j < nlocal) {
if (newton_pair || j < nlocal) {
torque[j][0] -= vxmu2f*tx;
torque[j][1] -= vxmu2f*ty;
torque[j][2] -= vxmu2f*tz;
@ -1791,7 +1791,7 @@ void PairLubricateU::init_style()
// are re-calculated at every step.
flagdeform = flagwall = 0;
for (int i = 0; i < modify->nfix; i++){
for (int i = 0; i < modify->nfix; i++) {
if (strcmp(modify->fix[i]->style,"deform") == 0)
flagdeform = 1;
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
@ -1811,14 +1811,14 @@ void PairLubricateU::init_style()
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
else {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
//Since fix->wall->init happens after pair->init_style
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);

View File

@ -126,8 +126,8 @@ void PairLubricateUPoly::compute(int eflag, int vflag)
// Store back the saved forces and torques in original arrays
for(i=0;i<nlocal+nghost;i++) {
for(j=0;j<3;j++) {
for (i=0;i<nlocal+nghost;i++) {
for (j=0;j<3;j++) {
f[i][j] = fl[i][j];
torque[i][j] = Tl[i][j];
}
@ -172,7 +172,7 @@ void PairLubricateUPoly::iterate(double **x, int stage)
// Find the right hand side= -ve of all forces/torques
// b = 6*Npart in overall size
for(ii = 0; ii < inum; ii++) {
for (ii = 0; ii < inum; ii++) {
i = ilist[ii];
for (j = 0; j < 3; j++) {
bcg[6*ii+j] = -f[i][j];
@ -351,20 +351,20 @@ void PairLubricateUPoly::compute_Fh(double **x)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -626,20 +626,20 @@ void PairLubricateUPoly::compute_RU(double **x)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (j = 0; j < 3; j++){
for (j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -1155,10 +1155,10 @@ void PairLubricateUPoly::init_style()
// are re-calculated at every step.
flagdeform = flagwall = 0;
for (int i = 0; i < modify->nfix; i++){
for (int i = 0; i < modify->nfix; i++) {
if (strcmp(modify->fix[i]->style,"deform") == 0)
flagdeform = 1;
else if (strstr(modify->fix[i]->style,"wall") != nullptr){
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
if (flagwall)
error->all(FLERR,
"Cannot use multiple fix wall commands with "
@ -1176,14 +1176,14 @@ void PairLubricateUPoly::init_style()
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
else {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
//Since fix->wall->init happens after pair->init_style
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
@ -1214,7 +1214,7 @@ void PairLubricateUPoly::init_style()
if (!flagVF) vol_f = 0;
if (!comm->me) {
if(logfile)
if (logfile)
fprintf(logfile, "lubricateU: vol_f = %g, vol_p = %g, vol_T = %g\n",
vol_f,vol_P,vol_T);
if (screen)

View File

@ -137,20 +137,20 @@ void PairLubricatePoly::compute(int eflag, int vflag)
double dims[3], wallcoord;
if (flagVF) // Flag for volume fraction corrections
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
if (flagdeform && !flagwall)
for (j = 0; j < 3; j++)
dims[j] = domain->prd[j];
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
}
else wallcoord = wallfix->coord0[m];
@ -461,7 +461,7 @@ void PairLubricatePoly::init_style()
// are re-calculated at every step.
shearing = flagdeform = flagwall = 0;
for (int i = 0; i < modify->nfix; i++){
for (int i = 0; i < modify->nfix; i++) {
if (strcmp(modify->fix[i]->style,"deform") == 0) {
shearing = flagdeform = 1;
if (((FixDeform *) modify->fix[i])->remapflag != Domain::V_REMAP)
@ -478,9 +478,9 @@ void PairLubricatePoly::init_style()
if (wallfix->xflag) flagwall = 2; // Moving walls exist
}
if (strstr(modify->fix[i]->style,"wall") != nullptr){
if (strstr(modify->fix[i]->style,"wall") != nullptr) {
flagwall = 1; // Walls exist
if (((FixWall *) modify->fix[i])->xflag ) {
if (((FixWall *) modify->fix[i])->xflag) {
flagwall = 2; // Moving walls exist
wallfix = (FixWall *) modify->fix[i];
}
@ -492,14 +492,14 @@ void PairLubricatePoly::init_style()
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
else {
double wallhi[3], walllo[3];
for (int j = 0; j < 3; j++){
for (int j = 0; j < 3; j++) {
wallhi[j] = domain->prd[j];
walllo[j] = 0;
}
for (int m = 0; m < wallfix->nwall; m++){
for (int m = 0; m < wallfix->nwall; m++) {
int dim = wallfix->wallwhich[m] / 2;
int side = wallfix->wallwhich[m] % 2;
if (wallfix->xstyle[m] == VARIABLE){
if (wallfix->xstyle[m] == VARIABLE) {
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
//Since fix->wall->init happens after pair->init_style
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);

View File

@ -168,7 +168,7 @@ void DumpAtomGZ::write()
int DumpAtomGZ::modify_param(int narg, char **arg)
{
int consumed = DumpAtom::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
if (strcmp(arg[0],"compression_level") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
int min_level = Z_DEFAULT_COMPRESSION;

View File

@ -171,7 +171,7 @@ void DumpAtomZstd::write()
int DumpAtomZstd::modify_param(int narg, char **arg)
{
int consumed = DumpAtom::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
try {
if (strcmp(arg[0],"checksum") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");

View File

@ -176,7 +176,7 @@ void DumpCFGGZ::write()
int DumpCFGGZ::modify_param(int narg, char **arg)
{
int consumed = DumpCFG::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
if (strcmp(arg[0],"compression_level") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
int min_level = Z_DEFAULT_COMPRESSION;

View File

@ -173,7 +173,7 @@ void DumpCFGZstd::write()
int DumpCFGZstd::modify_param(int narg, char **arg)
{
int consumed = DumpCFG::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
try {
if (strcmp(arg[0],"checksum") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");

View File

@ -168,7 +168,7 @@ void DumpCustomGZ::write()
int DumpCustomGZ::modify_param(int narg, char **arg)
{
int consumed = DumpCustom::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
if (strcmp(arg[0],"compression_level") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
int min_level = Z_DEFAULT_COMPRESSION;

View File

@ -171,7 +171,7 @@ void DumpCustomZstd::write()
int DumpCustomZstd::modify_param(int narg, char **arg)
{
int consumed = DumpCustom::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
try {
if (strcmp(arg[0],"checksum") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");

View File

@ -183,7 +183,7 @@ void DumpLocalGZ::write()
int DumpLocalGZ::modify_param(int narg, char **arg)
{
int consumed = DumpLocal::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
if (strcmp(arg[0],"compression_level") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
int min_level = Z_DEFAULT_COMPRESSION;

View File

@ -171,7 +171,7 @@ void DumpLocalZstd::write()
int DumpLocalZstd::modify_param(int narg, char **arg)
{
int consumed = DumpLocal::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
try {
if (strcmp(arg[0],"checksum") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");

View File

@ -147,7 +147,7 @@ void DumpXYZGZ::write()
int DumpXYZGZ::modify_param(int narg, char **arg)
{
int consumed = DumpXYZ::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
if (strcmp(arg[0],"compression_level") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
int min_level = Z_DEFAULT_COMPRESSION;

View File

@ -145,7 +145,7 @@ void DumpXYZZstd::write()
int DumpXYZZstd::modify_param(int narg, char **arg)
{
int consumed = DumpXYZ::modify_param(narg, arg);
if(consumed == 0) {
if (consumed == 0) {
try {
if (strcmp(arg[0],"checksum") == 0) {
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");

View File

@ -48,7 +48,7 @@ ZstdFileWriter::~ZstdFileWriter()
void ZstdFileWriter::open(const std::string &path)
{
if(isopen()) return;
if (isopen()) return;
fp = fopen(path.c_str(), "wb");
@ -72,7 +72,7 @@ void ZstdFileWriter::open(const std::string &path)
size_t ZstdFileWriter::write(const void * buffer, size_t length)
{
if(!isopen()) return 0;
if (!isopen()) return 0;
ZSTD_inBuffer input = { buffer, length, 0 };
ZSTD_EndDirective mode = ZSTD_e_continue;
@ -81,7 +81,7 @@ size_t ZstdFileWriter::write(const void * buffer, size_t length)
ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
ZSTD_compressStream2(cctx, &output, &input, mode);
fwrite(out_buffer, sizeof(char), output.pos, fp);
} while(input.pos < input.size);
} while (input.pos < input.size);
return length;
}
@ -90,7 +90,7 @@ size_t ZstdFileWriter::write(const void * buffer, size_t length)
void ZstdFileWriter::flush()
{
if(!isopen()) return;
if (!isopen()) return;
size_t remaining;
ZSTD_inBuffer input = { nullptr, 0, 0 };
@ -100,7 +100,7 @@ void ZstdFileWriter::flush()
ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
remaining = ZSTD_compressStream2(cctx, &output, &input, mode);
fwrite(out_buffer, sizeof(char), output.pos, fp);
} while(remaining);
} while (remaining);
fflush(fp);
}
@ -109,7 +109,7 @@ void ZstdFileWriter::flush()
void ZstdFileWriter::close()
{
if(!isopen()) return;
if (!isopen()) return;
size_t remaining;
ZSTD_inBuffer input = { nullptr, 0, 0 };
@ -119,7 +119,7 @@ void ZstdFileWriter::close()
ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
remaining = ZSTD_compressStream2(cctx, &output, &input, mode);
fwrite(out_buffer, sizeof(char), output.pos, fp);
} while(remaining);
} while (remaining);
ZSTD_freeCCtx(cctx);
cctx = nullptr;
@ -144,7 +144,7 @@ void ZstdFileWriter::setCompressionLevel(int level)
const int min_level = ZSTD_minCLevel();
const int max_level = ZSTD_maxCLevel();
if(level < min_level || level > max_level)
if (level < min_level || level > max_level)
throw FileWriterException(fmt::format("Compression level must in the range of [{}, {}]", min_level, max_level));
compression_level = level;

View File

@ -232,7 +232,7 @@ double ComputeTempCS::compute_scalar()
double t = 0.0;
for (int i = 0; i < nlocal; i++){
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
vthermal[0] = v[i][0] - vint[i][0];
vthermal[1] = v[i][1] - vint[i][1];
@ -271,7 +271,7 @@ void ComputeTempCS::compute_vector()
double t[6];
for (int i = 0; i < 6; i++) t[i] = 0.0;
for (int i = 0; i < nlocal; i++){
for (int i = 0; i < nlocal; i++) {
if (mask[i] & groupbit) {
if (rmass) massone = rmass[i];
else massone = mass[type[i]];

View File

@ -106,6 +106,10 @@ if (test $1 = "PERI") then
depend USER-OMP
fi
if (test $1 = "PYTHON") then
depend MLIAP
fi
if (test $1 = "RIGID") then
depend KOKKOS
depend USER-OMP
@ -114,6 +118,7 @@ fi
if (test $1 = "SNAP") then
depend KOKKOS
depend MLIAP
fi
if (test $1 = "USER-CGSDK") then

View File

@ -370,7 +370,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
Setfl *file = setfl;
// read potential file
if(comm->me == 0) {
if (comm->me == 0) {
PotentialFileReader reader(PairEAM::lmp, filename,
"eam/alloy", unit_convert_flag);

View File

@ -370,7 +370,7 @@ void PairEAMFSGPU::read_file(char *filename)
Fs *file = fs;
// read potential file
if(comm->me == 0) {
if (comm->me == 0) {
PotentialFileReader reader(PairEAM::lmp, filename, "eam/fs",
unit_convert_flag);

View File

@ -141,7 +141,7 @@ void PairVashishtaGPU::compute(int eflag, int vflag)
void PairVashishtaGPU::allocate()
{
if(!allocated) {
if (!allocated) {
PairVashishta::allocate();
}
int n = atom->ntypes;
@ -260,7 +260,7 @@ void PairVashishtaGPU::init_style()
double PairVashishtaGPU::init_one(int i, int j)
{
if(!gpu_allocated) {
if (!gpu_allocated) {
allocate();
}
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");

View File

@ -1303,7 +1303,7 @@ void FixWallGran::granular(double rsq, double dx, double dy, double dz,
relrot2 = omega[1];
relrot3 = omega[2];
}
if (roll_model != ROLL_NONE){
if (roll_model != ROLL_NONE) {
// rolling velocity, see eq. 31 of Wang et al, Particuology v 23, p 49 (2015)
// This is different from the Marshall papers,

View File

@ -536,7 +536,7 @@ void PairGranular::compute(int eflag, int vflag)
}
if (roll_model[itype][jtype] != ROLL_NONE ||
twist_model[itype][jtype] != TWIST_NONE){
twist_model[itype][jtype] != TWIST_NONE) {
relrot1 = omega[i][0] - omega[j][0];
relrot2 = omega[i][1] - omega[j][1];
relrot3 = omega[i][2] - omega[j][2];

View File

@ -252,7 +252,7 @@ char *do_query(char *qfunction, char * model_name, int narg, char **arg,
}
} else {
query += fmt::format("&{}=[", key);
while (n != std::string::npos){
while (n != std::string::npos) {
std::string sval = val.substr(0, n);
if (utils::is_integer(sval) ||
utils::is_double(sval) ||

View File

@ -230,9 +230,9 @@ void AtomKokkos::sort()
reallocate memory to the pointer selected by the mask
------------------------------------------------------------------------- */
void AtomKokkos::grow(unsigned int mask){
void AtomKokkos::grow(unsigned int mask) {
if (mask & SPECIAL_MASK){
if (mask & SPECIAL_MASK) {
memoryKK->destroy_kokkos(k_special, special);
sync(Device, mask);
modified(Device, mask);

View File

@ -83,16 +83,16 @@ struct SortFunctor {
ViewType source;
Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type> dest;
IndexView index;
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind){
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind) {
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0));
}
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind){
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind) {
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1));
}
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind){
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind) {
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1),src.extent(2));
}
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind){
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind) {
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1),src.extent(2),src.extent(3));
}
KOKKOS_INLINE_FUNCTION
@ -100,18 +100,18 @@ struct SortFunctor {
dest(i) = source(index(i));
}
void operator()(const typename std::enable_if<ViewType::rank==2, int>::type& i) {
for(int j=0; j < (int)source.extent(1); j++)
for (int j=0; j < (int)source.extent(1); j++)
dest(i,j) = source(index(i),j);
}
void operator()(const typename std::enable_if<ViewType::rank==3, int>::type& i) {
for(int j=0; j < (int)source.extent(1); j++)
for(int k=0; k < (int)source.extent(2); k++)
for (int j=0; j < (int)source.extent(1); j++)
for (int k=0; k < (int)source.extent(2); k++)
dest(i,j,k) = source(index(i),j,k);
}
void operator()(const typename std::enable_if<ViewType::rank==4, int>::type& i) {
for(int j=0; j < (int)source.extent(1); j++)
for(int k=0; k < (int)source.extent(2); k++)
for(int l=0; l < (int)source.extent(3); l++)
for (int j=0; j < (int)source.extent(1); j++)
for (int k=0; k < (int)source.extent(2); k++)
for (int l=0; l < (int)source.extent(3); l++)
dest(i,j,k,l) = source(index(i),j,k,l);
}
};

View File

@ -281,10 +281,10 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -296,7 +296,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -310,8 +310,8 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
}
} else {
atomKK->sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -323,7 +323,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -396,11 +396,11 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
const int & iswap,
const int nfirst, const int &pbc_flag,
const int* const pbc) {
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
@ -414,7 +414,7 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,1>
f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
@ -431,8 +431,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
} else {
atomKK->sync(Device,X_MASK);
atomKK->modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
@ -446,7 +446,7 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,1>
f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
@ -491,8 +491,8 @@ struct AtomVecAngleKokkos_UnpackComm {
/* ---------------------------------------------------------------------- */
void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
const DAT::tdual_xfloat_2d &buf) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
@ -641,7 +641,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
if (n > 0)
atomKK->sync(Host,F_MASK);
int m = 0;
@ -658,7 +658,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0)
if (n > 0)
atomKK->modified(Host,F_MASK);
int m = 0;
@ -742,7 +742,7 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecAngleKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -756,7 +756,7 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecAngleKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -939,7 +939,7 @@ struct AtomVecAngleKokkos_UnpackBorder {
typename AT::t_tagint_1d &molecule,
const int& first):
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
_first(first){
_first(first) {
};
KOKKOS_INLINE_FUNCTION
@ -963,7 +963,7 @@ void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
if(space==Host) {
if (space==Host) {
struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
Kokkos::parallel_for(n,f);
@ -1129,7 +1129,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
@ -1178,7 +1178,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -1220,12 +1220,12 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_
X_FLOAT hi )
{
const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if(space == Host) {
if (space == Host) {
AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
@ -1333,7 +1333,7 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
@ -1386,7 +1386,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);

View File

@ -200,7 +200,7 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecAtomicKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
@ -214,7 +214,7 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecAtomicKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
@ -374,7 +374,7 @@ struct AtomVecAtomicKokkos_UnpackBorder {
typename ArrayTypes<DeviceType>::t_int_1d &type,
typename ArrayTypes<DeviceType>::t_int_1d &mask,
const int& first):
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first){
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first) {
};
KOKKOS_INLINE_FUNCTION
@ -396,7 +396,7 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
while (first+n >= nmax) grow(0);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
if(space==Host) {
if (space==Host) {
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
Kokkos::parallel_for(n,f);
} else {
@ -504,7 +504,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
const size_t elements = 11;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
@ -527,7 +527,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
_buf(mysend,10) = d_ubuf(_image[i]).d;
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -546,11 +546,11 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
{
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
int newsize = nsend*11/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if(space == Host) {
if (space == Host) {
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
return nsend*11;
@ -615,7 +615,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
const size_t elements = 11;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
@ -644,7 +644,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
/* ---------------------------------------------------------------------- */
int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/11,f);

View File

@ -250,7 +250,7 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecBondKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -264,7 +264,7 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecBondKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -447,7 +447,7 @@ struct AtomVecBondKokkos_UnpackBorder {
typename AT::t_tagint_1d &molecule,
const int& first):
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
_first(first){
_first(first) {
};
KOKKOS_INLINE_FUNCTION
@ -471,7 +471,7 @@ void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
if(space==Host) {
if (space==Host) {
struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
Kokkos::parallel_for(n,f);
@ -621,7 +621,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 to store buffer length
@ -661,7 +661,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -696,12 +696,12 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
X_FLOAT hi )
{
const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if(space == Host) {
if (space == Host) {
AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
@ -794,7 +794,7 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
_bond_type(atom->k_bond_type.view<DeviceType>()),
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
@ -840,7 +840,7 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);

View File

@ -267,7 +267,7 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecChargeKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
@ -281,7 +281,7 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecChargeKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
@ -463,7 +463,7 @@ struct AtomVecChargeKokkos_UnpackBorder {
typename ArrayTypes<DeviceType>::t_int_1d &mask,
typename ArrayTypes<DeviceType>::t_float_1d &q,
const int& first):
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first){
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) {
};
KOKKOS_INLINE_FUNCTION
@ -485,7 +485,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
if (first+n >= nmax) {
grow(first+n+100);
}
if(space==Host) {
if (space==Host) {
struct AtomVecChargeKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,first);
Kokkos::parallel_for(n,f);
@ -618,7 +618,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
const size_t elements = 12;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
buf.template view<DeviceType>().extent(1))/elements;
@ -643,7 +643,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
_buf(mysend,11) = _q[i];
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -667,11 +667,11 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat
ExecutionSpace space,int dim,
X_FLOAT lo,X_FLOAT hi )
{
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
int newsize = nsend*12/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if(space == Host) {
if (space == Host) {
AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
@ -740,7 +740,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
_image(atom->k_image.view<DeviceType>()),
_q(atom->k_q.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
const size_t elements = 12;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
@ -772,7 +772,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
ExecutionSpace space) {
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/12,f);
@ -1131,7 +1131,7 @@ void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned
perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
} else {
if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
@ -1148,7 +1148,7 @@ void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned
perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPHostType>())
if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
}
}

View File

@ -267,10 +267,10 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
buf,list,iswap,
@ -286,7 +286,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
buf,list,iswap,
@ -304,8 +304,8 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
}
} else {
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
buf,list,iswap,
@ -321,7 +321,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
buf,list,iswap,
@ -409,11 +409,11 @@ struct AtomVecDPDKokkos_PackCommSelf {
int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst, const int &pbc_flag, const int* const pbc) {
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
nfirst,list,iswap,
@ -429,7 +429,7 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
nfirst,list,iswap,
@ -448,8 +448,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
} else {
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
nfirst,list,iswap,
@ -465,7 +465,7 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
nfirst,list,iswap,
@ -526,8 +526,8 @@ struct AtomVecDPDKokkos_UnpackComm {
/* ---------------------------------------------------------------------- */
void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
const DAT::tdual_xfloat_2d &buf) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
struct AtomVecDPDKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,
@ -716,7 +716,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
if (n > 0)
atomKK->sync(Host,F_MASK);
int m = 0;
@ -733,7 +733,7 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0) {
if (n > 0) {
atomKK->sync(Host,F_MASK);
atomKK->modified(Host,F_MASK);
}
@ -831,7 +831,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecDPDKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,
@ -849,7 +849,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecDPDKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,
@ -1134,7 +1134,7 @@ void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first,
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
if(space==Host) {
if (space==Host) {
struct AtomVecDPDKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),
h_x,h_tag,h_type,h_mask,
h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew,
@ -1326,7 +1326,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
const size_t elements = 17;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
@ -1355,7 +1355,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
_buf(mysend,16) = _uCGnew[i];
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -1380,7 +1380,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
{
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
@ -1388,7 +1388,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
DVECTOR_MASK);
if(space == Host) {
if (space == Host) {
AtomVecDPDKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
} else {
@ -1469,7 +1469,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
_mask(atom->k_mask.view<DeviceType>()),
_image(atom->k_image.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
const size_t elements = 17;
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
@ -1504,7 +1504,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
/* ---------------------------------------------------------------------- */
int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecDPDKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/17,f);

View File

@ -381,7 +381,7 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecFullKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
@ -395,7 +395,7 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecFullKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
@ -586,7 +586,7 @@ struct AtomVecFullKokkos_UnpackBorder {
typename AT::t_tagint_1d &molecule,
const int& first):
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule),
_first(first){
_first(first) {
};
KOKKOS_INLINE_FUNCTION
@ -611,7 +611,7 @@ void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
if(space==Host) {
if (space==Host) {
struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
Kokkos::parallel_for(n,f);
@ -824,7 +824,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
@ -895,7 +895,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -955,12 +955,12 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
{
const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if(space == Host) {
if (space == Host) {
AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
@ -1106,7 +1106,7 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
@ -1178,7 +1178,7 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
ExecutionSpace space) {
const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);

View File

@ -112,10 +112,10 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -127,7 +127,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -141,8 +141,8 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
}
} else {
sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -154,7 +154,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -225,11 +225,11 @@ struct AtomVecKokkos_PackCommSelf {
int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst, const int &pbc_flag, const int* const pbc) {
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -241,7 +241,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -256,8 +256,8 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -269,7 +269,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -361,10 +361,10 @@ struct AtomVecKokkos_PackCommSelfFused {
int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan,
const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc,
const DAT::tdual_int_1d &g2l) {
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
@ -378,7 +378,7 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &l
} else {
sync(Device,X_MASK);
modified(Device,X_MASK);
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz);
@ -420,8 +420,8 @@ struct AtomVecKokkos_UnpackComm {
/* ---------------------------------------------------------------------- */
void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
const DAT::tdual_xfloat_2d &buf) {
if (commKK->forward_comm_on_host) {
sync(Host,X_MASK);
modified(Host,X_MASK);
struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
@ -530,7 +530,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
const int &pbc_flag,
const int* const pbc)
{
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
sync(Host,X_MASK|V_MASK);
if (pbc_flag) {
if (deform_vremap) {
@ -552,7 +552,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,0> f(
atomKK->k_x,atomKK->k_mask,
atomKK->k_v,
@ -571,7 +571,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
}
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommVel<LMPHostType,0,1,0> f(
atomKK->k_x,atomKK->k_mask,
atomKK->k_v,
@ -591,9 +591,9 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
}
} else {
sync(Device,X_MASK|V_MASK);
if(pbc_flag) {
if(deform_vremap) {
if(domain->triclinic) {
if (pbc_flag) {
if (deform_vremap) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,1> f(
atomKK->k_x,atomKK->k_mask,
atomKK->k_v,
@ -611,7 +611,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,0> f(
atomKK->k_x,atomKK->k_mask,
atomKK->k_v,
@ -630,7 +630,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
}
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,1,0> f(
atomKK->k_x,atomKK->k_mask,
atomKK->k_v,
@ -691,8 +691,8 @@ struct AtomVecKokkos_UnpackCommVel {
/* ---------------------------------------------------------------------- */
void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
const DAT::tdual_xfloat_2d &buf) {
if (commKK->forward_comm_on_host) {
sync(Host,X_MASK|V_MASK);
modified(Host,X_MASK|V_MASK);
struct AtomVecKokkos_UnpackCommVel<LMPHostType> f(atomKK->k_x,atomKK->k_v,buf,first);
@ -864,8 +864,8 @@ struct AtomVecKokkos_PackReverse {
/* ---------------------------------------------------------------------- */
int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first,
const DAT::tdual_ffloat_2d &buf ) {
if(commKK->reverse_comm_on_host) {
const DAT::tdual_ffloat_2d &buf) {
if (commKK->reverse_comm_on_host) {
sync(Host,F_MASK);
struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first);
Kokkos::parallel_for(n,f);
@ -911,7 +911,7 @@ struct AtomVecKokkos_UnPackReverseSelf {
int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
const int nfirst) {
if(commKK->reverse_comm_on_host) {
if (commKK->reverse_comm_on_host) {
sync(Host,F_MASK);
struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap);
Kokkos::parallel_for(n,f);
@ -966,7 +966,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
// Check whether to always run reverse communication on the host
// Choose correct reverse UnPackReverse kernel
if(commKK->reverse_comm_on_host) {
if (commKK->reverse_comm_on_host) {
struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap);
Kokkos::parallel_for(n,f);
modified(Host,F_MASK);
@ -981,7 +981,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
int AtomVecKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
if (n > 0)
sync(Host,F_MASK);
int m = 0;
@ -1007,7 +1007,7 @@ void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf)
h_f(j,2) += buf[m++];
}
if(n > 0)
if (n > 0)
modified(Host,F_MASK);
}

View File

@ -178,7 +178,7 @@ class AtomVecKokkos : public AtomVec {
}
mirror_type tmp_view((typename ViewType::value_type*)buffer, src.d_view.layout());
if(space == Device) {
if (space == Device) {
Kokkos::deep_copy(LMPHostType(),tmp_view,src.h_view),
Kokkos::deep_copy(LMPHostType(),src.d_view,tmp_view);
src.clear_sync_state();
@ -191,7 +191,7 @@ class AtomVecKokkos : public AtomVec {
#else
template<class ViewType>
void perform_async_copy(ViewType& src, unsigned int space) {
if(space == Device)
if (space == Device)
src.template sync<LMPDeviceType>();
else
src.template sync<LMPHostType>();

View File

@ -360,10 +360,10 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -375,7 +375,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -389,8 +389,8 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
}
} else {
atomKK->sync(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -402,7 +402,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,1>
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -476,11 +476,11 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
const int & iswap,
const int nfirst, const int &pbc_flag,
const int* const pbc) {
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -492,7 +492,7 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -507,8 +507,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
} else {
atomKK->sync(Device,X_MASK);
atomKK->modified(Device,X_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -520,7 +520,7 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,1>
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
domain->xy,domain->xz,domain->yz,pbc);
@ -563,8 +563,8 @@ struct AtomVecMolecularKokkos_UnpackComm {
/* ---------------------------------------------------------------------- */
void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
const DAT::tdual_xfloat_2d &buf) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK);
atomKK->modified(Host,X_MASK);
struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
@ -713,7 +713,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf)
int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
if (n > 0)
atomKK->sync(Host,F_MASK);
int m = 0;
@ -730,7 +730,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0)
if (n > 0)
atomKK->modified(Host,F_MASK);
int m = 0;
@ -814,7 +814,7 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecMolecularKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -828,7 +828,7 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecMolecularKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
@ -1011,7 +1011,7 @@ struct AtomVecMolecularKokkos_UnpackBorder {
typename AT::t_tagint_1d &molecule,
const int& first):
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
_first(first){
_first(first) {
};
KOKKOS_INLINE_FUNCTION
@ -1035,7 +1035,7 @@ void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
while (first+n >= nmax) grow(0);
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
if(space==Host) {
if (space==Host) {
struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
Kokkos::parallel_for(n,f);
@ -1240,7 +1240,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
_sendlist(sendlist.template view<DeviceType>()),
_copylist(copylist.template view<DeviceType>()),
_nlocal(nlocal),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
@ -1309,7 +1309,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
const int j = _copylist(mysend);
if(j>-1) {
if (j>-1) {
_xw(i,0) = _x(j,0);
_xw(i,1) = _x(j,1);
_xw(i,2) = _x(j,2);
@ -1368,12 +1368,12 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl
{
const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
k_buf.view<LMPHostType>().extent(1))/elements) {
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
if(space == Host) {
if (space == Host) {
AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
@ -1517,7 +1517,7 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
_lo(lo),_hi(hi){
_lo(lo),_hi(hi) {
elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
@ -1589,7 +1589,7 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
ExecutionSpace space) {
const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
f(atomKK,k_buf,k_count,dim,lo,hi);

View File

@ -274,10 +274,10 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
return AtomVecKokkos::pack_comm_kokkos(n,list,iswap,buf,pbc_flag,pbc);
// Check whether to always run forward communication on the host
// Choose correct forward PackComm kernel
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -295,7 +295,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackComm<LMPHostType,0,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -315,8 +315,8 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
}
} else {
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -334,7 +334,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -461,11 +461,11 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
const int &pbc_flag,
const int* const pbc)
{
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
if(pbc_flag) {
if(deform_vremap) {
if(domain->triclinic) {
if (pbc_flag) {
if (deform_vremap) {
if (domain->triclinic) {
if (radvary == 0) {
struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,1> f(
atomKK->k_x,atomKK->k_mask,
@ -507,7 +507,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
}
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
if (radvary == 0) {
struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,0> f(
atomKK->k_x,atomKK->k_mask,
@ -550,7 +550,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
}
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
if (radvary == 0) {
struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,0,1,0> f(
atomKK->k_x,atomKK->k_mask,
@ -594,9 +594,9 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
}
} else {
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
if(pbc_flag) {
if(deform_vremap) {
if(domain->triclinic) {
if (pbc_flag) {
if (deform_vremap) {
if (domain->triclinic) {
if (radvary == 0) {
struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,1> f(
atomKK->k_x,atomKK->k_mask,
@ -638,7 +638,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
}
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
if (radvary == 0) {
struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,0> f(
atomKK->k_x,atomKK->k_mask,
@ -681,7 +681,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
}
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
if (radvary == 0) {
struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,0,1,0> f(
atomKK->k_x,atomKK->k_mask,
@ -792,11 +792,11 @@ int AtomVecSphereKokkos::pack_comm_self(
// Fallback to AtomVecKokkos if radvary == 0
if (radvary == 0)
return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -814,7 +814,7 @@ int AtomVecSphereKokkos::pack_comm_self(
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -835,8 +835,8 @@ int AtomVecSphereKokkos::pack_comm_self(
} else {
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
if(pbc_flag) {
if(domain->triclinic) {
if (pbc_flag) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -854,7 +854,7 @@ int AtomVecSphereKokkos::pack_comm_self(
Kokkos::parallel_for(n,f);
}
} else {
if(domain->triclinic) {
if (domain->triclinic) {
struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,1> f(
atomKK->k_x,
atomKK->k_radius,atomKK->k_rmass,
@ -917,13 +917,13 @@ struct AtomVecSphereKokkos_UnpackComm {
void AtomVecSphereKokkos::unpack_comm_kokkos(
const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
const DAT::tdual_xfloat_2d &buf) {
// Fallback to AtomVecKokkos if radvary == 0
if (radvary == 0) {
AtomVecKokkos::unpack_comm_kokkos(n,first,buf);
return;
}
if(commKK->forward_comm_on_host) {
if (commKK->forward_comm_on_host) {
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
atomKK->k_x,
@ -994,8 +994,8 @@ struct AtomVecSphereKokkos_UnpackCommVel {
void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf ) {
if(commKK->forward_comm_on_host) {
const DAT::tdual_xfloat_2d &buf) {
if (commKK->forward_comm_on_host) {
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
if (radvary == 0) {
struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
@ -1352,7 +1352,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
{
if(n > 0)
if (n > 0)
atomKK->sync(Host,F_MASK|TORQUE_MASK);
int m = 0;
@ -1372,7 +1372,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
{
if(n > 0)
if (n > 0)
atomKK->sync(Host,TORQUE_MASK);
int m = 0;
@ -1389,7 +1389,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
{
if(n > 0) {
if (n > 0) {
atomKK->modified(Host,F_MASK|TORQUE_MASK);
}
@ -1409,7 +1409,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf)
{
if(n > 0) {
if (n > 0) {
atomKK->modified(Host,TORQUE_MASK);
}
@ -1502,7 +1502,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
dy = pbc[1];
dz = pbc[2];
}
if(space==Host) {
if (space==Host) {
AtomVecSphereKokkos_PackBorder<LMPHostType,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,
@ -1519,7 +1519,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
}
} else {
dx = dy = dz = 0;
if(space==Host) {
if (space==Host) {
AtomVecSphereKokkos_PackBorder<LMPHostType,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,
@ -1697,7 +1697,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
dz = pbc[2];
}
if (!deform_vremap) {
if(space==Host) {
if (space==Host) {
AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,
@ -1721,7 +1721,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
dvz = pbc[2]*h_rate[2];
if(space==Host) {
if (space==Host) {
AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,1> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,
@ -1742,7 +1742,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
}
}
} else {
if(space==Host) {
if (space==Host) {
AtomVecSphereKokkos_PackBorderVel<LMPHostType,0,0> f(
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
iswap,h_x,h_tag,h_type,h_mask,
@ -1926,7 +1926,7 @@ struct AtomVecSphereKokkos_UnpackBorder {
void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
while (first+n >= nmax) grow(0);
if(space==Host) {
if (space==Host) {
struct AtomVecSphereKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),
h_x,h_tag,h_type,h_mask,
h_radius,h_rmass,
@ -2034,7 +2034,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos(
const int &n, const int &first,
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
while (first+n >= nmax) grow(0);
if(space==Host) {
if (space==Host) {
struct AtomVecSphereKokkos_UnpackBorderVel<LMPHostType> f(buf.view<LMPHostType>(),
h_x,h_tag,h_type,h_mask,
h_radius,h_rmass,
@ -2212,7 +2212,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
DAT::tdual_int_1d k_copylist,
ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi)
{
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
}
@ -2220,7 +2220,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
OMEGA_MASK);
if(space == Host) {
if (space == Host) {
AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
Kokkos::parallel_for(nsend,f);
} else {
@ -2338,7 +2338,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
/* ---------------------------------------------------------------------- */
int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
if(space == Host) {
if (space == Host) {
k_count.h_view(0) = nlocal;
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
Kokkos::parallel_for(nrecv/16,f);

View File

@ -107,6 +107,8 @@ void CommKokkos::init()
atomKK = (AtomKokkos *) atom;
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
forward_comm_classic = lmp->kokkos->forward_comm_classic;
forward_pair_comm_classic = lmp->kokkos->forward_pair_comm_classic;
forward_fix_comm_classic = lmp->kokkos->forward_fix_comm_classic;
reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
@ -361,12 +363,12 @@ void CommKokkos::reverse_comm_device()
void CommKokkos::forward_comm_fix(Fix *fix, int size)
{
if (fix->execution_space == Device && fix->forward_comm_device) {
k_sendlist.sync<LMPDeviceType>();
forward_comm_fix_device<LMPDeviceType>(fix,size);
} else {
if (fix->execution_space == Host || !fix->forward_comm_device || forward_fix_comm_classic) {
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm_fix(fix,size);
} else {
k_sendlist.sync<LMPDeviceType>();
forward_comm_fix_device<LMPDeviceType>(fix);
}
}
@ -456,10 +458,10 @@ void CommKokkos::reverse_comm_compute(Compute *compute)
void CommKokkos::forward_comm_pair(Pair *pair)
{
if (pair->execution_space == Host) {
if (pair->execution_space == Host || forward_pair_comm_classic) {
k_sendlist.sync<LMPHostType>();
CommBrick::forward_comm_pair(pair);
} else if (pair->execution_space == Device) {
} else {
k_sendlist.sync<LMPDeviceType>();
forward_comm_pair_device<LMPDeviceType>(pair);
}
@ -571,10 +573,10 @@ void CommKokkos::reverse_comm_dump(Dump *dump)
void CommKokkos::exchange()
{
if(atom->nextra_grow + atom->nextra_border) {
if(!exchange_comm_classic) {
if (atom->nextra_grow + atom->nextra_border) {
if (!exchange_comm_classic) {
static int print = 1;
if(print && comm->me==0) {
if (print && comm->me==0) {
error->warning(FLERR,"Fixes cannot yet send exchange data in Kokkos communication, "
"switching to classic exchange/border communication");
}
@ -625,7 +627,7 @@ struct BuildExchangeListFunctor {
void operator() (int i) const {
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1);
if(mysend < (int)_sendlist.extent(0)) {
if (mysend < (int)_sendlist.extent(0)) {
_sendlist(mysend) = i;
_sendflag(i) = 1;
}
@ -713,7 +715,7 @@ void CommKokkos::exchange_device()
int sendpos = nlocal-1;
nlocal -= k_count.h_view();
for(int i = 0; i < k_count.h_view(); i++) {
for (int i = 0; i < k_count.h_view(); i++) {
if (k_exchange_sendlist.h_view(i)<nlocal) {
while (k_sendflag.h_view(sendpos)) sendpos--;
k_exchange_copylist.h_view(i) = sendpos;
@ -887,7 +889,7 @@ struct BuildBorderListFunctor {
if (my_store_pos+mysend < maxsendlist) {
mysend = my_store_pos;
for(int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()){
for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
if (x(i,dim) >= lo && x(i,dim) <= hi) {
sendlist(iswap,mysend++) = i;
}
@ -979,7 +981,7 @@ void CommKokkos::borders_device() {
k_sendlist.modify<DeviceType>();
if(k_total_send.h_view() >= maxsendlist[iswap]) {
if (k_total_send.h_view() >= maxsendlist[iswap]) {
grow_list(iswap,k_total_send.h_view());
k_total_send.h_view() = 0;
@ -1227,7 +1229,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
maxsend = static_cast<int> (BUFFACTOR * n);
int maxsend_border = (maxsend+BUFEXTRA+5)/atom->avec->size_border + 2;
if (flag) {
if(space == Device)
if (space == Device)
k_buf_send.modify<LMPDeviceType>();
else
k_buf_send.modify<LMPHostType>();
@ -1280,7 +1282,7 @@ void CommKokkos::grow_list(int /*iswap*/, int n)
memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
for(int i=0;i<maxswap;i++) {
for (int i=0;i<maxswap;i++) {
maxsendlist[i]=size; sendlist[i]=&k_sendlist.view<LMPHostType>()(i,0);
}
}

View File

@ -25,6 +25,8 @@ class CommKokkos : public CommBrick {
bool exchange_comm_classic;
bool forward_comm_classic;
bool forward_pair_comm_classic;
bool forward_fix_comm_classic;
bool reverse_comm_classic;
bool exchange_comm_on_host;
bool forward_comm_on_host;

View File

@ -241,7 +241,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
int ncount = 0;
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum),
[&] (const int jj, int& count) {
Kokkos::single(Kokkos::PerThread(team), [&] (){
Kokkos::single(Kokkos::PerThread(team), [&] () {
int j = d_neighbors(i,jj);
j &= NEIGHMASK;
const F_FLOAT delx = x(j,0) - xtmp;
@ -341,17 +341,17 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
#define SWAP(view,i,j) do { \
tmp = view(i); view(i) = view(j); view(j) = tmp; \
} while(0)
} while (0)
#define ISWAP(view,i,j) do { \
itmp = view(i); view(i) = view(j); view(j) = itmp; \
} while(0)
} while (0)
#define SWAP3(view,i,j) do { \
tmp = view(i,0); view(i,0) = view(j,0); view(j,0) = tmp; \
tmp = view(i,1); view(i,1) = view(j,1); view(j,1) = tmp; \
tmp = view(i,2); view(i,2) = view(j,2); view(j,2) = tmp; \
} while(0)
} while (0)
/* ---------------------------------------------------------------------- */
@ -439,14 +439,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
const double r1 = d_rlist(ii,ineigh,1);
const double r2 = d_rlist(ii,ineigh,2);
const double rmag = sqrt(r0*r0 + r1*r1 + r2*r2);
if(rmag <= MY_EPSILON) {
if (rmag <= MY_EPSILON) {
return;
}
const double costheta = r2 / rmag;
SNAcomplex expphi = {r0,r1};
const double rxymag = sqrt(expphi.re*expphi.re+expphi.im*expphi.im);
if(rxymag <= MY_EPSILON) {
if (rxymag <= MY_EPSILON) {
expphi.re = 1.0;
expphi.im = 0.0;
} else {
@ -466,14 +466,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
const double polar_pf = polar_prefactor(l, 0, costheta);
Kokkos::atomic_add(&(d_qnm(ii,il,l).re), polar_pf);
SNAcomplex expphim = {expphi.re,expphi.im};
for(int m = 1; m <= +l; m++) {
for (int m = 1; m <= +l; m++) {
const double prefactor = polar_prefactor(l, m, costheta);
SNAcomplex ylm = {prefactor * expphim.re, prefactor * expphim.im};
//d_qnm(ii,il,m+l).re += ylm.re;
//d_qnm(ii,il,m+l).im += ylm.im;
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).re), ylm.re);
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).im), ylm.im);
if(m & 1) {
if (m & 1) {
//d_qnm(ii,il,-m+l).re -= ylm.re;
//d_qnm(ii,il,-m+l).im += ylm.im;
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), -ylm.re);
@ -508,7 +508,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
double facn = 1.0 / ncount;
for (int il = 0; il < nqlist; il++) {
int l = d_qlist[il];
for(int m = 0; m < 2*l+1; m++) {
for (int m = 0; m < 2*l+1; m++) {
d_qnm(ii,il,m).re *= facn;
d_qnm(ii,il,m).im *= facn;
}
@ -522,7 +522,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
int l = d_qlist[il];
double qnormfac = sqrt(MY_4PI/(2*l+1));
double qm_sum = 0.0;
for(int m = 0; m < 2*l+1; m++)
for (int m = 0; m < 2*l+1; m++)
qm_sum += d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im;
d_qnarray(i,jj++) = qnormfac * sqrt(qm_sum);
}
@ -534,8 +534,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
for (int il = 0; il < nqlist; il++) {
int l = d_qlist[il];
double wlsum = 0.0;
for(int m1 = 0; m1 < 2*l+1; m1++) {
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
for (int m1 = 0; m1 < 2*l+1; m1++) {
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
int m = m1 + m2 - l;
SNAcomplex qm1qm2;
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
@ -555,8 +555,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
for (int il = 0; il < nqlist; il++) {
int l = d_qlist[il];
double wlsum = 0.0;
for(int m1 = 0; m1 < 2*l+1; m1++) {
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
for (int m1 = 0; m1 < 2*l+1; m1++) {
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
const int m = m1 + m2 - l;
SNAcomplex qm1qm2;
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
@ -581,14 +581,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
const int il = iqlcomp;
const int l = qlcomp;
if (d_qnarray(i,il) < QEPSILON)
for(int m = 0; m < 2*l+1; m++) {
for (int m = 0; m < 2*l+1; m++) {
d_qnarray(i,jj++) = 0.0;
d_qnarray(i,jj++) = 0.0;
}
else {
const double qnormfac = sqrt(MY_4PI/(2*l+1));
const double qnfac = qnormfac/d_qnarray(i,il);
for(int m = 0; m < 2*l+1; m++) {
for (int m = 0; m < 2*l+1; m++) {
d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
}
@ -665,8 +665,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
idxcg_count = 0;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for(int m1 = 0; m1 < 2*l+1; m1++)
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
for (int m1 = 0; m1 < 2*l+1; m1++)
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
idxcg_count++;
}
idxcg_max = idxcg_count;
@ -676,9 +676,9 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
idxcg_count = 0;
for (int il = 0; il < nqlist; il++) {
int l = qlist[il];
for(int m1 = 0; m1 < 2*l+1; m1++) {
for (int m1 = 0; m1 < 2*l+1; m1++) {
aa2 = m1 - l;
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
bb2 = m2 - l;
m = aa2 + bb2 + l;
@ -727,7 +727,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::check_team_size_for(int inum, int
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
if(team_size*vector_length > team_size_max)
if (team_size*vector_length > team_size_max)
team_size = team_size_max/vector_length;
}

View File

@ -34,7 +34,7 @@ namespace LAMMPS_NS {
t0 = t1 = t2 = t3 = t4 = t5 = 0.0;
}
KOKKOS_INLINE_FUNCTION
s_CTEMP& operator+=(const s_CTEMP &rhs){
s_CTEMP& operator+=(const s_CTEMP &rhs) {
t0 += rhs.t0;
t1 += rhs.t1;
t2 += rhs.t2;

View File

@ -235,7 +235,7 @@ struct DomainPBCFunctor {
x(_x.view<DeviceType>()), v(_v.view<DeviceType>()),
mask(_mask.view<DeviceType>()), image(_image.view<DeviceType>()),
deform_groupbit(_deform_groupbit),
xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic){
xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic) {
lo[0]=_lo[0]; lo[1]=_lo[1]; lo[2]=_lo[2];
hi[0]=_hi[0]; hi[1]=_hi[1]; hi[2]=_hi[2];
period[0]=_period[0]; period[1]=_period[1]; period[2]=_period[2];

View File

@ -127,7 +127,7 @@ void FFT3dKokkos<DeviceType>::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, in
in starting address of input data on this proc
out starting address of where output data for this proc
will be placed (can be same as in)
flag 1 for forward FFT, -1 for inverse FFT
flag 1 for forward FFT, -1 for backward FFT
plan plan returned by previous call to fft_3d_create_plan
------------------------------------------------------------------------- */
@ -215,22 +215,22 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
length = plan->length1;
#if defined(FFT_MKL)
if (flag == -1)
if (flag == 1)
DftiComputeForward(plan->handle_fast,d_data.data());
else
DftiComputeBackward(plan->handle_fast,d_data.data());
#elif defined(FFT_FFTW3)
if (flag == -1)
if (flag == 1)
FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
else
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
#else
typename FFT_AT::t_FFT_DATA_1d d_tmp =
typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
kiss_fft_functor<DeviceType> f;
if (flag == -1)
if (flag == 1)
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length);
else
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_backward,length);
@ -238,7 +238,6 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
d_data = d_tmp;
#endif
// 1st mid-remap to prepare for 2nd FFTs
// copy = loc for remap result
@ -260,20 +259,20 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
length = plan->length2;
#if defined(FFT_MKL)
if (flag == -1)
if (flag == 1)
DftiComputeForward(plan->handle_mid,d_data.data());
else
DftiComputeBackward(plan->handle_mid,d_data.data());
#elif defined(FFT_FFTW3)
if (flag == -1)
if (flag == 1)
FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
else
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
#else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == -1)
if (flag == 1)
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_forward,length);
else
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_backward,length);
@ -302,20 +301,20 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
length = plan->length3;
#if defined(FFT_MKL)
if (flag == -1)
if (flag == 1)
DftiComputeForward(plan->handle_slow,d_data.data());
else
DftiComputeBackward(plan->handle_slow,d_data.data());
#elif defined(FFT_FFTW3)
if (flag == -1)
if (flag == 1)
FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
else
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
if (flag == -1)
if (flag == 1)
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_forward,length);
else
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_backward,length);
@ -323,7 +322,6 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
d_data = d_tmp;
#endif
// post-remap to put data in output format if needed
// destination is always out
@ -338,7 +336,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
// scaling if required
if (flag == 1 && plan->scaled) {
if (flag == -1 && plan->scaled) {
FFT_SCALAR norm = plan->norm;
int num = plan->normnum;
@ -807,7 +805,7 @@ void FFT3dKokkos<DeviceType>::bifactor(int n, int *factor1, int *factor2)
Arguments:
in starting address of input data on this proc, all set to 0.0
nsize size of in
flag 1 for forward FFT, -1 for inverse FFT
flag 1 for forward FFT, -1 for backward FFT
plan plan returned by previous call to fft_3d_create_plan
------------------------------------------------------------------------- */
@ -861,9 +859,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
}
#elif defined(FFT_CUFFT)
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
#else
kiss_fft_functor<DeviceType> f;
typename FFT_AT::t_FFT_DATA_1d d_tmp =

View File

@ -72,6 +72,7 @@ struct fft_plan_3d_kokkos {
template<class DeviceType>
class FFT3dKokkos : protected Pointers {
public:
enum{FORWARD=1,BACKWARD=-1};
typedef DeviceType device_type;
typedef FFTArrayTypes<DeviceType> FFT_AT;

View File

@ -77,7 +77,7 @@ void FixEnforce2DKokkos<DeviceType>::post_force(int /*vflag*/)
if (atomKK->torque_flag) flag_mask |= 4;
copymode = 1;
switch( flag_mask ){
switch (flag_mask) {
case 0:{
FixEnforce2DKokkosPostForceFunctor<DeviceType,0,0,0> functor(this);
Kokkos::parallel_for(nlocal,functor);
@ -139,21 +139,21 @@ template <int omega_flag, int angmom_flag, int torque_flag>
KOKKOS_INLINE_FUNCTION
void FixEnforce2DKokkos<DeviceType>::post_force_item( int i ) const
{
if (mask[i] & groupbit){
if (mask[i] & groupbit) {
v(i,2) = 0.0;
f(i,2) = 0.0;
if(omega_flag){
if (omega_flag) {
omega(i,0) = 0.0;
omega(i,1) = 0.0;
}
if(angmom_flag){
if (angmom_flag) {
angmom(i,0) = 0.0;
angmom(i,1) = 0.0;
}
if(torque_flag){
if (torque_flag) {
torque(i,0) = 0.0;
torque(i,1) = 0.0;
}

View File

@ -193,7 +193,7 @@ KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXInit, const int &i) const {
double tmp;
if (mask[i] & groupbit) {
if(dpdTheta[i] <= 0.0)
if (dpdTheta[i] <= 0.0)
k_error_flag.template view<DeviceType>()() = 1;
energy_lookup(i,dpdTheta[i],tmp);
uCond[i] = 0.0;
@ -233,7 +233,7 @@ void FixEOStableRXKokkos<DeviceType>::post_integrate()
template<class DeviceType>
KOKKOS_INLINE_FUNCTION
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXTemperatureLookup2, const int &i) const {
if (mask[i] & groupbit){
if (mask[i] & groupbit) {
temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]);
if (dpdTheta[i] <= 0.0)
k_error_flag.template view<DeviceType>()() = 1;
@ -303,7 +303,7 @@ void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, doubl
nPG = 0;
if (rx_flag) {
for (int ispecies = 0; ispecies < nspecies; ispecies++ ) {
for (int ispecies = 0; ispecies < nspecies; ispecies++) {
nTotal += dvector(ispecies,id);
if (fabs(d_moleculeCorrCoeff[ispecies]) > tolerance) {
nPG++;
@ -314,7 +314,7 @@ void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, doubl
nTotal = 1.0;
}
for(int ispecies=0;ispecies<nspecies;ispecies++){
for (int ispecies=0;ispecies<nspecies;ispecies++) {
//Table *tb = &tables[ispecies];
//thetai = MAX(thetai,tb->lo);
thetai = MAX(thetai,d_table_const.lo(ispecies));
@ -364,7 +364,7 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
// Store the current thetai in t1
t1 = MAX(thetai,lo);
t1 = MIN(t1,hi);
if(t1==hi) delta = -delta;
if (t1==hi) delta = -delta;
// Compute u1 at thetai
energy_lookup(id,t1,u1);
@ -382,9 +382,9 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
f2 = u2 - ui;
// Apply the Secant Method
for(it=0; it<maxit; it++){
if(fabs(f2-f1) < MY_EPSILON){
if(std::isnan(f1) || std::isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
for (it=0; it<maxit; it++) {
if (fabs(f2-f1) < MY_EPSILON) {
if (std::isnan(f1) || std::isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
temp = t1;
temp = MAX(temp,lo);
temp = MIN(temp,hi);
@ -392,15 +392,15 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
break;
}
temp = t2 - f2*(t2-t1)/(f2-f1);
if(fabs(temp-t2) < tolerance) break;
if (fabs(temp-t2) < tolerance) break;
f1 = f2;
t1 = t2;
t2 = temp;
energy_lookup(id,t2,u2);
f2 = u2 - ui;
}
if(it==maxit){
if(std::isnan(f1) || std::isnan(f2) || std::isnan(ui) || std::isnan(thetai) || std::isnan(t1) || std::isnan(t2))
if (it==maxit) {
if (std::isnan(f1) || std::isnan(f2) || std::isnan(ui) || std::isnan(thetai) || std::isnan(t1) || std::isnan(t2))
k_error_flag.template view<DeviceType>()() = 2;
else
k_error_flag.template view<DeviceType>()() = 3;
@ -440,7 +440,7 @@ void FixEOStableRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, doub
m = 0;
last = first + n ;
for (ii = first; ii < last; ii++){
for (ii = first; ii < last; ii++) {
h_uChem[ii] = buf[m++];
h_uCG[ii] = buf[m++];
h_uCGnew[ii] = buf[m++];
@ -518,24 +518,24 @@ void FixEOStableRXKokkos<DeviceType>::create_kokkos_tables()
memoryKK->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi");
memoryKK->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
if(tabstyle == LINEAR) {
if (tabstyle == LINEAR) {
memoryKK->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r");
memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
memoryKK->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
}
for(int i=0; i < ntables; i++) {
for (int i=0; i < ntables; i++) {
Table* tb = &tables[i];
h_table->lo[i] = tb->lo;
h_table->hi[i] = tb->hi;
h_table->invdelta[i] = tb->invdelta;
for(int j = 0; j<h_table->r.extent(1); j++)
for (int j = 0; j<h_table->r.extent(1); j++)
h_table->r(i,j) = tb->r[j];
for(int j = 0; j<h_table->e.extent(1); j++)
for (int j = 0; j<h_table->e.extent(1); j++)
h_table->e(i,j) = tb->e[j];
for(int j = 0; j<h_table->de.extent(1); j++)
for (int j = 0; j<h_table->de.extent(1); j++)
h_table->de(i,j) = tb->de[j];
}

View File

@ -61,7 +61,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
k_ratio.template modify<LMPHostType>();
if(gjfflag){
if (gjfflag) {
grow_arrays(atomKK->nmax);
atom->add_callback(Atom::GROW);
// initialize franprev to zero
@ -76,7 +76,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
k_franprev.template modify<LMPHostType>();
k_lv.template modify<LMPHostType>();
}
if(zeroflag){
if (zeroflag) {
k_fsumall = tdual_double_1d_3n("langevin:fsumall");
h_fsumall = k_fsumall.template view<LMPHostType>();
d_fsumall = k_fsumall.template view<DeviceType>();
@ -97,8 +97,8 @@ FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
memoryKK->destroy_kokkos(k_gfactor2,gfactor2);
memoryKK->destroy_kokkos(k_ratio,ratio);
memoryKK->destroy_kokkos(k_flangevin,flangevin);
if(gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
if(gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
if (gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
if (gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
memoryKK->destroy_kokkos(k_tforce,tforce);
}
@ -108,13 +108,13 @@ template<class DeviceType>
void FixLangevinKokkos<DeviceType>::init()
{
FixLangevin::init();
if(oflag)
if (oflag)
error->all(FLERR,"Fix langevin omega is not yet implemented with kokkos");
if(ascale)
if (ascale)
error->all(FLERR,"Fix langevin angmom is not yet implemented with kokkos");
if(gjfflag && tbiasflag)
if (gjfflag && tbiasflag)
error->all(FLERR,"Fix langevin gjf + tbias is not yet implemented with kokkos");
if(gjfflag && tbiasflag)
if (gjfflag && tbiasflag)
error->warning(FLERR,"Fix langevin gjf + kokkos is not implemented with random gaussians");
// prefactors are modified in the init
@ -182,8 +182,8 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
k_gfactor1.template sync<DeviceType>();
k_gfactor2.template sync<DeviceType>();
k_ratio.template sync<DeviceType>();
if(gjfflag) k_franprev.template sync<DeviceType>();
if(gjfflag) k_lv.template sync<DeviceType>();
if (gjfflag) k_franprev.template sync<DeviceType>();
if (gjfflag) k_lv.template sync<DeviceType>();
boltz = force->boltz;
dt = update->dt;
@ -217,7 +217,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
}
// account for bias velocity
if(tbiasflag == BIAS){
if (tbiasflag == BIAS) {
atomKK->sync(temperature->execution_space,temperature->datamask_read);
temperature->compute_scalar();
temperature->remove_bias_all(); // modifies velocities
@ -516,7 +516,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
}
if(tbiasflag == BIAS){
if (tbiasflag == BIAS) {
atomKK->sync(temperature->execution_space,temperature->datamask_read);
temperature->restore_bias_all(); // modifies velocities
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
@ -566,8 +566,8 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
if (mask[i] & groupbit) {
rand_type rand_gen = rand_pool.get_state();
if(Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
if(Tp_RMASS){
if (Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
if (Tp_RMASS) {
gamma1 = -rmass[i] / t_period / ftm2v;
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
gamma1 *= 1.0/d_ratio[type[i]];
@ -581,7 +581,7 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
if(Tp_BIAS){
if (Tp_BIAS) {
fdrag[0] = gamma1*v(i,0);
fdrag[1] = gamma1*v(i,1);
fdrag[2] = gamma1*v(i,2);
@ -625,7 +625,7 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
f(i,2) += fdrag[2] + fran[2];
if (Tp_TALLY) {
if (Tp_GJF){
if (Tp_GJF) {
fdrag[0] = gamma1*d_lv(i,0)/gjfsib/gjfsib;
fdrag[1] = gamma1*d_lv(i,1)/gjfsib/gjfsib;
fdrag[2] = gamma1*d_lv(i,2)/gjfsib/gjfsib;
@ -794,7 +794,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
if (gjfflag){
if (gjfflag) {
if (rmass.data()) {
FixLangevinKokkosEndOfStepFunctor<DeviceType,1> functor(this);
Kokkos::parallel_for(nlocal,functor);
@ -817,7 +817,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_item(int i) const {
tmp[0] = v(i,0);
tmp[1] = v(i,1);
tmp[2] = v(i,2);
if (!osflag){
if (!osflag) {
v(i,0) = d_lv(i,0);
v(i,1) = d_lv(i,1);
v(i,2) = d_lv(i,2);
@ -848,7 +848,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_rmass_item(int i) const
tmp[0] = v(i,0);
tmp[1] = v(i,1);
tmp[2] = v(i,2);
if (!osflag){
if (!osflag) {
v(i,0) = d_lv(i,0);
v(i,1) = d_lv(i,1);
v(i,2) = d_lv(i,2);

View File

@ -36,7 +36,7 @@ namespace LAMMPS_NS {
fx = fy = fz = 0.0;
}
KOKKOS_INLINE_FUNCTION
s_FSUM& operator+=(const s_FSUM &rhs){
s_FSUM& operator+=(const s_FSUM &rhs) {
fx += rhs.fx;
fy += rhs.fy;
fz += rhs.fz;
@ -175,7 +175,7 @@ namespace LAMMPS_NS {
FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
c(*c_ptr) {}
~FixLangevinKokkosPostForceFunctor(){c.cleanup_copy();}
~FixLangevinKokkosPostForceFunctor() {c.cleanup_copy();}
KOKKOS_INLINE_FUNCTION
void operator()(const int i) const {

View File

@ -43,11 +43,7 @@ FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg,
grow_arrays(atom->nmax);
d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
#else
h_resize = d_resize;
#endif
h_resize() = 1;
}

View File

@ -51,8 +51,8 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) :
atomKK = (AtomKokkos *) atom;
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK;
datamask_modify = Q_MASK | X_MASK;
datamask_read = X_MASK | V_MASK | F_MASK | Q_MASK | MASK_MASK | TYPE_MASK | TAG_MASK;
datamask_modify = X_MASK;
nmax = m_cap = 0;
allocated_flag = 0;
@ -81,8 +81,7 @@ FixQEqReaxKokkos<DeviceType>::~FixQEqReaxKokkos()
template<class DeviceType>
void FixQEqReaxKokkos<DeviceType>::init()
{
atomKK->k_q.modify<LMPHostType>();
atomKK->k_q.sync<DeviceType>();
atomKK->sync(execution_space,Q_MASK);
FixQEqReax::init();
@ -139,8 +138,8 @@ void FixQEqReaxKokkos<DeviceType>::init_shielding_k()
k_shield = DAT::tdual_ffloat_2d("qeq/kk:shield",ntypes+1,ntypes+1);
d_shield = k_shield.template view<DeviceType>();
for( i = 1; i <= ntypes; ++i )
for( j = 1; j <= ntypes; ++j )
for (i = 1; i <= ntypes; ++i)
for (j = 1; j <= ntypes; ++j)
k_shield.h_view(i,j) = pow( gamma[i] * gamma[j], -1.5 );
k_shield.template modify<LMPHostType>();
@ -263,15 +262,15 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int /*vflag*/)
// comm->forward_comm_fix(this); //Dist_vector( s );
pack_flag = 2;
k_s.template sync<DeviceType>();
comm->forward_comm_fix(this);
k_s.template modify<DeviceType>();
comm->forward_comm_fix(this);
k_s.template sync<DeviceType>();
// comm->forward_comm_fix(this); //Dist_vector( t );
pack_flag = 3;
k_t.template sync<DeviceType>();
comm->forward_comm_fix(this);
k_t.template modify<DeviceType>();
comm->forward_comm_fix(this);
k_t.template sync<DeviceType>();
need_dup = lmp->kokkos->need_dup<DeviceType>();
@ -752,9 +751,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
if (neighflag != FULL) {
k_o.template modify<DeviceType>();
k_o.template sync<LMPHostType>();
comm->reverse_comm_fix(this); //Coll_vector( q );
k_o.template modify<LMPHostType>();
k_o.template sync<DeviceType>();
}
@ -781,9 +778,9 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
// comm->forward_comm_fix(this); //Dist_vector( d );
pack_flag = 1;
k_d.template sync<DeviceType>();
comm->forward_comm_fix(this);
k_d.template modify<DeviceType>();
comm->forward_comm_fix(this);
k_d.template sync<DeviceType>();
// sparse_matvec( &H, d, q );
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
@ -807,9 +804,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
if (neighflag != FULL) {
k_o.template modify<DeviceType>();
k_o.template sync<LMPHostType>();
comm->reverse_comm_fix(this); //Coll_vector( q );
k_o.template modify<LMPHostType>();
k_o.template sync<DeviceType>();
}
@ -888,9 +883,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
if (neighflag != FULL) {
k_o.template modify<DeviceType>();
k_o.template sync<LMPHostType>();
comm->reverse_comm_fix(this); //Coll_vector( q );
k_o.template modify<LMPHostType>();
k_o.template sync<DeviceType>();
}
@ -917,9 +910,9 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
// comm->forward_comm_fix(this); //Dist_vector( d );
pack_flag = 1;
k_d.template sync<DeviceType>();
comm->forward_comm_fix(this);
k_d.template modify<DeviceType>();
comm->forward_comm_fix(this);
k_d.template sync<DeviceType>();
// sparse_matvec( &H, d, q );
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
@ -943,9 +936,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
if (neighflag != FULL) {
k_o.template modify<DeviceType>();
k_o.template sync<LMPHostType>();
comm->reverse_comm_fix(this); //Coll_vector( q );
k_o.template modify<LMPHostType>();
k_o.template sync<DeviceType>();
}
@ -1017,13 +1008,11 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
// q[i] = s[i] - u * t[i];
FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this);
Kokkos::parallel_for(inum,calculateQ_functor);
atomKK->modified(execution_space,Q_MASK);
pack_flag = 4;
//comm->forward_comm_fix( this ); //Dist_vector( atom->q );
atomKK->k_q.sync<DeviceType>();
comm->forward_comm_fix(this);
atomKK->k_q.modify<DeviceType>();
}
/* ---------------------------------------------------------------------- */
@ -1053,7 +1042,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse13_item(int ii) const
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
F_FLOAT tmp = 0.0;
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
const int j = d_jlist(jj);
tmp += d_val(jj) * d_s[j];
a_o[j] += d_val(jj) * d_s[i];
@ -1106,7 +1095,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse23_item(int ii) const
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
F_FLOAT tmp = 0.0;
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
const int j = d_jlist(jj);
tmp += d_val(jj) * d_d[j];
a_o[j] += d_val(jj) * d_d[i];
@ -1166,7 +1155,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse33_item(int ii) const
const int i = d_ilist[ii];
if (mask[i] & groupbit) {
F_FLOAT tmp = 0.0;
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
const int j = d_jlist(jj);
tmp += d_val(jj) * d_t[j];
a_o[j] += d_val(jj) * d_t[i];
@ -1371,11 +1360,11 @@ void FixQEqReaxKokkos<DeviceType>::operator()(TagFixQEqReaxPackForwardComm, cons
if (pack_flag == 1)
d_buf[i] = d_d[j];
else if( pack_flag == 2 )
else if (pack_flag == 2)
d_buf[i] = d_s[j];
else if( pack_flag == 3 )
else if (pack_flag == 3)
d_buf[i] = d_t[j];
else if( pack_flag == 4 )
else if (pack_flag == 4)
d_buf[i] = q[j];
}
@ -1387,6 +1376,9 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm_fix_kokkos(int n, int fir
first = first_in;
d_buf = buf.view<DeviceType>();
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixQEqReaxUnpackForwardComm>(0,n),*this);
if (pack_flag == 4)
atomKK->modified(execution_space,Q_MASK); // needed for auto_sync
}
template<class DeviceType>
@ -1394,11 +1386,11 @@ KOKKOS_INLINE_FUNCTION
void FixQEqReaxKokkos<DeviceType>::operator()(TagFixQEqReaxUnpackForwardComm, const int &i) const {
if (pack_flag == 1)
d_d[i + first] = d_buf[i];
else if( pack_flag == 2)
else if ( pack_flag == 2)
d_s[i + first] = d_buf[i];
else if( pack_flag == 3)
else if ( pack_flag == 3)
d_t[i + first] = d_buf[i];
else if( pack_flag == 4)
else if ( pack_flag == 4)
q[i + first] = d_buf[i];
}
@ -1411,14 +1403,19 @@ int FixQEqReaxKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *bu
{
int m;
if (pack_flag == 1)
for(m = 0; m < n; m++) buf[m] = h_d[list[m]];
else if( pack_flag == 2 )
for(m = 0; m < n; m++) buf[m] = h_s[list[m]];
else if( pack_flag == 3 )
for(m = 0; m < n; m++) buf[m] = h_t[list[m]];
else if( pack_flag == 4 )
for(m = 0; m < n; m++) buf[m] = atom->q[list[m]];
if (pack_flag == 1) {
k_d.sync_host();
for (m = 0; m < n; m++) buf[m] = h_d[list[m]];
} else if (pack_flag == 2) {
k_s.sync_host();
for (m = 0; m < n; m++) buf[m] = h_s[list[m]];
} else if (pack_flag == 3) {
k_t.sync_host();
for (m = 0; m < n; m++) buf[m] = h_t[list[m]];
} else if (pack_flag == 4) {
atomKK->sync(Host,Q_MASK);
for (m = 0; m < n; m++) buf[m] = atom->q[list[m]];
}
return n;
}
@ -1430,14 +1427,23 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double
{
int i, m;
if (pack_flag == 1)
for(m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m];
else if( pack_flag == 2)
for(m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m];
else if( pack_flag == 3)
for(m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m];
else if( pack_flag == 4)
for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
if (pack_flag == 1) {
k_d.sync_host();
for (m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m];
k_d.modify_host();
} else if (pack_flag == 2) {
k_s.sync_host();
for (m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m];
k_s.modify_host();
} else if (pack_flag == 3) {
k_t.sync_host();
for (m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m];
k_t.modify_host();
} else if (pack_flag == 4) {
atomKK->sync(Host,Q_MASK);
for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
atomKK->modified(Host,Q_MASK);
}
}
/* ---------------------------------------------------------------------- */
@ -1445,8 +1451,10 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double
template<class DeviceType>
int FixQEqReaxKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
{
k_o.sync_host();
int i, m;
for(m = 0, i = first; m < n; m++, i++) {
for (m = 0, i = first; m < n; m++, i++) {
buf[m] = h_o[i];
}
return n;
@ -1457,9 +1465,13 @@ int FixQEqReaxKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *bu
template<class DeviceType>
void FixQEqReaxKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
{
k_o.sync_host();
for(int m = 0; m < n; m++) {
h_o[list[m]] += buf[m];
}
k_o.modify_host();
}
/* ---------------------------------------------------------------------- */

View File

@ -147,9 +147,9 @@ class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase {
struct params_qeq{
KOKKOS_INLINE_FUNCTION
params_qeq(){chi=0;eta=0;gamma=0;};
params_qeq() {chi=0;eta=0;gamma=0;};
KOKKOS_INLINE_FUNCTION
params_qeq(int /*i*/){chi=0;eta=0;gamma=0;};
params_qeq(int /*i*/) {chi=0;eta=0;gamma=0;};
F_FLOAT chi, eta, gamma;
};

View File

@ -130,7 +130,7 @@ void FixRxKokkos<DeviceType>::init()
bool eos_flag = false;
for (int i = 0; i < modify->nfix; i++)
if (utils::strmatch(modify->fix[i]->style,"^eos/table/rx")) eos_flag = true;
if(!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified");
if (!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified");
if (update_kinetics_data)
create_kinetics_data();
@ -322,7 +322,7 @@ void FixRxKokkos<DeviceType>::k_rkf45_step (const int neq, const double h, Vecto
// 1)
k_rhs (0.0, y, f1, userData);
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
f1[k] *= h;
ytmp[k] = y[k] + c21 * f1[k];
}
@ -330,7 +330,7 @@ void FixRxKokkos<DeviceType>::k_rkf45_step (const int neq, const double h, Vecto
// 2)
k_rhs(0.0, ytmp, f2, userData);
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
f2[k] *= h;
ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k];
}
@ -417,7 +417,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
// compute ydot at t=t0
k_rhs (t, y, ydot, userData);
while(1)
while (1)
{
// Estimate y'' with finite-difference ...
@ -429,7 +429,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
// Compute WRMS norm of y''
double yddnrm = 0.0;
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
double ydd = (ydot1[k] - ydot[k]) / hg;
double wterr = ydd / (relTol * fabs( y[k] ) + absTol);
yddnrm += wterr * wterr;
@ -441,7 +441,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
//std::cout << "ydot " << ydot[neq-1] << std::endl;
// should we accept this?
if (hnew_is_ok || iter == max_iters){
if (hnew_is_ok || iter == max_iters) {
hnew = hg;
//if (iter == max_iters)
// fprintf(stderr, "ERROR_HIN_MAX_ITERS\n");
@ -455,11 +455,11 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
double hrat = hnew / hg;
// Accept this value ... the bias factor should bring it within range.
if ( (hrat > 0.5) && (hrat < 2.0) )
if ((hrat > 0.5) && (hrat < 2.0))
hnew_is_ok = true;
// If y'' is still bad after a few iterations, just accept h and give up.
if ( (iter > 1) && hrat > 2.0 ) {
if ((iter > 1) && hrat > 2.0) {
hnew = hg;
hnew_is_ok = true;
}
@ -510,7 +510,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
double t = 0.0;
if (h < h_min){
if (h < h_min) {
//fprintf(stderr,"hin not implemented yet\n");
//exit(-1);
nfe = k_rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, userData);
@ -530,7 +530,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
// Estimate the solution error.
// ... weighted 2-norm of the error.
double err2 = 0.0;
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol);
err2 += wterr * wterr;
}
@ -538,7 +538,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
double err = fmax( uround, sqrt( err2 / double(nspecies) ));
// Accept the solution?
if (err <= 1.0 || h <= h_min){
if (err <= 1.0 || h <= h_min) {
t += h;
nst++;
@ -571,7 +571,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
nit++;
nfe += 6;
if (maxIters && nit > maxIters){
if (maxIters && nit > maxIters) {
//fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
counter.nFails ++;
break;
@ -643,7 +643,7 @@ void FixRxKokkos<DeviceType>::rkf45_step (const int neq, const double h, double
// 1)
rhs (0.0, y, f1, v_param);
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
f1[k] *= h;
ytmp[k] = y[k] + c21 * f1[k];
}
@ -651,7 +651,7 @@ void FixRxKokkos<DeviceType>::rkf45_step (const int neq, const double h, double
// 2)
rhs(0.0, ytmp, f2, v_param);
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
f2[k] *= h;
ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k];
}
@ -736,7 +736,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
// compute ydot at t=t0
rhs (t, y, ydot, v_params);
while(1)
while (1)
{
// Estimate y'' with finite-difference ...
@ -748,7 +748,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
// Compute WRMS norm of y''
double yddnrm = 0.0;
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
double ydd = (ydot1[k] - ydot[k]) / hg;
double wterr = ydd / (relTol * fabs( y[k] ) + absTol);
yddnrm += wterr * wterr;
@ -760,7 +760,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
//std::cout << "ydot " << ydot[neq-1] << std::endl;
// should we accept this?
if (hnew_is_ok || iter == max_iters){
if (hnew_is_ok || iter == max_iters) {
hnew = hg;
if (iter == max_iters)
fprintf(stderr, "ERROR_HIN_MAX_ITERS\n");
@ -774,11 +774,11 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
double hrat = hnew / hg;
// Accept this value ... the bias factor should bring it within range.
if ( (hrat > 0.5) && (hrat < 2.0) )
if ((hrat > 0.5) && (hrat < 2.0))
hnew_is_ok = true;
// If y'' is still bad after a few iterations, just accept h and give up.
if ( (iter > 1) && hrat > 2.0 ) {
if ((iter > 1) && hrat > 2.0) {
hnew = hg;
hnew_is_ok = true;
}
@ -827,7 +827,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
double t = 0.0;
if (h < h_min){
if (h < h_min) {
//fprintf(stderr,"hin not implemented yet\n");
//exit(-1);
nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, v_param);
@ -836,7 +836,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
//printf("t= %e t_stop= %e h= %e\n", t, t_stop, h);
// Integrate until we reach the end time.
while (fabs(t - t_stop) > tround){
while (fabs(t - t_stop) > tround) {
double *yout = rwork;
double *eout = yout + neq;
@ -846,7 +846,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
// Estimate the solution error.
// ... weighted 2-norm of the error.
double err2 = 0.0;
for (int k = 0; k < neq; k++){
for (int k = 0; k < neq; k++) {
const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol);
err2 += wterr * wterr;
}
@ -854,7 +854,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
double err = fmax( uround, sqrt( err2 / double(nspecies) ));
// Accept the solution?
if (err <= 1.0 || h <= h_min){
if (err <= 1.0 || h <= h_min) {
t += h;
nst++;
@ -887,7 +887,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
nit++;
nfe += 6;
if (maxIters && nit > maxIters){
if (maxIters && nit > maxIters) {
//fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
counter.nFails ++;
break;
@ -928,14 +928,14 @@ int FixRxKokkos<DeviceType>::rhs_dense(double /*t*/, const double *y, double *dy
//const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
//const int nspecies = atom->nspecies_dpd;
for(int ispecies=0; ispecies<nspecies; ispecies++)
for (int ispecies=0; ispecies<nspecies; ispecies++)
dydt[ispecies] = 0.0;
// Construct the reaction rate laws
for(int jrxn=0; jrxn<nreactions; jrxn++){
for (int jrxn=0; jrxn<nreactions; jrxn++) {
double rxnRateLawForward = kFor[jrxn];
for(int ispecies=0; ispecies<nspecies; ispecies++){
for (int ispecies=0; ispecies<nspecies; ispecies++) {
const double concentration = y[ispecies]/VDPD;
rxnRateLawForward *= pow( concentration, d_kineticsData.stoichReactants(jrxn,ispecies) );
//rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
@ -944,8 +944,8 @@ int FixRxKokkos<DeviceType>::rhs_dense(double /*t*/, const double *y, double *dy
}
// Construct the reaction rates for each species
for(int ispecies=0; ispecies<nspecies; ispecies++)
for(int jrxn=0; jrxn<nreactions; jrxn++)
for (int ispecies=0; ispecies<nspecies; ispecies++)
for (int jrxn=0; jrxn<nreactions; jrxn++)
{
dydt[ispecies] += d_kineticsData.stoich(jrxn,ispecies) *VDPD*rxnRateLaw[jrxn];
//dydt[ispecies] += stoich[jrxn][ispecies]*VDPD*rxnRateLaw[jrxn];
@ -982,9 +982,9 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
for (int i = 0; i < nreactions; ++i)
{
double rxnRateLawForward;
if (isIntegral(i)){
if (isIntegral(i)) {
rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) );
for (int kk = 1; kk < maxReactants; ++kk){
for (int kk = 1; kk < maxReactants; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -992,7 +992,7 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
}
} else {
rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) );
for (int kk = 1; kk < maxReactants; ++kk){
for (int kk = 1; kk < maxReactants; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -1008,10 +1008,10 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
for (int k = 0; k < nspecies; ++k)
dydt[k] = 0.0;
for (int i = 0; i < nreactions; ++i){
for (int i = 0; i < nreactions; ++i) {
// Reactants ...
dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i];
for (int kk = 1; kk < maxReactants; ++kk){
for (int kk = 1; kk < maxReactants; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -1020,7 +1020,7 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
// Products ...
dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i];
for (int kk = maxReactants+1; kk < maxSpecies; ++kk){
for (int kk = maxReactants+1; kk < maxSpecies; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -1074,14 +1074,14 @@ int FixRxKokkos<DeviceType>::k_rhs_dense(double /*t*/, const VectorType& y, Vect
//const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
//const int nspecies = atom->nspecies_dpd;
for(int ispecies=0; ispecies<nspecies; ispecies++)
for (int ispecies=0; ispecies<nspecies; ispecies++)
dydt[ispecies] = 0.0;
// Construct the reaction rate laws
for(int jrxn=0; jrxn<nreactions; jrxn++){
for (int jrxn=0; jrxn<nreactions; jrxn++) {
double rxnRateLawForward = kFor[jrxn];
for(int ispecies=0; ispecies<nspecies; ispecies++){
for (int ispecies=0; ispecies<nspecies; ispecies++) {
const double concentration = y[ispecies]/VDPD;
rxnRateLawForward *= pow( concentration, d_kineticsData.stoichReactants(jrxn,ispecies) );
//rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
@ -1090,8 +1090,8 @@ int FixRxKokkos<DeviceType>::k_rhs_dense(double /*t*/, const VectorType& y, Vect
}
// Construct the reaction rates for each species
for(int ispecies=0; ispecies<nspecies; ispecies++)
for(int jrxn=0; jrxn<nreactions; jrxn++)
for (int ispecies=0; ispecies<nspecies; ispecies++)
for (int jrxn=0; jrxn<nreactions; jrxn++)
{
dydt[ispecies] += d_kineticsData.stoich(jrxn,ispecies) *VDPD*rxnRateLaw[jrxn];
//dydt[ispecies] += stoich[jrxn][ispecies]*VDPD*rxnRateLaw[jrxn];
@ -1129,9 +1129,9 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
for (int i = 0; i < nreactions; ++i)
{
double rxnRateLawForward;
if (isIntegral(i)){
if (isIntegral(i)) {
rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) );
for (int kk = 1; kk < maxReactants; ++kk){
for (int kk = 1; kk < maxReactants; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -1139,7 +1139,7 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
}
} else {
rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) );
for (int kk = 1; kk < maxReactants; ++kk){
for (int kk = 1; kk < maxReactants; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -1155,10 +1155,10 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
for (int k = 0; k < nspecies; ++k)
dydt[k] = 0.0;
for (int i = 0; i < nreactions; ++i){
for (int i = 0; i < nreactions; ++i) {
// Reactants ...
dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i];
for (int kk = 1; kk < maxReactants; ++kk){
for (int kk = 1; kk < maxReactants; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -1167,7 +1167,7 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
// Products ...
dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i];
for (int kk = maxReactants+1; kk < maxSpecies; ++kk){
for (int kk = maxReactants+1; kk < maxSpecies; ++kk) {
const int k = nuk(i,kk);
if (k == SparseKinetics_invalidIndex) break;
//if (k != SparseKinetics_invalidIndex)
@ -1686,7 +1686,7 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int /*vflag*/, const bool is
// getElapsedTime(timer_ODE, timer_stop), nlocal, TotalCounters.nFuncs, TotalCounters.nSteps);
// Warn the user if a failure was detected in the ODE solver.
if (TotalCounters.nFails > 0){
if (TotalCounters.nFails > 0) {
char sbuf[128];
sprintf(sbuf,"in FixRX::pre_force, ODE solver failed for %d atoms.", TotalCounters.nFails);
error->warning(FLERR, sbuf);
@ -1752,7 +1752,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
double min_per_proc[numCounters];
// Compute counters per dpd time-step.
for (int i = 0; i < numCounters; ++i){
for (int i = 0; i < numCounters; ++i) {
my_vals[i] = this->diagnosticCounter[i] / nTimes;
//printf("my sum[%d] = %f %d\n", i, my_vals[i], comm->me);
}
@ -1767,7 +1767,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
double avg_per_atom[numCounters], avg_per_proc[numCounters];
// Averages per-ODE and per-proc per time-step.
for (int i = 0; i < numCounters; ++i){
for (int i = 0; i < numCounters; ++i) {
avg_per_atom[i] = sums[i] / nODEs;
avg_per_proc[i] = sums[i] / comm->nprocs;
}
@ -1775,7 +1775,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
// Sum up the differences from each task.
double sum_sq[2*numCounters];
double my_sum_sq[2*numCounters];
for (int i = 0; i < numCounters; ++i){
for (int i = 0; i < numCounters; ++i) {
double diff_i = my_vals[i] - avg_per_proc[i];
my_sum_sq[i] = diff_i * diff_i;
}
@ -1835,7 +1835,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
TimerType timer_stop = getTimeStamp();
double time_local = getElapsedTime( timer_start, timer_stop );
if (comm->me == 0){
if (comm->me == 0) {
char smesg[128];
#define print_mesg(smesg) {\
@ -1849,7 +1849,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
print_mesg(smesg);
// only valid for single time-step!
if (diagnosticFrequency == 1){
if (diagnosticFrequency == 1) {
double rms_per_ODE[numCounters];
for (int i = 0; i < numCounters; ++i)
rms_per_ODE[i] = sqrt( sum_sq[i+numCounters] / nODEs );
@ -1867,7 +1867,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", avg_per_proc[StepSum], avg_per_proc[FuncSum], avg_per_proc[TimeSum], avg_per_proc[AtomSum]);
print_mesg(smesg);
if (comm->nprocs > 1){
if (comm->nprocs > 1) {
double rms_per_proc[numCounters];
for (int i = 0; i < numCounters; ++i)
rms_per_proc[i] = sqrt( sum_sq[i] / comm->nprocs );
@ -2206,7 +2206,7 @@ int FixRxKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, in
int m = 0;
for (int ii = 0; ii < n; ii++) {
const int jj = list[ii];
for(int ispecies = 0; ispecies < nspecies; ispecies++){
for (int ispecies = 0; ispecies < nspecies; ispecies++) {
buf[m++] = h_dvector(ispecies,jj);
buf[m++] = h_dvector(ispecies+nspecies,jj);
}
@ -2228,8 +2228,8 @@ void FixRxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
const int last = first + n ;
int m = 0;
for (int ii = first; ii < last; ii++){
for (int ispecies = 0; ispecies < nspecies; ispecies++){
for (int ii = first; ii < last; ii++) {
for (int ispecies = 0; ispecies < nspecies; ispecies++) {
h_dvector(ispecies,ii) = buf[m++];
h_dvector(ispecies+nspecies,ii) = buf[m++];
}

View File

@ -34,7 +34,7 @@ struct s_double_3 {
d0 = d1 = d2 = 0.0;
}
KOKKOS_INLINE_FUNCTION
s_double_3& operator+=(const s_double_3 &rhs){
s_double_3& operator+=(const s_double_3 &rhs) {
d0 += rhs.d0;
d1 += rhs.d1;
d2 += rhs.d2;

View File

@ -79,7 +79,7 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
// k_pairDPD = (PairDPDfdtKokkos *) force->pair_match("dpd/fdt",1);
k_pairDPDE = dynamic_cast<PairDPDfdtEnergyKokkos<DeviceType> *>(force->pair_match("dpd/fdt/energy",0));
// if(k_pairDPDE){
// if (k_pairDPDE) {
comm_forward = 3;
comm_reverse = 5;
// } else {
@ -88,19 +88,14 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
// }
if(/* k_pairDPD == nullptr &&*/ k_pairDPDE == nullptr)
if (/* k_pairDPD == nullptr &&*/ k_pairDPDE == nullptr)
error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk");
#ifdef DEBUG_SSA_PAIR_CT
d_counters = typename AT::t_int_2d("FixShardlowKokkos::d_counters", 2, 3);
d_hist = typename AT::t_int_1d("FixShardlowKokkos::d_hist", 32);
#ifndef KOKKOS_USE_CUDA_UVM
h_counters = Kokkos::create_mirror_view(d_counters);
h_hist = Kokkos::create_mirror_view(d_hist);
#else
h_counters = d_counters;
h_hist = d_hist;
#endif
#endif
}
@ -160,7 +155,7 @@ void FixShardlowKokkos<DeviceType>::init()
k_params.h_view(j,i) = k_params.h_view(i,j);
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
m_cutsq[j][i] = m_cutsq[i][j] = k_pairDPDE->k_cutsq.h_view(i,j);
}
@ -196,7 +191,7 @@ void FixShardlowKokkos<DeviceType>::pre_neighbor()
if (domain->triclinic)
error->all(FLERR,"Fix shardlow does not yet support triclinic geometries");
if(rcut >= bbx || rcut >= bby || rcut>= bbz )
if (rcut >= bbx || rcut >= bby || rcut>= bbz )
{
char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"};
char *msg = (char *) malloc(sizeof(fmt) + 4*15);
@ -231,7 +226,7 @@ void FixShardlowKokkos<DeviceType>::pre_neighbor()
massPerI = false;
masses = atomKK->k_mass.view<DeviceType>();
}
// if(k_pairDPDE){
// if (k_pairDPDE) {
dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
//} else {
@ -632,7 +627,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
int workItemCt = h_ssa_phaseLen[workPhase];
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
if (atom->ntypes > MAX_TYPES_STACKPARAMS)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<false> >(0,workItemCt),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<true> >(0,workItemCt),*this);
@ -649,7 +644,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
comm->forward_comm_fix(this);
atomKK->modified(Host,V_MASK);
if(k_pairDPDE){
if (k_pairDPDE) {
// Zero out the ghosts' uCond & uMech to be used as delta accumulators
// memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
// memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
@ -667,7 +662,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
// process neighbors in this AIR
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
if (atom->ntypes > MAX_TYPES_STACKPARAMS)
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<false> >(0,workItemCt),*this);
else
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<true> >(0,workItemCt),*this);
@ -759,7 +754,7 @@ int FixShardlowKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *b
buf[m++] = h_v(i, 0) - h_v_t0(i - nlocal, 0);
buf[m++] = h_v(i, 1) - h_v_t0(i - nlocal, 1);
buf[m++] = h_v(i, 2) - h_v_t0(i - nlocal, 2);
if(k_pairDPDE){
if (k_pairDPDE) {
buf[m++] = h_uCond(i); // for ghosts, this is an accumulated delta
buf[m++] = h_uMech(i); // for ghosts, this is an accumulated delta
}
@ -781,7 +776,7 @@ void FixShardlowKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double
h_v(j, 0) += buf[m++];
h_v(j, 1) += buf[m++];
h_v(j, 2) += buf[m++];
if(k_pairDPDE){
if (k_pairDPDE) {
h_uCond(j) += buf[m++]; // add in the accumulated delta
h_uMech(j) += buf[m++]; // add in the accumulated delta
}

View File

@ -62,9 +62,9 @@ class FixShardlowKokkos : public FixShardlow {
struct params_ssa {
KOKKOS_INLINE_FUNCTION
params_ssa(){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
params_ssa() {cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
KOKKOS_INLINE_FUNCTION
params_ssa(int /*i*/){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
params_ssa(int /*i*/) {cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
F_FLOAT cutinv,halfsigma,kappa,alpha;
};

View File

@ -39,7 +39,7 @@
GPU_AWARE_UNKNOWN
#elif defined(KOKKOS_ENABLE_CUDA)
// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0
// OpenMPI supports detecting GPU-aware MPI as of version 2.0.0
#if (OPEN_MPI)
#if (OMPI_MAJOR_VERSION >= 2)
@ -77,6 +77,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
exchange_comm_changed = 0;
forward_comm_changed = 0;
forward_pair_comm_changed = 0;
forward_fix_comm_changed = 0;
reverse_comm_changed = 0;
delete memory;
@ -147,7 +149,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
if (ngpus > 1 && !set_flag)
error->all(FLERR,"Could not determine local MPI rank for multiple "
"GPUs with Kokkos CUDA because MPI library not recognized");
"GPUs with Kokkos CUDA or HIP because MPI library not recognized");
} else if (strcmp(arg[iarg],"t") == 0 ||
strcmp(arg[iarg],"threads") == 0) {
@ -203,7 +205,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
neighflag = FULL;
neighflag_qeq = FULL;
newtonflag = 0;
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else {
if (nthreads > 1) {
@ -214,14 +219,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
neighflag_qeq = HALF;
}
newtonflag = 1;
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
forward_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
}
#ifdef LMP_KOKKOS_GPU
// check and warn about CUDA-aware MPI availability when using multiple MPI tasks
// change default only if we can safely detect that CUDA-aware MPI is not available
// check and warn about GPU-aware MPI availability when using multiple MPI tasks
// change default only if we can safely detect that GPU-aware MPI is not available
int nmpi = 0;
MPI_Comm_size(world,&nmpi);
@ -237,21 +245,21 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
gpu_aware_flag = 0;
char* str;
if ((str = getenv("OMPI_MCA_pml_pami_enable_cuda")))
if((strcmp(str,"1") == 0)) {
if ((strcmp(str,"1") == 0)) {
have_gpu_aware = 1;
gpu_aware_flag = 1;
}
if (!gpu_aware_flag)
if (me == 0)
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI");
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling GPU-aware MPI");
}
#endif
if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
if (me == 0)
error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, "
"use '-pk kokkos cuda/aware on' to override");
error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, "
"use '-pk kokkos gpu/aware on' to override");
gpu_aware_flag = 0;
} else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
// MVAPICH2
@ -264,17 +272,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
if (!gpu_aware_flag)
if (me == 0)
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI");
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling GPU-aware MPI");
// pure MPICH or some unsupported MPICH derivative
#elif defined(MPICH) && !defined(MVAPICH2_VERSION)
if (me == 0)
error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI");
error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
gpu_aware_flag = 0;
#else
if (me == 0)
error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available,"
error->warning(FLERR,"Kokkos with CUDA or HIP assumes GPU-aware MPI is available,"
" but cannot determine if this is the case\n try"
" '-pk kokkos cuda/aware off' if getting segmentation faults");
" '-pk kokkos gpu/aware off' if getting segmentation faults");
#endif
} // if (-1 == have_gpu_aware)
@ -340,12 +348,18 @@ void KokkosLMP::accelerator(int narg, char **arg)
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
forward_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else if (strcmp(arg[iarg+1],"host") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 1;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
} else if (strcmp(arg[iarg+1],"device") == 0) {
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
forward_pair_comm_classic = forward_fix_comm_classic = 0;
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
} else error->all(FLERR,"Illegal package kokkos command");
iarg += 2;
@ -373,9 +387,25 @@ void KokkosLMP::accelerator(int narg, char **arg)
} else error->all(FLERR,"Illegal package kokkos command");
forward_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"comm/pair/forward") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) forward_pair_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) forward_pair_comm_classic = 1;
else if (strcmp(arg[iarg+1],"device") == 0) forward_pair_comm_classic = 0;
else error->all(FLERR,"Illegal package kokkos command");
forward_pair_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) forward_fix_comm_classic = 1;
else if (strcmp(arg[iarg+1],"device") == 0) forward_fix_comm_classic = 0;
else error->all(FLERR,"Illegal package kokkos command");
forward_fix_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"comm/reverse") == 0) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
else if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
else if (strcmp(arg[iarg+1],"host") == 0) {
reverse_comm_classic = 0;
reverse_comm_on_host = 1;
@ -385,7 +415,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
} else error->all(FLERR,"Illegal package kokkos command");
reverse_comm_changed = 0;
iarg += 2;
} else if (strcmp(arg[iarg],"cuda/aware") == 0) {
} else if ((strcmp(arg[iarg],"gpu/aware") == 0)
|| (strcmp(arg[iarg],"cuda/aware") == 0)) {
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0;
else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
@ -425,7 +456,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
int nmpi = 0;
MPI_Comm_size(world,&nmpi);
// if "cuda/aware off" or "pair/only on", and "comm device", change to "comm no"
// if "gpu/aware off" or "pair/only on", and "comm device", change to "comm no"
if ((!gpu_aware_flag && nmpi > 1) || pair_only_flag) {
if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
@ -436,13 +467,21 @@ void KokkosLMP::accelerator(int narg, char **arg)
forward_comm_classic = 1;
forward_comm_changed = 1;
}
if (forward_pair_comm_classic == 0) {
forward_pair_comm_classic = 1;
forward_pair_comm_changed = 1;
}
if (forward_fix_comm_classic == 0) {
forward_fix_comm_classic = 1;
forward_fix_comm_changed = 1;
}
if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) {
reverse_comm_classic = 1;
reverse_comm_changed = 1;
}
}
// if "cuda/aware on" and "pair/only off", and comm flags were changed previously, change them back
// if "gpu/aware on" and "pair/only off", and comm flags were changed previously, change them back
if (gpu_aware_flag && !pair_only_flag) {
if (exchange_comm_changed) {
@ -453,6 +492,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
forward_comm_classic = 0;
forward_comm_changed = 0;
}
if (forward_pair_comm_changed) {
forward_pair_comm_classic = 0;
forward_pair_comm_changed = 0;
}
if (forward_fix_comm_changed) {
forward_fix_comm_classic = 0;
forward_fix_comm_changed = 0;
}
if (reverse_comm_changed) {
reverse_comm_classic = 0;
reverse_comm_changed = 0;
@ -490,25 +537,15 @@ int KokkosLMP::neigh_count(int m)
if (nk->lists[m]->execution_space == Host) {
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
} else if (nk->lists[m]->execution_space == Device) {
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
inum = nlistKK->inum;
#ifndef KOKKOS_USE_CUDA_UVM
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
#else
h_ilist = nlistKK->d_ilist;
h_numneigh = nlistKK->d_numneigh;
#endif
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
}

View File

@ -28,12 +28,16 @@ class KokkosLMP : protected Pointers {
int neighflag_qeq_set;
int exchange_comm_classic;
int forward_comm_classic;
int forward_pair_comm_classic;
int forward_fix_comm_classic;
int reverse_comm_classic;
int exchange_comm_on_host;
int forward_comm_on_host;
int reverse_comm_on_host;
int exchange_comm_changed;
int forward_comm_changed;
int forward_pair_comm_changed;
int forward_fix_comm_changed;
int reverse_comm_changed;
int nthreads,ngpus;
int numa;

View File

@ -1068,28 +1068,42 @@ void memset_kokkos (ViewType &view) {
struct params_lj_coul {
KOKKOS_INLINE_FUNCTION
params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
params_lj_coul() {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
KOKKOS_INLINE_FUNCTION
params_lj_coul(int /*i*/){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
params_lj_coul(int /*i*/) {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
};
// Pair SNAP
#define SNAP_KOKKOS_REAL double
#define SNAP_KOKKOS_HOST_VECLEN 1
#ifdef LMP_KOKKOS_GPU
#define SNAP_KOKKOS_DEVICE_VECLEN 32
#else
#define SNAP_KOKKOS_DEVICE_VECLEN 1
#endif
// intentional: SNAreal/complex gets reused beyond SNAP
typedef double SNAreal;
//typedef struct { SNAreal re, im; } SNAcomplex;
template <typename real>
struct alignas(2*sizeof(real)) SNAComplex
template <typename real_type_>
struct alignas(2*sizeof(real_type_)) SNAComplex
{
real re,im;
using real_type = real_type_;
using complex = SNAComplex<real_type>;
real_type re,im;
SNAComplex() = default;
KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
: re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
: re(re), im(static_cast<real>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
: re(re), im(static_cast<real_type>(0.)) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
: re(re), im(im) { ; }
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
@ -1117,27 +1131,24 @@ struct alignas(2*sizeof(real)) SNAComplex
return *this;
}
KOKKOS_INLINE_FUNCTION
static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
KOKKOS_INLINE_FUNCTION
static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
KOKKOS_INLINE_FUNCTION
const complex conj() { return complex(re, -im); }
};
template <typename real>
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
return SNAComplex<real>(r*self.re, r*self.im);
template <typename real_type>
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
return SNAComplex<real_type>(r*self.re, r*self.im);
}
typedef SNAComplex<SNAreal> SNAcomplex;
// Cayley-Klein pack
// Can guarantee it's aligned to 2 complex
struct alignas(32) CayleyKleinPack {
SNAcomplex a, b;
SNAcomplex da[3], db[3];
SNAreal sfac;
SNAreal dsfacu[3];
};
#if defined(KOKKOS_ENABLE_CXX11)
#undef ISFINITE
#define ISFINITE(x) std::isfinite(x)

View File

@ -46,11 +46,7 @@ template <typename TYPE, typename HTYPE>
const char *name)
{
data = TYPE(std::string(name),n1);
#ifndef KOKKOS_USE_CUDA_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
array = h_data.data();
return data;
}
@ -61,11 +57,7 @@ template <typename TYPE, typename HTYPE>
int n1, const char *name)
{
data = TYPE(std::string(name),n1);
#ifndef KOKKOS_USE_CUDA_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
return data;
}
@ -100,7 +92,7 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
template <typename TYPE>
TYPE destroy_kokkos(TYPE &data)
{
/*if(data.data()!=nullptr)
/*if (data.data()!=nullptr)
free(data.data());*/
data = TYPE();
return data;
@ -167,11 +159,7 @@ template <typename TYPE, typename HTYPE>
const char *name)
{
data = TYPE(std::string(name),n1,n2);
#ifndef KOKKOS_USE_CUDA_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
return data;
}
@ -185,7 +173,7 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
bigint n = 0;
for (int i = 0; i < n1; i++) {
if(n2==0)
if (n2==0)
array[i] = nullptr;
else
array[i] = &data.h_view(i,0);
@ -200,17 +188,13 @@ template <typename TYPE, typename HTYPE>
const char *name)
{
data = TYPE(std::string(name),n1,n2);
#ifndef KOKKOS_USE_CUDA_UVM
h_data = Kokkos::create_mirror_view(data);
#else
h_data = data;
#endif
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
array = (typename TYPE::value_type **) smalloc(nbytes,name);
bigint n = 0;
for (int i = 0; i < n1; i++) {
if(n2==0)
if (n2==0)
array[i] = nullptr;
else
array[i] = &h_data(i,0);
@ -234,7 +218,7 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
array = (typename TYPE::value_type**) srealloc(array,nbytes,name);
for (int i = 0; i < n1; i++)
if(n2==0)
if (n2==0)
array[i] = nullptr;
else
array[i] = &data.h_view(i,0);
@ -251,7 +235,7 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
array = (typename TYPE::value_type **) smalloc(nbytes,name);
for (int i = 0; i < n1; i++)
if(data.h_view.extent(1)==0)
if (data.h_view.extent(1)==0)
array[i] = nullptr;
else
array[i] = &data.h_view(i,0);
@ -271,7 +255,7 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
array = (typename TYPE::value_type **) smalloc(nbytes,name);
for (int i = 0; i < n1; i++)
if(data.h_view.extent(1)==0)
if (data.h_view.extent(1)==0)
array[i] = nullptr;
else
array[i] = &data.h_view(i,0);

View File

@ -25,7 +25,7 @@ namespace LAMMPS_NS {
d0 = d1 = 0.0;
}
KOKKOS_INLINE_FUNCTION
s_double2& operator+=(const s_double2 &rhs){
s_double2& operator+=(const s_double2 &rhs) {
d0 += rhs.d0;
d1 += rhs.d1;
return *this;

View File

@ -30,11 +30,7 @@ NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
atoms_per_bin = 16;
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
#else
h_resize = d_resize;
#endif
h_resize() = 1;
kokkos = 1;
@ -92,7 +88,7 @@ void NBinKokkos<DeviceType>::bin_atoms()
h_resize() = 1;
while(h_resize() > 0) {
while (h_resize() > 0) {
h_resize() = 0;
deep_copy(d_resize, h_resize);
@ -111,7 +107,7 @@ void NBinKokkos<DeviceType>::bin_atoms()
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
deep_copy(h_resize, d_resize);
if(h_resize()) {
if (h_resize()) {
atoms_per_bin += 16;
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
@ -135,7 +131,7 @@ void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
atom2bin(i) = ibin;
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
if(ac < (int)bins.extent(1)) {
if (ac < (int)bins.extent(1)) {
bins(ibin, ac) = i;
} else {
d_resize() = 1;

View File

@ -41,7 +41,6 @@ NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
d_lbinxhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxhi");
d_lbinyhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinyhi");
d_lbinzhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzhi");
#ifndef KOKKOS_USE_CUDA_UVM
h_resize = Kokkos::create_mirror_view(d_resize);
h_lbinxlo = Kokkos::create_mirror_view(d_lbinxlo);
h_lbinylo = Kokkos::create_mirror_view(d_lbinylo);
@ -49,15 +48,6 @@ NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
h_lbinxhi = Kokkos::create_mirror_view(d_lbinxhi);
h_lbinyhi = Kokkos::create_mirror_view(d_lbinyhi);
h_lbinzhi = Kokkos::create_mirror_view(d_lbinzhi);
#else
h_resize = d_resize;
h_lbinxlo = d_lbinxlo;
h_lbinylo = d_lbinylo;
h_lbinzlo = d_lbinzlo;
h_lbinxhi = d_lbinxhi;
h_lbinyhi = d_lbinyhi;
h_lbinzhi = d_lbinzhi;
#endif
h_resize() = 1;
k_gbincount = DAT::tdual_int_1d("NBinSSAKokkos::gbincount",8);
@ -156,7 +146,7 @@ void NBinSSAKokkos<DeviceType>::bin_atoms()
// actually bin the ghost atoms
{
if(ghosts_per_gbin > (int) gbins.extent(1)) {
if (ghosts_per_gbin > (int) gbins.extent(1)) {
k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin);
gbins = k_gbins.view<DeviceType>();
}
@ -293,7 +283,7 @@ void NBinSSAKokkos<DeviceType>::sortBin(
child = parent*2+1; /* Find the next child */
}
gbins(ibin, parent) = t; /* We save t in the heap */
} while(1);
} while (1);
}
namespace LAMMPS_NS {

View File

@ -108,20 +108,20 @@ class NBinSSAKokkos : public NBinStandard {
if (y >= subhi_[1]) iy = 1;
if (x < sublo_[0]) ix = -1;
if (x >= subhi_[0]) ix = 1;
if(iz < 0){
if (iz < 0) {
return -1;
} else if(iz == 0){
if( iy<0 ) return -1; // bottom left/middle/right
if( (iy==0) && (ix<0) ) return -1; // left atoms
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
if( (iy==0) && (ix>0) ) return 2; // Right atoms
if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms
if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms
} else if (iz == 0) {
if (iy<0) return -1; // bottom left/middle/right
if ((iy==0) && (ix<0) ) return -1; // left atoms
if ((iy==0) && (ix==0)) return 0; // Locally owned atoms
if ((iy==0) && (ix>0) ) return 2; // Right atoms
if ((iy>0) && (ix==0)) return 1; // Top-middle atoms
if ((iy>0) && (ix!=0)) return 3; // Top-right and top-left atoms
} else { // iz > 0
if((ix==0) && (iy==0)) return 4; // Back atoms
if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
if((ix!=0) && (iy!=0)) return 7; // Back corner atoms
if ((ix==0) && (iy==0)) return 4; // Back atoms
if ((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
if ((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
if ((ix!=0) && (iy!=0)) return 7; // Back corner atoms
}
return -2;
}

View File

@ -329,7 +329,7 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_type_grow_kokkos(){
void NeighborKokkos::modify_ex_type_grow_kokkos() {
memoryKK->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
k_ex1_type.modify<LMPHostType>();
memoryKK->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type");
@ -337,7 +337,7 @@ void NeighborKokkos::modify_ex_type_grow_kokkos(){
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_ex_group_grow_kokkos(){
void NeighborKokkos::modify_ex_group_grow_kokkos() {
memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
k_ex1_group.modify<LMPHostType>();
memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
@ -345,13 +345,13 @@ void NeighborKokkos::modify_ex_group_grow_kokkos(){
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_mol_group_grow_kokkos(){
void NeighborKokkos::modify_mol_group_grow_kokkos() {
memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");
k_ex_mol_group.modify<LMPHostType>();
}
/* ---------------------------------------------------------------------- */
void NeighborKokkos::modify_mol_intra_grow_kokkos(){
void NeighborKokkos::modify_mol_intra_grow_kokkos() {
memoryKK->grow_kokkos(k_ex_mol_intra,ex_mol_intra,maxex_mol,"neigh:ex_mol_intra");
k_ex_mol_intra.modify<LMPHostType>();
}

View File

@ -207,7 +207,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
data.special_flag[3] = special_flag[3];
data.h_resize()=1;
while(data.h_resize()) {
while (data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
@ -303,7 +303,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
}
Kokkos::deep_copy(h_scalars, d_scalars);
if(data.h_resize()) {
if (data.h_resize()) {
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename AT::t_neighbors_2d(Kokkos::NoInit("neighbors"), list->d_neighbors.extent(0), list->maxneighs);
data.neigh_list.d_neighbors = list->d_neighbors;
@ -410,24 +410,24 @@ void NeighborKokkosExecute<DeviceType>::
= d_stencil;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
for(int m = 0; m < c_bincount(ibin); m++) {
if (HalfNeigh)
for (int m = 0; m < c_bincount(ibin); m++) {
const int j = c_bins(ibin,m);
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
if ((j == i) || (HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (rsq <= cutneighsq(itype,jtype)) {
if (molecular != Atom::ATOMIC) {
if (!moltemplate)
which = find_special(i,j);
@ -436,38 +436,38 @@ void NeighborKokkosExecute<DeviceType>::
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (which == 0) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
} else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
} else if (minimum_image_check(delx,dely,delz)) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
}
for(int k = 0; k < nstencil; k++) {
for (int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(HalfNeigh && (ibin==jbin)) continue;
if (HalfNeigh && (ibin==jbin)) continue;
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
for (int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
const int jtype = type(j);
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(Tri) {
if (HalfNeigh && !Newton && (j < i)) continue;
if (!HalfNeigh && j==i) continue;
if (Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
@ -477,14 +477,14 @@ void NeighborKokkosExecute<DeviceType>::
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (rsq <= cutneighsq(itype,jtype)) {
if (molecular != Atom::ATOMIC) {
if (!moltemplate)
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
@ -493,19 +493,19 @@ void NeighborKokkosExecute<DeviceType>::
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (which == 0) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
} else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
} else if (minimum_image_check(delx,dely,delz)) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
@ -515,10 +515,10 @@ void NeighborKokkosExecute<DeviceType>::
neigh_list.d_numneigh(i) = n;
if(n > neigh_list.maxneighs) {
if (n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
}
neigh_list.d_ilist(i) = i;
@ -562,7 +562,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
if(ibin >= mbins) return;
if (ibin >= mbins) return;
X_FLOAT* other_x = sharedmem;
other_x = other_x + 5*atoms_per_bin*MY_BIN;
@ -570,7 +570,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
int bincount_current = c_bincount[ibin];
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
@ -583,7 +583,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
int itype;
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
if(i >= 0) {
if (i >= 0) {
xtmp = x(i, 0);
ytmp = x(i, 1);
ztmp = x(i, 2);
@ -596,23 +596,23 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
other_id[MY_II] = i;
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
if(test) return;
if (test) return;
if(i >= 0 && i < nlocal) {
if (i >= 0 && i < nlocal) {
#pragma unroll 4
for(int m = 0; m < bincount_current; m++) {
for (int m = 0; m < bincount_current; m++) {
int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if((j == i) ||
if ((j == i) ||
(HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(Tri) {
if (Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
@ -622,13 +622,13 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (rsq <= cutneighsq(itype,jtype)) {
if (molecular != Atom::ATOMIC) {
int which = 0;
if (!moltemplate)
@ -638,19 +638,19 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (which == 0) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
} else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
} else if (minimum_image_check(delx,dely,delz)) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
@ -661,15 +661,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
for(int k = 0; k < nstencil; k++) {
for (int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
if(ibin == jbin) continue;
if (ibin == jbin) continue;
bincount_current = c_bincount[jbin];
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
if(j >= 0) {
if (j >= 0) {
other_x[MY_II] = x(j, 0);
other_x[MY_II + atoms_per_bin] = x(j, 1);
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
@ -680,16 +680,16 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
__syncthreads();
if(i >= 0 && i < nlocal) {
if (i >= 0 && i < nlocal) {
#pragma unroll 8
for(int m = 0; m < bincount_current; m++) {
for (int m = 0; m < bincount_current; m++) {
const int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
//if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(Tri) {
if (HalfNeigh && !Newton && (j < i)) continue;
if (!HalfNeigh && j==i) continue;
if (Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
@ -699,14 +699,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (rsq <= cutneighsq(itype,jtype)) {
if (molecular != Atom::ATOMIC) {
int which = 0;
if (!moltemplate)
@ -716,19 +716,19 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (which == 0) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
} else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
} else if (minimum_image_check(delx,dely,delz)) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
@ -738,15 +738,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
__syncthreads();
}
if(i >= 0 && i < nlocal) {
if (i >= 0 && i < nlocal) {
neigh_list.d_numneigh(i) = n;
neigh_list.d_ilist(i) = i;
}
if(n > neigh_list.maxneighs) {
if (n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
}
}
}
@ -787,14 +787,14 @@ void NeighborKokkosExecute<DeviceType>::
const int ibin = c_atom2bin(i);
for (int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
for (int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
if (HalfNeigh && j <= i) continue;
else if (j == i) continue;
const int jtype = type[j];
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
@ -810,19 +810,19 @@ void NeighborKokkosExecute<DeviceType>::
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (which == 0) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
} else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
} else if (minimum_image_check(delx,dely,delz)) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
@ -843,14 +843,14 @@ void NeighborKokkosExecute<DeviceType>::
ybin2 < 0 || ybin2 >= mbiny ||
zbin2 < 0 || zbin2 >= mbinz) continue;
const int jbin = ibin + stencil[k];
for(int m = 0; m < c_bincount(jbin); m++) {
for (int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
if (HalfNeigh && j <= i) continue;
else if (j == i) continue;
const int jtype = type[j];
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j,0);
const X_FLOAT dely = ytmp - x(j,1);
@ -858,7 +858,7 @@ void NeighborKokkosExecute<DeviceType>::
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if (rsq <= cutneighsq(itype,jtype)) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
@ -867,10 +867,10 @@ void NeighborKokkosExecute<DeviceType>::
neigh_list.d_numneigh(i) = n;
if(n > neigh_list.maxneighs) {
if (n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
}
neigh_list.d_ilist(i) = i;
}
@ -902,18 +902,18 @@ void NeighborKokkosExecute<DeviceType>::
const int mask_history = 3 << SBBITS;
// loop over all bins in neighborhood (includes ibin)
if(HalfNeigh)
for(int m = 0; m < c_bincount(ibin); m++) {
if (HalfNeigh)
for (int m = 0; m < c_bincount(ibin); m++) {
const int j = c_bins(ibin,m);
const int jtype = type(j);
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
if ((j == i) || (HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
@ -922,29 +922,29 @@ void NeighborKokkosExecute<DeviceType>::
const X_FLOAT radsum = radi + radius(j);
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
if (rsq <= cutsq) {
if (n<neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
else n++;
}
}
for(int k = 0; k < nstencil; k++) {
for (int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
// get subview of jbin
if(HalfNeigh && (ibin==jbin)) continue;
if (HalfNeigh && (ibin==jbin)) continue;
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
for(int m = 0; m < c_bincount(jbin); m++) {
for (int m = 0; m < c_bincount(jbin); m++) {
const int j = c_bins(jbin,m);
const int jtype = type(j);
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(Tri) {
if (HalfNeigh && !Newton && (j < i)) continue;
if (!HalfNeigh && j==i) continue;
if (Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
@ -954,7 +954,7 @@ void NeighborKokkosExecute<DeviceType>::
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
@ -963,9 +963,9 @@ void NeighborKokkosExecute<DeviceType>::
const X_FLOAT radsum = radi + radius(j);
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
if (rsq <= cutsq) {
if (n<neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
else n++;
@ -975,10 +975,10 @@ void NeighborKokkosExecute<DeviceType>::
neigh_list.d_numneigh(i) = n;
if(n > neigh_list.maxneighs) {
if (n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
}
neigh_list.d_ilist(i) = i;
@ -1005,7 +1005,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
if(ibin >= mbins) return;
if (ibin >= mbins) return;
X_FLOAT* other_x = sharedmem;
other_x = other_x + 6*atoms_per_bin*MY_BIN;
@ -1013,7 +1013,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
int bincount_current = c_bincount[ibin];
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
/* if necessary, goto next page and add pages */
@ -1028,7 +1028,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
const int mask_history = 3 << SBBITS;
if(i >= 0) {
if (i >= 0) {
xtmp = x(i, 0);
ytmp = x(i, 1);
ztmp = x(i, 2);
@ -1043,23 +1043,23 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
other_id[MY_II] = i;
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
if(test) return;
if (test) return;
if(i >= 0 && i < nlocal) {
if (i >= 0 && i < nlocal) {
#pragma unroll 4
for(int m = 0; m < bincount_current; m++) {
for (int m = 0; m < bincount_current; m++) {
int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
if((j == i) ||
if ((j == i) ||
(HalfNeigh && !Newton && (j < i)) ||
(HalfNeigh && Newton &&
((j < i) ||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
) continue;
if(Tri) {
if (Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
@ -1069,7 +1069,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
@ -1077,8 +1077,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if (rsq <= cutsq) {
if (n<neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
@ -1090,15 +1090,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
= d_stencil;
for(int k = 0; k < nstencil; k++) {
for (int k = 0; k < nstencil; k++) {
const int jbin = ibin + stencil[k];
if(ibin == jbin) continue;
if (ibin == jbin) continue;
bincount_current = c_bincount[jbin];
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
if(j >= 0) {
if (j >= 0) {
other_x[MY_II] = x(j, 0);
other_x[MY_II + atoms_per_bin] = x(j, 1);
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
@ -1110,16 +1110,16 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
__syncthreads();
if(i >= 0 && i < nlocal) {
if (i >= 0 && i < nlocal) {
#pragma unroll 8
for(int m = 0; m < bincount_current; m++) {
for (int m = 0; m < bincount_current; m++) {
const int j = other_id[m];
const int jtype = other_x[m + 3 * atoms_per_bin];
if(HalfNeigh && (j < i)) continue;
if(HalfNeigh && !Newton && (j < i)) continue;
if(!HalfNeigh && j==i) continue;
if(Tri) {
if (HalfNeigh && (j < i)) continue;
if (HalfNeigh && !Newton && (j < i)) continue;
if (!HalfNeigh && j==i) continue;
if (Tri) {
if (x(j,2) < ztmp) continue;
if (x(j,2) == ztmp) {
if (x(j,1) < ytmp) continue;
@ -1129,7 +1129,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
}
}
}
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - other_x[m];
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
@ -1138,8 +1138,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
if(rsq <= cutsq) {
if(n<neigh_list.maxneighs) {
if (rsq <= cutsq) {
if (n<neigh_list.maxneighs) {
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
else neighbors_i(n++) = j;
}
@ -1150,15 +1150,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
__syncthreads();
}
if(i >= 0 && i < nlocal) {
if (i >= 0 && i < nlocal) {
neigh_list.d_numneigh(i) = n;
neigh_list.d_ilist(i) = i;
}
if(n > neigh_list.maxneighs) {
if (n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
}
}
}

View File

@ -452,7 +452,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
bool firstTry = true;
data.h_resize()=1;
while(data.h_resize()) {
while (data.h_resize()) {
data.h_new_maxneighs() = list->maxneighs;
data.h_resize() = 0;
@ -489,7 +489,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
deep_copy(data.h_resize, data.resize);
if(data.h_resize()) {
if (data.h_resize()) {
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
list->maxneighs = data.h_new_maxneighs() * 1.2;
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs);
@ -571,13 +571,13 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
for (; jl < c_bincount(jbin); ++jl) {
const int j = c_bins(jbin, jl);
const int jtype = type(j);
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (rsq <= cutneighsq(itype,jtype)) {
if (molecular != Atom::ATOMIC) {
if (!moltemplate)
which = find_special(i,j);
@ -586,19 +586,19 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
/* onemols[imol]->nspecial[iatom], */
/* tag[j]-tagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (which == 0) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
} else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
} else if (minimum_image_check(delx,dely,delz)) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
@ -608,9 +608,9 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
if (n > 0) {
neigh_list.d_numneigh(inum) = n;
neigh_list.d_ilist(inum++) = i;
if(n > neigh_list.maxneighs) {
if (n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
if (n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
}
}
}
@ -699,13 +699,13 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
for (int jl = 0; jl < c_bincount(jbin); ++jl) {
const int j = c_bins(jbin, jl);
const int jtype = type(j);
if(exclude && exclusion(i,j,itype,jtype)) continue;
if (exclude && exclusion(i,j,itype,jtype)) continue;
const X_FLOAT delx = xtmp - x(j, 0);
const X_FLOAT dely = ytmp - x(j, 1);
const X_FLOAT delz = ztmp - x(j, 2);
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
if(rsq <= cutneighsq(itype,jtype)) {
if (rsq <= cutneighsq(itype,jtype)) {
if (molecular != Atom::ATOMIC) {
if (!moltemplate)
which = find_special(j,i);
@ -714,19 +714,19 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
/* onemols[jmol]->nspecial[jatom], */
/* tag[i]-jtagprev); */
/* else which = 0; */
if (which == 0){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (which == 0) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
} else if (minimum_image_check(delx,dely,delz)){
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
} else if (minimum_image_check(delx,dely,delz)) {
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
else if (which > 0) {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
else n++;
}
} else {
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
else n++;
}
}
@ -736,9 +736,9 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
if (n > 0) {
neigh_list.d_numneigh(gNdx) = n;
neigh_list.d_ilist(gNdx++) = i;
if(n > neigh_list.maxneighs) {
if (n > neigh_list.maxneighs) {
resize() = 1;
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
if (n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
}
}
}

Some files were not shown because too many files have changed in this diff Show More