Merge branch 'master' into citation-refactor
This commit is contained in:
@ -25,7 +25,7 @@ set(LAMMPS_POTENTIALS_DIR ${LAMMPS_DIR}/potentials)
|
||||
find_package(Git)
|
||||
|
||||
# by default, install into $HOME/.local (not /usr/local), so that no root access (and sudo!!) is needed
|
||||
if (CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
if(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
|
||||
set(CMAKE_INSTALL_PREFIX "$ENV{HOME}/.local" CACHE PATH "default install path" FORCE )
|
||||
endif()
|
||||
|
||||
@ -33,7 +33,7 @@ endif()
|
||||
set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/Modules)
|
||||
|
||||
# make sure LIBRARY_PATH is set if environment variable is set
|
||||
if (DEFINED ENV{LIBRARY_PATH})
|
||||
if(DEFINED ENV{LIBRARY_PATH})
|
||||
list(APPEND CMAKE_LIBRARY_PATH "$ENV{LIBRARY_PATH}")
|
||||
message(STATUS "Appending $ENV{LIBRARY_PATH} to CMAKE_LIBRARY_PATH: ${CMAKE_LIBRARY_PATH}")
|
||||
endif()
|
||||
@ -373,7 +373,7 @@ else()
|
||||
set(CUDA_REQUEST_PIC)
|
||||
endif()
|
||||
|
||||
foreach(PKG_WITH_INCL KSPACE PYTHON VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
|
||||
foreach(PKG_WITH_INCL KSPACE PYTHON MLIAP VORONOI USER-COLVARS USER-MOLFILE USER-NETCDF USER-PLUMED USER-QMMM
|
||||
USER-QUIP USER-SCAFACOS USER-SMD USER-VTK KIM LATTE MESSAGE MSCG COMPRESS)
|
||||
if(PKG_${PKG_WITH_INCL})
|
||||
include(Packages/${PKG_WITH_INCL})
|
||||
@ -580,7 +580,7 @@ add_dependencies(lammps gitversion)
|
||||
############################################
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
list (FIND LANGUAGES "Fortran" _index)
|
||||
if (${_index} GREATER -1)
|
||||
if(${_index} GREATER -1)
|
||||
target_link_libraries(lammps PRIVATE ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
|
||||
endif()
|
||||
set(LAMMPS_CXX_HEADERS angle.h atom.h bond.h citeme.h comm.h compute.h dihedral.h domain.h error.h fix.h force.h group.h improper.h
|
||||
@ -737,14 +737,14 @@ if(OPTIONS)
|
||||
endif()
|
||||
get_property(LANGUAGES GLOBAL PROPERTY ENABLED_LANGUAGES)
|
||||
list (FIND LANGUAGES "Fortran" _index)
|
||||
if (${_index} GREATER -1)
|
||||
if(${_index} GREATER -1)
|
||||
message(STATUS "Fortran Compiler: ${CMAKE_Fortran_COMPILER}
|
||||
Type: ${CMAKE_Fortran_COMPILER_ID}
|
||||
Version: ${CMAKE_Fortran_COMPILER_VERSION}
|
||||
Fortran Flags:${CMAKE_Fortran_FLAGS} ${CMAKE_Fortran_FLAGS_${BTYPE}}")
|
||||
endif()
|
||||
list (FIND LANGUAGES "C" _index)
|
||||
if (${_index} GREATER -1)
|
||||
if(${_index} GREATER -1)
|
||||
message(STATUS "C compiler: ${CMAKE_C_COMPILER}
|
||||
Type: ${CMAKE_C_COMPILER_ID}
|
||||
Version: ${CMAKE_C_COMPILER_VERSION}
|
||||
|
||||
@ -8,7 +8,7 @@ else()
|
||||
find_package(Python3 COMPONENTS Interpreter QUIET)
|
||||
endif()
|
||||
|
||||
if (Python3_EXECUTABLE)
|
||||
if(Python3_EXECUTABLE)
|
||||
if(Python3_VERSION VERSION_GREATER_EQUAL 3.5)
|
||||
add_custom_target(
|
||||
check-whitespace
|
||||
|
||||
30
cmake/Modules/FindCythonize.cmake
Normal file
30
cmake/Modules/FindCythonize.cmake
Normal file
@ -0,0 +1,30 @@
|
||||
# Find the Cythonize tool.
|
||||
#
|
||||
# This code sets the following variables:
|
||||
#
|
||||
# Cythonize_EXECUTABLE
|
||||
#
|
||||
# adapted from https://github.com/cmarshall108/cython-cmake-example/blob/master/cmake/FindCython.cmake
|
||||
#=============================================================================
|
||||
|
||||
if(CMAKE_VERSION VERSION_LESS 3.12)
|
||||
find_package(PythonInterp 3.6 QUIET) # Deprecated since version 3.12
|
||||
if(PYTHONINTERP_FOUND)
|
||||
set(Python3_EXECUTABLE ${PYTHON_EXECUTABLE})
|
||||
endif()
|
||||
else()
|
||||
find_package(Python3 3.6 COMPONENTS Interpreter QUIET)
|
||||
endif()
|
||||
|
||||
# Use the Cython executable that lives next to the Python executable
|
||||
# if it is a local installation.
|
||||
if(Python3_EXECUTABLE)
|
||||
get_filename_component(_python_path ${Python3_EXECUTABLE} PATH)
|
||||
find_program(Cythonize_EXECUTABLE
|
||||
NAMES cythonize3 cythonize cythonize.bat
|
||||
HINTS ${_python_path})
|
||||
endif()
|
||||
|
||||
include(FindPackageHandleStandardArgs)
|
||||
FIND_PACKAGE_HANDLE_STANDARD_ARGS(Cythonize REQUIRED_VARS Cythonize_EXECUTABLE)
|
||||
mark_as_advanced(Cythonize_EXECUTABLE)
|
||||
@ -50,6 +50,7 @@ function(check_for_autogen_files source_dir)
|
||||
file(GLOB SRC_AUTOGEN_FILES ${source_dir}/style_*.h)
|
||||
file(GLOB SRC_AUTOGEN_PACKAGES ${source_dir}/packages_*.h)
|
||||
list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/lmpinstalledpkgs.h ${source_dir}/lmpgitversion.h)
|
||||
list(APPEND SRC_AUTOGEN_FILES ${SRC_AUTOGEN_PACKAGES} ${source_dir}/mliap_model_python_couple.h ${source_dir}/mliap_model_python_couple.cpp)
|
||||
foreach(_SRC ${SRC_AUTOGEN_FILES})
|
||||
get_filename_component(FILENAME "${_SRC}" NAME)
|
||||
if(EXISTS ${source_dir}/${FILENAME})
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
# Download and configure custom MPICH files for Windows
|
||||
message(STATUS "Downloading and configuring MPICH-1.4.1 for Windows")
|
||||
include(ExternalProject)
|
||||
if (CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
|
||||
if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
|
||||
ExternalProject_Add(mpi4win_build
|
||||
URL https://download.lammps.org/thirdparty/mpich2-win64-devel.tar.gz
|
||||
URL_MD5 4939fdb59d13182fd5dd65211e469f14
|
||||
|
||||
31
cmake/Modules/Packages/MLIAP.cmake
Normal file
31
cmake/Modules/Packages/MLIAP.cmake
Normal file
@ -0,0 +1,31 @@
|
||||
# if PYTHON package is included we may also include Python support in MLIAP
|
||||
set(MLIAP_ENABLE_PYTHON_DEFAULT OFF)
|
||||
if(PKG_PYTHON)
|
||||
find_package(Cythonize)
|
||||
if(Cythonize_FOUND)
|
||||
set(MLIAP_ENABLE_PYTHON_DEFAULT ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
option(MLIAP_ENABLE_PYTHON "Build MLIAP package with Python support" ${MLIAP_ENABLE_PYTHON_DEFAULT})
|
||||
|
||||
if(MLIAP_ENABLE_PYTHON)
|
||||
find_package(Cythonize REQUIRED)
|
||||
if(NOT PKG_PYTHON)
|
||||
message(FATAL_ERROR "Must enable PYTHON package for including Python support in MLIAP")
|
||||
endif()
|
||||
|
||||
set(MLIAP_BINARY_DIR ${CMAKE_BINARY_DIR}/cython)
|
||||
set(MLIAP_CYTHON_SRC ${LAMMPS_SOURCE_DIR}/MLIAP/mliap_model_python_couple.pyx)
|
||||
get_filename_component(MLIAP_CYTHON_BASE ${MLIAP_CYTHON_SRC} NAME_WE)
|
||||
file(MAKE_DIRECTORY ${MLIAP_BINARY_DIR})
|
||||
add_custom_command(OUTPUT ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.h
|
||||
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${MLIAP_CYTHON_SRC} ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
|
||||
COMMAND ${Cythonize_EXECUTABLE} -3 ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.pyx
|
||||
WORKING_DIRECTORY ${MLIAP_BINARY_DIR}
|
||||
MAIN_DEPENDENCY ${MLIAP_CYTHON_SRC}
|
||||
COMMENT "Generating C++ sources with cythonize...")
|
||||
target_compile_definitions(lammps PRIVATE -DMLIAP_PYTHON)
|
||||
target_sources(lammps PRIVATE ${MLIAP_BINARY_DIR}/${MLIAP_CYTHON_BASE}.cpp)
|
||||
target_include_directories(lammps PRIVATE ${MLIAP_BINARY_DIR})
|
||||
endif()
|
||||
@ -37,6 +37,7 @@ This is the list of packages that may require additional steps.
|
||||
* :ref:`KOKKOS <kokkos>`
|
||||
* :ref:`LATTE <latte>`
|
||||
* :ref:`MESSAGE <message>`
|
||||
* :ref:`MLIAP <mliap>`
|
||||
* :ref:`MSCG <mscg>`
|
||||
* :ref:`OPT <opt>`
|
||||
* :ref:`POEMS <poems>`
|
||||
@ -770,6 +771,54 @@ be installed on your system.
|
||||
|
||||
----------
|
||||
|
||||
.. _mliap:
|
||||
|
||||
MLIAP package
|
||||
---------------------------
|
||||
|
||||
Building the MLIAP package requires including the :ref:`SNAP <PKG-SNAP>`
|
||||
package. There will be an error message if this requirement is not satisfied.
|
||||
Using the *mliappy* model also requires enabling Python support, which
|
||||
in turn requires the :ref:`PYTHON <PKG-PYTHON>`
|
||||
package **and** requires you have the `cython <https://cython.org>`_ software
|
||||
installed and with it a working ``cythonize`` command. This feature requires
|
||||
compiling LAMMPS with Python version 3.6 or later.
|
||||
|
||||
.. tabs::
|
||||
|
||||
.. tab:: CMake build
|
||||
|
||||
.. code-block:: bash
|
||||
|
||||
-D MLIAP_ENABLE_PYTHON=value # enable mliappy model (default is autodetect)
|
||||
|
||||
Without this setting, CMake will check whether it can find a
|
||||
suitable Python version and the ``cythonize`` command and choose
|
||||
the default accordingly. During the build procedure the provided
|
||||
.pyx file(s) will be automatically translated to C++ code and compiled.
|
||||
Please do **not** run ``cythonize`` manually in the ``src/MLIAP`` folder,
|
||||
as that can lead to compilation errors if Python support is not enabled.
|
||||
If you did by accident, please remove the generated .cpp and .h files.
|
||||
|
||||
.. tab:: Traditional make
|
||||
|
||||
The build uses the ``lib/python/Makefile.mliap_python`` file in the
|
||||
compile/link process to add a rule to update the files generated by
|
||||
the ``cythonize`` command in case the corresponding .pyx file(s) were
|
||||
modified. You may need to modify ``lib/python/Makefile.lammps``
|
||||
if the LAMMPS build fails.
|
||||
To manually enforce building MLIAP with Python support enabled,
|
||||
you can add
|
||||
``-DMLIAP_PYTHON`` to the ``LMP_INC`` variable in your machine makefile.
|
||||
You may have to manually run the ``cythonize`` command on .pyx file(s)
|
||||
in the ``src`` folder, if this is not automatically done during
|
||||
installing the MLIAP package. Please do **not** run ``cythonize``
|
||||
in the ``src/MLIAP`` folder, as that can lead to compilation errors
|
||||
if Python support is not enabled.
|
||||
If you did by accident, please remove the generated .cpp and .h files.
|
||||
|
||||
----------
|
||||
|
||||
.. _mscg:
|
||||
|
||||
MSCG package
|
||||
|
||||
@ -1,5 +1,4 @@
|
||||
Include packages in build
|
||||
|
||||
=========================
|
||||
|
||||
In LAMMPS, a package is a group of files that enable a specific set of
|
||||
|
||||
@ -662,19 +662,31 @@ MLIAP package
|
||||
|
||||
**Contents:**
|
||||
|
||||
A general interface for machine-learning interatomic potentials.
|
||||
A general interface for machine-learning interatomic potentials, including PyTorch.
|
||||
|
||||
**Install:**
|
||||
|
||||
To use this package, also the :ref:`SNAP package <PKG-SNAP>` needs to be installed.
|
||||
To use this package, also the :ref:`SNAP package <PKG-SNAP>` package needs
|
||||
to be installed. To make the *mliappy* model available, also the
|
||||
:ref:`PYTHON package <PKG-PYTHON>` package needs to be installed, the version of
|
||||
Python must be 3.6 or later, and the `cython <https://cython.org/>`_ software
|
||||
must be installed.
|
||||
|
||||
**Author:** Aidan Thompson (Sandia).
|
||||
**Author:** Aidan Thompson (Sandia), Nicholas Lubbers (LANL).
|
||||
|
||||
**Supporting info:**
|
||||
|
||||
* src/MLIAP: filenames -> commands
|
||||
* src/MLIAP/README
|
||||
* :doc:`pair_style mliap <pair_mliap>`
|
||||
* examples/mliap
|
||||
* :doc:`compute_style mliap <compute_mliap>`
|
||||
* examples/mliap (see README)
|
||||
|
||||
When built with the *mliappy* model this package includes an extension for
|
||||
coupling with Python models, including PyTorch. In this case, the Python
|
||||
interpreter linked to LAMMPS will need the ``cython`` and ``numpy`` modules
|
||||
installed. The provided examples build models with PyTorch, which would
|
||||
therefore also needs to be installed to run those examples.
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -38,14 +38,14 @@ produce an executable compatible with a specific hardware.
|
||||
:class: note
|
||||
|
||||
Kokkos with CUDA currently implicitly assumes that the MPI library is
|
||||
CUDA-aware. This is not always the case, especially when using
|
||||
GPU-aware. This is not always the case, especially when using
|
||||
pre-compiled MPI libraries provided by a Linux distribution. This is
|
||||
not a problem when using only a single GPU with a single MPI
|
||||
rank. When running with multiple MPI ranks, you may see segmentation
|
||||
faults without CUDA-aware MPI support. These can be avoided by adding
|
||||
the flags :doc:`-pk kokkos cuda/aware off <Run_options>` to the
|
||||
faults without GPU-aware MPI support. These can be avoided by adding
|
||||
the flags :doc:`-pk kokkos gpu/aware off <Run_options>` to the
|
||||
LAMMPS command line or by using the command :doc:`package kokkos
|
||||
cuda/aware off <package>` in the input file.
|
||||
gpu/aware off <package>` in the input file.
|
||||
|
||||
.. admonition:: AMD GPU support
|
||||
:class: note
|
||||
@ -242,8 +242,8 @@ case, also packing/unpacking communication buffers on the host may give
|
||||
speedup (see the KOKKOS :doc:`package <package>` command). Using CUDA MPS
|
||||
is recommended in this scenario.
|
||||
|
||||
Using a CUDA-aware MPI library is highly recommended. CUDA-aware MPI use can be
|
||||
avoided by using :doc:`-pk kokkos cuda/aware no <package>`. As above for
|
||||
Using a GPU-aware MPI library is highly recommended. GPU-aware MPI use can be
|
||||
avoided by using :doc:`-pk kokkos gpu/aware off <package>`. As above for
|
||||
multi-core CPUs (and no GPU), if N is the number of physical cores/node,
|
||||
then the number of MPI tasks/node should not exceed N.
|
||||
|
||||
|
||||
@ -18,7 +18,7 @@ Syntax
|
||||
.. parsed-literal::
|
||||
|
||||
*model* values = style
|
||||
style = *linear* or *quadratic*
|
||||
style = *linear* or *quadratic* or *mliappy*
|
||||
*descriptor* values = style filename
|
||||
style = *sna*
|
||||
filename = name of file containing descriptor definitions
|
||||
@ -56,13 +56,15 @@ and it is also straightforward to add new descriptor styles.
|
||||
The compute *mliap* command must be followed by two keywords
|
||||
*model* and *descriptor* in either order.
|
||||
|
||||
The *model* keyword is followed by a model style, currently limited to
|
||||
either *linear* or *quadratic*.
|
||||
The *model* keyword is followed by the model style (*linear*, *quadratic* or *mliappy*).
|
||||
The *mliappy* model is only available
|
||||
if lammps is built with MLIAPPY package.
|
||||
|
||||
The *descriptor* keyword is followed by a descriptor style, and additional arguments.
|
||||
Currently the only descriptor style is *sna*, indicating the bispectrum component
|
||||
descriptors used by the Spectral Neighbor Analysis Potential (SNAP) potentials of
|
||||
:doc:`pair_style snap <pair_snap>`.
|
||||
The compute currently supports just one descriptor style, but it is
|
||||
is straightforward to add new descriptor styles.
|
||||
The SNAP descriptor style *sna* is the same as that used by :doc:`pair_style snap <pair_snap>`,
|
||||
including the linear, quadratic, and chem variants.
|
||||
A single additional argument specifies the descriptor filename
|
||||
containing the parameters and setting used by the SNAP descriptor.
|
||||
The descriptor filename usually ends in the *.mliap.descriptor* extension.
|
||||
@ -162,9 +164,10 @@ potentials, see the examples in `FitSNAP <https://github.com/FitSNAP/FitSNAP>`_.
|
||||
Restrictions
|
||||
""""""""""""
|
||||
|
||||
This compute is part of the MLIAP package. It is only enabled if
|
||||
LAMMPS was built with that package. In addition, building LAMMPS with the MLIAP package
|
||||
This compute is part of the MLIAP package. It is only enabled if LAMMPS
|
||||
was built with that package. In addition, building LAMMPS with the MLIAP package
|
||||
requires building LAMMPS with the SNAP package.
|
||||
The *mliappy* model requires building LAMMPS with the PYTHON package.
|
||||
See the :doc:`Build package <Build_package>` doc page for more info.
|
||||
|
||||
Related commands
|
||||
|
||||
@ -115,8 +115,8 @@ The optional keyword *chunksize* is only applicable when using the
|
||||
the KOKKOS package and is ignored otherwise. This keyword controls
|
||||
the number of atoms in each pass used to compute the bond-orientational
|
||||
order parameters and is used to avoid running out of memory. For example
|
||||
if there are 4000 atoms in the simulation and the *chunksize*
|
||||
is set to 2000, the parameter calculation will be broken up
|
||||
if there are 32768 atoms in the simulation and the *chunksize*
|
||||
is set to 16384, the parameter calculation will be broken up
|
||||
into two passes.
|
||||
|
||||
The value of :math:`Q_l` is set to zero for atoms not in the
|
||||
@ -193,7 +193,7 @@ Default
|
||||
|
||||
The option defaults are *cutoff* = pair style cutoff, *nnn* = 12,
|
||||
*degrees* = 5 4 6 8 10 12 i.e. :math:`Q_4`, :math:`Q_6`, :math:`Q_8`, :math:`Q_{10}`, and :math:`Q_{12}`,
|
||||
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 2000
|
||||
*wl* = no, *wl/hat* = no, *components* off, and *chunksize* = 16384
|
||||
|
||||
----------
|
||||
|
||||
|
||||
@ -93,7 +93,7 @@ from a compute, fix, or variable, then see the :doc:`fix ave/chunk <fix_ave_chun
|
||||
:doc:`fix ave/histo <fix_ave_histo>` commands. If you wish to convert a
|
||||
per-atom quantity into a single global value, see the :doc:`compute reduce <compute_reduce>` command.
|
||||
|
||||
The input values must either be all scalars. What kinds of
|
||||
The input values must be all scalars. What kinds of
|
||||
correlations between input values are calculated is determined by the
|
||||
*type* keyword as discussed below.
|
||||
|
||||
|
||||
@ -68,7 +68,7 @@ Syntax
|
||||
*no_affinity* values = none
|
||||
*kokkos* args = keyword value ...
|
||||
zero or more keyword/value pairs may be appended
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* or *comm/reverse* or *cuda/aware* or *pair/only*
|
||||
keywords = *neigh* or *neigh/qeq* or *neigh/thread* or *newton* or *binsize* or *comm* or *comm/exchange* or *comm/forward* *pair/comm/forward* *fix/comm/forward* or *comm/reverse* or *gpu/aware* or *pair/only*
|
||||
*neigh* value = *full* or *half*
|
||||
full = full neighbor list
|
||||
half = half neighbor list built in thread-safe manner
|
||||
@ -84,16 +84,18 @@ Syntax
|
||||
*binsize* value = size
|
||||
size = bin size for neighbor list construction (distance units)
|
||||
*comm* value = *no* or *host* or *device*
|
||||
use value for comm/exchange and comm/forward and comm/reverse
|
||||
use value for comm/exchange and comm/forward and pair/comm/forward and fix/comm/forward and comm/reverse
|
||||
*comm/exchange* value = *no* or *host* or *device*
|
||||
*comm/forward* value = *no* or *host* or *device*
|
||||
*pair/comm/forward* value = *no* or *device*
|
||||
*fix/comm/forward* value = *no* or *device*
|
||||
*comm/reverse* value = *no* or *host* or *device*
|
||||
no = perform communication pack/unpack in non-KOKKOS mode
|
||||
host = perform pack/unpack on host (e.g. with OpenMP threading)
|
||||
device = perform pack/unpack on device (e.g. on GPU)
|
||||
*cuda/aware* = *off* or *on*
|
||||
off = do not use CUDA-aware MPI
|
||||
on = use CUDA-aware MPI (default)
|
||||
*gpu/aware* = *off* or *on*
|
||||
off = do not use GPU-aware MPI
|
||||
on = use GPU-aware MPI (default)
|
||||
*pair/only* = *off* or *on*
|
||||
off = use device acceleration (e.g. GPU) for all available styles in the KOKKOS package (default)
|
||||
on = use device acceleration only for pair styles (and host acceleration for others)
|
||||
@ -498,7 +500,8 @@ because the GPU is faster at performing pairwise interactions, then this
|
||||
rule of thumb may give too large a binsize and the default should be
|
||||
overridden with a smaller value.
|
||||
|
||||
The *comm* and *comm/exchange* and *comm/forward* and *comm/reverse*
|
||||
The *comm* and *comm/exchange* and *comm/forward* and *pair/comm/forward*
|
||||
and *fix/comm/forward* and comm/reverse*
|
||||
keywords determine whether the host or device performs the packing and
|
||||
unpacking of data when communicating per-atom data between processors.
|
||||
"Exchange" communication happens only on timesteps that neighbor lists
|
||||
@ -506,18 +509,22 @@ are rebuilt. The data is only for atoms that migrate to new processors.
|
||||
"Forward" communication happens every timestep. "Reverse" communication
|
||||
happens every timestep if the *newton* option is on. The data is for
|
||||
atom coordinates and any other atom properties that needs to be updated
|
||||
for ghost atoms owned by each processor.
|
||||
for ghost atoms owned by each processor. "Pair/comm" controls additional
|
||||
communication in pair styles, such as pair_style EAM. "Fix/comm" controls
|
||||
additional communication in fixes, such as fix SHAKE.
|
||||
|
||||
The *comm* keyword is simply a short-cut to set the same value for both
|
||||
the *comm/exchange* and *comm/forward* and *comm/reverse* keywords.
|
||||
The *comm* keyword is simply a short-cut to set the same value for all
|
||||
the comm keywords.
|
||||
|
||||
The value options for all 3 keywords are *no* or *host* or *device*\ . A
|
||||
The value options for the keywords are *no* or *host* or *device*\ . A
|
||||
value of *no* means to use the standard non-KOKKOS method of
|
||||
packing/unpacking data for the communication. A value of *host* means to
|
||||
use the host, typically a multi-core CPU, and perform the
|
||||
packing/unpacking in parallel with threads. A value of *device* means to
|
||||
use the device, typically a GPU, to perform the packing/unpacking
|
||||
operation.
|
||||
operation. If a value of *host* is used for the *pair/comm/forward* or
|
||||
*fix/comm/forward* keyword, it will be automatically be changed to *no*
|
||||
since these keywords don't support *host* mode.
|
||||
|
||||
The optimal choice for these keywords depends on the input script and
|
||||
the hardware used. The *no* value is useful for verifying that the
|
||||
@ -538,18 +545,18 @@ pack/unpack communicated data. When running small systems on a GPU,
|
||||
performing the exchange pack/unpack on the host CPU can give speedup
|
||||
since it reduces the number of CUDA kernel launches.
|
||||
|
||||
The *cuda/aware* keyword chooses whether CUDA-aware MPI will be used. When
|
||||
The *gpu/aware* keyword chooses whether GPU-aware MPI will be used. When
|
||||
this keyword is set to *on*\ , buffers in GPU memory are passed directly
|
||||
through MPI send/receive calls. This reduces overhead of first copying
|
||||
the data to the host CPU. However CUDA-aware MPI is not supported on all
|
||||
the data to the host CPU. However GPU-aware MPI is not supported on all
|
||||
systems, which can lead to segmentation faults and would require using a
|
||||
value of *off*\ . If LAMMPS can safely detect that CUDA-aware MPI is not
|
||||
value of *off*\ . If LAMMPS can safely detect that GPU-aware MPI is not
|
||||
available (currently only possible with OpenMPI v2.0.0 or later), then
|
||||
the *cuda/aware* keyword is automatically set to *off* by default. When
|
||||
the *cuda/aware* keyword is set to *off* while any of the *comm*
|
||||
the *gpu/aware* keyword is automatically set to *off* by default. When
|
||||
the *gpu/aware* keyword is set to *off* while any of the *comm*
|
||||
keywords are set to *device*\ , the value for these *comm* keywords will
|
||||
be automatically changed to *no*\ . This setting has no effect if not
|
||||
running on GPUs or if using only one MPI rank. CUDA-aware MPI is available
|
||||
running on GPUs or if using only one MPI rank. GPU-aware MPI is available
|
||||
for OpenMPI 1.8 (or later versions), Mvapich2 1.9 (or later) when the
|
||||
"MV2_USE_CUDA" environment variable is set to "1", CrayMPI, and IBM
|
||||
Spectrum MPI when the "-gpu" flag is used.
|
||||
@ -558,7 +565,7 @@ The *pair/only* keyword can change how the KOKKOS suffix "kk" is applied
|
||||
when using an accelerator device. By default device acceleration is
|
||||
always used for all available styles. With *pair/only* set to *on* the
|
||||
suffix setting will choose device acceleration only for pair styles and
|
||||
run all other force computations concurrently on the host CPU.
|
||||
run all other force computations on the host CPU.
|
||||
The *comm* flags will also automatically be changed to *no*\ . This can
|
||||
result in better performance for certain configurations and system sizes.
|
||||
|
||||
@ -671,8 +678,8 @@ script or via the "-pk intel" :doc:`command-line switch <Run_options>`.
|
||||
|
||||
For the KOKKOS package, the option defaults for GPUs are neigh = full,
|
||||
neigh/qeq = full, newton = off, binsize for GPUs = 2x LAMMPS default
|
||||
value, comm = device, cuda/aware = on. When LAMMPS can safely detect
|
||||
that CUDA-aware MPI is not available, the default value of cuda/aware
|
||||
value, comm = device, gpu/aware = on. When LAMMPS can safely detect
|
||||
that GPU-aware MPI is not available, the default value of gpu/aware
|
||||
becomes "off". For CPUs or Xeon Phis, the option defaults are neigh =
|
||||
half, neigh/qeq = half, newton = on, binsize = 0.0, and comm = no. The
|
||||
option neigh/thread = on when there are 16K atoms or less on an MPI
|
||||
|
||||
@ -16,7 +16,7 @@ Syntax
|
||||
.. parsed-literal::
|
||||
|
||||
*model* values = style filename
|
||||
style = *linear* or *quadratic*
|
||||
style = *linear* or *quadratic* or *mliappy*
|
||||
filename = name of file containing model definitions
|
||||
*descriptor* values = style filename
|
||||
style = *sna*
|
||||
@ -40,12 +40,15 @@ definitions of the interatomic potential functional form (*model*)
|
||||
and the geometric quantities that characterize the atomic positions
|
||||
(*descriptor*). By defining *model* and *descriptor* separately,
|
||||
it is possible to use many different models with a given descriptor,
|
||||
or many different descriptors with a given model. Currently, the pair_style
|
||||
supports just two models, *linear* and *quadratic*,
|
||||
and one descriptor, *sna*, the SNAP descriptor used by :doc:`pair_style snap <pair_snap>`, including the linear, quadratic,
|
||||
and chem variants. Work is currently underway to extend
|
||||
the interface to handle neural network energy models,
|
||||
and it is also straightforward to add new descriptor styles.
|
||||
or many different descriptors with a given model. The
|
||||
pair style currently supports just one descriptor style, but it is
|
||||
is straightforward to add new descriptor styles.
|
||||
The SNAP descriptor style *sna* is the same as that used by :doc:`pair_style snap <pair_snap>`,
|
||||
including the linear, quadratic, and chem variants.
|
||||
The available models are *linear*, *quadratic*, and *mliappy*.
|
||||
The *mliappy* style can be used to couple python models,
|
||||
e.g. PyTorch neural network energy models, and requires building
|
||||
LAMMPS with the PYTHON package (see below).
|
||||
In order to train a model, it is useful to know the gradient or derivative
|
||||
of energy, force, and stress w.r.t. model parameters. This information
|
||||
can be accessed using the related :doc:`compute mliap <compute_mliap>` command.
|
||||
@ -59,9 +62,8 @@ that specify the mapping of MLIAP
|
||||
element names to LAMMPS atom types,
|
||||
where N is the number of LAMMPS atom types.
|
||||
|
||||
The *model* keyword is followed by a model style, currently limited to
|
||||
either *linear* or *quadratic*. In both cases,
|
||||
this is followed by a single argument specifying the model filename containing the
|
||||
The *model* keyword is followed by the model style. This is followed
|
||||
by a single argument specifying the model filename containing the
|
||||
parameters for a set of elements.
|
||||
The model filename usually ends in the *.mliap.model* extension.
|
||||
It may contain parameters for many elements. The only requirement is that it
|
||||
@ -82,6 +84,16 @@ for the :doc:`pair_style snap <pair_snap>` coefficient file.
|
||||
Specifically, the line containing the element weight and radius is omitted,
|
||||
since these are handled by the *descriptor*.
|
||||
|
||||
Notes on mliappy models:
|
||||
When the *model* keyword is *mliappy*, the filename should end in '.pt',
|
||||
'.pth' for pytorch models, or be a pickle file. To load a model from
|
||||
memory (i.e. an existing python object), specify the filename as
|
||||
"LATER", and then call `lammps.mliap.load_model(model)` from python
|
||||
before using the pair style. When using lammps via the library mode, you will need to call
|
||||
`lammps.mliappy.activate_mliappy(lmp)` on the active lammps object
|
||||
before the pair style is defined. This call locates and loads the mliap-specific
|
||||
python module that is built into lammps.
|
||||
|
||||
The *descriptor* keyword is followed by a descriptor style, and additional arguments.
|
||||
Currently the only descriptor style is *sna*, indicating the bispectrum component
|
||||
descriptors used by the Spectral Neighbor Analysis Potential (SNAP) potentials of
|
||||
@ -138,11 +150,13 @@ This pair style can only be used via the *pair* keyword of the
|
||||
Restrictions
|
||||
""""""""""""
|
||||
|
||||
This style is part of the MLIAP package. It is only enabled if LAMMPS
|
||||
This pair style is part of the MLIAP package. It is only enabled if LAMMPS
|
||||
was built with that package. In addition, building LAMMPS with the MLIAP package
|
||||
requires building LAMMPS with the SNAP package.
|
||||
The *mliappy* model requires building LAMMPS with the PYTHON package.
|
||||
See the :doc:`Build package <Build_package>` doc page for more info.
|
||||
|
||||
|
||||
Related commands
|
||||
""""""""""""""""
|
||||
|
||||
|
||||
@ -152,7 +152,7 @@ The default values for these keywords are
|
||||
* *chemflag* = 0
|
||||
* *bnormflag* = 0
|
||||
* *wselfallflag* = 0
|
||||
* *chunksize* = 2000
|
||||
* *chunksize* = 4096
|
||||
|
||||
If *quadraticflag* is set to 1, then the SNAP energy expression includes additional quadratic terms
|
||||
that have been shown to increase the overall accuracy of the potential without much increase
|
||||
@ -189,8 +189,8 @@ pair style *snap* with the KOKKOS package and is ignored otherwise.
|
||||
This keyword controls
|
||||
the number of atoms in each pass used to compute the bispectrum
|
||||
components and is used to avoid running out of memory. For example
|
||||
if there are 4000 atoms in the simulation and the *chunksize*
|
||||
is set to 2000, the bispectrum calculation will be broken up
|
||||
if there are 8192 atoms in the simulation and the *chunksize*
|
||||
is set to 4096, the bispectrum calculation will be broken up
|
||||
into two passes.
|
||||
|
||||
Detailed definitions for all the other keywords
|
||||
|
||||
@ -558,6 +558,7 @@ Cygwin
|
||||
cylindrically
|
||||
Cyrot
|
||||
cyrstals
|
||||
cython
|
||||
Daivis
|
||||
Dammak
|
||||
dampflag
|
||||
@ -1918,6 +1919,7 @@ mK
|
||||
mkdir
|
||||
mkv
|
||||
mliap
|
||||
mliappy
|
||||
mlparks
|
||||
Mniszewski
|
||||
mnt
|
||||
@ -2508,6 +2510,7 @@ Pstart
|
||||
Pstop
|
||||
pstyle
|
||||
Ptarget
|
||||
pth
|
||||
pthread
|
||||
pthreads
|
||||
ptm
|
||||
@ -2536,6 +2539,7 @@ pymodule
|
||||
pymol
|
||||
pypar
|
||||
pythonic
|
||||
pytorch
|
||||
Pyy
|
||||
pz
|
||||
Pz
|
||||
|
||||
103
examples/mliap/README
Normal file
103
examples/mliap/README
Normal file
@ -0,0 +1,103 @@
|
||||
This directory contains multiple examples of
|
||||
machine-learning potentials defined using the
|
||||
MLIAP package in LAMMPS. The input files
|
||||
are described below.
|
||||
|
||||
in.mliap.snap.Ta06A
|
||||
-------------------
|
||||
Run linear SNAP, equivalent to examples/snap/in.snap.Ta06A
|
||||
|
||||
in.mliap.snap.WBe.PRB2019
|
||||
-------------------------
|
||||
Run linear SNAP, equivalent to examples/snap/in.snap.WBe.PRB2019
|
||||
|
||||
in.mliap.snap.quadratic
|
||||
-----------------------
|
||||
Run quadratic SNAP
|
||||
|
||||
in.mliap.snap.chem
|
||||
------------------
|
||||
Run EME-SNAP, equivalent to examples/snap/in.snap.InP.JCPA2020
|
||||
|
||||
in.mliap.snap.compute
|
||||
---------------------
|
||||
Generate the A matrix, the gradients (w.r.t. coefficients)
|
||||
of total potential energy, forces, and stress tensor for
|
||||
linear SNAP, equivalent to in.snap.compute
|
||||
|
||||
in.mliap.quadratic.compute
|
||||
--------------------------
|
||||
Generate the A matrix, the gradients (w.r.t. coefficients)
|
||||
of total potential energy, forces, and stress tensor for
|
||||
for quadratic SNAP, equivalent to in.snap.compute.quadratic
|
||||
|
||||
in.mliap.pytorch.Ta06A
|
||||
-----------------------
|
||||
This reproduces the output of in.mliap.snap.Ta06A above,
|
||||
but using the Python coupling to PyTorch.
|
||||
|
||||
This example can be run in two different ways:
|
||||
|
||||
1: Running a LAMMPS executable: in.mliap.pytorch.Ta06A
|
||||
|
||||
First run ``python convert_mliap_Ta06A.py``. It creates
|
||||
a PyTorch energy model that replicates the
|
||||
SNAP Ta06A potential and saves it in the file
|
||||
"Ta06A.mliap.pytorch.model.pt".
|
||||
|
||||
You can then run the example as follows
|
||||
|
||||
`lmp -in in.mliap.pytorch.Ta06A -echo both`
|
||||
|
||||
The resultant log.lammps output should be identical to that generated
|
||||
by in.mliap.snap.Ta06A.
|
||||
|
||||
If this fails, see the instructions for building the MLIAP package
|
||||
with Python support enabled. Also, confirm that the
|
||||
LAMMPS Python embedded Python interpreter is
|
||||
working by running ../examples/in.python.
|
||||
|
||||
2: Running a Python script: mliap_pytorch_Ta06A.py
|
||||
|
||||
Before testing this, ensure that the previous method
|
||||
(running a LAMMPS executable) works.
|
||||
|
||||
You can run the example in serial:
|
||||
|
||||
`python mliap_pytorch_Ta06A.py`
|
||||
|
||||
or in parallel:
|
||||
|
||||
`mpirun -np 4 python mliap_pytorch_Ta06A.py`
|
||||
|
||||
The resultant log.lammps output should be identical to that generated
|
||||
by in.mliap.snap.Ta06A and in.mliap.pytorch.Ta06A.
|
||||
|
||||
Not all Python installations support this mode of operation.
|
||||
It requires that the Python interpreter be initialized. If not,
|
||||
the script will exit with an error message.
|
||||
|
||||
in.mliap.pytorch.relu1hidden
|
||||
----------------------------
|
||||
This example demonstrates a simple neural network potential
|
||||
using PyTorch and SNAP descriptors.
|
||||
|
||||
`lmp -in in.mliap.pytorch.relu1hidden -echo both`
|
||||
|
||||
It was trained on just the energy component (no forces) of
|
||||
the data used in the original SNAP Ta06A potential for
|
||||
tantalum (Thompson, Swiler, Trott, Foiles, Tucker,
|
||||
J Comp Phys, 285, 316 (2015).). Because of the very small amount
|
||||
of energy training data, it uses just 1 hidden layer with
|
||||
a ReLU activation function. It is not expected to be
|
||||
very accurate for forces.
|
||||
|
||||
NOTE: Unlike the previous example, this example uses
|
||||
a pre-built PyTorch file `Ta06A.mliap.pytorch.model.pt`.
|
||||
It is read using `torch.load`,
|
||||
which implicitly uses the Python `pickle` module.
|
||||
This is known to be insecure. It is possible to construct malicious
|
||||
pickle data that will execute arbitrary code during unpickling. Never
|
||||
load data that could have come from an untrusted source, or that
|
||||
could have been tampered with. Only load data you trust.
|
||||
|
||||
18
examples/mliap/Ta06A.mliap.pytorch
Normal file
18
examples/mliap/Ta06A.mliap.pytorch
Normal file
@ -0,0 +1,18 @@
|
||||
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
|
||||
|
||||
# Definition of SNAP potential Ta_Cand06A
|
||||
# Assumes 1 LAMMPS atom type
|
||||
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 73
|
||||
|
||||
# Specify hybrid with SNAP, ZBL
|
||||
|
||||
pair_style hybrid/overlay &
|
||||
zbl ${zblcutinner} ${zblcutouter} &
|
||||
mliap model mliappy Ta06A.mliap.pytorch.model.pt &
|
||||
descriptor sna Ta06A.mliap.descriptor
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff * * mliap Ta
|
||||
|
||||
26
examples/mliap/convert_mliap_Ta06A.py
Normal file
26
examples/mliap/convert_mliap_Ta06A.py
Normal file
@ -0,0 +1,26 @@
|
||||
import sys
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
# torch.nn.modules useful for defining a MLIAPPY model.
|
||||
from lammps.mliap.pytorch import TorchWrapper, IgnoreElems
|
||||
|
||||
# Read coefficients
|
||||
coeffs = np.genfromtxt("Ta06A.mliap.model",skip_header=6)
|
||||
|
||||
# Write coefficients to a pytorch linear model
|
||||
bias = coeffs[0]
|
||||
weights = coeffs[1:]
|
||||
lin = torch.nn.Linear(weights.shape[0],1)
|
||||
lin.to(torch.float64)
|
||||
with torch.autograd.no_grad():
|
||||
lin.weight.set_(torch.from_numpy(weights).unsqueeze(0))
|
||||
lin.bias.set_(torch.as_tensor(bias,dtype=torch.float64).unsqueeze(0))
|
||||
|
||||
# Wrap the pytorch model for usage with mliappy coupling.
|
||||
model = IgnoreElems(lin) # The linear module does not use the types.
|
||||
n_descriptors = lin.weight.shape[1]
|
||||
n_elements = 1
|
||||
linked_model = TorchWrapper(model,n_descriptors=n_descriptors,n_elements=n_elements)
|
||||
|
||||
torch.save(linked_model,"Ta06A.mliap.pytorch.model.pt")
|
||||
53
examples/mliap/in.mliap.pytorch.Ta06A
Normal file
53
examples/mliap/in.mliap.pytorch.Ta06A
Normal file
@ -0,0 +1,53 @@
|
||||
# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.316
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable ny equal ${nrep}
|
||||
variable nz equal ${nrep}
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
mass 1 180.88
|
||||
|
||||
# choose potential
|
||||
|
||||
include Ta06A.mliap.pytorch
|
||||
|
||||
# Setup output
|
||||
|
||||
compute eatom all pe/atom
|
||||
compute energy all reduce sum c_eatom
|
||||
|
||||
compute satom all stress/atom NULL
|
||||
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
|
||||
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
|
||||
|
||||
thermo_style custom step temp epair c_energy etotal press v_press
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459 loop geom
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
|
||||
53
examples/mliap/in.mliap.pytorch.relu1hidden
Normal file
53
examples/mliap/in.mliap.pytorch.relu1hidden
Normal file
@ -0,0 +1,53 @@
|
||||
# Demonstrate MLIAP interface to linear SNAP potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.316
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable ny equal ${nrep}
|
||||
variable nz equal ${nrep}
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
mass 1 180.88
|
||||
|
||||
# choose potential
|
||||
|
||||
include relu1hidden.mliap.pytorch
|
||||
|
||||
# Setup output
|
||||
|
||||
compute eatom all pe/atom
|
||||
compute energy all reduce sum c_eatom
|
||||
|
||||
compute satom all stress/atom NULL
|
||||
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
|
||||
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
|
||||
|
||||
thermo_style custom step temp epair c_energy etotal press v_press
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459 loop geom
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
# Demonstrate MLIAP interface to kinear SNAP potential
|
||||
# Demonstrate MLIAP interface to linear SNAP potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
|
||||
157
examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.1
Normal file
157
examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.1
Normal file
@ -0,0 +1,157 @@
|
||||
LAMMPS (30 Nov 2020)
|
||||
using 48 OpenMP thread(s) per MPI task
|
||||
# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.316
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable nx equal 4
|
||||
variable ny equal ${nrep}
|
||||
variable ny equal 4
|
||||
variable nz equal ${nrep}
|
||||
variable nz equal 4
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
lattice bcc 3.316
|
||||
Lattice spacing in x,y,z = 3.3160000 3.3160000 3.3160000
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 4 0 ${nz}
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 1 box
|
||||
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (13.264000 13.264000 13.264000)
|
||||
1 by 1 by 1 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 128 atoms
|
||||
create_atoms CPU = 0.002 seconds
|
||||
|
||||
mass 1 180.88
|
||||
|
||||
# choose potential
|
||||
|
||||
include Ta06A.mliap.pytorch
|
||||
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
|
||||
|
||||
# Definition of SNAP potential Ta_Cand06A
|
||||
# Assumes 1 LAMMPS atom type
|
||||
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 73
|
||||
|
||||
# Specify hybrid with SNAP, ZBL
|
||||
|
||||
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
|
||||
pair_style hybrid/overlay zbl 4 ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
|
||||
pair_style hybrid/overlay zbl 4 4.8 mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
|
||||
Loading python model complete.
|
||||
Reading potential file Ta06A.mliap.descriptor with DATE: 2014-09-05
|
||||
SNAP keyword rcutfac 4.67637
|
||||
SNAP keyword twojmax 6
|
||||
SNAP keyword nelems 1
|
||||
SNAP keyword elems Ta
|
||||
SNAP keyword radelems 0.5
|
||||
SNAP keyword welems 1
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0
|
||||
SNAP keyword bzeroflag 0
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff 1 1 zbl 73 ${zblz}
|
||||
pair_coeff 1 1 zbl 73 73
|
||||
pair_coeff * * mliap Ta
|
||||
|
||||
|
||||
# Setup output
|
||||
|
||||
compute eatom all pe/atom
|
||||
compute energy all reduce sum c_eatom
|
||||
|
||||
compute satom all stress/atom NULL
|
||||
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
|
||||
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
|
||||
|
||||
thermo_style custom step temp epair c_energy etotal press v_press
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459 loop geom
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 5.8
|
||||
ghost atom cutoff = 5.8
|
||||
binsize = 2.9, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair zbl, perpetual, half/full from (2)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair mliap, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 159.8 | 159.8 | 159.8 Mbytes
|
||||
Step Temp E_pair c_energy TotEng Press v_press
|
||||
0 300 -11.85157 -11.85157 -11.813095 2717.1661 -2717.1661
|
||||
10 296.01467 -11.851059 -11.851059 -11.813095 2697.4796 -2697.4796
|
||||
20 284.53666 -11.849587 -11.849587 -11.813095 2289.1527 -2289.1527
|
||||
30 266.51577 -11.847275 -11.847275 -11.813095 1851.7131 -1851.7131
|
||||
40 243.05007 -11.844266 -11.844266 -11.813095 1570.684 -1570.684
|
||||
50 215.51032 -11.840734 -11.840734 -11.813094 1468.1899 -1468.1899
|
||||
60 185.48331 -11.836883 -11.836883 -11.813094 1524.8757 -1524.8757
|
||||
70 154.6736 -11.832931 -11.832931 -11.813094 1698.3351 -1698.3351
|
||||
80 124.79303 -11.829099 -11.829099 -11.813094 1947.0715 -1947.0715
|
||||
90 97.448054 -11.825592 -11.825592 -11.813094 2231.9563 -2231.9563
|
||||
100 74.035418 -11.822589 -11.822589 -11.813094 2515.8526 -2515.8526
|
||||
Loop time of 2.00236 on 48 procs for 100 steps with 128 atoms
|
||||
|
||||
Performance: 2.157 ns/day, 11.124 hours/ns, 49.941 timesteps/s
|
||||
288.8% CPU use with 1 MPI tasks x 48 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 1.9998 | 1.9998 | 1.9998 | 0.0 | 99.87
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.0011814 | 0.0011814 | 0.0011814 | 0.0 | 0.06
|
||||
Output | 0.00059724 | 0.00059724 | 0.00059724 | 0.0 | 0.03
|
||||
Modify | 0.00047352 | 0.00047352 | 0.00047352 | 0.0 | 0.02
|
||||
Other | | 0.0003468 | | | 0.02
|
||||
|
||||
Nlocal: 128.000 ave 128 max 128 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 727.000 ave 727 max 727 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 3712.00 ave 3712 max 3712 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
FullNghs: 7424.00 ave 7424 max 7424 min
|
||||
Histogram: 1 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 7424
|
||||
Ave neighs/atom = 58.000000
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
|
||||
Total wall time: 0:00:03
|
||||
157
examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.4
Normal file
157
examples/mliap/log.04Dec20.mliap.pytorch.Ta06A.g++.4
Normal file
@ -0,0 +1,157 @@
|
||||
LAMMPS (30 Nov 2020)
|
||||
using 48 OpenMP thread(s) per MPI task
|
||||
# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.316
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable nx equal 4
|
||||
variable ny equal ${nrep}
|
||||
variable ny equal 4
|
||||
variable nz equal ${nrep}
|
||||
variable nz equal 4
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
lattice bcc 3.316
|
||||
Lattice spacing in x,y,z = 3.3160000 3.3160000 3.3160000
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 ${ny} 0 ${nz}
|
||||
region box block 0 4 0 4 0 ${nz}
|
||||
region box block 0 4 0 4 0 4
|
||||
create_box 1 box
|
||||
Created orthogonal box = (0.0000000 0.0000000 0.0000000) to (13.264000 13.264000 13.264000)
|
||||
1 by 2 by 2 MPI processor grid
|
||||
create_atoms 1 box
|
||||
Created 128 atoms
|
||||
create_atoms CPU = 0.002 seconds
|
||||
|
||||
mass 1 180.88
|
||||
|
||||
# choose potential
|
||||
|
||||
include Ta06A.mliap.pytorch
|
||||
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
|
||||
|
||||
# Definition of SNAP potential Ta_Cand06A
|
||||
# Assumes 1 LAMMPS atom type
|
||||
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 73
|
||||
|
||||
# Specify hybrid with SNAP, ZBL
|
||||
|
||||
pair_style hybrid/overlay zbl ${zblcutinner} ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
|
||||
pair_style hybrid/overlay zbl 4 ${zblcutouter} mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
|
||||
pair_style hybrid/overlay zbl 4 4.8 mliap model mliappy Ta06A.mliap.pytorch.model.pkl descriptor sna Ta06A.mliap.descriptor
|
||||
Loading python model complete.
|
||||
Reading potential file Ta06A.mliap.descriptor with DATE: 2014-09-05
|
||||
SNAP keyword rcutfac 4.67637
|
||||
SNAP keyword twojmax 6
|
||||
SNAP keyword nelems 1
|
||||
SNAP keyword elems Ta
|
||||
SNAP keyword radelems 0.5
|
||||
SNAP keyword welems 1
|
||||
SNAP keyword rfac0 0.99363
|
||||
SNAP keyword rmin0 0
|
||||
SNAP keyword bzeroflag 0
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff 1 1 zbl 73 ${zblz}
|
||||
pair_coeff 1 1 zbl 73 73
|
||||
pair_coeff * * mliap Ta
|
||||
|
||||
|
||||
# Setup output
|
||||
|
||||
compute eatom all pe/atom
|
||||
compute energy all reduce sum c_eatom
|
||||
|
||||
compute satom all stress/atom NULL
|
||||
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
|
||||
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
|
||||
|
||||
thermo_style custom step temp epair c_energy etotal press v_press
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459 loop geom
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
run 100
|
||||
Neighbor list info ...
|
||||
update every 1 steps, delay 0 steps, check yes
|
||||
max neighbors/atom: 2000, page size: 100000
|
||||
master list distance cutoff = 5.8
|
||||
ghost atom cutoff = 5.8
|
||||
binsize = 2.9, bins = 5 5 5
|
||||
2 neighbor lists, perpetual/occasional/extra = 2 0 0
|
||||
(1) pair zbl, perpetual, half/full from (2)
|
||||
attributes: half, newton on
|
||||
pair build: halffull/newton
|
||||
stencil: none
|
||||
bin: none
|
||||
(2) pair mliap, perpetual
|
||||
attributes: full, newton on
|
||||
pair build: full/bin/atomonly
|
||||
stencil: full/bin/3d
|
||||
bin: standard
|
||||
Per MPI rank memory allocation (min/avg/max) = 159.7 | 159.7 | 159.7 Mbytes
|
||||
Step Temp E_pair c_energy TotEng Press v_press
|
||||
0 300 -11.85157 -11.85157 -11.813095 2717.1661 -2717.1661
|
||||
10 296.01467 -11.851059 -11.851059 -11.813095 2697.4796 -2697.4796
|
||||
20 284.53666 -11.849587 -11.849587 -11.813095 2289.1527 -2289.1527
|
||||
30 266.51577 -11.847275 -11.847275 -11.813095 1851.7131 -1851.7131
|
||||
40 243.05007 -11.844266 -11.844266 -11.813095 1570.684 -1570.684
|
||||
50 215.51032 -11.840734 -11.840734 -11.813094 1468.1899 -1468.1899
|
||||
60 185.48331 -11.836883 -11.836883 -11.813094 1524.8757 -1524.8757
|
||||
70 154.6736 -11.832931 -11.832931 -11.813094 1698.3351 -1698.3351
|
||||
80 124.79303 -11.829099 -11.829099 -11.813094 1947.0715 -1947.0715
|
||||
90 97.448054 -11.825592 -11.825592 -11.813094 2231.9563 -2231.9563
|
||||
100 74.035418 -11.822589 -11.822589 -11.813094 2515.8526 -2515.8526
|
||||
Loop time of 0.562802 on 192 procs for 100 steps with 128 atoms
|
||||
|
||||
Performance: 7.676 ns/day, 3.127 hours/ns, 177.682 timesteps/s
|
||||
99.7% CPU use with 4 MPI tasks x 48 OpenMP threads
|
||||
|
||||
MPI task timing breakdown:
|
||||
Section | min time | avg time | max time |%varavg| %total
|
||||
---------------------------------------------------------------
|
||||
Pair | 0.53583 | 0.54622 | 0.55401 | 0.9 | 97.05
|
||||
Neigh | 0 | 0 | 0 | 0.0 | 0.00
|
||||
Comm | 0.0071442 | 0.01491 | 0.025289 | 5.4 | 2.65
|
||||
Output | 0.00092525 | 0.00095771 | 0.0010166 | 0.0 | 0.17
|
||||
Modify | 0.00014479 | 0.00015043 | 0.00015893 | 0.0 | 0.03
|
||||
Other | | 0.0005624 | | | 0.10
|
||||
|
||||
Nlocal: 32.0000 ave 32 max 32 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Nghost: 431.000 ave 431 max 431 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
Neighs: 928.000 ave 928 max 928 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
FullNghs: 1856.00 ave 1856 max 1856 min
|
||||
Histogram: 4 0 0 0 0 0 0 0 0 0
|
||||
|
||||
Total # of neighbors = 7424
|
||||
Ave neighs/atom = 58.000000
|
||||
Neighbor list builds = 0
|
||||
Dangerous builds = 0
|
||||
|
||||
Total wall time: 0:00:02
|
||||
104
examples/mliap/mliap_pytorch_Ta06A.py
Normal file
104
examples/mliap/mliap_pytorch_Ta06A.py
Normal file
@ -0,0 +1,104 @@
|
||||
# Demonstrate how to load a model from the python side.
|
||||
# This is essentially the same as in.mliap.pytorch.Ta06A
|
||||
# except that python is the driving program, and lammps
|
||||
# is in library mode.
|
||||
|
||||
before_loading =\
|
||||
"""# Demonstrate MLIAP/PyTorch interface to linear SNAP potential
|
||||
|
||||
# Initialize simulation
|
||||
|
||||
variable nsteps index 100
|
||||
variable nrep equal 4
|
||||
variable a equal 3.316
|
||||
units metal
|
||||
|
||||
# generate the box and atom positions using a BCC lattice
|
||||
|
||||
variable nx equal ${nrep}
|
||||
variable ny equal ${nrep}
|
||||
variable nz equal ${nrep}
|
||||
|
||||
boundary p p p
|
||||
|
||||
lattice bcc $a
|
||||
region box block 0 ${nx} 0 ${ny} 0 ${nz}
|
||||
create_box 1 box
|
||||
create_atoms 1 box
|
||||
|
||||
mass 1 180.88
|
||||
|
||||
# choose potential
|
||||
|
||||
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
|
||||
|
||||
# Definition of SNAP potential Ta_Cand06A
|
||||
# Assumes 1 LAMMPS atom type
|
||||
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 73
|
||||
|
||||
# Specify hybrid with SNAP, ZBL
|
||||
|
||||
pair_style hybrid/overlay &
|
||||
zbl ${zblcutinner} ${zblcutouter} &
|
||||
mliap model mliappy LATER &
|
||||
descriptor sna Ta06A.mliap.descriptor
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff * * mliap Ta
|
||||
"""
|
||||
after_loading =\
|
||||
"""
|
||||
|
||||
# Setup output
|
||||
|
||||
compute eatom all pe/atom
|
||||
compute energy all reduce sum c_eatom
|
||||
|
||||
compute satom all stress/atom NULL
|
||||
compute str all reduce sum c_satom[1] c_satom[2] c_satom[3]
|
||||
variable press equal (c_str[1]+c_str[2]+c_str[3])/(3*vol)
|
||||
|
||||
thermo_style custom step temp epair c_energy etotal press v_press
|
||||
thermo 10
|
||||
thermo_modify norm yes
|
||||
|
||||
# Set up NVE run
|
||||
|
||||
timestep 0.5e-3
|
||||
neighbor 1.0 bin
|
||||
neigh_modify once no every 1 delay 0 check yes
|
||||
|
||||
# Run MD
|
||||
|
||||
velocity all create 300.0 4928459 loop geom
|
||||
fix 1 all nve
|
||||
run ${nsteps}
|
||||
"""
|
||||
|
||||
import lammps
|
||||
|
||||
lmp = lammps.lammps(cmdargs=['-echo','both'])
|
||||
|
||||
# Before defining the pair style, one must do the following:
|
||||
import lammps.mliap
|
||||
lammps.mliap.activate_mliappy(lmp)
|
||||
# Otherwise, when running lammps in library mode,
|
||||
# you will get an error:
|
||||
# "ERROR: Loading MLIAPPY coupling module failure."
|
||||
|
||||
# Setup the simulation and declare an empty model
|
||||
# by specifying model filename as "LATER"
|
||||
lmp.commands_string(before_loading)
|
||||
|
||||
# Define the model however you like. In this example
|
||||
# we load it from disk:
|
||||
import torch
|
||||
model = torch.load('Ta06A.mliap.pytorch.model.pt')
|
||||
|
||||
# Connect the PyTorch model to the mliap pair style.
|
||||
lammps.mliap.load_model(model)
|
||||
|
||||
# run the simulation with the mliap pair style
|
||||
lmp.commands_string(after_loading)
|
||||
18
examples/mliap/relu1hidden.mliap.pytorch
Normal file
18
examples/mliap/relu1hidden.mliap.pytorch
Normal file
@ -0,0 +1,18 @@
|
||||
# DATE: 2014-09-05 UNITS: metal CONTRIBUTOR: Aidan Thompson athomps@sandia.gov CITATION: Thompson, Swiler, Trott, Foiles and Tucker, arxiv.org, 1409.3880 (2014)
|
||||
|
||||
# Definition of SNAP potential Ta_Cand06A
|
||||
# Assumes 1 LAMMPS atom type
|
||||
|
||||
variable zblcutinner equal 4
|
||||
variable zblcutouter equal 4.8
|
||||
variable zblz equal 73
|
||||
|
||||
# Specify hybrid with SNAP, ZBL
|
||||
|
||||
pair_style hybrid/overlay &
|
||||
zbl ${zblcutinner} ${zblcutouter} &
|
||||
mliap model mliappy relu1hidden.mliap.pytorch.model.pt &
|
||||
descriptor sna Ta06A.mliap.descriptor
|
||||
pair_coeff 1 1 zbl ${zblz} ${zblz}
|
||||
pair_coeff * * mliap Ta
|
||||
|
||||
BIN
examples/mliap/relu1hidden.mliap.pytorch.model.pt
Normal file
BIN
examples/mliap/relu1hidden.mliap.pytorch.model.pt
Normal file
Binary file not shown.
3
lib/python/Makefile.mliap_python
Normal file
3
lib/python/Makefile.mliap_python
Normal file
@ -0,0 +1,3 @@
|
||||
|
||||
../mliap_model_python_couple.cpp: ../mliap_model_python_couple.pyx
|
||||
cythonize -3 ../mliap_model_python_couple.cpp
|
||||
@ -98,19 +98,23 @@ os.chdir(os.path.dirname(args.package))
|
||||
from distutils.core import setup
|
||||
from distutils.sysconfig import get_python_lib
|
||||
import site
|
||||
tryuser=False
|
||||
|
||||
#Arguments common to global or user install -- everything but data_files
|
||||
setup_kwargs= dict(name="lammps",
|
||||
version=verstr,
|
||||
author="Steve Plimpton",
|
||||
author_email="sjplimp@sandia.gov",
|
||||
url="https://lammps.sandia.gov",
|
||||
description="LAMMPS Molecular Dynamics Python package",
|
||||
license="GPL",
|
||||
packages=["lammps","lammps.mliap"],
|
||||
)
|
||||
|
||||
tryuser=False
|
||||
try:
|
||||
sys.argv = ["setup.py","install"] # as if had run "python setup.py install"
|
||||
setup(name = "lammps",
|
||||
version = verstr,
|
||||
author = "Steve Plimpton",
|
||||
author_email = "sjplimp@sandia.gov",
|
||||
url = "https://lammps.sandia.gov",
|
||||
description = "LAMMPS Molecular Dynamics Python package",
|
||||
license = "GPL",
|
||||
packages=['lammps'],
|
||||
data_files = [(os.path.join(get_python_lib(), 'lammps'), [args.lib])])
|
||||
setup_kwargs['data_files']=[(os.path.join(get_python_lib(), 'lammps'), [args.lib])]
|
||||
setup(**setup_kwargs)
|
||||
except:
|
||||
tryuser=True
|
||||
print ("Installation into global site-packages folder failed.\nTrying user folder %s now." % site.USER_SITE)
|
||||
@ -118,14 +122,7 @@ except:
|
||||
if tryuser:
|
||||
try:
|
||||
sys.argv = ["setup.py","install","--user"] # as if had run "python setup.py install --user"
|
||||
setup(name = "lammps",
|
||||
version = verstr,
|
||||
author = "Steve Plimpton",
|
||||
author_email = "sjplimp@sandia.gov",
|
||||
url = "https://lammps.sandia.gov",
|
||||
description = "LAMMPS Molecular Dynamics Python package",
|
||||
license = "GPL",
|
||||
packages=['lammps'],
|
||||
data_files = [(os.path.join(site.USER_SITE, 'lammps'), [args.lib])])
|
||||
setup_kwargs['data_files']=[(os.path.join(site.USER_SITE, 'lammps'), [args.lib])]
|
||||
setup(**setup_kwargs)
|
||||
except:
|
||||
print("Installation into user site package folder failed.")
|
||||
|
||||
13
python/lammps/mliap/__init__.py
Normal file
13
python/lammps/mliap/__init__.py
Normal file
@ -0,0 +1,13 @@
|
||||
|
||||
# Check compatiblity of this build with the python shared library.
|
||||
# If this fails, lammps will segfault because its library will
|
||||
# try to improperly start up a new interpreter.
|
||||
import sysconfig
|
||||
import ctypes
|
||||
library = sysconfig.get_config_vars('INSTSONAME')[0]
|
||||
pylib = ctypes.CDLL(library)
|
||||
if not pylib.Py_IsInitialized():
|
||||
raise RuntimeError("This interpreter is not compatible with python-based mliap for LAMMPS.")
|
||||
del sysconfig, ctypes, library, pylib
|
||||
|
||||
from .loader import load_model, activate_mliappy
|
||||
52
python/lammps/mliap/loader.py
Normal file
52
python/lammps/mliap/loader.py
Normal file
@ -0,0 +1,52 @@
|
||||
# ----------------------------------------------------------------------
|
||||
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
# http://lammps.sandia.gov, Sandia National Laboratories
|
||||
# Steve Plimpton, sjplimp@sandia.gov
|
||||
#
|
||||
# Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
# certain rights in this software. This software is distributed under
|
||||
# the GNU General Public License.
|
||||
#
|
||||
# See the README file in the top-level LAMMPS directory.
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Contributing author: Nicholas Lubbers (LANL)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
|
||||
import sys
|
||||
import importlib.util
|
||||
import importlib.machinery
|
||||
|
||||
def activate_mliappy(lmp):
|
||||
try:
|
||||
# Begin Importlib magic to find the embedded python module
|
||||
# This is needed because the filename for liblammps does not
|
||||
# match the spec for normal python modules, wherein
|
||||
# file names match with PyInit function names.
|
||||
# Also, python normally doesn't look for extensions besides '.so'
|
||||
# We fix both of these problems by providing an explict
|
||||
# path to the extension module 'mliap_model_python_couple' in
|
||||
|
||||
path = lmp.lib._name
|
||||
loader = importlib.machinery.ExtensionFileLoader('mliap_model_python_couple', path)
|
||||
spec = importlib.util.spec_from_loader('mliap_model_python_couple', loader)
|
||||
module = importlib.util.module_from_spec(spec)
|
||||
sys.modules['mliap_model_python_couple'] = module
|
||||
spec.loader.exec_module(module)
|
||||
# End Importlib magic to find the embedded python module
|
||||
|
||||
except Exception as ee:
|
||||
raise ImportError("Could not load MLIAP python coupling module.") from ee
|
||||
|
||||
def load_model(model):
|
||||
try:
|
||||
import mliap_model_python_couple
|
||||
except ImportError as ie:
|
||||
raise ImportError("MLIAP python module must be activated before loading\n"
|
||||
"the pair style. Call lammps.mliap.activate_mliappy(lmp)."
|
||||
) from ie
|
||||
mliap_model_python_couple.load_from_python(model)
|
||||
|
||||
65
python/lammps/mliap/pytorch.py
Normal file
65
python/lammps/mliap/pytorch.py
Normal file
@ -0,0 +1,65 @@
|
||||
# ----------------------------------------------------------------------
|
||||
# LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
|
||||
# http://lammps.sandia.gov, Sandia National Laboratories
|
||||
# Steve Plimpton, sjplimp@sandia.gov
|
||||
#
|
||||
# Copyright (2003) Sandia Corporation. Under the terms of Contract
|
||||
# DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
|
||||
# certain rights in this software. This software is distributed under
|
||||
# the GNU General Public License.
|
||||
#
|
||||
# See the README file in the top-level LAMMPS directory.
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Contributing author: Nicholas Lubbers (LANL)
|
||||
# -------------------------------------------------------------------------
|
||||
|
||||
import numpy as np
|
||||
import torch
|
||||
|
||||
def calc_n_params(model):
|
||||
return sum(p.nelement() for p in model.parameters())
|
||||
|
||||
class TorchWrapper(torch.nn.Module):
|
||||
def __init__(self, model,n_descriptors,n_elements,n_params=None,device=None,dtype=torch.float64):
|
||||
super().__init__()
|
||||
|
||||
self.model = model
|
||||
self.device = device
|
||||
self.dtype = dtype
|
||||
|
||||
# Put model on device and convert to dtype
|
||||
self.to(self.dtype)
|
||||
self.to(self.device)
|
||||
|
||||
if n_params is None:
|
||||
n_params = calc_n_params(model)
|
||||
|
||||
self.n_params = n_params
|
||||
self.n_descriptors = n_descriptors
|
||||
self.n_elements = n_elements
|
||||
|
||||
def forward(self, elems, bispectrum, beta, energy):
|
||||
|
||||
bispectrum = torch.from_numpy(bispectrum).to(dtype=self.dtype, device=self.device).requires_grad_(True)
|
||||
elems = torch.from_numpy(elems).to(dtype=torch.long, device=self.device) - 1
|
||||
|
||||
with torch.autograd.enable_grad():
|
||||
|
||||
energy_nn = self.model(bispectrum, elems)
|
||||
if energy_nn.ndim > 1:
|
||||
energy_nn = energy_nn.flatten()
|
||||
|
||||
beta_nn = torch.autograd.grad(energy_nn.sum(), bispectrum)[0]
|
||||
|
||||
beta[:] = beta_nn.detach().cpu().numpy().astype(np.float64)
|
||||
energy[:] = energy_nn.detach().cpu().numpy().astype(np.float64)
|
||||
|
||||
class IgnoreElems(torch.nn.Module):
|
||||
def __init__(self,subnet):
|
||||
super().__init__()
|
||||
self.subnet = subnet
|
||||
|
||||
def forward(self,bispectrum,elems):
|
||||
return self.subnet(bispectrum)
|
||||
@ -22,5 +22,5 @@ setup(
|
||||
url = "https://lammps.sandia.gov",
|
||||
description = "LAMMPS Molecular Dynamics Python package",
|
||||
license = "GPL",
|
||||
packages=["lammps"]
|
||||
packages=["lammps","lammps.mliap"],
|
||||
)
|
||||
|
||||
@ -99,20 +99,20 @@ void PairBrownian::compute(int eflag, int vflag)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -494,7 +494,7 @@ void PairBrownian::init_style()
|
||||
// are re-calculated at every step.
|
||||
|
||||
flagdeform = flagwall = 0;
|
||||
for (int i = 0; i < modify->nfix; i++){
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
if (strcmp(modify->fix[i]->style,"deform") == 0)
|
||||
flagdeform = 1;
|
||||
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
|
||||
@ -514,14 +514,14 @@ void PairBrownian::init_style()
|
||||
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
|
||||
else {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
|
||||
// Since fix->wall->init happens after pair->init_style
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
|
||||
@ -82,20 +82,20 @@ void PairBrownianPoly::compute(int eflag, int vflag)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (j = 0; j < 3; j++){
|
||||
for (j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -355,7 +355,7 @@ void PairBrownianPoly::init_style()
|
||||
// are re-calculated at every step.
|
||||
|
||||
flagdeform = flagwall = 0;
|
||||
for (int i = 0; i < modify->nfix; i++){
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
if (strcmp(modify->fix[i]->style,"deform") == 0)
|
||||
flagdeform = 1;
|
||||
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
|
||||
@ -375,14 +375,14 @@ void PairBrownianPoly::init_style()
|
||||
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
|
||||
else {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
|
||||
// Since fix->wall->init happens after pair->init_style
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
|
||||
@ -155,20 +155,20 @@ void PairLubricate::compute(int eflag, int vflag)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -559,7 +559,7 @@ void PairLubricate::init_style()
|
||||
// are re-calculated at every step.
|
||||
|
||||
shearing = flagdeform = flagwall = 0;
|
||||
for (int i = 0; i < modify->nfix; i++){
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
if (strcmp(modify->fix[i]->style,"deform") == 0) {
|
||||
shearing = flagdeform = 1;
|
||||
if (((FixDeform *) modify->fix[i])->remapflag != Domain::V_REMAP)
|
||||
@ -584,15 +584,15 @@ void PairLubricate::init_style()
|
||||
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
|
||||
else {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
|
||||
//Since fix->wall->init happens after pair->init_style
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
|
||||
@ -158,8 +158,8 @@ void PairLubricateU::compute(int eflag, int vflag)
|
||||
|
||||
// store back the saved forces and torques in original arrays
|
||||
|
||||
for(i=0;i<nlocal+nghost;i++) {
|
||||
for(j=0;j<3;j++) {
|
||||
for (i=0;i<nlocal+nghost;i++) {
|
||||
for (j=0;j<3;j++) {
|
||||
f[i][j] = fl[i][j];
|
||||
torque[i][j] = Tl[i][j];
|
||||
}
|
||||
@ -223,7 +223,7 @@ void PairLubricateU::stage_one()
|
||||
// Find the right hand side= -ve of all forces/torques
|
||||
// b = 6*Npart in overall size
|
||||
|
||||
for(ii = 0; ii < inum; ii++) {
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
for (j = 0; j < 3; j++) {
|
||||
bcg[6*ii+j] = -f[i][j];
|
||||
@ -407,7 +407,7 @@ void PairLubricateU::stage_two(double **x)
|
||||
// Find the right hand side= -ve of all forces/torques
|
||||
// b = 6*Npart in overall size
|
||||
|
||||
for(ii = 0; ii < inum; ii++) {
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
for (j = 0; j < 3; j++) {
|
||||
bcg[6*ii+j] = -f[i][j];
|
||||
@ -581,20 +581,20 @@ void PairLubricateU::compute_Fh(double **x)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -813,20 +813,20 @@ void PairLubricateU::compute_RU()
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -1013,7 +1013,7 @@ void PairLubricateU::compute_RU()
|
||||
torque[i][1] -= vxmu2f*ty;
|
||||
torque[i][2] -= vxmu2f*tz;
|
||||
|
||||
if(newton_pair || j < nlocal) {
|
||||
if (newton_pair || j < nlocal) {
|
||||
torque[j][0] -= vxmu2f*tx;
|
||||
torque[j][1] -= vxmu2f*ty;
|
||||
torque[j][2] -= vxmu2f*tz;
|
||||
@ -1084,20 +1084,20 @@ void PairLubricateU::compute_RU(double **x)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -1284,7 +1284,7 @@ void PairLubricateU::compute_RU(double **x)
|
||||
torque[i][1] -= vxmu2f*ty;
|
||||
torque[i][2] -= vxmu2f*tz;
|
||||
|
||||
if(newton_pair || j < nlocal) {
|
||||
if (newton_pair || j < nlocal) {
|
||||
torque[j][0] -= vxmu2f*tx;
|
||||
torque[j][1] -= vxmu2f*ty;
|
||||
torque[j][2] -= vxmu2f*tz;
|
||||
@ -1791,7 +1791,7 @@ void PairLubricateU::init_style()
|
||||
// are re-calculated at every step.
|
||||
|
||||
flagdeform = flagwall = 0;
|
||||
for (int i = 0; i < modify->nfix; i++){
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
if (strcmp(modify->fix[i]->style,"deform") == 0)
|
||||
flagdeform = 1;
|
||||
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
|
||||
@ -1811,14 +1811,14 @@ void PairLubricateU::init_style()
|
||||
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
|
||||
else {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
|
||||
//Since fix->wall->init happens after pair->init_style
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
|
||||
@ -126,8 +126,8 @@ void PairLubricateUPoly::compute(int eflag, int vflag)
|
||||
|
||||
// Store back the saved forces and torques in original arrays
|
||||
|
||||
for(i=0;i<nlocal+nghost;i++) {
|
||||
for(j=0;j<3;j++) {
|
||||
for (i=0;i<nlocal+nghost;i++) {
|
||||
for (j=0;j<3;j++) {
|
||||
f[i][j] = fl[i][j];
|
||||
torque[i][j] = Tl[i][j];
|
||||
}
|
||||
@ -172,7 +172,7 @@ void PairLubricateUPoly::iterate(double **x, int stage)
|
||||
// Find the right hand side= -ve of all forces/torques
|
||||
// b = 6*Npart in overall size
|
||||
|
||||
for(ii = 0; ii < inum; ii++) {
|
||||
for (ii = 0; ii < inum; ii++) {
|
||||
i = ilist[ii];
|
||||
for (j = 0; j < 3; j++) {
|
||||
bcg[6*ii+j] = -f[i][j];
|
||||
@ -351,20 +351,20 @@ void PairLubricateUPoly::compute_Fh(double **x)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -626,20 +626,20 @@ void PairLubricateUPoly::compute_RU(double **x)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (j = 0; j < 3; j++){
|
||||
for (j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -1155,10 +1155,10 @@ void PairLubricateUPoly::init_style()
|
||||
// are re-calculated at every step.
|
||||
|
||||
flagdeform = flagwall = 0;
|
||||
for (int i = 0; i < modify->nfix; i++){
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
if (strcmp(modify->fix[i]->style,"deform") == 0)
|
||||
flagdeform = 1;
|
||||
else if (strstr(modify->fix[i]->style,"wall") != nullptr){
|
||||
else if (strstr(modify->fix[i]->style,"wall") != nullptr) {
|
||||
if (flagwall)
|
||||
error->all(FLERR,
|
||||
"Cannot use multiple fix wall commands with "
|
||||
@ -1176,14 +1176,14 @@ void PairLubricateUPoly::init_style()
|
||||
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
|
||||
else {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
|
||||
//Since fix->wall->init happens after pair->init_style
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
@ -1214,7 +1214,7 @@ void PairLubricateUPoly::init_style()
|
||||
if (!flagVF) vol_f = 0;
|
||||
|
||||
if (!comm->me) {
|
||||
if(logfile)
|
||||
if (logfile)
|
||||
fprintf(logfile, "lubricateU: vol_f = %g, vol_p = %g, vol_T = %g\n",
|
||||
vol_f,vol_P,vol_T);
|
||||
if (screen)
|
||||
|
||||
@ -137,20 +137,20 @@ void PairLubricatePoly::compute(int eflag, int vflag)
|
||||
|
||||
double dims[3], wallcoord;
|
||||
if (flagVF) // Flag for volume fraction corrections
|
||||
if (flagdeform || flagwall == 2){ // Possible changes in volume fraction
|
||||
if (flagdeform || flagwall == 2) { // Possible changes in volume fraction
|
||||
if (flagdeform && !flagwall)
|
||||
for (j = 0; j < 3; j++)
|
||||
dims[j] = domain->prd[j];
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)){
|
||||
else if (flagwall == 2 || (flagdeform && flagwall == 1)) {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
}
|
||||
else wallcoord = wallfix->coord0[m];
|
||||
@ -461,7 +461,7 @@ void PairLubricatePoly::init_style()
|
||||
// are re-calculated at every step.
|
||||
|
||||
shearing = flagdeform = flagwall = 0;
|
||||
for (int i = 0; i < modify->nfix; i++){
|
||||
for (int i = 0; i < modify->nfix; i++) {
|
||||
if (strcmp(modify->fix[i]->style,"deform") == 0) {
|
||||
shearing = flagdeform = 1;
|
||||
if (((FixDeform *) modify->fix[i])->remapflag != Domain::V_REMAP)
|
||||
@ -478,9 +478,9 @@ void PairLubricatePoly::init_style()
|
||||
if (wallfix->xflag) flagwall = 2; // Moving walls exist
|
||||
}
|
||||
|
||||
if (strstr(modify->fix[i]->style,"wall") != nullptr){
|
||||
if (strstr(modify->fix[i]->style,"wall") != nullptr) {
|
||||
flagwall = 1; // Walls exist
|
||||
if (((FixWall *) modify->fix[i])->xflag ) {
|
||||
if (((FixWall *) modify->fix[i])->xflag) {
|
||||
flagwall = 2; // Moving walls exist
|
||||
wallfix = (FixWall *) modify->fix[i];
|
||||
}
|
||||
@ -492,14 +492,14 @@ void PairLubricatePoly::init_style()
|
||||
if (!flagwall) vol_T = domain->xprd*domain->yprd*domain->zprd;
|
||||
else {
|
||||
double wallhi[3], walllo[3];
|
||||
for (int j = 0; j < 3; j++){
|
||||
for (int j = 0; j < 3; j++) {
|
||||
wallhi[j] = domain->prd[j];
|
||||
walllo[j] = 0;
|
||||
}
|
||||
for (int m = 0; m < wallfix->nwall; m++){
|
||||
for (int m = 0; m < wallfix->nwall; m++) {
|
||||
int dim = wallfix->wallwhich[m] / 2;
|
||||
int side = wallfix->wallwhich[m] % 2;
|
||||
if (wallfix->xstyle[m] == VARIABLE){
|
||||
if (wallfix->xstyle[m] == VARIABLE) {
|
||||
wallfix->xindex[m] = input->variable->find(wallfix->xstr[m]);
|
||||
//Since fix->wall->init happens after pair->init_style
|
||||
wallcoord = input->variable->compute_equal(wallfix->xindex[m]);
|
||||
|
||||
@ -168,7 +168,7 @@ void DumpAtomGZ::write()
|
||||
int DumpAtomGZ::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpAtom::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
if (strcmp(arg[0],"compression_level") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
int min_level = Z_DEFAULT_COMPRESSION;
|
||||
|
||||
@ -171,7 +171,7 @@ void DumpAtomZstd::write()
|
||||
int DumpAtomZstd::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpAtom::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
try {
|
||||
if (strcmp(arg[0],"checksum") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
|
||||
@ -176,7 +176,7 @@ void DumpCFGGZ::write()
|
||||
int DumpCFGGZ::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpCFG::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
if (strcmp(arg[0],"compression_level") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
int min_level = Z_DEFAULT_COMPRESSION;
|
||||
|
||||
@ -173,7 +173,7 @@ void DumpCFGZstd::write()
|
||||
int DumpCFGZstd::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpCFG::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
try {
|
||||
if (strcmp(arg[0],"checksum") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
|
||||
@ -168,7 +168,7 @@ void DumpCustomGZ::write()
|
||||
int DumpCustomGZ::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpCustom::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
if (strcmp(arg[0],"compression_level") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
int min_level = Z_DEFAULT_COMPRESSION;
|
||||
|
||||
@ -171,7 +171,7 @@ void DumpCustomZstd::write()
|
||||
int DumpCustomZstd::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpCustom::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
try {
|
||||
if (strcmp(arg[0],"checksum") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
|
||||
@ -183,7 +183,7 @@ void DumpLocalGZ::write()
|
||||
int DumpLocalGZ::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpLocal::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
if (strcmp(arg[0],"compression_level") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
int min_level = Z_DEFAULT_COMPRESSION;
|
||||
|
||||
@ -171,7 +171,7 @@ void DumpLocalZstd::write()
|
||||
int DumpLocalZstd::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpLocal::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
try {
|
||||
if (strcmp(arg[0],"checksum") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
|
||||
@ -147,7 +147,7 @@ void DumpXYZGZ::write()
|
||||
int DumpXYZGZ::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpXYZ::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
if (strcmp(arg[0],"compression_level") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
int min_level = Z_DEFAULT_COMPRESSION;
|
||||
|
||||
@ -145,7 +145,7 @@ void DumpXYZZstd::write()
|
||||
int DumpXYZZstd::modify_param(int narg, char **arg)
|
||||
{
|
||||
int consumed = DumpXYZ::modify_param(narg, arg);
|
||||
if(consumed == 0) {
|
||||
if (consumed == 0) {
|
||||
try {
|
||||
if (strcmp(arg[0],"checksum") == 0) {
|
||||
if (narg < 2) error->all(FLERR,"Illegal dump_modify command");
|
||||
|
||||
@ -48,7 +48,7 @@ ZstdFileWriter::~ZstdFileWriter()
|
||||
|
||||
void ZstdFileWriter::open(const std::string &path)
|
||||
{
|
||||
if(isopen()) return;
|
||||
if (isopen()) return;
|
||||
|
||||
fp = fopen(path.c_str(), "wb");
|
||||
|
||||
@ -72,7 +72,7 @@ void ZstdFileWriter::open(const std::string &path)
|
||||
|
||||
size_t ZstdFileWriter::write(const void * buffer, size_t length)
|
||||
{
|
||||
if(!isopen()) return 0;
|
||||
if (!isopen()) return 0;
|
||||
|
||||
ZSTD_inBuffer input = { buffer, length, 0 };
|
||||
ZSTD_EndDirective mode = ZSTD_e_continue;
|
||||
@ -81,7 +81,7 @@ size_t ZstdFileWriter::write(const void * buffer, size_t length)
|
||||
ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
|
||||
ZSTD_compressStream2(cctx, &output, &input, mode);
|
||||
fwrite(out_buffer, sizeof(char), output.pos, fp);
|
||||
} while(input.pos < input.size);
|
||||
} while (input.pos < input.size);
|
||||
|
||||
return length;
|
||||
}
|
||||
@ -90,7 +90,7 @@ size_t ZstdFileWriter::write(const void * buffer, size_t length)
|
||||
|
||||
void ZstdFileWriter::flush()
|
||||
{
|
||||
if(!isopen()) return;
|
||||
if (!isopen()) return;
|
||||
|
||||
size_t remaining;
|
||||
ZSTD_inBuffer input = { nullptr, 0, 0 };
|
||||
@ -100,7 +100,7 @@ void ZstdFileWriter::flush()
|
||||
ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
|
||||
remaining = ZSTD_compressStream2(cctx, &output, &input, mode);
|
||||
fwrite(out_buffer, sizeof(char), output.pos, fp);
|
||||
} while(remaining);
|
||||
} while (remaining);
|
||||
|
||||
fflush(fp);
|
||||
}
|
||||
@ -109,7 +109,7 @@ void ZstdFileWriter::flush()
|
||||
|
||||
void ZstdFileWriter::close()
|
||||
{
|
||||
if(!isopen()) return;
|
||||
if (!isopen()) return;
|
||||
|
||||
size_t remaining;
|
||||
ZSTD_inBuffer input = { nullptr, 0, 0 };
|
||||
@ -119,7 +119,7 @@ void ZstdFileWriter::close()
|
||||
ZSTD_outBuffer output = { out_buffer, out_buffer_size, 0 };
|
||||
remaining = ZSTD_compressStream2(cctx, &output, &input, mode);
|
||||
fwrite(out_buffer, sizeof(char), output.pos, fp);
|
||||
} while(remaining);
|
||||
} while (remaining);
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
cctx = nullptr;
|
||||
@ -144,7 +144,7 @@ void ZstdFileWriter::setCompressionLevel(int level)
|
||||
const int min_level = ZSTD_minCLevel();
|
||||
const int max_level = ZSTD_maxCLevel();
|
||||
|
||||
if(level < min_level || level > max_level)
|
||||
if (level < min_level || level > max_level)
|
||||
throw FileWriterException(fmt::format("Compression level must in the range of [{}, {}]", min_level, max_level));
|
||||
|
||||
compression_level = level;
|
||||
|
||||
@ -232,7 +232,7 @@ double ComputeTempCS::compute_scalar()
|
||||
|
||||
double t = 0.0;
|
||||
|
||||
for (int i = 0; i < nlocal; i++){
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
vthermal[0] = v[i][0] - vint[i][0];
|
||||
vthermal[1] = v[i][1] - vint[i][1];
|
||||
@ -271,7 +271,7 @@ void ComputeTempCS::compute_vector()
|
||||
double t[6];
|
||||
for (int i = 0; i < 6; i++) t[i] = 0.0;
|
||||
|
||||
for (int i = 0; i < nlocal; i++){
|
||||
for (int i = 0; i < nlocal; i++) {
|
||||
if (mask[i] & groupbit) {
|
||||
if (rmass) massone = rmass[i];
|
||||
else massone = mass[type[i]];
|
||||
|
||||
@ -106,6 +106,10 @@ if (test $1 = "PERI") then
|
||||
depend USER-OMP
|
||||
fi
|
||||
|
||||
if (test $1 = "PYTHON") then
|
||||
depend MLIAP
|
||||
fi
|
||||
|
||||
if (test $1 = "RIGID") then
|
||||
depend KOKKOS
|
||||
depend USER-OMP
|
||||
@ -114,6 +118,7 @@ fi
|
||||
|
||||
if (test $1 = "SNAP") then
|
||||
depend KOKKOS
|
||||
depend MLIAP
|
||||
fi
|
||||
|
||||
if (test $1 = "USER-CGSDK") then
|
||||
|
||||
@ -370,7 +370,7 @@ void PairEAMAlloyGPU::read_file(char *filename)
|
||||
Setfl *file = setfl;
|
||||
|
||||
// read potential file
|
||||
if(comm->me == 0) {
|
||||
if (comm->me == 0) {
|
||||
PotentialFileReader reader(PairEAM::lmp, filename,
|
||||
"eam/alloy", unit_convert_flag);
|
||||
|
||||
|
||||
@ -370,7 +370,7 @@ void PairEAMFSGPU::read_file(char *filename)
|
||||
Fs *file = fs;
|
||||
|
||||
// read potential file
|
||||
if(comm->me == 0) {
|
||||
if (comm->me == 0) {
|
||||
PotentialFileReader reader(PairEAM::lmp, filename, "eam/fs",
|
||||
unit_convert_flag);
|
||||
|
||||
|
||||
@ -141,7 +141,7 @@ void PairVashishtaGPU::compute(int eflag, int vflag)
|
||||
|
||||
void PairVashishtaGPU::allocate()
|
||||
{
|
||||
if(!allocated) {
|
||||
if (!allocated) {
|
||||
PairVashishta::allocate();
|
||||
}
|
||||
int n = atom->ntypes;
|
||||
@ -260,7 +260,7 @@ void PairVashishtaGPU::init_style()
|
||||
|
||||
double PairVashishtaGPU::init_one(int i, int j)
|
||||
{
|
||||
if(!gpu_allocated) {
|
||||
if (!gpu_allocated) {
|
||||
allocate();
|
||||
}
|
||||
if (setflag[i][j] == 0) error->all(FLERR,"All pair coeffs are not set");
|
||||
|
||||
@ -1303,7 +1303,7 @@ void FixWallGran::granular(double rsq, double dx, double dy, double dz,
|
||||
relrot2 = omega[1];
|
||||
relrot3 = omega[2];
|
||||
}
|
||||
if (roll_model != ROLL_NONE){
|
||||
if (roll_model != ROLL_NONE) {
|
||||
|
||||
// rolling velocity, see eq. 31 of Wang et al, Particuology v 23, p 49 (2015)
|
||||
// This is different from the Marshall papers,
|
||||
|
||||
@ -536,7 +536,7 @@ void PairGranular::compute(int eflag, int vflag)
|
||||
}
|
||||
|
||||
if (roll_model[itype][jtype] != ROLL_NONE ||
|
||||
twist_model[itype][jtype] != TWIST_NONE){
|
||||
twist_model[itype][jtype] != TWIST_NONE) {
|
||||
relrot1 = omega[i][0] - omega[j][0];
|
||||
relrot2 = omega[i][1] - omega[j][1];
|
||||
relrot3 = omega[i][2] - omega[j][2];
|
||||
|
||||
@ -252,7 +252,7 @@ char *do_query(char *qfunction, char * model_name, int narg, char **arg,
|
||||
}
|
||||
} else {
|
||||
query += fmt::format("&{}=[", key);
|
||||
while (n != std::string::npos){
|
||||
while (n != std::string::npos) {
|
||||
std::string sval = val.substr(0, n);
|
||||
if (utils::is_integer(sval) ||
|
||||
utils::is_double(sval) ||
|
||||
|
||||
@ -230,9 +230,9 @@ void AtomKokkos::sort()
|
||||
reallocate memory to the pointer selected by the mask
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
void AtomKokkos::grow(unsigned int mask){
|
||||
void AtomKokkos::grow(unsigned int mask) {
|
||||
|
||||
if (mask & SPECIAL_MASK){
|
||||
if (mask & SPECIAL_MASK) {
|
||||
memoryKK->destroy_kokkos(k_special, special);
|
||||
sync(Device, mask);
|
||||
modified(Device, mask);
|
||||
|
||||
@ -83,16 +83,16 @@ struct SortFunctor {
|
||||
ViewType source;
|
||||
Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type> dest;
|
||||
IndexView index;
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind){
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==1,IndexView>::type ind):source(src),index(ind) {
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0));
|
||||
}
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind){
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==2,IndexView>::type ind):source(src),index(ind) {
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1));
|
||||
}
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind){
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==3,IndexView>::type ind):source(src),index(ind) {
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1),src.extent(2));
|
||||
}
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind){
|
||||
SortFunctor(ViewType src, typename std::enable_if<ViewType::dynamic_rank==4,IndexView>::type ind):source(src),index(ind) {
|
||||
dest = Kokkos::View<typename ViewType::non_const_data_type,typename ViewType::array_type,device_type>("",src.extent(0),src.extent(1),src.extent(2),src.extent(3));
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -100,18 +100,18 @@ struct SortFunctor {
|
||||
dest(i) = source(index(i));
|
||||
}
|
||||
void operator()(const typename std::enable_if<ViewType::rank==2, int>::type& i) {
|
||||
for(int j=0; j < (int)source.extent(1); j++)
|
||||
for (int j=0; j < (int)source.extent(1); j++)
|
||||
dest(i,j) = source(index(i),j);
|
||||
}
|
||||
void operator()(const typename std::enable_if<ViewType::rank==3, int>::type& i) {
|
||||
for(int j=0; j < (int)source.extent(1); j++)
|
||||
for(int k=0; k < (int)source.extent(2); k++)
|
||||
for (int j=0; j < (int)source.extent(1); j++)
|
||||
for (int k=0; k < (int)source.extent(2); k++)
|
||||
dest(i,j,k) = source(index(i),j,k);
|
||||
}
|
||||
void operator()(const typename std::enable_if<ViewType::rank==4, int>::type& i) {
|
||||
for(int j=0; j < (int)source.extent(1); j++)
|
||||
for(int k=0; k < (int)source.extent(2); k++)
|
||||
for(int l=0; l < (int)source.extent(3); l++)
|
||||
for (int j=0; j < (int)source.extent(1); j++)
|
||||
for (int k=0; k < (int)source.extent(2); k++)
|
||||
for (int l=0; l < (int)source.extent(3); l++)
|
||||
dest(i,j,k,l) = source(index(i),j,k,l);
|
||||
}
|
||||
};
|
||||
|
||||
@ -281,10 +281,10 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
||||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -296,7 +296,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -310,8 +310,8 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
||||
}
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -323,7 +323,7 @@ int AtomVecAngleKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -396,11 +396,11 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
||||
const int & iswap,
|
||||
const int nfirst, const int &pbc_flag,
|
||||
const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,1,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
@ -414,7 +414,7 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPHostType,0,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
@ -431,8 +431,8 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,1,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
@ -446,7 +446,7 @@ int AtomVecAngleKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &li
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecAngleKokkos_PackCommSelf<LMPDeviceType,0,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
@ -491,8 +491,8 @@ struct AtomVecAngleKokkos_UnpackComm {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecAngleKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
const DAT::tdual_xfloat_2d &buf) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
struct AtomVecAngleKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
@ -641,7 +641,7 @@ void AtomVecAngleKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
|
||||
int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -658,7 +658,7 @@ int AtomVecAngleKokkos::pack_reverse(int n, int first, double *buf)
|
||||
|
||||
void AtomVecAngleKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -742,7 +742,7 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecAngleKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
@ -756,7 +756,7 @@ int AtomVecAngleKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecAngleKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
@ -939,7 +939,7 @@ struct AtomVecAngleKokkos_UnpackBorder {
|
||||
typename AT::t_tagint_1d &molecule,
|
||||
const int& first):
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
|
||||
_first(first){
|
||||
_first(first) {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -963,7 +963,7 @@ void AtomVecAngleKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecAngleKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -1129,7 +1129,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
@ -1178,7 +1178,7 @@ struct AtomVecAngleKokkos_PackExchangeFunctor {
|
||||
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -1220,12 +1220,12 @@ int AtomVecAngleKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_
|
||||
X_FLOAT hi )
|
||||
{
|
||||
const int elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecAngleKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
@ -1333,7 +1333,7 @@ struct AtomVecAngleKokkos_UnpackExchangeFunctor {
|
||||
_angle_atom2(atom->k_angle_atom2.view<DeviceType>()),
|
||||
_angle_atom3(atom->k_angle_atom3.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
elements =17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
@ -1386,7 +1386,7 @@ int AtomVecAngleKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int n
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 17+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom;
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAngleKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -200,7 +200,7 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecAtomicKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
|
||||
@ -214,7 +214,7 @@ int AtomVecAtomicKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecAtomicKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,dx,dy,dz);
|
||||
@ -374,7 +374,7 @@ struct AtomVecAtomicKokkos_UnpackBorder {
|
||||
typename ArrayTypes<DeviceType>::t_int_1d &type,
|
||||
typename ArrayTypes<DeviceType>::t_int_1d &mask,
|
||||
const int& first):
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first){
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_first(first) {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -396,7 +396,7 @@ void AtomVecAtomicKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecAtomicKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
} else {
|
||||
@ -504,7 +504,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 11;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
|
||||
@ -527,7 +527,7 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
||||
_buf(mysend,10) = d_ubuf(_image[i]).d;
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -546,11 +546,11 @@ struct AtomVecAtomicKokkos_PackExchangeFunctor {
|
||||
|
||||
int AtomVecAtomicKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
|
||||
{
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/11) {
|
||||
int newsize = nsend*11/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecAtomicKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
return nsend*11;
|
||||
@ -615,7 +615,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 11;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
|
||||
@ -644,7 +644,7 @@ struct AtomVecAtomicKokkos_UnpackExchangeFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecAtomicKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecAtomicKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/11,f);
|
||||
|
||||
@ -250,7 +250,7 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecBondKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
@ -264,7 +264,7 @@ int AtomVecBondKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecBondKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
@ -447,7 +447,7 @@ struct AtomVecBondKokkos_UnpackBorder {
|
||||
typename AT::t_tagint_1d &molecule,
|
||||
const int& first):
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
|
||||
_first(first){
|
||||
_first(first) {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -471,7 +471,7 @@ void AtomVecBondKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecBondKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -621,7 +621,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 to store buffer length
|
||||
@ -661,7 +661,7 @@ struct AtomVecBondKokkos_PackExchangeFunctor {
|
||||
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -696,12 +696,12 @@ int AtomVecBondKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
|
||||
X_FLOAT hi )
|
||||
{
|
||||
const int elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecBondKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
@ -794,7 +794,7 @@ struct AtomVecBondKokkos_UnpackExchangeFunctor {
|
||||
_bond_type(atom->k_bond_type.view<DeviceType>()),
|
||||
_bond_atom(atom->k_bond_atom.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
elements = 16+atom->maxspecial+atom->bond_per_atom+atom->bond_per_atom;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
@ -840,7 +840,7 @@ int AtomVecBondKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 16+atomKK->maxspecial+atomKK->bond_per_atom+atomKK->bond_per_atom;
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecBondKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -267,7 +267,7 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecChargeKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
|
||||
@ -281,7 +281,7 @@ int AtomVecChargeKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecChargeKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,dx,dy,dz);
|
||||
@ -463,7 +463,7 @@ struct AtomVecChargeKokkos_UnpackBorder {
|
||||
typename ArrayTypes<DeviceType>::t_int_1d &mask,
|
||||
typename ArrayTypes<DeviceType>::t_float_1d &q,
|
||||
const int& first):
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first){
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_first(first) {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -485,7 +485,7 @@ void AtomVecChargeKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
if (first+n >= nmax) {
|
||||
grow(first+n+100);
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecChargeKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -618,7 +618,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 12;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*
|
||||
buf.template view<DeviceType>().extent(1))/elements;
|
||||
@ -643,7 +643,7 @@ struct AtomVecChargeKokkos_PackExchangeFunctor {
|
||||
_buf(mysend,11) = _q[i];
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -667,11 +667,11 @@ int AtomVecChargeKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat
|
||||
ExecutionSpace space,int dim,
|
||||
X_FLOAT lo,X_FLOAT hi )
|
||||
{
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/12) {
|
||||
int newsize = nsend*12/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecChargeKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
@ -740,7 +740,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_q(atom->k_q.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 12;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
|
||||
@ -772,7 +772,7 @@ struct AtomVecChargeKokkos_UnpackExchangeFunctor {
|
||||
int AtomVecChargeKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,
|
||||
int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,
|
||||
ExecutionSpace space) {
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecChargeKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/12,f);
|
||||
@ -1131,7 +1131,7 @@ void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned
|
||||
perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
|
||||
if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPDeviceType>())
|
||||
perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
|
||||
if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
|
||||
if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPDeviceType>())
|
||||
perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
|
||||
} else {
|
||||
if ((mask & X_MASK) && atomKK->k_x.need_sync<LMPHostType>())
|
||||
@ -1148,7 +1148,7 @@ void AtomVecChargeKokkos::sync_overlapping_device(ExecutionSpace space, unsigned
|
||||
perform_async_copy<DAT::tdual_int_1d>(atomKK->k_mask,space);
|
||||
if ((mask & IMAGE_MASK) && atomKK->k_image.need_sync<LMPHostType>())
|
||||
perform_async_copy<DAT::tdual_imageint_1d>(atomKK->k_image,space);
|
||||
if ((mask & MOLECULE_MASK) && atomKK->k_q.need_sync<LMPHostType>())
|
||||
if ((mask & Q_MASK) && atomKK->k_q.need_sync<LMPHostType>())
|
||||
perform_async_copy<DAT::tdual_float_1d>(atomKK->k_q,space);
|
||||
}
|
||||
}
|
||||
|
||||
@ -267,10 +267,10 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
||||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,list,iswap,
|
||||
@ -286,7 +286,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,list,iswap,
|
||||
@ -304,8 +304,8 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
||||
}
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,list,iswap,
|
||||
@ -321,7 +321,7 @@ int AtomVecDPDKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
buf,list,iswap,
|
||||
@ -409,11 +409,11 @@ struct AtomVecDPDKokkos_PackCommSelf {
|
||||
|
||||
int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
||||
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
nfirst,list,iswap,
|
||||
@ -429,7 +429,7 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
nfirst,list,iswap,
|
||||
@ -448,8 +448,8 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Device,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
nfirst,list,iswap,
|
||||
@ -465,7 +465,7 @@ int AtomVecDPDKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecDPDKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,
|
||||
atomKK->k_dpdTheta,atomKK->k_uCond,atomKK->k_uMech,atomKK->k_uChem,
|
||||
nfirst,list,iswap,
|
||||
@ -526,8 +526,8 @@ struct AtomVecDPDKokkos_UnpackComm {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecDPDKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
const DAT::tdual_xfloat_2d &buf) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
atomKK->modified(Host,X_MASK|DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK);
|
||||
struct AtomVecDPDKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,
|
||||
@ -716,7 +716,7 @@ void AtomVecDPDKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
|
||||
int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -733,7 +733,7 @@ int AtomVecDPDKokkos::pack_reverse(int n, int first, double *buf)
|
||||
|
||||
void AtomVecDPDKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
if (n > 0) {
|
||||
atomKK->sync(Host,F_MASK);
|
||||
atomKK->modified(Host,F_MASK);
|
||||
}
|
||||
@ -831,7 +831,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecDPDKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,
|
||||
@ -849,7 +849,7 @@ int AtomVecDPDKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist, DA
|
||||
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecDPDKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,
|
||||
@ -1134,7 +1134,7 @@ void AtomVecDPDKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|
|
||||
DPDTHETA_MASK|UCOND_MASK|UMECH_MASK|UCHEM_MASK|
|
||||
UCG_MASK|UCGNEW_MASK|DVECTOR_MASK);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecDPDKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),
|
||||
h_x,h_tag,h_type,h_mask,
|
||||
h_dpdTheta,h_uCond,h_uMech,h_uChem,h_uCG,h_uCGnew,
|
||||
@ -1326,7 +1326,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 17;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
|
||||
@ -1355,7 +1355,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
|
||||
_buf(mysend,16) = _uCGnew[i];
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -1380,7 +1380,7 @@ struct AtomVecDPDKokkos_PackExchangeFunctor {
|
||||
|
||||
int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d &k_buf, DAT::tdual_int_1d k_sendlist,DAT::tdual_int_1d k_copylist,ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi )
|
||||
{
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/17) {
|
||||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
@ -1388,7 +1388,7 @@ int AtomVecDPDKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2d
|
||||
MASK_MASK | IMAGE_MASK| DPDTHETA_MASK | UCOND_MASK |
|
||||
UMECH_MASK | UCHEM_MASK | UCG_MASK | UCGNEW_MASK |
|
||||
DVECTOR_MASK);
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecDPDKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
} else {
|
||||
@ -1469,7 +1469,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
|
||||
_mask(atom->k_mask.view<DeviceType>()),
|
||||
_image(atom->k_image.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
const size_t elements = 17;
|
||||
const int maxsendlist = (buf.template view<DeviceType>().extent(0)*buf.template view<DeviceType>().extent(1))/elements;
|
||||
|
||||
@ -1504,7 +1504,7 @@ struct AtomVecDPDKokkos_UnpackExchangeFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecDPDKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecDPDKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/17,f);
|
||||
|
||||
@ -381,7 +381,7 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecFullKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
|
||||
@ -395,7 +395,7 @@ int AtomVecFullKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendlist,
|
||||
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecFullKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_q,h_molecule,dx,dy,dz);
|
||||
@ -586,7 +586,7 @@ struct AtomVecFullKokkos_UnpackBorder {
|
||||
typename AT::t_tagint_1d &molecule,
|
||||
const int& first):
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_q(q),_molecule(molecule),
|
||||
_first(first){
|
||||
_first(first) {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -611,7 +611,7 @@ void AtomVecFullKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|Q_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecFullKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_q,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -824,7 +824,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
@ -895,7 +895,7 @@ struct AtomVecFullKokkos_PackExchangeFunctor {
|
||||
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -955,12 +955,12 @@ int AtomVecFullKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfloat_2
|
||||
{
|
||||
const int elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecFullKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
@ -1106,7 +1106,7 @@ struct AtomVecFullKokkos_UnpackExchangeFunctor {
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
|
||||
elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
@ -1178,7 +1178,7 @@ int AtomVecFullKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nr
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 20+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecFullKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -112,10 +112,10 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
|
||||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackComm<LMPHostType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -127,7 +127,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackComm<LMPHostType,0,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -141,8 +141,8 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackComm<LMPDeviceType,1,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -154,7 +154,7 @@ int AtomVecKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackComm<LMPDeviceType,0,1> f(atomKK->k_x,buf,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -225,11 +225,11 @@ struct AtomVecKokkos_PackCommSelf {
|
||||
|
||||
int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
||||
const int nfirst, const int &pbc_flag, const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelf<LMPHostType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -241,7 +241,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelf<LMPHostType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -256,8 +256,8 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,1,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -269,7 +269,7 @@ int AtomVecKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d &list, c
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelf<LMPDeviceType,0,1> f(atomKK->k_x,nfirst,list,iswap,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -361,10 +361,10 @@ struct AtomVecKokkos_PackCommSelfFused {
|
||||
int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &list, const DAT::tdual_int_1d &sendnum_scan,
|
||||
const DAT::tdual_int_1d &firstrecv, const DAT::tdual_int_1d &pbc_flag, const DAT::tdual_int_2d &pbc,
|
||||
const DAT::tdual_int_1d &g2l) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPHostType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
@ -378,7 +378,7 @@ int AtomVecKokkos::pack_comm_self_fused(const int &n, const DAT::tdual_int_2d &l
|
||||
} else {
|
||||
sync(Device,X_MASK);
|
||||
modified(Device,X_MASK);
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommSelfFused<LMPDeviceType,1> f(atomKK->k_x,list,pbc,pbc_flag,firstrecv,sendnum_scan,g2l,
|
||||
domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz);
|
||||
@ -420,8 +420,8 @@ struct AtomVecKokkos_UnpackComm {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
const DAT::tdual_xfloat_2d &buf) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK);
|
||||
modified(Host,X_MASK);
|
||||
struct AtomVecKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
@ -530,7 +530,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
|
||||
const int &pbc_flag,
|
||||
const int* const pbc)
|
||||
{
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|V_MASK);
|
||||
if (pbc_flag) {
|
||||
if (deform_vremap) {
|
||||
@ -552,7 +552,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommVel<LMPHostType,1,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
atomKK->k_v,
|
||||
@ -571,7 +571,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommVel<LMPHostType,0,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
atomKK->k_v,
|
||||
@ -591,9 +591,9 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
} else {
|
||||
sync(Device,X_MASK|V_MASK);
|
||||
if(pbc_flag) {
|
||||
if(deform_vremap) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (deform_vremap) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,1> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
atomKK->k_v,
|
||||
@ -611,7 +611,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommVel<LMPDeviceType,1,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
atomKK->k_v,
|
||||
@ -630,7 +630,7 @@ int AtomVecKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecKokkos_PackCommVel<LMPDeviceType,0,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
atomKK->k_v,
|
||||
@ -691,8 +691,8 @@ struct AtomVecKokkos_UnpackCommVel {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecKokkos::unpack_comm_vel_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
const DAT::tdual_xfloat_2d &buf) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
sync(Host,X_MASK|V_MASK);
|
||||
modified(Host,X_MASK|V_MASK);
|
||||
struct AtomVecKokkos_UnpackCommVel<LMPHostType> f(atomKK->k_x,atomKK->k_v,buf,first);
|
||||
@ -864,8 +864,8 @@ struct AtomVecKokkos_PackReverse {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecKokkos::pack_reverse_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_ffloat_2d &buf ) {
|
||||
if(commKK->reverse_comm_on_host) {
|
||||
const DAT::tdual_ffloat_2d &buf) {
|
||||
if (commKK->reverse_comm_on_host) {
|
||||
sync(Host,F_MASK);
|
||||
struct AtomVecKokkos_PackReverse<LMPHostType> f(atomKK->k_f,buf,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -911,7 +911,7 @@ struct AtomVecKokkos_UnPackReverseSelf {
|
||||
|
||||
int AtomVecKokkos::unpack_reverse_self(const int &n, const DAT::tdual_int_2d &list, const int & iswap,
|
||||
const int nfirst) {
|
||||
if(commKK->reverse_comm_on_host) {
|
||||
if (commKK->reverse_comm_on_host) {
|
||||
sync(Host,F_MASK);
|
||||
struct AtomVecKokkos_UnPackReverseSelf<LMPHostType> f(atomKK->k_f,nfirst,list,iswap);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -966,7 +966,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
|
||||
// Check whether to always run reverse communication on the host
|
||||
// Choose correct reverse UnPackReverse kernel
|
||||
|
||||
if(commKK->reverse_comm_on_host) {
|
||||
if (commKK->reverse_comm_on_host) {
|
||||
struct AtomVecKokkos_UnPackReverse<LMPHostType> f(atomKK->k_f,buf,list,iswap);
|
||||
Kokkos::parallel_for(n,f);
|
||||
modified(Host,F_MASK);
|
||||
@ -981,7 +981,7 @@ void AtomVecKokkos::unpack_reverse_kokkos(const int &n,
|
||||
|
||||
int AtomVecKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -1007,7 +1007,7 @@ void AtomVecKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
h_f(j,2) += buf[m++];
|
||||
}
|
||||
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
modified(Host,F_MASK);
|
||||
}
|
||||
|
||||
|
||||
@ -178,7 +178,7 @@ class AtomVecKokkos : public AtomVec {
|
||||
}
|
||||
mirror_type tmp_view((typename ViewType::value_type*)buffer, src.d_view.layout());
|
||||
|
||||
if(space == Device) {
|
||||
if (space == Device) {
|
||||
Kokkos::deep_copy(LMPHostType(),tmp_view,src.h_view),
|
||||
Kokkos::deep_copy(LMPHostType(),src.d_view,tmp_view);
|
||||
src.clear_sync_state();
|
||||
@ -191,7 +191,7 @@ class AtomVecKokkos : public AtomVec {
|
||||
#else
|
||||
template<class ViewType>
|
||||
void perform_async_copy(ViewType& src, unsigned int space) {
|
||||
if(space == Device)
|
||||
if (space == Device)
|
||||
src.template sync<LMPDeviceType>();
|
||||
else
|
||||
src.template sync<LMPHostType>();
|
||||
|
||||
@ -360,10 +360,10 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
||||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPHostType,1,1>
|
||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -375,7 +375,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPHostType,0,1>
|
||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -389,8 +389,8 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
||||
}
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,1,1>
|
||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -402,7 +402,7 @@ int AtomVecMolecularKokkos::pack_comm_kokkos(const int &n,
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackComm<LMPDeviceType,0,1>
|
||||
f(atomKK->k_x,buf,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -476,11 +476,11 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
||||
const int & iswap,
|
||||
const int nfirst, const int &pbc_flag,
|
||||
const int* const pbc) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,1,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -492,7 +492,7 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPHostType,0,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -507,8 +507,8 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK);
|
||||
atomKK->modified(Device,X_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,1,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -520,7 +520,7 @@ int AtomVecMolecularKokkos::pack_comm_self(const int &n, const DAT::tdual_int_2d
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecMolecularKokkos_PackCommSelf<LMPDeviceType,0,1>
|
||||
f(atomKK->k_x,nfirst,list,iswap,domain->xprd,domain->yprd,domain->zprd,
|
||||
domain->xy,domain->xz,domain->yz,pbc);
|
||||
@ -563,8 +563,8 @@ struct AtomVecMolecularKokkos_UnpackComm {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void AtomVecMolecularKokkos::unpack_comm_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
const DAT::tdual_xfloat_2d &buf) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK);
|
||||
atomKK->modified(Host,X_MASK);
|
||||
struct AtomVecMolecularKokkos_UnpackComm<LMPHostType> f(atomKK->k_x,buf,first);
|
||||
@ -713,7 +713,7 @@ void AtomVecMolecularKokkos::unpack_comm_vel(int n, int first, double *buf)
|
||||
|
||||
int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
atomKK->sync(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -730,7 +730,7 @@ int AtomVecMolecularKokkos::pack_reverse(int n, int first, double *buf)
|
||||
|
||||
void AtomVecMolecularKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
atomKK->modified(Host,F_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -814,7 +814,7 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecMolecularKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
@ -828,7 +828,7 @@ int AtomVecMolecularKokkos::pack_border_kokkos(int n, DAT::tdual_int_2d k_sendli
|
||||
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecMolecularKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,h_molecule,dx,dy,dz);
|
||||
@ -1011,7 +1011,7 @@ struct AtomVecMolecularKokkos_UnpackBorder {
|
||||
typename AT::t_tagint_1d &molecule,
|
||||
const int& first):
|
||||
_buf(buf),_x(x),_tag(tag),_type(type),_mask(mask),_molecule(molecule),
|
||||
_first(first){
|
||||
_first(first) {
|
||||
};
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
@ -1035,7 +1035,7 @@ void AtomVecMolecularKokkos::unpack_border_kokkos(const int &n, const int &first
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
while (first+n >= nmax) grow(0);
|
||||
atomKK->modified(space,X_MASK|TAG_MASK|TYPE_MASK|MASK_MASK|MOLECULE_MASK);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecMolecularKokkos_UnpackBorder<LMPHostType>
|
||||
f(buf.view<LMPHostType>(),h_x,h_tag,h_type,h_mask,h_molecule,first);
|
||||
Kokkos::parallel_for(n,f);
|
||||
@ -1240,7 +1240,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
|
||||
_sendlist(sendlist.template view<DeviceType>()),
|
||||
_copylist(copylist.template view<DeviceType>()),
|
||||
_nlocal(nlocal),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
// 3 comp of x, 3 comp of v, 1 tag, 1 type, 1 mask, 1 image, 1 molecule, 3 nspecial,
|
||||
// maxspecial special, 1 num_bond, bond_per_atom bond_type, bond_per_atom bond_atom,
|
||||
// 1 num_angle, angle_per_atom angle_type, angle_per_atom angle_atom1, angle_atom2,
|
||||
@ -1309,7 +1309,7 @@ struct AtomVecMolecularKokkos_PackExchangeFunctor {
|
||||
|
||||
const int j = _copylist(mysend);
|
||||
|
||||
if(j>-1) {
|
||||
if (j>-1) {
|
||||
_xw(i,0) = _x(j,0);
|
||||
_xw(i,1) = _x(j,1);
|
||||
_xw(i,2) = _x(j,2);
|
||||
@ -1368,12 +1368,12 @@ int AtomVecMolecularKokkos::pack_exchange_kokkos(const int &nsend,DAT::tdual_xfl
|
||||
{
|
||||
const int elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*
|
||||
k_buf.view<LMPHostType>().extent(1))/elements) {
|
||||
int newsize = nsend*elements/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecMolecularKokkos_PackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
@ -1517,7 +1517,7 @@ struct AtomVecMolecularKokkos_UnpackExchangeFunctor {
|
||||
_improper_atom3(atom->k_improper_atom3.view<DeviceType>()),
|
||||
_improper_atom4(atom->k_improper_atom4.view<DeviceType>()),
|
||||
_nlocal(nlocal.template view<DeviceType>()),_dim(dim),
|
||||
_lo(lo),_hi(hi){
|
||||
_lo(lo),_hi(hi) {
|
||||
|
||||
elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
@ -1589,7 +1589,7 @@ int AtomVecMolecularKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,i
|
||||
ExecutionSpace space) {
|
||||
const size_t elements = 19+atom->maxspecial+2*atom->bond_per_atom+4*atom->angle_per_atom+
|
||||
5*atom->dihedral_per_atom + 5*atom->improper_per_atom;
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecMolecularKokkos_UnpackExchangeFunctor<LMPHostType>
|
||||
f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
|
||||
@ -274,10 +274,10 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
||||
return AtomVecKokkos::pack_comm_kokkos(n,list,iswap,buf,pbc_flag,pbc);
|
||||
// Check whether to always run forward communication on the host
|
||||
// Choose correct forward PackComm kernel
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPHostType,1,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -295,7 +295,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPHostType,0,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -315,8 +315,8 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
||||
}
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPDeviceType,1,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -334,7 +334,7 @@ int AtomVecSphereKokkos::pack_comm_kokkos(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackComm<LMPDeviceType,0,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -461,11 +461,11 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
const int &pbc_flag,
|
||||
const int* const pbc)
|
||||
{
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if(pbc_flag) {
|
||||
if(deform_vremap) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (deform_vremap) {
|
||||
if (domain->triclinic) {
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,1> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
@ -507,7 +507,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,1,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
@ -550,7 +550,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_PackCommVel<LMPHostType,0,0,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
@ -594,9 +594,9 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if(pbc_flag) {
|
||||
if(deform_vremap) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (deform_vremap) {
|
||||
if (domain->triclinic) {
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,1> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
@ -638,7 +638,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,1,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
@ -681,7 +681,7 @@ int AtomVecSphereKokkos::pack_comm_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_PackCommVel<LMPDeviceType,0,0,1,0> f(
|
||||
atomKK->k_x,atomKK->k_mask,
|
||||
@ -792,11 +792,11 @@ int AtomVecSphereKokkos::pack_comm_self(
|
||||
// Fallback to AtomVecKokkos if radvary == 0
|
||||
if (radvary == 0)
|
||||
return AtomVecKokkos::pack_comm_self(n,list,iswap,nfirst,pbc_flag,pbc);
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->sync(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,1,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -814,7 +814,7 @@ int AtomVecSphereKokkos::pack_comm_self(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPHostType,0,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -835,8 +835,8 @@ int AtomVecSphereKokkos::pack_comm_self(
|
||||
} else {
|
||||
atomKK->sync(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
atomKK->modified(Device,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
if(pbc_flag) {
|
||||
if(domain->triclinic) {
|
||||
if (pbc_flag) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,1,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -854,7 +854,7 @@ int AtomVecSphereKokkos::pack_comm_self(
|
||||
Kokkos::parallel_for(n,f);
|
||||
}
|
||||
} else {
|
||||
if(domain->triclinic) {
|
||||
if (domain->triclinic) {
|
||||
struct AtomVecSphereKokkos_PackCommSelf<LMPDeviceType,0,1> f(
|
||||
atomKK->k_x,
|
||||
atomKK->k_radius,atomKK->k_rmass,
|
||||
@ -917,13 +917,13 @@ struct AtomVecSphereKokkos_UnpackComm {
|
||||
|
||||
void AtomVecSphereKokkos::unpack_comm_kokkos(
|
||||
const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
const DAT::tdual_xfloat_2d &buf) {
|
||||
// Fallback to AtomVecKokkos if radvary == 0
|
||||
if (radvary == 0) {
|
||||
AtomVecKokkos::unpack_comm_kokkos(n,first,buf);
|
||||
return;
|
||||
}
|
||||
if(commKK->forward_comm_on_host) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK);
|
||||
struct AtomVecSphereKokkos_UnpackComm<LMPHostType> f(
|
||||
atomKK->k_x,
|
||||
@ -994,8 +994,8 @@ struct AtomVecSphereKokkos_UnpackCommVel {
|
||||
|
||||
void AtomVecSphereKokkos::unpack_comm_vel_kokkos(
|
||||
const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf ) {
|
||||
if(commKK->forward_comm_on_host) {
|
||||
const DAT::tdual_xfloat_2d &buf) {
|
||||
if (commKK->forward_comm_on_host) {
|
||||
atomKK->modified(Host,X_MASK|RADIUS_MASK|RMASS_MASK|V_MASK|OMEGA_MASK);
|
||||
if (radvary == 0) {
|
||||
struct AtomVecSphereKokkos_UnpackCommVel<LMPHostType,0> f(
|
||||
@ -1352,7 +1352,7 @@ int AtomVecSphereKokkos::unpack_comm_hybrid(int n, int first, double *buf)
|
||||
|
||||
int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
atomKK->sync(Host,F_MASK|TORQUE_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -1372,7 +1372,7 @@ int AtomVecSphereKokkos::pack_reverse(int n, int first, double *buf)
|
||||
|
||||
int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
|
||||
{
|
||||
if(n > 0)
|
||||
if (n > 0)
|
||||
atomKK->sync(Host,TORQUE_MASK);
|
||||
|
||||
int m = 0;
|
||||
@ -1389,7 +1389,7 @@ int AtomVecSphereKokkos::pack_reverse_hybrid(int n, int first, double *buf)
|
||||
|
||||
void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
if (n > 0) {
|
||||
atomKK->modified(Host,F_MASK|TORQUE_MASK);
|
||||
}
|
||||
|
||||
@ -1409,7 +1409,7 @@ void AtomVecSphereKokkos::unpack_reverse(int n, int *list, double *buf)
|
||||
|
||||
int AtomVecSphereKokkos::unpack_reverse_hybrid(int n, int *list, double *buf)
|
||||
{
|
||||
if(n > 0) {
|
||||
if (n > 0) {
|
||||
atomKK->modified(Host,TORQUE_MASK);
|
||||
}
|
||||
|
||||
@ -1502,7 +1502,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
|
||||
dy = pbc[1];
|
||||
dz = pbc[2];
|
||||
}
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecSphereKokkos_PackBorder<LMPHostType,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,
|
||||
@ -1519,7 +1519,7 @@ int AtomVecSphereKokkos::pack_border_kokkos(
|
||||
}
|
||||
} else {
|
||||
dx = dy = dz = 0;
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecSphereKokkos_PackBorder<LMPHostType,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,
|
||||
@ -1697,7 +1697,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
|
||||
dz = pbc[2];
|
||||
}
|
||||
if (!deform_vremap) {
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,
|
||||
@ -1721,7 +1721,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
|
||||
dvx = pbc[0]*h_rate[0] + pbc[5]*h_rate[5] + pbc[4]*h_rate[4];
|
||||
dvy = pbc[1]*h_rate[1] + pbc[3]*h_rate[3];
|
||||
dvz = pbc[2]*h_rate[2];
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecSphereKokkos_PackBorderVel<LMPHostType,1,1> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,
|
||||
@ -1742,7 +1742,7 @@ int AtomVecSphereKokkos::pack_border_vel_kokkos(
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
AtomVecSphereKokkos_PackBorderVel<LMPHostType,0,0> f(
|
||||
buf.view<LMPHostType>(), k_sendlist.view<LMPHostType>(),
|
||||
iswap,h_x,h_tag,h_type,h_mask,
|
||||
@ -1926,7 +1926,7 @@ struct AtomVecSphereKokkos_UnpackBorder {
|
||||
void AtomVecSphereKokkos::unpack_border_kokkos(const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
|
||||
while (first+n >= nmax) grow(0);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecSphereKokkos_UnpackBorder<LMPHostType> f(buf.view<LMPHostType>(),
|
||||
h_x,h_tag,h_type,h_mask,
|
||||
h_radius,h_rmass,
|
||||
@ -2034,7 +2034,7 @@ void AtomVecSphereKokkos::unpack_border_vel_kokkos(
|
||||
const int &n, const int &first,
|
||||
const DAT::tdual_xfloat_2d &buf,ExecutionSpace space) {
|
||||
while (first+n >= nmax) grow(0);
|
||||
if(space==Host) {
|
||||
if (space==Host) {
|
||||
struct AtomVecSphereKokkos_UnpackBorderVel<LMPHostType> f(buf.view<LMPHostType>(),
|
||||
h_x,h_tag,h_type,h_mask,
|
||||
h_radius,h_rmass,
|
||||
@ -2212,7 +2212,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
||||
DAT::tdual_int_1d k_copylist,
|
||||
ExecutionSpace space,int dim,X_FLOAT lo,X_FLOAT hi)
|
||||
{
|
||||
if(nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
|
||||
if (nsend > (int) (k_buf.view<LMPHostType>().extent(0)*k_buf.view<LMPHostType>().extent(1))/16) {
|
||||
int newsize = nsend*17/k_buf.view<LMPHostType>().extent(1)+1;
|
||||
k_buf.resize(newsize,k_buf.view<LMPHostType>().extent(1));
|
||||
}
|
||||
@ -2220,7 +2220,7 @@ int AtomVecSphereKokkos::pack_exchange_kokkos(
|
||||
MASK_MASK | IMAGE_MASK| RADIUS_MASK | RMASS_MASK |
|
||||
OMEGA_MASK);
|
||||
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
AtomVecSphereKokkos_PackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_sendlist,k_copylist,atom->nlocal,dim,lo,hi);
|
||||
Kokkos::parallel_for(nsend,f);
|
||||
} else {
|
||||
@ -2338,7 +2338,7 @@ struct AtomVecSphereKokkos_UnpackExchangeFunctor {
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
int AtomVecSphereKokkos::unpack_exchange_kokkos(DAT::tdual_xfloat_2d &k_buf,int nrecv,int nlocal,int dim,X_FLOAT lo,X_FLOAT hi,ExecutionSpace space) {
|
||||
if(space == Host) {
|
||||
if (space == Host) {
|
||||
k_count.h_view(0) = nlocal;
|
||||
AtomVecSphereKokkos_UnpackExchangeFunctor<LMPHostType> f(atomKK,k_buf,k_count,dim,lo,hi);
|
||||
Kokkos::parallel_for(nrecv/16,f);
|
||||
|
||||
@ -107,6 +107,8 @@ void CommKokkos::init()
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
exchange_comm_classic = lmp->kokkos->exchange_comm_classic;
|
||||
forward_comm_classic = lmp->kokkos->forward_comm_classic;
|
||||
forward_pair_comm_classic = lmp->kokkos->forward_pair_comm_classic;
|
||||
forward_fix_comm_classic = lmp->kokkos->forward_fix_comm_classic;
|
||||
reverse_comm_classic = lmp->kokkos->reverse_comm_classic;
|
||||
exchange_comm_on_host = lmp->kokkos->exchange_comm_on_host;
|
||||
forward_comm_on_host = lmp->kokkos->forward_comm_on_host;
|
||||
@ -361,12 +363,12 @@ void CommKokkos::reverse_comm_device()
|
||||
|
||||
void CommKokkos::forward_comm_fix(Fix *fix, int size)
|
||||
{
|
||||
if (fix->execution_space == Device && fix->forward_comm_device) {
|
||||
k_sendlist.sync<LMPDeviceType>();
|
||||
forward_comm_fix_device<LMPDeviceType>(fix,size);
|
||||
} else {
|
||||
if (fix->execution_space == Host || !fix->forward_comm_device || forward_fix_comm_classic) {
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm_fix(fix,size);
|
||||
} else {
|
||||
k_sendlist.sync<LMPDeviceType>();
|
||||
forward_comm_fix_device<LMPDeviceType>(fix);
|
||||
}
|
||||
}
|
||||
|
||||
@ -456,10 +458,10 @@ void CommKokkos::reverse_comm_compute(Compute *compute)
|
||||
|
||||
void CommKokkos::forward_comm_pair(Pair *pair)
|
||||
{
|
||||
if (pair->execution_space == Host) {
|
||||
if (pair->execution_space == Host || forward_pair_comm_classic) {
|
||||
k_sendlist.sync<LMPHostType>();
|
||||
CommBrick::forward_comm_pair(pair);
|
||||
} else if (pair->execution_space == Device) {
|
||||
} else {
|
||||
k_sendlist.sync<LMPDeviceType>();
|
||||
forward_comm_pair_device<LMPDeviceType>(pair);
|
||||
}
|
||||
@ -571,10 +573,10 @@ void CommKokkos::reverse_comm_dump(Dump *dump)
|
||||
|
||||
void CommKokkos::exchange()
|
||||
{
|
||||
if(atom->nextra_grow + atom->nextra_border) {
|
||||
if(!exchange_comm_classic) {
|
||||
if (atom->nextra_grow + atom->nextra_border) {
|
||||
if (!exchange_comm_classic) {
|
||||
static int print = 1;
|
||||
if(print && comm->me==0) {
|
||||
if (print && comm->me==0) {
|
||||
error->warning(FLERR,"Fixes cannot yet send exchange data in Kokkos communication, "
|
||||
"switching to classic exchange/border communication");
|
||||
}
|
||||
@ -625,7 +627,7 @@ struct BuildExchangeListFunctor {
|
||||
void operator() (int i) const {
|
||||
if (_x(i,_dim) < _lo || _x(i,_dim) >= _hi) {
|
||||
const int mysend=Kokkos::atomic_fetch_add(&_nsend(),1);
|
||||
if(mysend < (int)_sendlist.extent(0)) {
|
||||
if (mysend < (int)_sendlist.extent(0)) {
|
||||
_sendlist(mysend) = i;
|
||||
_sendflag(i) = 1;
|
||||
}
|
||||
@ -713,7 +715,7 @@ void CommKokkos::exchange_device()
|
||||
|
||||
int sendpos = nlocal-1;
|
||||
nlocal -= k_count.h_view();
|
||||
for(int i = 0; i < k_count.h_view(); i++) {
|
||||
for (int i = 0; i < k_count.h_view(); i++) {
|
||||
if (k_exchange_sendlist.h_view(i)<nlocal) {
|
||||
while (k_sendflag.h_view(sendpos)) sendpos--;
|
||||
k_exchange_copylist.h_view(i) = sendpos;
|
||||
@ -887,7 +889,7 @@ struct BuildBorderListFunctor {
|
||||
|
||||
if (my_store_pos+mysend < maxsendlist) {
|
||||
mysend = my_store_pos;
|
||||
for(int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()){
|
||||
for (int i=teamstart + dev.team_rank(); i<teamend; i+=dev.team_size()) {
|
||||
if (x(i,dim) >= lo && x(i,dim) <= hi) {
|
||||
sendlist(iswap,mysend++) = i;
|
||||
}
|
||||
@ -979,7 +981,7 @@ void CommKokkos::borders_device() {
|
||||
|
||||
k_sendlist.modify<DeviceType>();
|
||||
|
||||
if(k_total_send.h_view() >= maxsendlist[iswap]) {
|
||||
if (k_total_send.h_view() >= maxsendlist[iswap]) {
|
||||
grow_list(iswap,k_total_send.h_view());
|
||||
|
||||
k_total_send.h_view() = 0;
|
||||
@ -1227,7 +1229,7 @@ void CommKokkos::grow_send_kokkos(int n, int flag, ExecutionSpace space)
|
||||
maxsend = static_cast<int> (BUFFACTOR * n);
|
||||
int maxsend_border = (maxsend+BUFEXTRA+5)/atom->avec->size_border + 2;
|
||||
if (flag) {
|
||||
if(space == Device)
|
||||
if (space == Device)
|
||||
k_buf_send.modify<LMPDeviceType>();
|
||||
else
|
||||
k_buf_send.modify<LMPHostType>();
|
||||
@ -1280,7 +1282,7 @@ void CommKokkos::grow_list(int /*iswap*/, int n)
|
||||
|
||||
memoryKK->grow_kokkos(k_sendlist,sendlist,maxswap,size,"comm:sendlist");
|
||||
|
||||
for(int i=0;i<maxswap;i++) {
|
||||
for (int i=0;i<maxswap;i++) {
|
||||
maxsendlist[i]=size; sendlist[i]=&k_sendlist.view<LMPHostType>()(i,0);
|
||||
}
|
||||
}
|
||||
|
||||
@ -25,6 +25,8 @@ class CommKokkos : public CommBrick {
|
||||
|
||||
bool exchange_comm_classic;
|
||||
bool forward_comm_classic;
|
||||
bool forward_pair_comm_classic;
|
||||
bool forward_fix_comm_classic;
|
||||
bool reverse_comm_classic;
|
||||
bool exchange_comm_on_host;
|
||||
bool forward_comm_on_host;
|
||||
|
||||
@ -241,7 +241,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
|
||||
int ncount = 0;
|
||||
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team,jnum),
|
||||
[&] (const int jj, int& count) {
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] (){
|
||||
Kokkos::single(Kokkos::PerThread(team), [&] () {
|
||||
int j = d_neighbors(i,jj);
|
||||
j &= NEIGHMASK;
|
||||
const F_FLOAT delx = x(j,0) - xtmp;
|
||||
@ -341,17 +341,17 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::operator() (TagComputeOrientOrder
|
||||
|
||||
#define SWAP(view,i,j) do { \
|
||||
tmp = view(i); view(i) = view(j); view(j) = tmp; \
|
||||
} while(0)
|
||||
} while (0)
|
||||
|
||||
#define ISWAP(view,i,j) do { \
|
||||
itmp = view(i); view(i) = view(j); view(j) = itmp; \
|
||||
} while(0)
|
||||
} while (0)
|
||||
|
||||
#define SWAP3(view,i,j) do { \
|
||||
tmp = view(i,0); view(i,0) = view(j,0); view(j,0) = tmp; \
|
||||
tmp = view(i,1); view(i,1) = view(j,1); view(j,1) = tmp; \
|
||||
tmp = view(i,2); view(i,2) = view(j,2); view(j,2) = tmp; \
|
||||
} while(0)
|
||||
} while (0)
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -439,14 +439,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
|
||||
const double r1 = d_rlist(ii,ineigh,1);
|
||||
const double r2 = d_rlist(ii,ineigh,2);
|
||||
const double rmag = sqrt(r0*r0 + r1*r1 + r2*r2);
|
||||
if(rmag <= MY_EPSILON) {
|
||||
if (rmag <= MY_EPSILON) {
|
||||
return;
|
||||
}
|
||||
|
||||
const double costheta = r2 / rmag;
|
||||
SNAcomplex expphi = {r0,r1};
|
||||
const double rxymag = sqrt(expphi.re*expphi.re+expphi.im*expphi.im);
|
||||
if(rxymag <= MY_EPSILON) {
|
||||
if (rxymag <= MY_EPSILON) {
|
||||
expphi.re = 1.0;
|
||||
expphi.im = 0.0;
|
||||
} else {
|
||||
@ -466,14 +466,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop1(int /*ncount*/, int ii
|
||||
const double polar_pf = polar_prefactor(l, 0, costheta);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,l).re), polar_pf);
|
||||
SNAcomplex expphim = {expphi.re,expphi.im};
|
||||
for(int m = 1; m <= +l; m++) {
|
||||
for (int m = 1; m <= +l; m++) {
|
||||
const double prefactor = polar_prefactor(l, m, costheta);
|
||||
SNAcomplex ylm = {prefactor * expphim.re, prefactor * expphim.im};
|
||||
//d_qnm(ii,il,m+l).re += ylm.re;
|
||||
//d_qnm(ii,il,m+l).im += ylm.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).re), ylm.re);
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,m+l).im), ylm.im);
|
||||
if(m & 1) {
|
||||
if (m & 1) {
|
||||
//d_qnm(ii,il,-m+l).re -= ylm.re;
|
||||
//d_qnm(ii,il,-m+l).im += ylm.im;
|
||||
Kokkos::atomic_add(&(d_qnm(ii,il,-m+l).re), -ylm.re);
|
||||
@ -508,7 +508,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
double facn = 1.0 / ncount;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
for(int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
d_qnm(ii,il,m).re *= facn;
|
||||
d_qnm(ii,il,m).im *= facn;
|
||||
}
|
||||
@ -522,7 +522,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
int l = d_qlist[il];
|
||||
double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
double qm_sum = 0.0;
|
||||
for(int m = 0; m < 2*l+1; m++)
|
||||
for (int m = 0; m < 2*l+1; m++)
|
||||
qm_sum += d_qnm(ii,il,m).re*d_qnm(ii,il,m).re + d_qnm(ii,il,m).im*d_qnm(ii,il,m).im;
|
||||
d_qnarray(i,jj++) = qnormfac * sqrt(qm_sum);
|
||||
}
|
||||
@ -534,8 +534,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
double wlsum = 0.0;
|
||||
for(int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
int m = m1 + m2 - l;
|
||||
SNAcomplex qm1qm2;
|
||||
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||
@ -555,8 +555,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = d_qlist[il];
|
||||
double wlsum = 0.0;
|
||||
for(int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
const int m = m1 + m2 - l;
|
||||
SNAcomplex qm1qm2;
|
||||
qm1qm2.re = d_qnm(ii,il,m1).re*d_qnm(ii,il,m2).re - d_qnm(ii,il,m1).im*d_qnm(ii,il,m2).im;
|
||||
@ -581,14 +581,14 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::calc_boop2(int ncount, int ii) co
|
||||
const int il = iqlcomp;
|
||||
const int l = qlcomp;
|
||||
if (d_qnarray(i,il) < QEPSILON)
|
||||
for(int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
d_qnarray(i,jj++) = 0.0;
|
||||
d_qnarray(i,jj++) = 0.0;
|
||||
}
|
||||
else {
|
||||
const double qnormfac = sqrt(MY_4PI/(2*l+1));
|
||||
const double qnfac = qnormfac/d_qnarray(i,il);
|
||||
for(int m = 0; m < 2*l+1; m++) {
|
||||
for (int m = 0; m < 2*l+1; m++) {
|
||||
d_qnarray(i,jj++) = d_qnm(ii,il,m).re * qnfac;
|
||||
d_qnarray(i,jj++) = d_qnm(ii,il,m).im * qnfac;
|
||||
}
|
||||
@ -665,8 +665,8 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
|
||||
idxcg_count = 0;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for(int m1 = 0; m1 < 2*l+1; m1++)
|
||||
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++)
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++)
|
||||
idxcg_count++;
|
||||
}
|
||||
idxcg_max = idxcg_count;
|
||||
@ -676,9 +676,9 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::init_clebsch_gordan()
|
||||
idxcg_count = 0;
|
||||
for (int il = 0; il < nqlist; il++) {
|
||||
int l = qlist[il];
|
||||
for(int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
for (int m1 = 0; m1 < 2*l+1; m1++) {
|
||||
aa2 = m1 - l;
|
||||
for(int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
for (int m2 = MAX(0,l-m1); m2 < MIN(2*l+1,3*l-m1+1); m2++) {
|
||||
bb2 = m2 - l;
|
||||
m = aa2 + bb2 + l;
|
||||
|
||||
@ -727,7 +727,7 @@ void ComputeOrientOrderAtomKokkos<DeviceType>::check_team_size_for(int inum, int
|
||||
|
||||
team_size_max = Kokkos::TeamPolicy<DeviceType,TagStyle>(inum,Kokkos::AUTO).team_size_max(*this,Kokkos::ParallelForTag());
|
||||
|
||||
if(team_size*vector_length > team_size_max)
|
||||
if (team_size*vector_length > team_size_max)
|
||||
team_size = team_size_max/vector_length;
|
||||
}
|
||||
|
||||
|
||||
@ -34,7 +34,7 @@ namespace LAMMPS_NS {
|
||||
t0 = t1 = t2 = t3 = t4 = t5 = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_CTEMP& operator+=(const s_CTEMP &rhs){
|
||||
s_CTEMP& operator+=(const s_CTEMP &rhs) {
|
||||
t0 += rhs.t0;
|
||||
t1 += rhs.t1;
|
||||
t2 += rhs.t2;
|
||||
|
||||
@ -235,7 +235,7 @@ struct DomainPBCFunctor {
|
||||
x(_x.view<DeviceType>()), v(_v.view<DeviceType>()),
|
||||
mask(_mask.view<DeviceType>()), image(_image.view<DeviceType>()),
|
||||
deform_groupbit(_deform_groupbit),
|
||||
xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic){
|
||||
xperiodic(_xperiodic), yperiodic(_yperiodic), zperiodic(_zperiodic) {
|
||||
lo[0]=_lo[0]; lo[1]=_lo[1]; lo[2]=_lo[2];
|
||||
hi[0]=_hi[0]; hi[1]=_hi[1]; hi[2]=_hi[2];
|
||||
period[0]=_period[0]; period[1]=_period[1]; period[2]=_period[2];
|
||||
|
||||
@ -127,7 +127,7 @@ void FFT3dKokkos<DeviceType>::timing1d(typename FFT_AT::t_FFT_SCALAR_1d d_in, in
|
||||
in starting address of input data on this proc
|
||||
out starting address of where output data for this proc
|
||||
will be placed (can be same as in)
|
||||
flag 1 for forward FFT, -1 for inverse FFT
|
||||
flag 1 for forward FFT, -1 for backward FFT
|
||||
plan plan returned by previous call to fft_3d_create_plan
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -215,22 +215,22 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
length = plan->length1;
|
||||
|
||||
#if defined(FFT_MKL)
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
DftiComputeForward(plan->handle_fast,d_data.data());
|
||||
else
|
||||
DftiComputeBackward(plan->handle_fast,d_data.data());
|
||||
#elif defined(FFT_FFTW3)
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
FFTW_API(execute_dft)(plan->plan_fast_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
else
|
||||
FFTW_API(execute_dft)(plan->plan_fast_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
#elif defined(FFT_CUFFT)
|
||||
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
|
||||
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
typename FFT_AT::t_FFT_DATA_1d d_tmp =
|
||||
typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
|
||||
kiss_fft_functor<DeviceType> f;
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_forward,length);
|
||||
else
|
||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_fast_backward,length);
|
||||
@ -238,7 +238,6 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
d_data = d_tmp;
|
||||
#endif
|
||||
|
||||
|
||||
// 1st mid-remap to prepare for 2nd FFTs
|
||||
// copy = loc for remap result
|
||||
|
||||
@ -260,20 +259,20 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
length = plan->length2;
|
||||
|
||||
#if defined(FFT_MKL)
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
DftiComputeForward(plan->handle_mid,d_data.data());
|
||||
else
|
||||
DftiComputeBackward(plan->handle_mid,d_data.data());
|
||||
#elif defined(FFT_FFTW3)
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
FFTW_API(execute_dft)(plan->plan_mid_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
else
|
||||
FFTW_API(execute_dft)(plan->plan_mid_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
#elif defined(FFT_CUFFT)
|
||||
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
|
||||
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_forward,length);
|
||||
else
|
||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_mid_backward,length);
|
||||
@ -302,20 +301,20 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
length = plan->length3;
|
||||
|
||||
#if defined(FFT_MKL)
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
DftiComputeForward(plan->handle_slow,d_data.data());
|
||||
else
|
||||
DftiComputeBackward(plan->handle_slow,d_data.data());
|
||||
#elif defined(FFT_FFTW3)
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
FFTW_API(execute_dft)(plan->plan_slow_forward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
else
|
||||
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
#elif defined(FFT_CUFFT)
|
||||
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
|
||||
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
d_tmp = typename FFT_AT::t_FFT_DATA_1d(Kokkos::view_alloc("fft_3d:tmp",Kokkos::WithoutInitializing),d_data.extent(0));
|
||||
if (flag == -1)
|
||||
if (flag == 1)
|
||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_forward,length);
|
||||
else
|
||||
f = kiss_fft_functor<DeviceType>(d_data,d_tmp,plan->cfg_slow_backward,length);
|
||||
@ -323,7 +322,6 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
d_data = d_tmp;
|
||||
#endif
|
||||
|
||||
|
||||
// post-remap to put data in output format if needed
|
||||
// destination is always out
|
||||
|
||||
@ -338,7 +336,7 @@ void FFT3dKokkos<DeviceType>::fft_3d_kokkos(typename FFT_AT::t_FFT_DATA_1d d_in,
|
||||
|
||||
// scaling if required
|
||||
|
||||
if (flag == 1 && plan->scaled) {
|
||||
if (flag == -1 && plan->scaled) {
|
||||
FFT_SCALAR norm = plan->norm;
|
||||
int num = plan->normnum;
|
||||
|
||||
@ -807,7 +805,7 @@ void FFT3dKokkos<DeviceType>::bifactor(int n, int *factor1, int *factor2)
|
||||
Arguments:
|
||||
in starting address of input data on this proc, all set to 0.0
|
||||
nsize size of in
|
||||
flag 1 for forward FFT, -1 for inverse FFT
|
||||
flag 1 for forward FFT, -1 for backward FFT
|
||||
plan plan returned by previous call to fft_3d_create_plan
|
||||
------------------------------------------------------------------------- */
|
||||
|
||||
@ -861,9 +859,9 @@ void FFT3dKokkos<DeviceType>::fft_3d_1d_only_kokkos(typename FFT_AT::t_FFT_DATA_
|
||||
FFTW_API(execute_dft)(plan->plan_slow_backward,(FFT_DATA*)d_data.data(),(FFT_DATA*)d_data.data());
|
||||
}
|
||||
#elif defined(FFT_CUFFT)
|
||||
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),flag);
|
||||
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),flag);
|
||||
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),flag);
|
||||
cufftExec(plan->plan_fast,d_data.data(),d_data.data(),-flag);
|
||||
cufftExec(plan->plan_mid,d_data.data(),d_data.data(),-flag);
|
||||
cufftExec(plan->plan_slow,d_data.data(),d_data.data(),-flag);
|
||||
#else
|
||||
kiss_fft_functor<DeviceType> f;
|
||||
typename FFT_AT::t_FFT_DATA_1d d_tmp =
|
||||
|
||||
@ -72,6 +72,7 @@ struct fft_plan_3d_kokkos {
|
||||
template<class DeviceType>
|
||||
class FFT3dKokkos : protected Pointers {
|
||||
public:
|
||||
enum{FORWARD=1,BACKWARD=-1};
|
||||
typedef DeviceType device_type;
|
||||
typedef FFTArrayTypes<DeviceType> FFT_AT;
|
||||
|
||||
|
||||
@ -77,7 +77,7 @@ void FixEnforce2DKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
if (atomKK->torque_flag) flag_mask |= 4;
|
||||
|
||||
copymode = 1;
|
||||
switch( flag_mask ){
|
||||
switch (flag_mask) {
|
||||
case 0:{
|
||||
FixEnforce2DKokkosPostForceFunctor<DeviceType,0,0,0> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
@ -139,21 +139,21 @@ template <int omega_flag, int angmom_flag, int torque_flag>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEnforce2DKokkos<DeviceType>::post_force_item( int i ) const
|
||||
{
|
||||
if (mask[i] & groupbit){
|
||||
if (mask[i] & groupbit) {
|
||||
v(i,2) = 0.0;
|
||||
f(i,2) = 0.0;
|
||||
|
||||
if(omega_flag){
|
||||
if (omega_flag) {
|
||||
omega(i,0) = 0.0;
|
||||
omega(i,1) = 0.0;
|
||||
}
|
||||
|
||||
if(angmom_flag){
|
||||
if (angmom_flag) {
|
||||
angmom(i,0) = 0.0;
|
||||
angmom(i,1) = 0.0;
|
||||
}
|
||||
|
||||
if(torque_flag){
|
||||
if (torque_flag) {
|
||||
torque(i,0) = 0.0;
|
||||
torque(i,1) = 0.0;
|
||||
}
|
||||
|
||||
@ -193,7 +193,7 @@ KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXInit, const int &i) const {
|
||||
double tmp;
|
||||
if (mask[i] & groupbit) {
|
||||
if(dpdTheta[i] <= 0.0)
|
||||
if (dpdTheta[i] <= 0.0)
|
||||
k_error_flag.template view<DeviceType>()() = 1;
|
||||
energy_lookup(i,dpdTheta[i],tmp);
|
||||
uCond[i] = 0.0;
|
||||
@ -233,7 +233,7 @@ void FixEOStableRXKokkos<DeviceType>::post_integrate()
|
||||
template<class DeviceType>
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void FixEOStableRXKokkos<DeviceType>::operator()(TagFixEOStableRXTemperatureLookup2, const int &i) const {
|
||||
if (mask[i] & groupbit){
|
||||
if (mask[i] & groupbit) {
|
||||
temperature_lookup(i,uCond[i]+uMech[i]+uChem[i],dpdTheta[i]);
|
||||
if (dpdTheta[i] <= 0.0)
|
||||
k_error_flag.template view<DeviceType>()() = 1;
|
||||
@ -303,7 +303,7 @@ void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, doubl
|
||||
nPG = 0;
|
||||
|
||||
if (rx_flag) {
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++ ) {
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++) {
|
||||
nTotal += dvector(ispecies,id);
|
||||
if (fabs(d_moleculeCorrCoeff[ispecies]) > tolerance) {
|
||||
nPG++;
|
||||
@ -314,7 +314,7 @@ void FixEOStableRXKokkos<DeviceType>::energy_lookup(int id, double thetai, doubl
|
||||
nTotal = 1.0;
|
||||
}
|
||||
|
||||
for(int ispecies=0;ispecies<nspecies;ispecies++){
|
||||
for (int ispecies=0;ispecies<nspecies;ispecies++) {
|
||||
//Table *tb = &tables[ispecies];
|
||||
//thetai = MAX(thetai,tb->lo);
|
||||
thetai = MAX(thetai,d_table_const.lo(ispecies));
|
||||
@ -364,7 +364,7 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
|
||||
// Store the current thetai in t1
|
||||
t1 = MAX(thetai,lo);
|
||||
t1 = MIN(t1,hi);
|
||||
if(t1==hi) delta = -delta;
|
||||
if (t1==hi) delta = -delta;
|
||||
|
||||
// Compute u1 at thetai
|
||||
energy_lookup(id,t1,u1);
|
||||
@ -382,9 +382,9 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
|
||||
f2 = u2 - ui;
|
||||
|
||||
// Apply the Secant Method
|
||||
for(it=0; it<maxit; it++){
|
||||
if(fabs(f2-f1) < MY_EPSILON){
|
||||
if(std::isnan(f1) || std::isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
|
||||
for (it=0; it<maxit; it++) {
|
||||
if (fabs(f2-f1) < MY_EPSILON) {
|
||||
if (std::isnan(f1) || std::isnan(f2)) k_error_flag.template view<DeviceType>()() = 2;
|
||||
temp = t1;
|
||||
temp = MAX(temp,lo);
|
||||
temp = MIN(temp,hi);
|
||||
@ -392,15 +392,15 @@ void FixEOStableRXKokkos<DeviceType>::temperature_lookup(int id, double ui, doub
|
||||
break;
|
||||
}
|
||||
temp = t2 - f2*(t2-t1)/(f2-f1);
|
||||
if(fabs(temp-t2) < tolerance) break;
|
||||
if (fabs(temp-t2) < tolerance) break;
|
||||
f1 = f2;
|
||||
t1 = t2;
|
||||
t2 = temp;
|
||||
energy_lookup(id,t2,u2);
|
||||
f2 = u2 - ui;
|
||||
}
|
||||
if(it==maxit){
|
||||
if(std::isnan(f1) || std::isnan(f2) || std::isnan(ui) || std::isnan(thetai) || std::isnan(t1) || std::isnan(t2))
|
||||
if (it==maxit) {
|
||||
if (std::isnan(f1) || std::isnan(f2) || std::isnan(ui) || std::isnan(thetai) || std::isnan(t1) || std::isnan(t2))
|
||||
k_error_flag.template view<DeviceType>()() = 2;
|
||||
else
|
||||
k_error_flag.template view<DeviceType>()() = 3;
|
||||
@ -440,7 +440,7 @@ void FixEOStableRXKokkos<DeviceType>::unpack_forward_comm(int n, int first, doub
|
||||
|
||||
m = 0;
|
||||
last = first + n ;
|
||||
for (ii = first; ii < last; ii++){
|
||||
for (ii = first; ii < last; ii++) {
|
||||
h_uChem[ii] = buf[m++];
|
||||
h_uCG[ii] = buf[m++];
|
||||
h_uCGnew[ii] = buf[m++];
|
||||
@ -518,24 +518,24 @@ void FixEOStableRXKokkos<DeviceType>::create_kokkos_tables()
|
||||
memoryKK->create_kokkos(d_table->hi,h_table->hi,ntables,"Table::hi");
|
||||
memoryKK->create_kokkos(d_table->invdelta,h_table->invdelta,ntables,"Table::invdelta");
|
||||
|
||||
if(tabstyle == LINEAR) {
|
||||
if (tabstyle == LINEAR) {
|
||||
memoryKK->create_kokkos(d_table->r,h_table->r,ntables,tablength,"Table::r");
|
||||
memoryKK->create_kokkos(d_table->e,h_table->e,ntables,tablength,"Table::e");
|
||||
memoryKK->create_kokkos(d_table->de,h_table->de,ntables,tlm1,"Table::de");
|
||||
}
|
||||
|
||||
for(int i=0; i < ntables; i++) {
|
||||
for (int i=0; i < ntables; i++) {
|
||||
Table* tb = &tables[i];
|
||||
|
||||
h_table->lo[i] = tb->lo;
|
||||
h_table->hi[i] = tb->hi;
|
||||
h_table->invdelta[i] = tb->invdelta;
|
||||
|
||||
for(int j = 0; j<h_table->r.extent(1); j++)
|
||||
for (int j = 0; j<h_table->r.extent(1); j++)
|
||||
h_table->r(i,j) = tb->r[j];
|
||||
for(int j = 0; j<h_table->e.extent(1); j++)
|
||||
for (int j = 0; j<h_table->e.extent(1); j++)
|
||||
h_table->e(i,j) = tb->e[j];
|
||||
for(int j = 0; j<h_table->de.extent(1); j++)
|
||||
for (int j = 0; j<h_table->de.extent(1); j++)
|
||||
h_table->de(i,j) = tb->de[j];
|
||||
}
|
||||
|
||||
|
||||
@ -61,7 +61,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
|
||||
for (int i = 1; i <= ntypes; i++) ratio[i] = 1.0;
|
||||
k_ratio.template modify<LMPHostType>();
|
||||
|
||||
if(gjfflag){
|
||||
if (gjfflag) {
|
||||
grow_arrays(atomKK->nmax);
|
||||
atom->add_callback(Atom::GROW);
|
||||
// initialize franprev to zero
|
||||
@ -76,7 +76,7 @@ FixLangevinKokkos<DeviceType>::FixLangevinKokkos(LAMMPS *lmp, int narg, char **a
|
||||
k_franprev.template modify<LMPHostType>();
|
||||
k_lv.template modify<LMPHostType>();
|
||||
}
|
||||
if(zeroflag){
|
||||
if (zeroflag) {
|
||||
k_fsumall = tdual_double_1d_3n("langevin:fsumall");
|
||||
h_fsumall = k_fsumall.template view<LMPHostType>();
|
||||
d_fsumall = k_fsumall.template view<DeviceType>();
|
||||
@ -97,8 +97,8 @@ FixLangevinKokkos<DeviceType>::~FixLangevinKokkos()
|
||||
memoryKK->destroy_kokkos(k_gfactor2,gfactor2);
|
||||
memoryKK->destroy_kokkos(k_ratio,ratio);
|
||||
memoryKK->destroy_kokkos(k_flangevin,flangevin);
|
||||
if(gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
|
||||
if(gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
|
||||
if (gjfflag) memoryKK->destroy_kokkos(k_franprev,franprev);
|
||||
if (gjfflag) memoryKK->destroy_kokkos(k_lv,lv);
|
||||
memoryKK->destroy_kokkos(k_tforce,tforce);
|
||||
}
|
||||
|
||||
@ -108,13 +108,13 @@ template<class DeviceType>
|
||||
void FixLangevinKokkos<DeviceType>::init()
|
||||
{
|
||||
FixLangevin::init();
|
||||
if(oflag)
|
||||
if (oflag)
|
||||
error->all(FLERR,"Fix langevin omega is not yet implemented with kokkos");
|
||||
if(ascale)
|
||||
if (ascale)
|
||||
error->all(FLERR,"Fix langevin angmom is not yet implemented with kokkos");
|
||||
if(gjfflag && tbiasflag)
|
||||
if (gjfflag && tbiasflag)
|
||||
error->all(FLERR,"Fix langevin gjf + tbias is not yet implemented with kokkos");
|
||||
if(gjfflag && tbiasflag)
|
||||
if (gjfflag && tbiasflag)
|
||||
error->warning(FLERR,"Fix langevin gjf + kokkos is not implemented with random gaussians");
|
||||
|
||||
// prefactors are modified in the init
|
||||
@ -182,8 +182,8 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
k_gfactor1.template sync<DeviceType>();
|
||||
k_gfactor2.template sync<DeviceType>();
|
||||
k_ratio.template sync<DeviceType>();
|
||||
if(gjfflag) k_franprev.template sync<DeviceType>();
|
||||
if(gjfflag) k_lv.template sync<DeviceType>();
|
||||
if (gjfflag) k_franprev.template sync<DeviceType>();
|
||||
if (gjfflag) k_lv.template sync<DeviceType>();
|
||||
|
||||
boltz = force->boltz;
|
||||
dt = update->dt;
|
||||
@ -217,7 +217,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
}
|
||||
|
||||
// account for bias velocity
|
||||
if(tbiasflag == BIAS){
|
||||
if (tbiasflag == BIAS) {
|
||||
atomKK->sync(temperature->execution_space,temperature->datamask_read);
|
||||
temperature->compute_scalar();
|
||||
temperature->remove_bias_all(); // modifies velocities
|
||||
@ -516,7 +516,7 @@ void FixLangevinKokkos<DeviceType>::post_force(int /*vflag*/)
|
||||
}
|
||||
|
||||
|
||||
if(tbiasflag == BIAS){
|
||||
if (tbiasflag == BIAS) {
|
||||
atomKK->sync(temperature->execution_space,temperature->datamask_read);
|
||||
temperature->restore_bias_all(); // modifies velocities
|
||||
atomKK->modified(temperature->execution_space,temperature->datamask_modify);
|
||||
@ -566,8 +566,8 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
|
||||
|
||||
if (mask[i] & groupbit) {
|
||||
rand_type rand_gen = rand_pool.get_state();
|
||||
if(Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
|
||||
if(Tp_RMASS){
|
||||
if (Tp_TSTYLEATOM) tsqrt_t = sqrt(d_tforce[i]);
|
||||
if (Tp_RMASS) {
|
||||
gamma1 = -rmass[i] / t_period / ftm2v;
|
||||
gamma2 = sqrt(rmass[i]) * fran_prop_const / ftm2v;
|
||||
gamma1 *= 1.0/d_ratio[type[i]];
|
||||
@ -581,7 +581,7 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
|
||||
fran[1] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
fran[2] = gamma2 * (rand_gen.drand() - 0.5); //(random->uniform()-0.5);
|
||||
|
||||
if(Tp_BIAS){
|
||||
if (Tp_BIAS) {
|
||||
fdrag[0] = gamma1*v(i,0);
|
||||
fdrag[1] = gamma1*v(i,1);
|
||||
fdrag[2] = gamma1*v(i,2);
|
||||
@ -625,7 +625,7 @@ FSUM FixLangevinKokkos<DeviceType>::post_force_item(int i) const
|
||||
f(i,2) += fdrag[2] + fran[2];
|
||||
|
||||
if (Tp_TALLY) {
|
||||
if (Tp_GJF){
|
||||
if (Tp_GJF) {
|
||||
fdrag[0] = gamma1*d_lv(i,0)/gjfsib/gjfsib;
|
||||
fdrag[1] = gamma1*d_lv(i,1)/gjfsib/gjfsib;
|
||||
fdrag[2] = gamma1*d_lv(i,2)/gjfsib/gjfsib;
|
||||
@ -794,7 +794,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step()
|
||||
FixLangevinKokkosTallyEnergyFunctor<DeviceType> tally_functor(this);
|
||||
Kokkos::parallel_reduce(nlocal,tally_functor,energy_onestep);
|
||||
|
||||
if (gjfflag){
|
||||
if (gjfflag) {
|
||||
if (rmass.data()) {
|
||||
FixLangevinKokkosEndOfStepFunctor<DeviceType,1> functor(this);
|
||||
Kokkos::parallel_for(nlocal,functor);
|
||||
@ -817,7 +817,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_item(int i) const {
|
||||
tmp[0] = v(i,0);
|
||||
tmp[1] = v(i,1);
|
||||
tmp[2] = v(i,2);
|
||||
if (!osflag){
|
||||
if (!osflag) {
|
||||
v(i,0) = d_lv(i,0);
|
||||
v(i,1) = d_lv(i,1);
|
||||
v(i,2) = d_lv(i,2);
|
||||
@ -848,7 +848,7 @@ void FixLangevinKokkos<DeviceType>::end_of_step_rmass_item(int i) const
|
||||
tmp[0] = v(i,0);
|
||||
tmp[1] = v(i,1);
|
||||
tmp[2] = v(i,2);
|
||||
if (!osflag){
|
||||
if (!osflag) {
|
||||
v(i,0) = d_lv(i,0);
|
||||
v(i,1) = d_lv(i,1);
|
||||
v(i,2) = d_lv(i,2);
|
||||
|
||||
@ -36,7 +36,7 @@ namespace LAMMPS_NS {
|
||||
fx = fy = fz = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_FSUM& operator+=(const s_FSUM &rhs){
|
||||
s_FSUM& operator+=(const s_FSUM &rhs) {
|
||||
fx += rhs.fx;
|
||||
fy += rhs.fy;
|
||||
fz += rhs.fz;
|
||||
@ -175,7 +175,7 @@ namespace LAMMPS_NS {
|
||||
|
||||
FixLangevinKokkosPostForceFunctor(FixLangevinKokkos<DeviceType>* c_ptr):
|
||||
c(*c_ptr) {}
|
||||
~FixLangevinKokkosPostForceFunctor(){c.cleanup_copy();}
|
||||
~FixLangevinKokkosPostForceFunctor() {c.cleanup_copy();}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
void operator()(const int i) const {
|
||||
|
||||
@ -43,11 +43,7 @@ FixNeighHistoryKokkos<DeviceType>::FixNeighHistoryKokkos(LAMMPS *lmp, int narg,
|
||||
grow_arrays(atom->nmax);
|
||||
|
||||
d_resize = typename ArrayTypes<DeviceType>::t_int_scalar("FixNeighHistoryKokkos::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(d_resize);
|
||||
#else
|
||||
h_resize = d_resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
}
|
||||
|
||||
|
||||
@ -51,8 +51,8 @@ FixQEqReaxKokkos(LAMMPS *lmp, int narg, char **arg) :
|
||||
atomKK = (AtomKokkos *) atom;
|
||||
execution_space = ExecutionSpaceFromDevice<DeviceType>::space;
|
||||
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | MASK_MASK | Q_MASK | TYPE_MASK | TAG_MASK;
|
||||
datamask_modify = Q_MASK | X_MASK;
|
||||
datamask_read = X_MASK | V_MASK | F_MASK | Q_MASK | MASK_MASK | TYPE_MASK | TAG_MASK;
|
||||
datamask_modify = X_MASK;
|
||||
|
||||
nmax = m_cap = 0;
|
||||
allocated_flag = 0;
|
||||
@ -81,8 +81,7 @@ FixQEqReaxKokkos<DeviceType>::~FixQEqReaxKokkos()
|
||||
template<class DeviceType>
|
||||
void FixQEqReaxKokkos<DeviceType>::init()
|
||||
{
|
||||
atomKK->k_q.modify<LMPHostType>();
|
||||
atomKK->k_q.sync<DeviceType>();
|
||||
atomKK->sync(execution_space,Q_MASK);
|
||||
|
||||
FixQEqReax::init();
|
||||
|
||||
@ -139,8 +138,8 @@ void FixQEqReaxKokkos<DeviceType>::init_shielding_k()
|
||||
k_shield = DAT::tdual_ffloat_2d("qeq/kk:shield",ntypes+1,ntypes+1);
|
||||
d_shield = k_shield.template view<DeviceType>();
|
||||
|
||||
for( i = 1; i <= ntypes; ++i )
|
||||
for( j = 1; j <= ntypes; ++j )
|
||||
for (i = 1; i <= ntypes; ++i)
|
||||
for (j = 1; j <= ntypes; ++j)
|
||||
k_shield.h_view(i,j) = pow( gamma[i] * gamma[j], -1.5 );
|
||||
|
||||
k_shield.template modify<LMPHostType>();
|
||||
@ -263,15 +262,15 @@ void FixQEqReaxKokkos<DeviceType>::pre_force(int /*vflag*/)
|
||||
|
||||
// comm->forward_comm_fix(this); //Dist_vector( s );
|
||||
pack_flag = 2;
|
||||
k_s.template sync<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_s.template modify<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_s.template sync<DeviceType>();
|
||||
|
||||
// comm->forward_comm_fix(this); //Dist_vector( t );
|
||||
pack_flag = 3;
|
||||
k_t.template sync<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_t.template modify<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_t.template sync<DeviceType>();
|
||||
|
||||
need_dup = lmp->kokkos->need_dup<DeviceType>();
|
||||
|
||||
@ -752,9 +751,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
||||
|
||||
if (neighflag != FULL) {
|
||||
k_o.template modify<DeviceType>();
|
||||
k_o.template sync<LMPHostType>();
|
||||
comm->reverse_comm_fix(this); //Coll_vector( q );
|
||||
k_o.template modify<LMPHostType>();
|
||||
k_o.template sync<DeviceType>();
|
||||
}
|
||||
|
||||
@ -781,9 +778,9 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
||||
|
||||
// comm->forward_comm_fix(this); //Dist_vector( d );
|
||||
pack_flag = 1;
|
||||
k_d.template sync<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_d.template modify<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_d.template sync<DeviceType>();
|
||||
|
||||
// sparse_matvec( &H, d, q );
|
||||
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
|
||||
@ -807,9 +804,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve1()
|
||||
|
||||
if (neighflag != FULL) {
|
||||
k_o.template modify<DeviceType>();
|
||||
k_o.template sync<LMPHostType>();
|
||||
comm->reverse_comm_fix(this); //Coll_vector( q );
|
||||
k_o.template modify<LMPHostType>();
|
||||
k_o.template sync<DeviceType>();
|
||||
}
|
||||
|
||||
@ -888,9 +883,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
||||
|
||||
if (neighflag != FULL) {
|
||||
k_o.template modify<DeviceType>();
|
||||
k_o.template sync<LMPHostType>();
|
||||
comm->reverse_comm_fix(this); //Coll_vector( q );
|
||||
k_o.template modify<LMPHostType>();
|
||||
k_o.template sync<DeviceType>();
|
||||
}
|
||||
|
||||
@ -917,9 +910,9 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
||||
|
||||
// comm->forward_comm_fix(this); //Dist_vector( d );
|
||||
pack_flag = 1;
|
||||
k_d.template sync<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_d.template modify<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
k_d.template sync<DeviceType>();
|
||||
|
||||
// sparse_matvec( &H, d, q );
|
||||
FixQEqReaxKokkosSparse22Functor<DeviceType> sparse22_functor(this);
|
||||
@ -943,9 +936,7 @@ void FixQEqReaxKokkos<DeviceType>::cg_solve2()
|
||||
|
||||
if (neighflag != FULL) {
|
||||
k_o.template modify<DeviceType>();
|
||||
k_o.template sync<LMPHostType>();
|
||||
comm->reverse_comm_fix(this); //Coll_vector( q );
|
||||
k_o.template modify<LMPHostType>();
|
||||
k_o.template sync<DeviceType>();
|
||||
}
|
||||
|
||||
@ -1017,13 +1008,11 @@ void FixQEqReaxKokkos<DeviceType>::calculate_q()
|
||||
// q[i] = s[i] - u * t[i];
|
||||
FixQEqReaxKokkosCalculateQFunctor<DeviceType> calculateQ_functor(this);
|
||||
Kokkos::parallel_for(inum,calculateQ_functor);
|
||||
atomKK->modified(execution_space,Q_MASK);
|
||||
|
||||
pack_flag = 4;
|
||||
//comm->forward_comm_fix( this ); //Dist_vector( atom->q );
|
||||
atomKK->k_q.sync<DeviceType>();
|
||||
comm->forward_comm_fix(this);
|
||||
atomKK->k_q.modify<DeviceType>();
|
||||
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -1053,7 +1042,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse13_item(int ii) const
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
F_FLOAT tmp = 0.0;
|
||||
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
const int j = d_jlist(jj);
|
||||
tmp += d_val(jj) * d_s[j];
|
||||
a_o[j] += d_val(jj) * d_s[i];
|
||||
@ -1106,7 +1095,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse23_item(int ii) const
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
F_FLOAT tmp = 0.0;
|
||||
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
const int j = d_jlist(jj);
|
||||
tmp += d_val(jj) * d_d[j];
|
||||
a_o[j] += d_val(jj) * d_d[i];
|
||||
@ -1166,7 +1155,7 @@ void FixQEqReaxKokkos<DeviceType>::sparse33_item(int ii) const
|
||||
const int i = d_ilist[ii];
|
||||
if (mask[i] & groupbit) {
|
||||
F_FLOAT tmp = 0.0;
|
||||
for(int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
for (int jj = d_firstnbr[i]; jj < d_firstnbr[i] + d_numnbrs[i]; jj++) {
|
||||
const int j = d_jlist(jj);
|
||||
tmp += d_val(jj) * d_t[j];
|
||||
a_o[j] += d_val(jj) * d_t[i];
|
||||
@ -1371,11 +1360,11 @@ void FixQEqReaxKokkos<DeviceType>::operator()(TagFixQEqReaxPackForwardComm, cons
|
||||
|
||||
if (pack_flag == 1)
|
||||
d_buf[i] = d_d[j];
|
||||
else if( pack_flag == 2 )
|
||||
else if (pack_flag == 2)
|
||||
d_buf[i] = d_s[j];
|
||||
else if( pack_flag == 3 )
|
||||
else if (pack_flag == 3)
|
||||
d_buf[i] = d_t[j];
|
||||
else if( pack_flag == 4 )
|
||||
else if (pack_flag == 4)
|
||||
d_buf[i] = q[j];
|
||||
}
|
||||
|
||||
@ -1387,6 +1376,9 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm_fix_kokkos(int n, int fir
|
||||
first = first_in;
|
||||
d_buf = buf.view<DeviceType>();
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType, TagFixQEqReaxUnpackForwardComm>(0,n),*this);
|
||||
|
||||
if (pack_flag == 4)
|
||||
atomKK->modified(execution_space,Q_MASK); // needed for auto_sync
|
||||
}
|
||||
|
||||
template<class DeviceType>
|
||||
@ -1394,11 +1386,11 @@ KOKKOS_INLINE_FUNCTION
|
||||
void FixQEqReaxKokkos<DeviceType>::operator()(TagFixQEqReaxUnpackForwardComm, const int &i) const {
|
||||
if (pack_flag == 1)
|
||||
d_d[i + first] = d_buf[i];
|
||||
else if( pack_flag == 2)
|
||||
else if ( pack_flag == 2)
|
||||
d_s[i + first] = d_buf[i];
|
||||
else if( pack_flag == 3)
|
||||
else if ( pack_flag == 3)
|
||||
d_t[i + first] = d_buf[i];
|
||||
else if( pack_flag == 4)
|
||||
else if ( pack_flag == 4)
|
||||
q[i + first] = d_buf[i];
|
||||
|
||||
}
|
||||
@ -1411,14 +1403,19 @@ int FixQEqReaxKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *bu
|
||||
{
|
||||
int m;
|
||||
|
||||
if (pack_flag == 1)
|
||||
for(m = 0; m < n; m++) buf[m] = h_d[list[m]];
|
||||
else if( pack_flag == 2 )
|
||||
for(m = 0; m < n; m++) buf[m] = h_s[list[m]];
|
||||
else if( pack_flag == 3 )
|
||||
for(m = 0; m < n; m++) buf[m] = h_t[list[m]];
|
||||
else if( pack_flag == 4 )
|
||||
for(m = 0; m < n; m++) buf[m] = atom->q[list[m]];
|
||||
if (pack_flag == 1) {
|
||||
k_d.sync_host();
|
||||
for (m = 0; m < n; m++) buf[m] = h_d[list[m]];
|
||||
} else if (pack_flag == 2) {
|
||||
k_s.sync_host();
|
||||
for (m = 0; m < n; m++) buf[m] = h_s[list[m]];
|
||||
} else if (pack_flag == 3) {
|
||||
k_t.sync_host();
|
||||
for (m = 0; m < n; m++) buf[m] = h_t[list[m]];
|
||||
} else if (pack_flag == 4) {
|
||||
atomKK->sync(Host,Q_MASK);
|
||||
for (m = 0; m < n; m++) buf[m] = atom->q[list[m]];
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
@ -1430,14 +1427,23 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double
|
||||
{
|
||||
int i, m;
|
||||
|
||||
if (pack_flag == 1)
|
||||
for(m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m];
|
||||
else if( pack_flag == 2)
|
||||
for(m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m];
|
||||
else if( pack_flag == 3)
|
||||
for(m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m];
|
||||
else if( pack_flag == 4)
|
||||
for(m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
|
||||
if (pack_flag == 1) {
|
||||
k_d.sync_host();
|
||||
for (m = 0, i = first; m < n; m++, i++) h_d[i] = buf[m];
|
||||
k_d.modify_host();
|
||||
} else if (pack_flag == 2) {
|
||||
k_s.sync_host();
|
||||
for (m = 0, i = first; m < n; m++, i++) h_s[i] = buf[m];
|
||||
k_s.modify_host();
|
||||
} else if (pack_flag == 3) {
|
||||
k_t.sync_host();
|
||||
for (m = 0, i = first; m < n; m++, i++) h_t[i] = buf[m];
|
||||
k_t.modify_host();
|
||||
} else if (pack_flag == 4) {
|
||||
atomKK->sync(Host,Q_MASK);
|
||||
for (m = 0, i = first; m < n; m++, i++) atom->q[i] = buf[m];
|
||||
atomKK->modified(Host,Q_MASK);
|
||||
}
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
@ -1445,8 +1451,10 @@ void FixQEqReaxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double
|
||||
template<class DeviceType>
|
||||
int FixQEqReaxKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *buf)
|
||||
{
|
||||
k_o.sync_host();
|
||||
|
||||
int i, m;
|
||||
for(m = 0, i = first; m < n; m++, i++) {
|
||||
for (m = 0, i = first; m < n; m++, i++) {
|
||||
buf[m] = h_o[i];
|
||||
}
|
||||
return n;
|
||||
@ -1457,9 +1465,13 @@ int FixQEqReaxKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *bu
|
||||
template<class DeviceType>
|
||||
void FixQEqReaxKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double *buf)
|
||||
{
|
||||
k_o.sync_host();
|
||||
|
||||
for(int m = 0; m < n; m++) {
|
||||
h_o[list[m]] += buf[m];
|
||||
}
|
||||
|
||||
k_o.modify_host();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
@ -147,9 +147,9 @@ class FixQEqReaxKokkos : public FixQEqReax, public KokkosBase {
|
||||
|
||||
struct params_qeq{
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_qeq(){chi=0;eta=0;gamma=0;};
|
||||
params_qeq() {chi=0;eta=0;gamma=0;};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_qeq(int /*i*/){chi=0;eta=0;gamma=0;};
|
||||
params_qeq(int /*i*/) {chi=0;eta=0;gamma=0;};
|
||||
F_FLOAT chi, eta, gamma;
|
||||
};
|
||||
|
||||
|
||||
@ -130,7 +130,7 @@ void FixRxKokkos<DeviceType>::init()
|
||||
bool eos_flag = false;
|
||||
for (int i = 0; i < modify->nfix; i++)
|
||||
if (utils::strmatch(modify->fix[i]->style,"^eos/table/rx")) eos_flag = true;
|
||||
if(!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified");
|
||||
if (!eos_flag) error->all(FLERR,"fix rx requires fix eos/table/rx to be specified");
|
||||
|
||||
if (update_kinetics_data)
|
||||
create_kinetics_data();
|
||||
@ -322,7 +322,7 @@ void FixRxKokkos<DeviceType>::k_rkf45_step (const int neq, const double h, Vecto
|
||||
// 1)
|
||||
k_rhs (0.0, y, f1, userData);
|
||||
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
f1[k] *= h;
|
||||
ytmp[k] = y[k] + c21 * f1[k];
|
||||
}
|
||||
@ -330,7 +330,7 @@ void FixRxKokkos<DeviceType>::k_rkf45_step (const int neq, const double h, Vecto
|
||||
// 2)
|
||||
k_rhs(0.0, ytmp, f2, userData);
|
||||
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
f2[k] *= h;
|
||||
ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k];
|
||||
}
|
||||
@ -417,7 +417,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
|
||||
// compute ydot at t=t0
|
||||
k_rhs (t, y, ydot, userData);
|
||||
|
||||
while(1)
|
||||
while (1)
|
||||
{
|
||||
// Estimate y'' with finite-difference ...
|
||||
|
||||
@ -429,7 +429,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
|
||||
|
||||
// Compute WRMS norm of y''
|
||||
double yddnrm = 0.0;
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
double ydd = (ydot1[k] - ydot[k]) / hg;
|
||||
double wterr = ydd / (relTol * fabs( y[k] ) + absTol);
|
||||
yddnrm += wterr * wterr;
|
||||
@ -441,7 +441,7 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
|
||||
//std::cout << "ydot " << ydot[neq-1] << std::endl;
|
||||
|
||||
// should we accept this?
|
||||
if (hnew_is_ok || iter == max_iters){
|
||||
if (hnew_is_ok || iter == max_iters) {
|
||||
hnew = hg;
|
||||
//if (iter == max_iters)
|
||||
// fprintf(stderr, "ERROR_HIN_MAX_ITERS\n");
|
||||
@ -455,11 +455,11 @@ int FixRxKokkos<DeviceType>::k_rkf45_h0 (const int neq, const double t, const do
|
||||
double hrat = hnew / hg;
|
||||
|
||||
// Accept this value ... the bias factor should bring it within range.
|
||||
if ( (hrat > 0.5) && (hrat < 2.0) )
|
||||
if ((hrat > 0.5) && (hrat < 2.0))
|
||||
hnew_is_ok = true;
|
||||
|
||||
// If y'' is still bad after a few iterations, just accept h and give up.
|
||||
if ( (iter > 1) && hrat > 2.0 ) {
|
||||
if ((iter > 1) && hrat > 2.0) {
|
||||
hnew = hg;
|
||||
hnew_is_ok = true;
|
||||
}
|
||||
@ -510,7 +510,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
|
||||
|
||||
double t = 0.0;
|
||||
|
||||
if (h < h_min){
|
||||
if (h < h_min) {
|
||||
//fprintf(stderr,"hin not implemented yet\n");
|
||||
//exit(-1);
|
||||
nfe = k_rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, userData);
|
||||
@ -530,7 +530,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
|
||||
// Estimate the solution error.
|
||||
// ... weighted 2-norm of the error.
|
||||
double err2 = 0.0;
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol);
|
||||
err2 += wterr * wterr;
|
||||
}
|
||||
@ -538,7 +538,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
|
||||
double err = fmax( uround, sqrt( err2 / double(nspecies) ));
|
||||
|
||||
// Accept the solution?
|
||||
if (err <= 1.0 || h <= h_min){
|
||||
if (err <= 1.0 || h <= h_min) {
|
||||
t += h;
|
||||
nst++;
|
||||
|
||||
@ -571,7 +571,7 @@ void FixRxKokkos<DeviceType>::k_rkf45(const int neq, const double t_stop, Vector
|
||||
nit++;
|
||||
nfe += 6;
|
||||
|
||||
if (maxIters && nit > maxIters){
|
||||
if (maxIters && nit > maxIters) {
|
||||
//fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
|
||||
counter.nFails ++;
|
||||
break;
|
||||
@ -643,7 +643,7 @@ void FixRxKokkos<DeviceType>::rkf45_step (const int neq, const double h, double
|
||||
// 1)
|
||||
rhs (0.0, y, f1, v_param);
|
||||
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
f1[k] *= h;
|
||||
ytmp[k] = y[k] + c21 * f1[k];
|
||||
}
|
||||
@ -651,7 +651,7 @@ void FixRxKokkos<DeviceType>::rkf45_step (const int neq, const double h, double
|
||||
// 2)
|
||||
rhs(0.0, ytmp, f2, v_param);
|
||||
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
f2[k] *= h;
|
||||
ytmp[k] = y[k] + c31 * f1[k] + c32 * f2[k];
|
||||
}
|
||||
@ -736,7 +736,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
|
||||
// compute ydot at t=t0
|
||||
rhs (t, y, ydot, v_params);
|
||||
|
||||
while(1)
|
||||
while (1)
|
||||
{
|
||||
// Estimate y'' with finite-difference ...
|
||||
|
||||
@ -748,7 +748,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
|
||||
|
||||
// Compute WRMS norm of y''
|
||||
double yddnrm = 0.0;
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
double ydd = (ydot1[k] - ydot[k]) / hg;
|
||||
double wterr = ydd / (relTol * fabs( y[k] ) + absTol);
|
||||
yddnrm += wterr * wterr;
|
||||
@ -760,7 +760,7 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
|
||||
//std::cout << "ydot " << ydot[neq-1] << std::endl;
|
||||
|
||||
// should we accept this?
|
||||
if (hnew_is_ok || iter == max_iters){
|
||||
if (hnew_is_ok || iter == max_iters) {
|
||||
hnew = hg;
|
||||
if (iter == max_iters)
|
||||
fprintf(stderr, "ERROR_HIN_MAX_ITERS\n");
|
||||
@ -774,11 +774,11 @@ int FixRxKokkos<DeviceType>::rkf45_h0(const int neq, const double t, const doubl
|
||||
double hrat = hnew / hg;
|
||||
|
||||
// Accept this value ... the bias factor should bring it within range.
|
||||
if ( (hrat > 0.5) && (hrat < 2.0) )
|
||||
if ((hrat > 0.5) && (hrat < 2.0))
|
||||
hnew_is_ok = true;
|
||||
|
||||
// If y'' is still bad after a few iterations, just accept h and give up.
|
||||
if ( (iter > 1) && hrat > 2.0 ) {
|
||||
if ((iter > 1) && hrat > 2.0) {
|
||||
hnew = hg;
|
||||
hnew_is_ok = true;
|
||||
}
|
||||
@ -827,7 +827,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
|
||||
|
||||
double t = 0.0;
|
||||
|
||||
if (h < h_min){
|
||||
if (h < h_min) {
|
||||
//fprintf(stderr,"hin not implemented yet\n");
|
||||
//exit(-1);
|
||||
nfe = rkf45_h0 (neq, t, t_stop, h_min, h_max, h, y, rwork, v_param);
|
||||
@ -836,7 +836,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
|
||||
//printf("t= %e t_stop= %e h= %e\n", t, t_stop, h);
|
||||
|
||||
// Integrate until we reach the end time.
|
||||
while (fabs(t - t_stop) > tround){
|
||||
while (fabs(t - t_stop) > tround) {
|
||||
double *yout = rwork;
|
||||
double *eout = yout + neq;
|
||||
|
||||
@ -846,7 +846,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
|
||||
// Estimate the solution error.
|
||||
// ... weighted 2-norm of the error.
|
||||
double err2 = 0.0;
|
||||
for (int k = 0; k < neq; k++){
|
||||
for (int k = 0; k < neq; k++) {
|
||||
const double wterr = eout[k] / (relTol * fabs( y[k] ) + absTol);
|
||||
err2 += wterr * wterr;
|
||||
}
|
||||
@ -854,7 +854,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
|
||||
double err = fmax( uround, sqrt( err2 / double(nspecies) ));
|
||||
|
||||
// Accept the solution?
|
||||
if (err <= 1.0 || h <= h_min){
|
||||
if (err <= 1.0 || h <= h_min) {
|
||||
t += h;
|
||||
nst++;
|
||||
|
||||
@ -887,7 +887,7 @@ void FixRxKokkos<DeviceType>::rkf45(const int neq, const double t_stop, double *
|
||||
nit++;
|
||||
nfe += 6;
|
||||
|
||||
if (maxIters && nit > maxIters){
|
||||
if (maxIters && nit > maxIters) {
|
||||
//fprintf(stderr,"atom[%d] took too many iterations in rkf45 %d %e %e\n", id, nit, t, t_stop);
|
||||
counter.nFails ++;
|
||||
break;
|
||||
@ -928,14 +928,14 @@ int FixRxKokkos<DeviceType>::rhs_dense(double /*t*/, const double *y, double *dy
|
||||
//const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
|
||||
//const int nspecies = atom->nspecies_dpd;
|
||||
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
for (int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
dydt[ispecies] = 0.0;
|
||||
|
||||
// Construct the reaction rate laws
|
||||
for(int jrxn=0; jrxn<nreactions; jrxn++){
|
||||
for (int jrxn=0; jrxn<nreactions; jrxn++) {
|
||||
double rxnRateLawForward = kFor[jrxn];
|
||||
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++){
|
||||
for (int ispecies=0; ispecies<nspecies; ispecies++) {
|
||||
const double concentration = y[ispecies]/VDPD;
|
||||
rxnRateLawForward *= pow( concentration, d_kineticsData.stoichReactants(jrxn,ispecies) );
|
||||
//rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
|
||||
@ -944,8 +944,8 @@ int FixRxKokkos<DeviceType>::rhs_dense(double /*t*/, const double *y, double *dy
|
||||
}
|
||||
|
||||
// Construct the reaction rates for each species
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
for(int jrxn=0; jrxn<nreactions; jrxn++)
|
||||
for (int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
for (int jrxn=0; jrxn<nreactions; jrxn++)
|
||||
{
|
||||
dydt[ispecies] += d_kineticsData.stoich(jrxn,ispecies) *VDPD*rxnRateLaw[jrxn];
|
||||
//dydt[ispecies] += stoich[jrxn][ispecies]*VDPD*rxnRateLaw[jrxn];
|
||||
@ -982,9 +982,9 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
|
||||
for (int i = 0; i < nreactions; ++i)
|
||||
{
|
||||
double rxnRateLawForward;
|
||||
if (isIntegral(i)){
|
||||
if (isIntegral(i)) {
|
||||
rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) );
|
||||
for (int kk = 1; kk < maxReactants; ++kk){
|
||||
for (int kk = 1; kk < maxReactants; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -992,7 +992,7 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
|
||||
}
|
||||
} else {
|
||||
rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) );
|
||||
for (int kk = 1; kk < maxReactants; ++kk){
|
||||
for (int kk = 1; kk < maxReactants; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -1008,10 +1008,10 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
|
||||
for (int k = 0; k < nspecies; ++k)
|
||||
dydt[k] = 0.0;
|
||||
|
||||
for (int i = 0; i < nreactions; ++i){
|
||||
for (int i = 0; i < nreactions; ++i) {
|
||||
// Reactants ...
|
||||
dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i];
|
||||
for (int kk = 1; kk < maxReactants; ++kk){
|
||||
for (int kk = 1; kk < maxReactants; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -1020,7 +1020,7 @@ int FixRxKokkos<DeviceType>::rhs_sparse(double /*t*/, const double *y, double *d
|
||||
|
||||
// Products ...
|
||||
dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i];
|
||||
for (int kk = maxReactants+1; kk < maxSpecies; ++kk){
|
||||
for (int kk = maxReactants+1; kk < maxSpecies; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -1074,14 +1074,14 @@ int FixRxKokkos<DeviceType>::k_rhs_dense(double /*t*/, const VectorType& y, Vect
|
||||
//const double VDPD = domain->xprd * domain->yprd * domain->zprd / atom->natoms;
|
||||
//const int nspecies = atom->nspecies_dpd;
|
||||
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
for (int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
dydt[ispecies] = 0.0;
|
||||
|
||||
// Construct the reaction rate laws
|
||||
for(int jrxn=0; jrxn<nreactions; jrxn++){
|
||||
for (int jrxn=0; jrxn<nreactions; jrxn++) {
|
||||
double rxnRateLawForward = kFor[jrxn];
|
||||
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++){
|
||||
for (int ispecies=0; ispecies<nspecies; ispecies++) {
|
||||
const double concentration = y[ispecies]/VDPD;
|
||||
rxnRateLawForward *= pow( concentration, d_kineticsData.stoichReactants(jrxn,ispecies) );
|
||||
//rxnRateLawForward *= pow(concentration,stoichReactants[jrxn][ispecies]);
|
||||
@ -1090,8 +1090,8 @@ int FixRxKokkos<DeviceType>::k_rhs_dense(double /*t*/, const VectorType& y, Vect
|
||||
}
|
||||
|
||||
// Construct the reaction rates for each species
|
||||
for(int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
for(int jrxn=0; jrxn<nreactions; jrxn++)
|
||||
for (int ispecies=0; ispecies<nspecies; ispecies++)
|
||||
for (int jrxn=0; jrxn<nreactions; jrxn++)
|
||||
{
|
||||
dydt[ispecies] += d_kineticsData.stoich(jrxn,ispecies) *VDPD*rxnRateLaw[jrxn];
|
||||
//dydt[ispecies] += stoich[jrxn][ispecies]*VDPD*rxnRateLaw[jrxn];
|
||||
@ -1129,9 +1129,9 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
|
||||
for (int i = 0; i < nreactions; ++i)
|
||||
{
|
||||
double rxnRateLawForward;
|
||||
if (isIntegral(i)){
|
||||
if (isIntegral(i)) {
|
||||
rxnRateLawForward = kFor[i] * powint( conc[ nuk(i,0) ], inu(i,0) );
|
||||
for (int kk = 1; kk < maxReactants; ++kk){
|
||||
for (int kk = 1; kk < maxReactants; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -1139,7 +1139,7 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
|
||||
}
|
||||
} else {
|
||||
rxnRateLawForward = kFor[i] * pow( conc[ nuk(i,0) ], nu(i,0) );
|
||||
for (int kk = 1; kk < maxReactants; ++kk){
|
||||
for (int kk = 1; kk < maxReactants; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -1155,10 +1155,10 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
|
||||
for (int k = 0; k < nspecies; ++k)
|
||||
dydt[k] = 0.0;
|
||||
|
||||
for (int i = 0; i < nreactions; ++i){
|
||||
for (int i = 0; i < nreactions; ++i) {
|
||||
// Reactants ...
|
||||
dydt[ nuk(i,0) ] -= nu(i,0) * rxnRateLaw[i];
|
||||
for (int kk = 1; kk < maxReactants; ++kk){
|
||||
for (int kk = 1; kk < maxReactants; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -1167,7 +1167,7 @@ int FixRxKokkos<DeviceType>::k_rhs_sparse(double /*t*/, const VectorType& y, Vec
|
||||
|
||||
// Products ...
|
||||
dydt[ nuk(i,maxReactants) ] += nu(i,maxReactants) * rxnRateLaw[i];
|
||||
for (int kk = maxReactants+1; kk < maxSpecies; ++kk){
|
||||
for (int kk = maxReactants+1; kk < maxSpecies; ++kk) {
|
||||
const int k = nuk(i,kk);
|
||||
if (k == SparseKinetics_invalidIndex) break;
|
||||
//if (k != SparseKinetics_invalidIndex)
|
||||
@ -1686,7 +1686,7 @@ void FixRxKokkos<DeviceType>::solve_reactions(const int /*vflag*/, const bool is
|
||||
// getElapsedTime(timer_ODE, timer_stop), nlocal, TotalCounters.nFuncs, TotalCounters.nSteps);
|
||||
|
||||
// Warn the user if a failure was detected in the ODE solver.
|
||||
if (TotalCounters.nFails > 0){
|
||||
if (TotalCounters.nFails > 0) {
|
||||
char sbuf[128];
|
||||
sprintf(sbuf,"in FixRX::pre_force, ODE solver failed for %d atoms.", TotalCounters.nFails);
|
||||
error->warning(FLERR, sbuf);
|
||||
@ -1752,7 +1752,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
double min_per_proc[numCounters];
|
||||
|
||||
// Compute counters per dpd time-step.
|
||||
for (int i = 0; i < numCounters; ++i){
|
||||
for (int i = 0; i < numCounters; ++i) {
|
||||
my_vals[i] = this->diagnosticCounter[i] / nTimes;
|
||||
//printf("my sum[%d] = %f %d\n", i, my_vals[i], comm->me);
|
||||
}
|
||||
@ -1767,7 +1767,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
double avg_per_atom[numCounters], avg_per_proc[numCounters];
|
||||
|
||||
// Averages per-ODE and per-proc per time-step.
|
||||
for (int i = 0; i < numCounters; ++i){
|
||||
for (int i = 0; i < numCounters; ++i) {
|
||||
avg_per_atom[i] = sums[i] / nODEs;
|
||||
avg_per_proc[i] = sums[i] / comm->nprocs;
|
||||
}
|
||||
@ -1775,7 +1775,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
// Sum up the differences from each task.
|
||||
double sum_sq[2*numCounters];
|
||||
double my_sum_sq[2*numCounters];
|
||||
for (int i = 0; i < numCounters; ++i){
|
||||
for (int i = 0; i < numCounters; ++i) {
|
||||
double diff_i = my_vals[i] - avg_per_proc[i];
|
||||
my_sum_sq[i] = diff_i * diff_i;
|
||||
}
|
||||
@ -1835,7 +1835,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
TimerType timer_stop = getTimeStamp();
|
||||
double time_local = getElapsedTime( timer_start, timer_stop );
|
||||
|
||||
if (comm->me == 0){
|
||||
if (comm->me == 0) {
|
||||
char smesg[128];
|
||||
|
||||
#define print_mesg(smesg) {\
|
||||
@ -1849,7 +1849,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
print_mesg(smesg);
|
||||
|
||||
// only valid for single time-step!
|
||||
if (diagnosticFrequency == 1){
|
||||
if (diagnosticFrequency == 1) {
|
||||
double rms_per_ODE[numCounters];
|
||||
for (int i = 0; i < numCounters; ++i)
|
||||
rms_per_ODE[i] = sqrt( sum_sq[i+numCounters] / nODEs );
|
||||
@ -1867,7 +1867,7 @@ void FixRxKokkos<DeviceType>::odeDiagnostics(void)
|
||||
sprintf(smesg, " AVG per Proc : %-12.5g | %-12.5g | %-12.5g | %-12.5g", avg_per_proc[StepSum], avg_per_proc[FuncSum], avg_per_proc[TimeSum], avg_per_proc[AtomSum]);
|
||||
print_mesg(smesg);
|
||||
|
||||
if (comm->nprocs > 1){
|
||||
if (comm->nprocs > 1) {
|
||||
double rms_per_proc[numCounters];
|
||||
for (int i = 0; i < numCounters; ++i)
|
||||
rms_per_proc[i] = sqrt( sum_sq[i] / comm->nprocs );
|
||||
@ -2206,7 +2206,7 @@ int FixRxKokkos<DeviceType>::pack_forward_comm(int n, int *list, double *buf, in
|
||||
int m = 0;
|
||||
for (int ii = 0; ii < n; ii++) {
|
||||
const int jj = list[ii];
|
||||
for(int ispecies = 0; ispecies < nspecies; ispecies++){
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++) {
|
||||
buf[m++] = h_dvector(ispecies,jj);
|
||||
buf[m++] = h_dvector(ispecies+nspecies,jj);
|
||||
}
|
||||
@ -2228,8 +2228,8 @@ void FixRxKokkos<DeviceType>::unpack_forward_comm(int n, int first, double *buf)
|
||||
|
||||
const int last = first + n ;
|
||||
int m = 0;
|
||||
for (int ii = first; ii < last; ii++){
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++){
|
||||
for (int ii = first; ii < last; ii++) {
|
||||
for (int ispecies = 0; ispecies < nspecies; ispecies++) {
|
||||
h_dvector(ispecies,ii) = buf[m++];
|
||||
h_dvector(ispecies+nspecies,ii) = buf[m++];
|
||||
}
|
||||
|
||||
@ -34,7 +34,7 @@ struct s_double_3 {
|
||||
d0 = d1 = d2 = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_double_3& operator+=(const s_double_3 &rhs){
|
||||
s_double_3& operator+=(const s_double_3 &rhs) {
|
||||
d0 += rhs.d0;
|
||||
d1 += rhs.d1;
|
||||
d2 += rhs.d2;
|
||||
|
||||
@ -79,7 +79,7 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
|
||||
// k_pairDPD = (PairDPDfdtKokkos *) force->pair_match("dpd/fdt",1);
|
||||
k_pairDPDE = dynamic_cast<PairDPDfdtEnergyKokkos<DeviceType> *>(force->pair_match("dpd/fdt/energy",0));
|
||||
|
||||
// if(k_pairDPDE){
|
||||
// if (k_pairDPDE) {
|
||||
comm_forward = 3;
|
||||
comm_reverse = 5;
|
||||
// } else {
|
||||
@ -88,19 +88,14 @@ FixShardlowKokkos<DeviceType>::FixShardlowKokkos(LAMMPS *lmp, int narg, char **a
|
||||
// }
|
||||
|
||||
|
||||
if(/* k_pairDPD == nullptr &&*/ k_pairDPDE == nullptr)
|
||||
if (/* k_pairDPD == nullptr &&*/ k_pairDPDE == nullptr)
|
||||
error->all(FLERR,"Must use pair_style "/*"dpd/fdt/kk or "*/"dpd/fdt/energy/kk with fix shardlow/kk");
|
||||
|
||||
#ifdef DEBUG_SSA_PAIR_CT
|
||||
d_counters = typename AT::t_int_2d("FixShardlowKokkos::d_counters", 2, 3);
|
||||
d_hist = typename AT::t_int_1d("FixShardlowKokkos::d_hist", 32);
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_counters = Kokkos::create_mirror_view(d_counters);
|
||||
h_hist = Kokkos::create_mirror_view(d_hist);
|
||||
#else
|
||||
h_counters = d_counters;
|
||||
h_hist = d_hist;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -160,7 +155,7 @@ void FixShardlowKokkos<DeviceType>::init()
|
||||
|
||||
k_params.h_view(j,i) = k_params.h_view(i,j);
|
||||
|
||||
if(i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
if (i<MAX_TYPES_STACKPARAMS+1 && j<MAX_TYPES_STACKPARAMS+1) {
|
||||
m_params[i][j] = m_params[j][i] = k_params.h_view(i,j);
|
||||
m_cutsq[j][i] = m_cutsq[i][j] = k_pairDPDE->k_cutsq.h_view(i,j);
|
||||
}
|
||||
@ -196,7 +191,7 @@ void FixShardlowKokkos<DeviceType>::pre_neighbor()
|
||||
if (domain->triclinic)
|
||||
error->all(FLERR,"Fix shardlow does not yet support triclinic geometries");
|
||||
|
||||
if(rcut >= bbx || rcut >= bby || rcut>= bbz )
|
||||
if (rcut >= bbx || rcut >= bby || rcut>= bbz )
|
||||
{
|
||||
char fmt[] = {"Shardlow algorithm requires sub-domain length > 2*(rcut+skin). Either reduce the number of processors requested, or change the cutoff/skin: rcut= %e bbx= %e bby= %e bbz= %e\n"};
|
||||
char *msg = (char *) malloc(sizeof(fmt) + 4*15);
|
||||
@ -231,7 +226,7 @@ void FixShardlowKokkos<DeviceType>::pre_neighbor()
|
||||
massPerI = false;
|
||||
masses = atomKK->k_mass.view<DeviceType>();
|
||||
}
|
||||
// if(k_pairDPDE){
|
||||
// if (k_pairDPDE) {
|
||||
dpdTheta = atomKK->k_dpdTheta.view<DeviceType>();
|
||||
|
||||
//} else {
|
||||
@ -632,7 +627,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
for (workPhase = 0; workPhase < ssa_phaseCt; ++workPhase) {
|
||||
int workItemCt = h_ssa_phaseLen[workPhase];
|
||||
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
if (atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<false> >(0,workItemCt),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDE<true> >(0,workItemCt),*this);
|
||||
@ -649,7 +644,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
comm->forward_comm_fix(this);
|
||||
atomKK->modified(Host,V_MASK);
|
||||
|
||||
if(k_pairDPDE){
|
||||
if (k_pairDPDE) {
|
||||
// Zero out the ghosts' uCond & uMech to be used as delta accumulators
|
||||
// memset(&(atom->uCond[nlocal]), 0, sizeof(double)*nghost);
|
||||
// memset(&(atom->uMech[nlocal]), 0, sizeof(double)*nghost);
|
||||
@ -667,7 +662,7 @@ void FixShardlowKokkos<DeviceType>::initial_integrate(int /*vflag*/)
|
||||
|
||||
// process neighbors in this AIR
|
||||
atomKK->sync(execution_space,X_MASK | V_MASK | TYPE_MASK | RMASS_MASK | UCOND_MASK | UMECH_MASK | DPDTHETA_MASK);
|
||||
if(atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
if (atom->ntypes > MAX_TYPES_STACKPARAMS)
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<false> >(0,workItemCt),*this);
|
||||
else
|
||||
Kokkos::parallel_for(Kokkos::RangePolicy<DeviceType,TagFixShardlowSSAUpdateDPDEGhost<true> >(0,workItemCt),*this);
|
||||
@ -759,7 +754,7 @@ int FixShardlowKokkos<DeviceType>::pack_reverse_comm(int n, int first, double *b
|
||||
buf[m++] = h_v(i, 0) - h_v_t0(i - nlocal, 0);
|
||||
buf[m++] = h_v(i, 1) - h_v_t0(i - nlocal, 1);
|
||||
buf[m++] = h_v(i, 2) - h_v_t0(i - nlocal, 2);
|
||||
if(k_pairDPDE){
|
||||
if (k_pairDPDE) {
|
||||
buf[m++] = h_uCond(i); // for ghosts, this is an accumulated delta
|
||||
buf[m++] = h_uMech(i); // for ghosts, this is an accumulated delta
|
||||
}
|
||||
@ -781,7 +776,7 @@ void FixShardlowKokkos<DeviceType>::unpack_reverse_comm(int n, int *list, double
|
||||
h_v(j, 0) += buf[m++];
|
||||
h_v(j, 1) += buf[m++];
|
||||
h_v(j, 2) += buf[m++];
|
||||
if(k_pairDPDE){
|
||||
if (k_pairDPDE) {
|
||||
h_uCond(j) += buf[m++]; // add in the accumulated delta
|
||||
h_uMech(j) += buf[m++]; // add in the accumulated delta
|
||||
}
|
||||
|
||||
@ -62,9 +62,9 @@ class FixShardlowKokkos : public FixShardlow {
|
||||
|
||||
struct params_ssa {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_ssa(){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
|
||||
params_ssa() {cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_ssa(int /*i*/){cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
|
||||
params_ssa(int /*i*/) {cutinv=FLT_MAX;halfsigma=0;kappa=0;alpha=0;};
|
||||
F_FLOAT cutinv,halfsigma,kappa,alpha;
|
||||
};
|
||||
|
||||
|
||||
@ -39,7 +39,7 @@
|
||||
GPU_AWARE_UNKNOWN
|
||||
#elif defined(KOKKOS_ENABLE_CUDA)
|
||||
|
||||
// OpenMPI supports detecting CUDA-aware MPI as of version 2.0.0
|
||||
// OpenMPI supports detecting GPU-aware MPI as of version 2.0.0
|
||||
|
||||
#if (OPEN_MPI)
|
||||
#if (OMPI_MAJOR_VERSION >= 2)
|
||||
@ -77,6 +77,8 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
exchange_comm_changed = 0;
|
||||
forward_comm_changed = 0;
|
||||
forward_pair_comm_changed = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
reverse_comm_changed = 0;
|
||||
|
||||
delete memory;
|
||||
@ -147,7 +149,7 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
if (ngpus > 1 && !set_flag)
|
||||
error->all(FLERR,"Could not determine local MPI rank for multiple "
|
||||
"GPUs with Kokkos CUDA because MPI library not recognized");
|
||||
"GPUs with Kokkos CUDA or HIP because MPI library not recognized");
|
||||
|
||||
} else if (strcmp(arg[iarg],"t") == 0 ||
|
||||
strcmp(arg[iarg],"threads") == 0) {
|
||||
@ -203,7 +205,10 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
neighflag = FULL;
|
||||
neighflag_qeq = FULL;
|
||||
newtonflag = 0;
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else {
|
||||
if (nthreads > 1) {
|
||||
@ -214,14 +219,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
neighflag_qeq = HALF;
|
||||
}
|
||||
newtonflag = 1;
|
||||
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
}
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
|
||||
// check and warn about CUDA-aware MPI availability when using multiple MPI tasks
|
||||
// change default only if we can safely detect that CUDA-aware MPI is not available
|
||||
// check and warn about GPU-aware MPI availability when using multiple MPI tasks
|
||||
// change default only if we can safely detect that GPU-aware MPI is not available
|
||||
|
||||
int nmpi = 0;
|
||||
MPI_Comm_size(world,&nmpi);
|
||||
@ -237,21 +245,21 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
gpu_aware_flag = 0;
|
||||
char* str;
|
||||
if ((str = getenv("OMPI_MCA_pml_pami_enable_cuda")))
|
||||
if((strcmp(str,"1") == 0)) {
|
||||
if ((strcmp(str,"1") == 0)) {
|
||||
have_gpu_aware = 1;
|
||||
gpu_aware_flag = 1;
|
||||
}
|
||||
|
||||
if (!gpu_aware_flag)
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling CUDA-aware MPI");
|
||||
error->warning(FLERR,"The Spectrum MPI '-gpu' flag is not set. Disabling GPU-aware MPI");
|
||||
}
|
||||
#endif
|
||||
|
||||
if (gpu_aware_flag == 1 && have_gpu_aware == 0) {
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Turning off CUDA-aware MPI since it is not detected, "
|
||||
"use '-pk kokkos cuda/aware on' to override");
|
||||
error->warning(FLERR,"Turning off GPU-aware MPI since it is not detected, "
|
||||
"use '-pk kokkos gpu/aware on' to override");
|
||||
gpu_aware_flag = 0;
|
||||
} else if (have_gpu_aware == -1) { // maybe we are dealing with MPICH, MVAPICH2 or some derivative?
|
||||
// MVAPICH2
|
||||
@ -264,17 +272,17 @@ KokkosLMP::KokkosLMP(LAMMPS *lmp, int narg, char **arg) : Pointers(lmp)
|
||||
|
||||
if (!gpu_aware_flag)
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling CUDA-aware MPI");
|
||||
error->warning(FLERR,"MVAPICH2 'MV2_USE_CUDA' environment variable is not set. Disabling GPU-aware MPI");
|
||||
// pure MPICH or some unsupported MPICH derivative
|
||||
#elif defined(MPICH) && !defined(MVAPICH2_VERSION)
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Detected MPICH. Disabling CUDA-aware MPI");
|
||||
error->warning(FLERR,"Detected MPICH. Disabling GPU-aware MPI");
|
||||
gpu_aware_flag = 0;
|
||||
#else
|
||||
if (me == 0)
|
||||
error->warning(FLERR,"Kokkos with CUDA assumes CUDA-aware MPI is available,"
|
||||
error->warning(FLERR,"Kokkos with CUDA or HIP assumes GPU-aware MPI is available,"
|
||||
" but cannot determine if this is the case\n try"
|
||||
" '-pk kokkos cuda/aware off' if getting segmentation faults");
|
||||
" '-pk kokkos gpu/aware off' if getting segmentation faults");
|
||||
|
||||
#endif
|
||||
} // if (-1 == have_gpu_aware)
|
||||
@ -340,12 +348,18 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 1;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 1;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 1;
|
||||
} else if (strcmp(arg[iarg+1],"device") == 0) {
|
||||
exchange_comm_classic = forward_comm_classic = reverse_comm_classic = 0;
|
||||
forward_pair_comm_classic = forward_fix_comm_classic = 0;
|
||||
|
||||
exchange_comm_on_host = forward_comm_on_host = reverse_comm_on_host = 0;
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
iarg += 2;
|
||||
@ -373,9 +387,25 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
forward_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/pair/forward") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) forward_pair_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) forward_pair_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"device") == 0) forward_pair_comm_classic = 0;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
forward_pair_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/fix/forward") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) forward_fix_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) forward_fix_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"device") == 0) forward_fix_comm_classic = 0;
|
||||
else error->all(FLERR,"Illegal package kokkos command");
|
||||
forward_fix_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"comm/reverse") == 0) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"no") == 0) reverse_comm_classic = 1;
|
||||
else if (strcmp(arg[iarg+1],"host") == 0) {
|
||||
reverse_comm_classic = 0;
|
||||
reverse_comm_on_host = 1;
|
||||
@ -385,7 +415,8 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
} else error->all(FLERR,"Illegal package kokkos command");
|
||||
reverse_comm_changed = 0;
|
||||
iarg += 2;
|
||||
} else if (strcmp(arg[iarg],"cuda/aware") == 0) {
|
||||
} else if ((strcmp(arg[iarg],"gpu/aware") == 0)
|
||||
|| (strcmp(arg[iarg],"cuda/aware") == 0)) {
|
||||
if (iarg+2 > narg) error->all(FLERR,"Illegal package kokkos command");
|
||||
if (strcmp(arg[iarg+1],"off") == 0) gpu_aware_flag = 0;
|
||||
else if (strcmp(arg[iarg+1],"on") == 0) gpu_aware_flag = 1;
|
||||
@ -425,7 +456,7 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
int nmpi = 0;
|
||||
MPI_Comm_size(world,&nmpi);
|
||||
|
||||
// if "cuda/aware off" or "pair/only on", and "comm device", change to "comm no"
|
||||
// if "gpu/aware off" or "pair/only on", and "comm device", change to "comm no"
|
||||
|
||||
if ((!gpu_aware_flag && nmpi > 1) || pair_only_flag) {
|
||||
if (exchange_comm_classic == 0 && exchange_comm_on_host == 0) {
|
||||
@ -436,13 +467,21 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
forward_comm_classic = 1;
|
||||
forward_comm_changed = 1;
|
||||
}
|
||||
if (forward_pair_comm_classic == 0) {
|
||||
forward_pair_comm_classic = 1;
|
||||
forward_pair_comm_changed = 1;
|
||||
}
|
||||
if (forward_fix_comm_classic == 0) {
|
||||
forward_fix_comm_classic = 1;
|
||||
forward_fix_comm_changed = 1;
|
||||
}
|
||||
if (reverse_comm_classic == 0 && reverse_comm_on_host == 0) {
|
||||
reverse_comm_classic = 1;
|
||||
reverse_comm_changed = 1;
|
||||
}
|
||||
}
|
||||
|
||||
// if "cuda/aware on" and "pair/only off", and comm flags were changed previously, change them back
|
||||
// if "gpu/aware on" and "pair/only off", and comm flags were changed previously, change them back
|
||||
|
||||
if (gpu_aware_flag && !pair_only_flag) {
|
||||
if (exchange_comm_changed) {
|
||||
@ -453,6 +492,14 @@ void KokkosLMP::accelerator(int narg, char **arg)
|
||||
forward_comm_classic = 0;
|
||||
forward_comm_changed = 0;
|
||||
}
|
||||
if (forward_pair_comm_changed) {
|
||||
forward_pair_comm_classic = 0;
|
||||
forward_pair_comm_changed = 0;
|
||||
}
|
||||
if (forward_fix_comm_changed) {
|
||||
forward_fix_comm_classic = 0;
|
||||
forward_fix_comm_changed = 0;
|
||||
}
|
||||
if (reverse_comm_changed) {
|
||||
reverse_comm_classic = 0;
|
||||
reverse_comm_changed = 0;
|
||||
@ -490,25 +537,15 @@ int KokkosLMP::neigh_count(int m)
|
||||
if (nk->lists[m]->execution_space == Host) {
|
||||
NeighListKokkos<LMPHostType>* nlistKK = (NeighListKokkos<LMPHostType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
#else
|
||||
h_ilist = nlistKK->d_ilist;
|
||||
h_numneigh = nlistKK->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
} else if (nk->lists[m]->execution_space == Device) {
|
||||
NeighListKokkos<LMPDeviceType>* nlistKK = (NeighListKokkos<LMPDeviceType>*) nk->lists[m];
|
||||
inum = nlistKK->inum;
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_ilist = Kokkos::create_mirror_view(nlistKK->d_ilist);
|
||||
h_numneigh = Kokkos::create_mirror_view(nlistKK->d_numneigh);
|
||||
#else
|
||||
h_ilist = nlistKK->d_ilist;
|
||||
h_numneigh = nlistKK->d_numneigh;
|
||||
#endif
|
||||
Kokkos::deep_copy(h_ilist,nlistKK->d_ilist);
|
||||
Kokkos::deep_copy(h_numneigh,nlistKK->d_numneigh);
|
||||
}
|
||||
|
||||
@ -28,12 +28,16 @@ class KokkosLMP : protected Pointers {
|
||||
int neighflag_qeq_set;
|
||||
int exchange_comm_classic;
|
||||
int forward_comm_classic;
|
||||
int forward_pair_comm_classic;
|
||||
int forward_fix_comm_classic;
|
||||
int reverse_comm_classic;
|
||||
int exchange_comm_on_host;
|
||||
int forward_comm_on_host;
|
||||
int reverse_comm_on_host;
|
||||
int exchange_comm_changed;
|
||||
int forward_comm_changed;
|
||||
int forward_pair_comm_changed;
|
||||
int forward_fix_comm_changed;
|
||||
int reverse_comm_changed;
|
||||
int nthreads,ngpus;
|
||||
int numa;
|
||||
|
||||
@ -1068,28 +1068,42 @@ void memset_kokkos (ViewType &view) {
|
||||
|
||||
struct params_lj_coul {
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_lj_coul(){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
params_lj_coul() {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
params_lj_coul(int /*i*/){cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
params_lj_coul(int /*i*/) {cut_ljsq=0;cut_coulsq=0;lj1=0;lj2=0;lj3=0;lj4=0;offset=0;};
|
||||
F_FLOAT cut_ljsq,cut_coulsq,lj1,lj2,lj3,lj4,offset;
|
||||
};
|
||||
|
||||
// Pair SNAP
|
||||
|
||||
#define SNAP_KOKKOS_REAL double
|
||||
#define SNAP_KOKKOS_HOST_VECLEN 1
|
||||
|
||||
#ifdef LMP_KOKKOS_GPU
|
||||
#define SNAP_KOKKOS_DEVICE_VECLEN 32
|
||||
#else
|
||||
#define SNAP_KOKKOS_DEVICE_VECLEN 1
|
||||
#endif
|
||||
|
||||
|
||||
// intentional: SNAreal/complex gets reused beyond SNAP
|
||||
typedef double SNAreal;
|
||||
|
||||
//typedef struct { SNAreal re, im; } SNAcomplex;
|
||||
template <typename real>
|
||||
struct alignas(2*sizeof(real)) SNAComplex
|
||||
template <typename real_type_>
|
||||
struct alignas(2*sizeof(real_type_)) SNAComplex
|
||||
{
|
||||
real re,im;
|
||||
using real_type = real_type_;
|
||||
using complex = SNAComplex<real_type>;
|
||||
real_type re,im;
|
||||
|
||||
SNAComplex() = default;
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex()
|
||||
: re(static_cast<real_type>(0.)), im(static_cast<real_type>(0.)) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re)
|
||||
: re(re), im(static_cast<real>(0.)) { ; }
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re)
|
||||
: re(re), im(static_cast<real_type>(0.)) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real re, real im)
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(real_type re, real_type im)
|
||||
: re(re), im(im) { ; }
|
||||
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex(const SNAComplex& other)
|
||||
@ -1117,27 +1131,24 @@ struct alignas(2*sizeof(real)) SNAComplex
|
||||
return *this;
|
||||
}
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr complex zero() { return complex(static_cast<real_type>(0.), static_cast<real_type>(0.)); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
static constexpr complex one() { return complex(static_cast<real_type>(1.), static_cast<real_type>(0.)); }
|
||||
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
const complex conj() { return complex(re, -im); }
|
||||
|
||||
};
|
||||
|
||||
template <typename real>
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real> operator*(const real& r, const SNAComplex<real>& self) {
|
||||
return SNAComplex<real>(r*self.re, r*self.im);
|
||||
template <typename real_type>
|
||||
KOKKOS_FORCEINLINE_FUNCTION SNAComplex<real_type> operator*(const real_type& r, const SNAComplex<real_type>& self) {
|
||||
return SNAComplex<real_type>(r*self.re, r*self.im);
|
||||
}
|
||||
|
||||
typedef SNAComplex<SNAreal> SNAcomplex;
|
||||
|
||||
// Cayley-Klein pack
|
||||
// Can guarantee it's aligned to 2 complex
|
||||
struct alignas(32) CayleyKleinPack {
|
||||
|
||||
SNAcomplex a, b;
|
||||
SNAcomplex da[3], db[3];
|
||||
SNAreal sfac;
|
||||
SNAreal dsfacu[3];
|
||||
|
||||
};
|
||||
|
||||
|
||||
#if defined(KOKKOS_ENABLE_CXX11)
|
||||
#undef ISFINITE
|
||||
#define ISFINITE(x) std::isfinite(x)
|
||||
|
||||
@ -46,11 +46,7 @@ template <typename TYPE, typename HTYPE>
|
||||
const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1);
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
array = h_data.data();
|
||||
return data;
|
||||
}
|
||||
@ -61,11 +57,7 @@ template <typename TYPE, typename HTYPE>
|
||||
int n1, const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1);
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
return data;
|
||||
}
|
||||
|
||||
@ -100,7 +92,7 @@ void destroy_kokkos(TYPE data, typename TYPE::value_type* &array)
|
||||
template <typename TYPE>
|
||||
TYPE destroy_kokkos(TYPE &data)
|
||||
{
|
||||
/*if(data.data()!=nullptr)
|
||||
/*if (data.data()!=nullptr)
|
||||
free(data.data());*/
|
||||
data = TYPE();
|
||||
return data;
|
||||
@ -167,11 +159,7 @@ template <typename TYPE, typename HTYPE>
|
||||
const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1,n2);
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
return data;
|
||||
}
|
||||
|
||||
@ -185,7 +173,7 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
|
||||
bigint n = 0;
|
||||
for (int i = 0; i < n1; i++) {
|
||||
if(n2==0)
|
||||
if (n2==0)
|
||||
array[i] = nullptr;
|
||||
else
|
||||
array[i] = &data.h_view(i,0);
|
||||
@ -200,17 +188,13 @@ template <typename TYPE, typename HTYPE>
|
||||
const char *name)
|
||||
{
|
||||
data = TYPE(std::string(name),n1,n2);
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_data = Kokkos::create_mirror_view(data);
|
||||
#else
|
||||
h_data = data;
|
||||
#endif
|
||||
bigint nbytes = ((bigint) sizeof(typename TYPE::value_type *)) * n1;
|
||||
array = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
|
||||
bigint n = 0;
|
||||
for (int i = 0; i < n1; i++) {
|
||||
if(n2==0)
|
||||
if (n2==0)
|
||||
array[i] = nullptr;
|
||||
else
|
||||
array[i] = &h_data(i,0);
|
||||
@ -234,7 +218,7 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
array = (typename TYPE::value_type**) srealloc(array,nbytes,name);
|
||||
|
||||
for (int i = 0; i < n1; i++)
|
||||
if(n2==0)
|
||||
if (n2==0)
|
||||
array[i] = nullptr;
|
||||
else
|
||||
array[i] = &data.h_view(i,0);
|
||||
@ -251,7 +235,7 @@ TYPE create_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
array = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
|
||||
for (int i = 0; i < n1; i++)
|
||||
if(data.h_view.extent(1)==0)
|
||||
if (data.h_view.extent(1)==0)
|
||||
array[i] = nullptr;
|
||||
else
|
||||
array[i] = &data.h_view(i,0);
|
||||
@ -271,7 +255,7 @@ TYPE grow_kokkos(TYPE &data, typename TYPE::value_type **&array,
|
||||
array = (typename TYPE::value_type **) smalloc(nbytes,name);
|
||||
|
||||
for (int i = 0; i < n1; i++)
|
||||
if(data.h_view.extent(1)==0)
|
||||
if (data.h_view.extent(1)==0)
|
||||
array[i] = nullptr;
|
||||
else
|
||||
array[i] = &data.h_view(i,0);
|
||||
|
||||
@ -25,7 +25,7 @@ namespace LAMMPS_NS {
|
||||
d0 = d1 = 0.0;
|
||||
}
|
||||
KOKKOS_INLINE_FUNCTION
|
||||
s_double2& operator+=(const s_double2 &rhs){
|
||||
s_double2& operator+=(const s_double2 &rhs) {
|
||||
d0 += rhs.d0;
|
||||
d1 += rhs.d1;
|
||||
return *this;
|
||||
|
||||
@ -30,11 +30,7 @@ NBinKokkos<DeviceType>::NBinKokkos(LAMMPS *lmp) : NBinStandard(lmp) {
|
||||
atoms_per_bin = 16;
|
||||
|
||||
d_resize = typename AT::t_int_scalar("NeighborKokkosFunctor::resize");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(d_resize);
|
||||
#else
|
||||
h_resize = d_resize;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
|
||||
kokkos = 1;
|
||||
@ -92,7 +88,7 @@ void NBinKokkos<DeviceType>::bin_atoms()
|
||||
|
||||
h_resize() = 1;
|
||||
|
||||
while(h_resize() > 0) {
|
||||
while (h_resize() > 0) {
|
||||
h_resize() = 0;
|
||||
deep_copy(d_resize, h_resize);
|
||||
|
||||
@ -111,7 +107,7 @@ void NBinKokkos<DeviceType>::bin_atoms()
|
||||
Kokkos::parallel_for(atom->nlocal+atom->nghost, f);
|
||||
|
||||
deep_copy(h_resize, d_resize);
|
||||
if(h_resize()) {
|
||||
if (h_resize()) {
|
||||
|
||||
atoms_per_bin += 16;
|
||||
k_bins = DAT::tdual_int_2d("bins", mbins, atoms_per_bin);
|
||||
@ -135,7 +131,7 @@ void NBinKokkos<DeviceType>::binatomsItem(const int &i) const
|
||||
|
||||
atom2bin(i) = ibin;
|
||||
const int ac = Kokkos::atomic_fetch_add(&bincount[ibin], (int)1);
|
||||
if(ac < (int)bins.extent(1)) {
|
||||
if (ac < (int)bins.extent(1)) {
|
||||
bins(ibin, ac) = i;
|
||||
} else {
|
||||
d_resize() = 1;
|
||||
|
||||
@ -41,7 +41,6 @@ NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
|
||||
d_lbinxhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinxhi");
|
||||
d_lbinyhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinyhi");
|
||||
d_lbinzhi = typename AT::t_int_scalar("NBinSSAKokkos::d_lbinzhi");
|
||||
#ifndef KOKKOS_USE_CUDA_UVM
|
||||
h_resize = Kokkos::create_mirror_view(d_resize);
|
||||
h_lbinxlo = Kokkos::create_mirror_view(d_lbinxlo);
|
||||
h_lbinylo = Kokkos::create_mirror_view(d_lbinylo);
|
||||
@ -49,15 +48,6 @@ NBinSSAKokkos<DeviceType>::NBinSSAKokkos(LAMMPS *lmp) : NBinStandard(lmp)
|
||||
h_lbinxhi = Kokkos::create_mirror_view(d_lbinxhi);
|
||||
h_lbinyhi = Kokkos::create_mirror_view(d_lbinyhi);
|
||||
h_lbinzhi = Kokkos::create_mirror_view(d_lbinzhi);
|
||||
#else
|
||||
h_resize = d_resize;
|
||||
h_lbinxlo = d_lbinxlo;
|
||||
h_lbinylo = d_lbinylo;
|
||||
h_lbinzlo = d_lbinzlo;
|
||||
h_lbinxhi = d_lbinxhi;
|
||||
h_lbinyhi = d_lbinyhi;
|
||||
h_lbinzhi = d_lbinzhi;
|
||||
#endif
|
||||
h_resize() = 1;
|
||||
|
||||
k_gbincount = DAT::tdual_int_1d("NBinSSAKokkos::gbincount",8);
|
||||
@ -156,7 +146,7 @@ void NBinSSAKokkos<DeviceType>::bin_atoms()
|
||||
|
||||
// actually bin the ghost atoms
|
||||
{
|
||||
if(ghosts_per_gbin > (int) gbins.extent(1)) {
|
||||
if (ghosts_per_gbin > (int) gbins.extent(1)) {
|
||||
k_gbins = DAT::tdual_int_2d("gbins", 8, ghosts_per_gbin);
|
||||
gbins = k_gbins.view<DeviceType>();
|
||||
}
|
||||
@ -293,7 +283,7 @@ void NBinSSAKokkos<DeviceType>::sortBin(
|
||||
child = parent*2+1; /* Find the next child */
|
||||
}
|
||||
gbins(ibin, parent) = t; /* We save t in the heap */
|
||||
} while(1);
|
||||
} while (1);
|
||||
}
|
||||
|
||||
namespace LAMMPS_NS {
|
||||
|
||||
@ -108,20 +108,20 @@ class NBinSSAKokkos : public NBinStandard {
|
||||
if (y >= subhi_[1]) iy = 1;
|
||||
if (x < sublo_[0]) ix = -1;
|
||||
if (x >= subhi_[0]) ix = 1;
|
||||
if(iz < 0){
|
||||
if (iz < 0) {
|
||||
return -1;
|
||||
} else if(iz == 0){
|
||||
if( iy<0 ) return -1; // bottom left/middle/right
|
||||
if( (iy==0) && (ix<0) ) return -1; // left atoms
|
||||
if( (iy==0) && (ix==0) ) return 0; // Locally owned atoms
|
||||
if( (iy==0) && (ix>0) ) return 2; // Right atoms
|
||||
if( (iy>0) && (ix==0) ) return 1; // Top-middle atoms
|
||||
if( (iy>0) && (ix!=0) ) return 3; // Top-right and top-left atoms
|
||||
} else if (iz == 0) {
|
||||
if (iy<0) return -1; // bottom left/middle/right
|
||||
if ((iy==0) && (ix<0) ) return -1; // left atoms
|
||||
if ((iy==0) && (ix==0)) return 0; // Locally owned atoms
|
||||
if ((iy==0) && (ix>0) ) return 2; // Right atoms
|
||||
if ((iy>0) && (ix==0)) return 1; // Top-middle atoms
|
||||
if ((iy>0) && (ix!=0)) return 3; // Top-right and top-left atoms
|
||||
} else { // iz > 0
|
||||
if((ix==0) && (iy==0)) return 4; // Back atoms
|
||||
if((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
|
||||
if((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
|
||||
if((ix!=0) && (iy!=0)) return 7; // Back corner atoms
|
||||
if ((ix==0) && (iy==0)) return 4; // Back atoms
|
||||
if ((ix==0) && (iy!=0)) return 5; // Top-back and bottom-back atoms
|
||||
if ((ix!=0) && (iy==0)) return 6; // Left-back and right-back atoms
|
||||
if ((ix!=0) && (iy!=0)) return 7; // Back corner atoms
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
|
||||
@ -329,7 +329,7 @@ void NeighborKokkos::operator()(TagNeighborXhold<DeviceType>, const int &i) cons
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
|
||||
void NeighborKokkos::modify_ex_type_grow_kokkos(){
|
||||
void NeighborKokkos::modify_ex_type_grow_kokkos() {
|
||||
memoryKK->grow_kokkos(k_ex1_type,ex1_type,maxex_type,"neigh:ex1_type");
|
||||
k_ex1_type.modify<LMPHostType>();
|
||||
memoryKK->grow_kokkos(k_ex2_type,ex2_type,maxex_type,"neigh:ex2_type");
|
||||
@ -337,7 +337,7 @@ void NeighborKokkos::modify_ex_type_grow_kokkos(){
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::modify_ex_group_grow_kokkos(){
|
||||
void NeighborKokkos::modify_ex_group_grow_kokkos() {
|
||||
memoryKK->grow_kokkos(k_ex1_group,ex1_group,maxex_group,"neigh:ex1_group");
|
||||
k_ex1_group.modify<LMPHostType>();
|
||||
memoryKK->grow_kokkos(k_ex2_group,ex2_group,maxex_group,"neigh:ex2_group");
|
||||
@ -345,13 +345,13 @@ void NeighborKokkos::modify_ex_group_grow_kokkos(){
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::modify_mol_group_grow_kokkos(){
|
||||
void NeighborKokkos::modify_mol_group_grow_kokkos() {
|
||||
memoryKK->grow_kokkos(k_ex_mol_group,ex_mol_group,maxex_mol,"neigh:ex_mol_group");
|
||||
k_ex_mol_group.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
/* ---------------------------------------------------------------------- */
|
||||
void NeighborKokkos::modify_mol_intra_grow_kokkos(){
|
||||
void NeighborKokkos::modify_mol_intra_grow_kokkos() {
|
||||
memoryKK->grow_kokkos(k_ex_mol_intra,ex_mol_intra,maxex_mol,"neigh:ex_mol_intra");
|
||||
k_ex_mol_intra.modify<LMPHostType>();
|
||||
}
|
||||
|
||||
@ -207,7 +207,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
||||
data.special_flag[3] = special_flag[3];
|
||||
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
while (data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
@ -303,7 +303,7 @@ void NPairKokkos<DeviceType,HALF_NEIGH,GHOST,TRI,SIZE>::build(NeighList *list_)
|
||||
}
|
||||
Kokkos::deep_copy(h_scalars, d_scalars);
|
||||
|
||||
if(data.h_resize()) {
|
||||
if (data.h_resize()) {
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename AT::t_neighbors_2d(Kokkos::NoInit("neighbors"), list->d_neighbors.extent(0), list->maxneighs);
|
||||
data.neigh_list.d_neighbors = list->d_neighbors;
|
||||
@ -410,24 +410,24 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
= d_stencil;
|
||||
|
||||
// loop over all bins in neighborhood (includes ibin)
|
||||
if(HalfNeigh)
|
||||
for(int m = 0; m < c_bincount(ibin); m++) {
|
||||
if (HalfNeigh)
|
||||
for (int m = 0; m < c_bincount(ibin); m++) {
|
||||
const int j = c_bins(ibin,m);
|
||||
const int jtype = type(j);
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
|
||||
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
|
||||
if ((j == i) || (HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
|
||||
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
@ -436,38 +436,38 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (which == 0) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
// get subview of jbin
|
||||
if(HalfNeigh && (ibin==jbin)) continue;
|
||||
if (HalfNeigh && (ibin==jbin)) continue;
|
||||
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
for (int m = 0; m < c_bincount(jbin); m++) {
|
||||
|
||||
const int j = c_bins(jbin,m);
|
||||
const int jtype = type(j);
|
||||
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(Tri) {
|
||||
if (HalfNeigh && !Newton && (j < i)) continue;
|
||||
if (!HalfNeigh && j==i) continue;
|
||||
if (Tri) {
|
||||
if (x(j,2) < ztmp) continue;
|
||||
if (x(j,2) == ztmp) {
|
||||
if (x(j,1) < ytmp) continue;
|
||||
@ -477,14 +477,14 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
}
|
||||
}
|
||||
}
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
if (!moltemplate)
|
||||
which = NeighborKokkosExecute<DeviceType>::find_special(i,j);
|
||||
@ -493,19 +493,19 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (which == 0) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
@ -515,10 +515,10 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n > neigh_list.maxneighs) {
|
||||
if (n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
}
|
||||
|
||||
neigh_list.d_ilist(i) = i;
|
||||
@ -562,7 +562,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
|
||||
|
||||
if(ibin >= mbins) return;
|
||||
if (ibin >= mbins) return;
|
||||
X_FLOAT* other_x = sharedmem;
|
||||
other_x = other_x + 5*atoms_per_bin*MY_BIN;
|
||||
|
||||
@ -570,7 +570,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
int bincount_current = c_bincount[ibin];
|
||||
|
||||
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
|
||||
for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
|
||||
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
|
||||
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
|
||||
/* if necessary, goto next page and add pages */
|
||||
@ -583,7 +583,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
int itype;
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
|
||||
|
||||
if(i >= 0) {
|
||||
if (i >= 0) {
|
||||
xtmp = x(i, 0);
|
||||
ytmp = x(i, 1);
|
||||
ztmp = x(i, 2);
|
||||
@ -596,23 +596,23 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
other_id[MY_II] = i;
|
||||
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
|
||||
|
||||
if(test) return;
|
||||
if (test) return;
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
if (i >= 0 && i < nlocal) {
|
||||
#pragma unroll 4
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
for (int m = 0; m < bincount_current; m++) {
|
||||
int j = other_id[m];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
|
||||
if((j == i) ||
|
||||
if ((j == i) ||
|
||||
(HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton &&
|
||||
((j < i) ||
|
||||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
if(Tri) {
|
||||
if (Tri) {
|
||||
if (x(j,2) < ztmp) continue;
|
||||
if (x(j,2) == ztmp) {
|
||||
if (x(j,1) < ytmp) continue;
|
||||
@ -622,13 +622,13 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
}
|
||||
}
|
||||
}
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
@ -638,19 +638,19 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (which == 0) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
@ -661,15 +661,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= d_stencil;
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
if(ibin == jbin) continue;
|
||||
if (ibin == jbin) continue;
|
||||
|
||||
bincount_current = c_bincount[jbin];
|
||||
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
|
||||
|
||||
if(j >= 0) {
|
||||
if (j >= 0) {
|
||||
other_x[MY_II] = x(j, 0);
|
||||
other_x[MY_II + atoms_per_bin] = x(j, 1);
|
||||
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
|
||||
@ -680,16 +680,16 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
if (i >= 0 && i < nlocal) {
|
||||
#pragma unroll 8
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
for (int m = 0; m < bincount_current; m++) {
|
||||
const int j = other_id[m];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
//if(HalfNeigh && (j < i)) continue;
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(Tri) {
|
||||
if (HalfNeigh && !Newton && (j < i)) continue;
|
||||
if (!HalfNeigh && j==i) continue;
|
||||
if (Tri) {
|
||||
if (x(j,2) < ztmp) continue;
|
||||
if (x(j,2) == ztmp) {
|
||||
if (x(j,1) < ytmp) continue;
|
||||
@ -699,14 +699,14 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
}
|
||||
}
|
||||
}
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
|
||||
const X_FLOAT rsq = delx * delx + dely * dely + delz * delz;
|
||||
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
int which = 0;
|
||||
if (!moltemplate)
|
||||
@ -716,19 +716,19 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (which == 0) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
@ -738,15 +738,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemCuda(typename Kokkos::TeamPoli
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
if (i >= 0 && i < nlocal) {
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
if(n > neigh_list.maxneighs) {
|
||||
if (n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -787,14 +787,14 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
const int ibin = c_atom2bin(i);
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
for (int m = 0; m < c_bincount(jbin); m++) {
|
||||
const int j = c_bins(jbin,m);
|
||||
|
||||
if (HalfNeigh && j <= i) continue;
|
||||
else if (j == i) continue;
|
||||
|
||||
const int jtype = type[j];
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j,0);
|
||||
const X_FLOAT dely = ytmp - x(j,1);
|
||||
@ -810,19 +810,19 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (which == 0) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
@ -843,14 +843,14 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
ybin2 < 0 || ybin2 >= mbiny ||
|
||||
zbin2 < 0 || zbin2 >= mbinz) continue;
|
||||
const int jbin = ibin + stencil[k];
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
for (int m = 0; m < c_bincount(jbin); m++) {
|
||||
const int j = c_bins(jbin,m);
|
||||
|
||||
if (HalfNeigh && j <= i) continue;
|
||||
else if (j == i) continue;
|
||||
|
||||
const int jtype = type[j];
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j,0);
|
||||
const X_FLOAT dely = ytmp - x(j,1);
|
||||
@ -858,7 +858,7 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
|
||||
if (rsq <= cutneighsq(itype,jtype)) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
@ -867,10 +867,10 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n > neigh_list.maxneighs) {
|
||||
if (n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
}
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
@ -902,18 +902,18 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
const int mask_history = 3 << SBBITS;
|
||||
|
||||
// loop over all bins in neighborhood (includes ibin)
|
||||
if(HalfNeigh)
|
||||
for(int m = 0; m < c_bincount(ibin); m++) {
|
||||
if (HalfNeigh)
|
||||
for (int m = 0; m < c_bincount(ibin); m++) {
|
||||
const int j = c_bins(ibin,m);
|
||||
const int jtype = type(j);
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using HalfNeighborlists
|
||||
if((j == i) || (HalfNeigh && !Newton && (j < i)) ||
|
||||
if ((j == i) || (HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton && ((j < i) || ((j >= nlocal) &&
|
||||
((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
@ -922,29 +922,29 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
const X_FLOAT radsum = radi + radius(j);
|
||||
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
|
||||
|
||||
if(rsq <= cutsq) {
|
||||
if(n<neigh_list.maxneighs) {
|
||||
if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
if (rsq <= cutsq) {
|
||||
if (n<neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
}
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
// get subview of jbin
|
||||
if(HalfNeigh && (ibin==jbin)) continue;
|
||||
if (HalfNeigh && (ibin==jbin)) continue;
|
||||
//const ArrayTypes<DeviceType>::t_int_1d_const_um =Kokkos::subview<t_int_1d_const_um>(bins,jbin,ALL);
|
||||
for(int m = 0; m < c_bincount(jbin); m++) {
|
||||
for (int m = 0; m < c_bincount(jbin); m++) {
|
||||
|
||||
const int j = c_bins(jbin,m);
|
||||
const int jtype = type(j);
|
||||
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(Tri) {
|
||||
if (HalfNeigh && !Newton && (j < i)) continue;
|
||||
if (!HalfNeigh && j==i) continue;
|
||||
if (Tri) {
|
||||
if (x(j,2) < ztmp) continue;
|
||||
if (x(j,2) == ztmp) {
|
||||
if (x(j,1) < ytmp) continue;
|
||||
@ -954,7 +954,7 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
}
|
||||
}
|
||||
}
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
@ -963,9 +963,9 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
const X_FLOAT radsum = radi + radius(j);
|
||||
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
|
||||
|
||||
if(rsq <= cutsq) {
|
||||
if(n<neigh_list.maxneighs) {
|
||||
if(neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
if (rsq <= cutsq) {
|
||||
if (n<neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
}
|
||||
else n++;
|
||||
@ -975,10 +975,10 @@ void NeighborKokkosExecute<DeviceType>::
|
||||
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
|
||||
if(n > neigh_list.maxneighs) {
|
||||
if (n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
}
|
||||
|
||||
neigh_list.d_ilist(i) = i;
|
||||
@ -1005,7 +1005,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
|
||||
const int ibin = dev.league_rank()*BINS_PER_TEAM+MY_BIN;
|
||||
|
||||
if(ibin >= mbins) return;
|
||||
if (ibin >= mbins) return;
|
||||
X_FLOAT* other_x = sharedmem;
|
||||
other_x = other_x + 6*atoms_per_bin*MY_BIN;
|
||||
|
||||
@ -1013,7 +1013,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
|
||||
int bincount_current = c_bincount[ibin];
|
||||
|
||||
for(int kk = 0; kk < TEAMS_PER_BIN; kk++) {
|
||||
for (int kk = 0; kk < TEAMS_PER_BIN; kk++) {
|
||||
const int MY_II = dev.team_rank()%atoms_per_bin+kk*dev.team_size();
|
||||
const int i = MY_II < bincount_current ? c_bins(ibin, MY_II) : -1;
|
||||
/* if necessary, goto next page and add pages */
|
||||
@ -1028,7 +1028,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
const AtomNeighbors neighbors_i = neigh_list.get_neighbors((i>=0&&i<nlocal)?i:0);
|
||||
const int mask_history = 3 << SBBITS;
|
||||
|
||||
if(i >= 0) {
|
||||
if (i >= 0) {
|
||||
xtmp = x(i, 0);
|
||||
ytmp = x(i, 1);
|
||||
ztmp = x(i, 2);
|
||||
@ -1043,23 +1043,23 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
other_id[MY_II] = i;
|
||||
int test = (__syncthreads_count(i >= 0 && i <= nlocal) == 0);
|
||||
|
||||
if(test) return;
|
||||
if (test) return;
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
if (i >= 0 && i < nlocal) {
|
||||
#pragma unroll 4
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
for (int m = 0; m < bincount_current; m++) {
|
||||
int j = other_id[m];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
//for same bin as atom i skip j if i==j and skip atoms "below and to the left" if using halfneighborlists
|
||||
if((j == i) ||
|
||||
if ((j == i) ||
|
||||
(HalfNeigh && !Newton && (j < i)) ||
|
||||
(HalfNeigh && Newton &&
|
||||
((j < i) ||
|
||||
((j >= nlocal) && ((x(j, 2) < ztmp) || (x(j, 2) == ztmp && x(j, 1) < ytmp) ||
|
||||
(x(j, 2) == ztmp && x(j, 1) == ytmp && x(j, 0) < xtmp)))))
|
||||
) continue;
|
||||
if(Tri) {
|
||||
if (Tri) {
|
||||
if (x(j,2) < ztmp) continue;
|
||||
if (x(j,2) == ztmp) {
|
||||
if (x(j,1) < ytmp) continue;
|
||||
@ -1069,7 +1069,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
}
|
||||
}
|
||||
}
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
const X_FLOAT delz = ztmp - other_x[m + 2 * atoms_per_bin];
|
||||
@ -1077,8 +1077,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
|
||||
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
|
||||
|
||||
if(rsq <= cutsq) {
|
||||
if(n<neigh_list.maxneighs) {
|
||||
if (rsq <= cutsq) {
|
||||
if (n<neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
}
|
||||
@ -1090,15 +1090,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
|
||||
const typename ArrayTypes<DeviceType>::t_int_1d_const_um stencil
|
||||
= d_stencil;
|
||||
for(int k = 0; k < nstencil; k++) {
|
||||
for (int k = 0; k < nstencil; k++) {
|
||||
const int jbin = ibin + stencil[k];
|
||||
|
||||
if(ibin == jbin) continue;
|
||||
if (ibin == jbin) continue;
|
||||
|
||||
bincount_current = c_bincount[jbin];
|
||||
int j = MY_II < bincount_current ? c_bins(jbin, MY_II) : -1;
|
||||
|
||||
if(j >= 0) {
|
||||
if (j >= 0) {
|
||||
other_x[MY_II] = x(j, 0);
|
||||
other_x[MY_II + atoms_per_bin] = x(j, 1);
|
||||
other_x[MY_II + 2 * atoms_per_bin] = x(j, 2);
|
||||
@ -1110,16 +1110,16 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
if (i >= 0 && i < nlocal) {
|
||||
#pragma unroll 8
|
||||
for(int m = 0; m < bincount_current; m++) {
|
||||
for (int m = 0; m < bincount_current; m++) {
|
||||
const int j = other_id[m];
|
||||
const int jtype = other_x[m + 3 * atoms_per_bin];
|
||||
|
||||
if(HalfNeigh && (j < i)) continue;
|
||||
if(HalfNeigh && !Newton && (j < i)) continue;
|
||||
if(!HalfNeigh && j==i) continue;
|
||||
if(Tri) {
|
||||
if (HalfNeigh && (j < i)) continue;
|
||||
if (HalfNeigh && !Newton && (j < i)) continue;
|
||||
if (!HalfNeigh && j==i) continue;
|
||||
if (Tri) {
|
||||
if (x(j,2) < ztmp) continue;
|
||||
if (x(j,2) == ztmp) {
|
||||
if (x(j,1) < ytmp) continue;
|
||||
@ -1129,7 +1129,7 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
}
|
||||
}
|
||||
}
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - other_x[m];
|
||||
const X_FLOAT dely = ytmp - other_x[m + atoms_per_bin];
|
||||
@ -1138,8 +1138,8 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
const X_FLOAT radsum = radi + other_x[m + 4 * atoms_per_bin];
|
||||
const X_FLOAT cutsq = (radsum + skin) * (radsum + skin);
|
||||
|
||||
if(rsq <= cutsq) {
|
||||
if(n<neigh_list.maxneighs) {
|
||||
if (rsq <= cutsq) {
|
||||
if (n<neigh_list.maxneighs) {
|
||||
if (neigh_list.history && rsq < radsum*radsum) neighbors_i(n++) = j ^ mask_history;
|
||||
else neighbors_i(n++) = j;
|
||||
}
|
||||
@ -1150,15 +1150,15 @@ void NeighborKokkosExecute<DeviceType>::build_ItemSizeCuda(typename Kokkos::Team
|
||||
__syncthreads();
|
||||
}
|
||||
|
||||
if(i >= 0 && i < nlocal) {
|
||||
if (i >= 0 && i < nlocal) {
|
||||
neigh_list.d_numneigh(i) = n;
|
||||
neigh_list.d_ilist(i) = i;
|
||||
}
|
||||
|
||||
if(n > neigh_list.maxneighs) {
|
||||
if (n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
|
||||
if(n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
if (n > new_maxneighs()) new_maxneighs() = n; // avoid atomics, safe because in while loop
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -452,7 +452,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
|
||||
|
||||
bool firstTry = true;
|
||||
data.h_resize()=1;
|
||||
while(data.h_resize()) {
|
||||
while (data.h_resize()) {
|
||||
data.h_new_maxneighs() = list->maxneighs;
|
||||
data.h_resize() = 0;
|
||||
|
||||
@ -489,7 +489,7 @@ fprintf(stdout, "tota%03d total %3d could use %6d inums, expected %6d inums. inu
|
||||
|
||||
deep_copy(data.h_resize, data.resize);
|
||||
|
||||
if(data.h_resize()) {
|
||||
if (data.h_resize()) {
|
||||
deep_copy(data.h_new_maxneighs, data.new_maxneighs);
|
||||
list->maxneighs = data.h_new_maxneighs() * 1.2;
|
||||
list->d_neighbors = typename ArrayTypes<DeviceType>::t_neighbors_2d("neighbors", list->d_neighbors.extent(0), list->maxneighs);
|
||||
@ -571,13 +571,13 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
|
||||
for (; jl < c_bincount(jbin); ++jl) {
|
||||
const int j = c_bins(jbin, jl);
|
||||
const int jtype = type(j);
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
if (!moltemplate)
|
||||
which = find_special(i,j);
|
||||
@ -586,19 +586,19 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
|
||||
/* onemols[imol]->nspecial[iatom], */
|
||||
/* tag[j]-tagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (which == 0) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
@ -608,9 +608,9 @@ void NPairSSAKokkosExecute<DeviceType>::build_locals_onePhase(const bool firstTr
|
||||
if (n > 0) {
|
||||
neigh_list.d_numneigh(inum) = n;
|
||||
neigh_list.d_ilist(inum++) = i;
|
||||
if(n > neigh_list.maxneighs) {
|
||||
if (n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
if (n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -699,13 +699,13 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
|
||||
for (int jl = 0; jl < c_bincount(jbin); ++jl) {
|
||||
const int j = c_bins(jbin, jl);
|
||||
const int jtype = type(j);
|
||||
if(exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
if (exclude && exclusion(i,j,itype,jtype)) continue;
|
||||
|
||||
const X_FLOAT delx = xtmp - x(j, 0);
|
||||
const X_FLOAT dely = ytmp - x(j, 1);
|
||||
const X_FLOAT delz = ztmp - x(j, 2);
|
||||
const X_FLOAT rsq = delx*delx + dely*dely + delz*delz;
|
||||
if(rsq <= cutneighsq(itype,jtype)) {
|
||||
if (rsq <= cutneighsq(itype,jtype)) {
|
||||
if (molecular != Atom::ATOMIC) {
|
||||
if (!moltemplate)
|
||||
which = find_special(j,i);
|
||||
@ -714,19 +714,19 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
|
||||
/* onemols[jmol]->nspecial[jatom], */
|
||||
/* tag[i]-jtagprev); */
|
||||
/* else which = 0; */
|
||||
if (which == 0){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (which == 0) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
} else if (minimum_image_check(delx,dely,delz)){
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
} else if (minimum_image_check(delx,dely,delz)) {
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
else if (which > 0) {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j ^ (which << SBBITS);
|
||||
else n++;
|
||||
}
|
||||
} else {
|
||||
if(n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
if (n<neigh_list.maxneighs) neighbors_i(n++) = j;
|
||||
else n++;
|
||||
}
|
||||
}
|
||||
@ -736,9 +736,9 @@ void NPairSSAKokkosExecute<DeviceType>::build_ghosts_onePhase(int workPhase) con
|
||||
if (n > 0) {
|
||||
neigh_list.d_numneigh(gNdx) = n;
|
||||
neigh_list.d_ilist(gNdx++) = i;
|
||||
if(n > neigh_list.maxneighs) {
|
||||
if (n > neigh_list.maxneighs) {
|
||||
resize() = 1;
|
||||
if(n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
if (n > new_maxneighs()) Kokkos::atomic_fetch_max(&new_maxneighs(),n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user