Compare commits

...

139 Commits

Author SHA1 Message Date
2b9646097c Merge branch 'master' of github.com:lammps/lammps 2017-09-01 09:16:11 -06:00
f58088ae61 patch 1Sep17 2017-09-01 09:15:37 -06:00
21893539cb Merge pull request #633 from lammps/doc-user-meso
tweak an eq in USER-MESO, also allow neigh to be built w/out styles
2017-08-25 17:41:00 -06:00
dd101db79c Merge pull request #582 from stanmoore1/kk_changes
Fixing execution space issues in KOKKOS package
2017-08-25 17:06:26 -06:00
81ba181349 tweak an eq in USER-MESO, also allow neigh to be built w/out styles 2017-08-25 17:04:36 -06:00
6195b3c0f6 Merge pull request #631 from akohlmey/small-fixes-and-updates
Small fixes and updates for patch release
2017-08-24 08:35:32 -06:00
de0fdbed6c moltemplate bugfix update 2017-08-24 07:12:45 -04:00
dea92fbf52 print write dump systen init warning only on MPI rank 0 2017-08-23 21:01:55 -04:00
6db3bd87ee correct typo in header formatting 2017-08-23 17:52:09 -04:00
b6c93f96cf Merge pull request #629 from stanmoore1/kokkos_update
Update Kokkos library to v2.04.00
2017-08-23 14:39:42 -06:00
7b7a5076d0 Merge pull request #624 from akohlmey/compute-fragment
Add computes fragment/atom and aggregate/atom
2017-08-23 14:14:33 -06:00
1536eb5de5 Merge pull request #630 from akohlmey/moltemplate-update
Update for moltemplate to version 2017-8-22
2017-08-23 14:13:07 -06:00
f7a0a6eb70 Merge pull request #622 from akohlmey/collected-small-fixes
Collected small fixes
2017-08-23 14:12:45 -06:00
d80a6cbd31 Merge pull request #621 from lammps/user-meso
new USER-MESO package
2017-08-23 14:12:18 -06:00
c4b37039ab Merge pull request #619 from jrgissing/molecule_maxspecial
molecule maxspecial value corrected when specials autogenerated
2017-08-23 14:11:47 -06:00
b52efa2850 add compatibility to NetCDF 4.3.3 as bundled with RHEL 7.x 2017-08-23 15:28:27 -04:00
e88ff8d6f9 correct embedded help for GPU lib Install.py 2017-08-23 15:20:30 -04:00
97edf90a73 update fix qeq docs for the new, more flexible parameter file format 2017-08-23 11:22:09 -04:00
12150ffa73 add moltemplate version 2017-8-22 2017-08-23 07:54:56 -04:00
57aafba7c3 remove old moltemplate version 2017-08-23 07:54:05 -04:00
cdfb1aa043 make links unique, fix doc format warnings and add entries to administrative files 2017-08-23 00:39:52 -04:00
b80752a40c restore changes to read_data docs, that would have been lost due to merging an outdated version 2017-08-23 00:01:19 -04:00
79d5ca669d fix issues indicated by compiler warnings 2017-08-22 23:46:32 -04:00
26c15140be add USER-MESO files to .gitignore 2017-08-22 23:46:09 -04:00
090c792d90 Update Kokkos library to v2.04.00 2017-08-22 13:42:02 -06:00
24c00b1f7a fix typo 2017-08-21 13:12:48 -04:00
5a0c3aea8a add a compute aggregate/atom, that combines the rules for compute cluster/atom and fragment/atom 2017-08-21 13:12:43 -04:00
d1a0c040c9 add initializers for nmatch/nwant variables in molecule file parser 2017-08-20 23:28:13 -04:00
f5b8f722ee remove non-portable non-ascii blanks from fix wall/ees docs 2017-08-20 10:09:21 -04:00
39e51df2c0 add missing entry in pdf manual 2017-08-20 10:02:11 -04:00
c895df73d6 skip over disabled bonds 2017-08-20 09:49:01 -04:00
4dc1195cd8 add docs for compute fragment/atom 2017-08-20 09:41:49 -04:00
35fd82b602 trim unneeded includes 2017-08-20 09:19:04 -04:00
c5ce3ffe60 use list of bonds per atom instead of bondlist, as that will work with shake as well 2017-08-20 09:18:04 -04:00
aa1ce09b12 more cleanup, checks and generalization of QEQ parameter file parsing 2017-08-18 20:03:47 -04:00
f945d4567d new USER-MESO package 2017-08-18 17:33:42 -06:00
44ccdb86df add checks when reading QEQ parameter file to avoid segfaults and memory corruption on incorrect files 2017-08-18 19:24:39 -04:00
2e6a928aa3 ignore file recently added to USER-INTEL 2017-08-18 19:23:33 -04:00
6f66e6c454 add new compute fragment/atom 2017-08-18 19:07:32 -04:00
d671a04274 whitespace cleanup 2017-08-18 14:24:09 -04:00
bf7cff73d4 allocate the temporary special array on the heap and not the stack (it may get too big quickly) and clear the array before use. 2017-08-18 14:23:48 -04:00
9e85b3178a molecule maxspecial value corrected when specials autogenerated 2017-08-17 21:39:25 -06:00
b11fe2eddb Merge pull request #573 from junghans/cmake
Add secondary, cmake based build system
2017-08-17 16:06:19 -06:00
5cd1dc93dc Revert 7edb294b44 temporarily 2017-07-31 15:32:20 -06:00
93190a548a Merge branch 'cmake' of github.com:junghans/lammps into cmake 2017-07-31 11:48:27 -06:00
72f50c91ee Add -DLAMMPS_EXCEPTIONS flag 2017-07-31 11:48:22 -06:00
408d9d99a9 cmake: improve some error messages 2017-07-31 11:35:41 -06:00
7edb294b44 Fix execution space issues in comm_kokkos 2017-07-28 10:44:08 -06:00
8e834d8be1 Add missing sync to neigh_bond_kokkos 2017-07-27 09:37:16 -06:00
8f59c0e188 Fix modify/sync issue in neigh_bond_kokkos 2017-07-27 08:51:17 -06:00
0231cc38a3 cmake: some more typo fixes 2017-07-25 19:09:20 -06:00
c3c9f357fd Fix concurrent modification issue in comm_kokkos 2017-07-25 14:37:41 -06:00
76fb4e0815 Merge branch 'master' into kk_changes 2017-07-25 14:16:31 -06:00
d3fa882280 Fix execution space issues in Kokkos EAM pairstyles 2017-07-25 13:49:23 -06:00
557e5b964a Fixing Kokkos execution space issue with regions 2017-07-25 13:37:01 -06:00
633ca33f2f Fix issue in Kokkos neighborlist 2017-07-25 13:08:24 -06:00
6716de5320 allow user to override PYTHON_INSTDIR 2017-07-24 20:17:17 -06:00
f2023431f6 cmake: fixed another typo 2017-07-24 12:54:26 -06:00
126d9cd3bc add GZIP and FFMPEG status 2017-07-22 13:57:15 -06:00
8d485ea128 Remove hardcoded execution spaces 2017-07-21 16:04:18 -06:00
f4b6b67f6e Fixing more execution space issues in KOKKOS package 2017-07-21 15:04:14 -06:00
8ed881947f Fix execution space in manybody potentials 2017-07-21 14:28:13 -06:00
74deeeca58 cmake: fixed another typo 2017-07-21 11:50:13 -06:00
c010edc4fd cmake: fixed two typos 2017-07-21 11:38:02 -06:00
e8e9ea8392 added one trivial test 2017-07-20 16:14:02 -06:00
c88d1e5510 make ENABLE_ALL work out of the box 2017-07-20 15:15:29 -06:00
427ca88dd4 cmake: error for POEMS + BODY package 2017-07-20 15:02:41 -06:00
1749d643c7 GPU: bring back CUDPP_OPT 2017-07-20 14:30:52 -06:00
f037f89f5f fix GPU + BUILD_SHARED_LIBS
X-Thanks: Robert Maynard <robert.maynard@kitware.com>
2017-07-20 14:12:23 -06:00
488609a5fd make FFT a selective option 2017-07-19 18:54:15 -06:00
01bcb79bdc cmake: clean up and updated comments 2017-07-19 18:34:07 -06:00
babba1870e added FFTW2 support 2017-07-19 15:33:02 -06:00
a99e3ef4f0 cmake: fix typo and pkg info 2017-07-19 15:15:59 -06:00
c95db97b83 fix PYTHON install 2017-07-19 15:15:24 -06:00
2961ba7ebb added MKL support 2017-07-19 10:35:48 -06:00
e2ad4fa745 GPU: cubin not needed 2017-07-18 19:29:40 -06:00
acbc60319f GPU: clean up part 2 2017-07-18 18:43:51 -06:00
e6f5f77edf GPU: clean up 2017-07-18 18:38:36 -06:00
864fd9cd87 remove cubin_headers hack 2017-07-18 18:20:23 -06:00
4d65c327f5 added minimal README 2017-07-18 16:06:29 -06:00
c3d9786616 GPU compiles 2017-07-18 16:01:35 -06:00
a9eaeb4d95 working on GPU build 2017-07-18 13:47:03 -06:00
1b34bec81a Prefer DeviceType specification over RangePolicy 2017-07-18 10:59:34 -06:00
9df61b642e Removing stray file 2017-07-18 10:44:45 -06:00
3c88b2a980 Fixing execution space issues in KOKKOS package 2017-07-18 09:53:26 -06:00
3c3baf34c4 GPU: added cuda includedir 2017-07-17 15:04:07 -06:00
8bc3184611 added support for LAMMPS_FFMPEG 2017-07-17 14:01:05 -06:00
050a82af58 fix LAMMPS_LONGLONG_TO_LONG description 2017-07-17 13:28:34 -06:00
99f5f82b02 added support for LAMMPS_MEMALIGN and LAMMPS_LONGLONG_TO_LONG 2017-07-17 13:26:46 -06:00
a3885b78ad added support -DLAMMPS_XDR and -DPACK_* 2017-07-17 13:21:42 -06:00
08c920029f added lammps size option 2017-07-17 12:22:28 -06:00
2411192ab4 cmake: add cross-configure warning 2017-07-17 11:52:06 -06:00
5ee2c31038 split PACKAGES, get ENABLE-ALL right 2017-07-17 11:01:08 -06:00
1c1a1db366 Fix typo 2017-07-17 11:55:05 -04:00
80e4448614 added GPU 2017-07-16 23:03:11 -06:00
385c6f7f2b cmake: clean up 2017-07-16 22:53:53 -06:00
fc1be074b2 added USER-INTEL 2017-07-16 22:52:59 -06:00
7605f72c9a finish USER-QMMM 2017-07-16 22:37:51 -06:00
22ecd9b8d2 started on USER-QMMM 2017-07-16 22:07:21 -06:00
7dd5068740 allow internal lapack 2017-07-16 21:43:29 -06:00
fa0f8a9e2a added USER-QUIP 2017-07-16 21:31:57 -06:00
d6f05ea309 USER_OMP -> USER-OMP 2017-07-16 20:19:20 -06:00
b6385d6ce2 add OpenKIM support 2017-07-16 20:17:41 -06:00
2978cce8db Added OPT 2017-07-16 20:52:38 -04:00
9991f679ae added USER-VTK 2017-07-16 20:41:42 -04:00
fc2e8b3c5e more USER packages 2017-07-16 17:52:43 -06:00
bb87bd4ac7 enable more user packages 2017-07-16 17:01:28 -06:00
d50b62837b add USER-AWPMD 2017-07-16 16:45:28 -06:00
c64424754d added USER-ATC 2017-07-16 16:18:58 -06:00
c549a16a85 enable REPLICA RIGID SHOCK SNAP SRD VORONOI 2017-07-16 11:30:09 -06:00
140182fb0b added support for QEQ 2017-07-16 11:14:08 -06:00
742eee1966 added support for POEMS 2017-07-16 11:07:36 -06:00
4812d4c659 enable PERI 2017-07-16 10:48:29 -06:00
95d9d32307 add support for MSCG 2017-07-16 10:37:24 -06:00
f50a757dc6 added MPIIO support 2017-07-16 08:32:14 -06:00
01f5136584 cmake: clean up 2017-07-16 08:22:19 -06:00
fa1f38596c Add support for PYTHON in CMake build 2017-07-15 18:29:33 -05:00
23540cfc94 enable MISC 2017-07-15 16:54:27 -06:00
fdb5ccefc7 Merge branch 'cmake' of github.com:junghans/lammps into cmake 2017-07-15 17:47:53 -05:00
629f112915 add support for MEAM 2017-07-15 16:44:03 -06:00
acd315e97a Add basic KOKKOS support to CMake build 2017-07-15 15:33:36 -05:00
b15f75d37b Merge branch 'cmake' of github.com:junghans/lammps into cmake 2017-07-14 22:08:06 -05:00
5e841bfe15 Added USER-OMP support to CMake build 2017-07-14 22:07:53 -05:00
d079b2f758 CreateStyleHeader: use temp file 2017-07-14 18:37:06 -06:00
54f2b02ac8 cmake: fix install 2017-07-14 18:33:27 -06:00
0a6e9c8bf6 added ENABLE_ALL option 2017-07-14 17:41:13 -06:00
b85979503f Add CMake support for more packages
BODY, COLLOID, CLASS2, COMPRESS, CORESHELL, DIPOLE, GRANULAR, MC, MOLECULE, MANYBODY, RIGID
2017-07-14 18:19:59 -05:00
c7e218f310 Merge branch 'cmake' of github.com:junghans/lammps into cmake 2017-07-14 18:06:27 -05:00
335ef11a7b Added style header generation with CMake 2017-07-14 17:49:05 -05:00
d5dcb3d329 add support for KSPACE 2017-07-14 15:55:36 -06:00
c07adac22d add support for LAMMPS_GZIP 2017-07-14 14:49:53 -06:00
e4e1252152 fix LAMMPS_PNG 2017-07-14 14:44:44 -06:00
842dc1b58c cmake: collect link libs 2017-07-14 14:21:21 -06:00
a566419ca6 Add LAMMPS_LIB_SOURCE_DIR variable in CMakeLists.txt 2017-07-14 04:36:52 -05:00
fdd3d802f0 Clean up CMakeList.txt by introducing LAMMPS_SOURCE_DIR variable 2017-07-14 04:00:38 -05:00
a86b0d4c1b Add PNG library detection to CMakeList.txt 2017-07-14 03:53:07 -05:00
7f1789a0c4 cmake: add support for REAX and hence Fortran 2017-07-13 23:30:46 -06:00
bfb449cec9 cmake: furhter improvments
* Add support for one package
* Add support for JPEG as external package
* Use pre-generated style header
  * TODO write a script to generate them
2017-07-13 22:54:51 -06:00
6b19016deb cmake: initial commit 2017-07-12 16:23:38 -06:00
1124 changed files with 76034 additions and 42082 deletions

8
.gitignore vendored
View File

@ -32,3 +32,11 @@ log.cite
.Trashes
ehthumbs.db
Thumbs.db
#cmake
/build*
/CMakeCache.txt
/CMakeFiles/
/Makefile
/cmake_install.cmake
/lmp

547
cmake/CMakeLists.txt Normal file
View File

@ -0,0 +1,547 @@
########################################
# CMake build system
# This file is part of LAMMPS
# Created by Christoph Junghans and Richard Berger
cmake_minimum_required(VERSION 3.1)
project(lammps)
set(SOVERSION 0)
set(LAMMPS_SOURCE_DIR ${CMAKE_SOURCE_DIR}/../src)
set(LAMMPS_LIB_SOURCE_DIR ${CMAKE_SOURCE_DIR}/../lib)
set(LAMMPS_LIB_BINARY_DIR ${CMAKE_BINARY_DIR}/lib)
#To not conflict with old Makefile build system, we build everything here
file(GLOB LIB_SOURCES ${LAMMPS_SOURCE_DIR}/*.cpp)
file(GLOB LMP_SOURCES ${LAMMPS_SOURCE_DIR}/main.cpp)
list(REMOVE_ITEM LIB_SOURCES ${LMP_SOURCES})
# Cmake modules/macros are in a subdirectory to keep this file cleaner
set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/Modules)
if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CXX_FLAGS)
#release comes with -O3 by default
set(CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CXX_FLAGS)
foreach(STYLE_FILE style_angle.h style_atom.h style_body.h style_bond.h style_command.h style_compute.h style_dihedral.h style_dump.h
style_fix.h style_improper.h style_integrate.h style_kspace.h style_minimize.h style_nbin.h style_npair.h style_nstencil.h
style_ntopo.h style_pair.h style_reader.h style_region.h)
if(EXISTS ${LAMMPS_SOURCE_DIR}/${STYLE_FILE})
message(FATAL_ERROR "There is a ${STYLE_FILE} in ${LAMMPS_SOURCE_DIR}, please clean up the source directory first")
endif()
endforeach()
enable_language(CXX)
######################################################################
# compiler tests
# these need ot be done early (before further tests).
#####################################################################
include(CheckCCompilerFlag)
########################################################################
# User input options #
########################################################################
option(BUILD_SHARED_LIBS "Build shared libs" OFF)
option(INSTALL_LIB "Install lammps library and header" ON)
include(GNUInstallDirs)
set(LAMMPS_LINK_LIBS)
option(ENABLE_MPI "Build MPI version" OFF)
if(ENABLE_MPI)
find_package(MPI REQUIRED)
include_directories(${MPI_C_INCLUDE_PATH})
list(APPEND LAMMPS_LINK_LIBS ${MPI_CXX_LIBRARIES})
option(LAMMPS_LONGLONG_TO_LONG "Workaround if your system or MPI version does not recognize 'long long' data types" OFF)
if(LAMMPS_LONGLONG_TO_LONG)
add_definitions(-DLAMMPS_LONGLONG_TO_LONG)
endif()
else()
file(GLOB MPI_SOURCES ${LAMMPS_SOURCE_DIR}/STUBS/mpi.c)
list(APPEND LIB_SOURCES ${MPI_SOURCES})
include_directories(${LAMMPS_SOURCE_DIR}/STUBS)
endif()
set(LAMMPS_SIZE_LIMIT "LAMMPS_SMALLBIG" CACHE STRING "Lammps size limit")
set_property(CACHE LAMMPS_SIZE_LIMIT PROPERTY STRINGS LAMMPS_SMALLBIG LAMMPS_BIGBIG LAMMPS_SMALLSMALL)
add_definitions(-D${LAMMPS_SIZE_LIMIT})
set(LAMMPS_MEMALIGN "64" CACHE STRING "enables the use of the posix_memalign() call instead of malloc() when large chunks or memory are allocated by LAMMPS")
add_definitions(-DLAMMPS_MEMALIGN=${LAMMPS_MEMALIGN})
option(LAMMPS_EXCEPTIONS "enable the use of C++ exceptions for error messages (useful for library interface)" OFF)
if(LAMMPS_EXCEPTIONS)
add_definitions(-DLAMMPS_EXCEPTIONS)
endif()
option(CMAKE_VERBOSE_MAKEFILE "Verbose makefile" OFF)
option(ENABLE_TESTING "Enable testing" OFF)
if(ENABLE_TESTING)
enable_testing()
endif(ENABLE_TESTING)
option(ENABLE_ALL "Build all default packages" OFF)
set(DEFAULT_PACKAGES ASPHERE BODY CLASS2 COLLOID COMPRESS CORESHELL DIPOLE GRANULAR
KSPACE MANYBODY MC MEAM MISC MOLECULE PERI QEQ
REAX REPLICA RIGID SHOCK SNAP SRD)
set(OTHER_PACKAGES KIM PYTHON MSCG MPIIO VORONOI POEMS
USER-ATC USER-AWPMD USER-CGDNA
USER-CGSDK USER-COLVARS USER-DIFFRACTION USER-DPD USER-DRUDE USER-EFF
USER-FEP USER-H5MD USER-LB USER-MANIFOLD USER-MEAMC USER-MGPT USER-MISC
USER-MOLFILE USER-NETCDF USER-PHONON USER-QTB USER-REAXC USER-SMD
USER-SMTBQ USER-SPH USER-TALLY USER-VTK USER-QUIP USER-QMMM)
set(ACCEL_PACKAGES USER-OMP KOKKOS OPT USER-INTEL GPU)
foreach(PKG ${DEFAULT_PACKAGES})
option(ENABLE_${PKG} "Build ${PKG} Package" ${ENABLE_ALL})
endforeach()
foreach(PKG ${ACCEL_PACKAGES} ${OTHER_PACKAGES})
option(ENABLE_${PKG} "Build ${PKG} Package" OFF)
endforeach()
macro(pkg_depends PKG1 PKG2)
if(ENABLE_${PKG1} AND NOT ENABLE_${PKG2})
message(FATAL_ERROR "${PKG1} package needs LAMMPS to be build with ${PKG2}")
endif()
endmacro()
pkg_depends(MPIIO MPI)
pkg_depends(QEQ MANYBODY)
pkg_depends(USER-ATC MANYBODY)
pkg_depends(USER-H5MD MPI)
pkg_depends(USER-LB MPI)
pkg_depends(USER-MISC MANYBODY)
pkg_depends(USER-PHONON KSPACE)
if(ENABLE_BODY AND ENABLE_POEMS)
message(FATAL_ERROR "BODY and POEMS cannot be enabled at the same time")
endif()
######################################################
# packages with special compiler needs or external libs
######################################################
if(ENABLE_REAX OR ENABLE_MEAM OR ENABLE_USER-QUIP OR ENABLE_USER-QMMM)
enable_language(Fortran)
endif()
if(ENABLE_KOKKOS OR ENABLE_MSCG)
# starting with CMake 3.1 this is all you have to do to enforce C++11
set(CMAKE_CXX_STANDARD 11) # C++11...
set(CMAKE_CXX_STANDARD_REQUIRED ON) #...is required...
set(CMAKE_CXX_EXTENSIONS OFF) #...without compiler extensions like gnu++11
endif()
if(ENABLE_USER-OMP OR ENABLE_KOKKOS OR ENABLE_USER-INTEL)
find_package(OpenMP REQUIRED)
set (CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set (CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()
if(ENABLE_KSPACE)
set(FFT "KISSFFT" CACHE STRING "FFT library for KSPACE package")
set_property(CACHE FFT PROPERTY STRINGS KISSFFT FFTW3 MKL FFTW2)
if(NOT FFT STREQUAL "KISSFFT")
find_package(${FFT} REQUIRED)
add_definitions(-DFFT_${FFT})
include_directories(${${FFT}_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${${FFT}_LIBRARIES})
endif()
set(PACK_OPTIMIZATION "PACK_ARRAY" CACHE STRING "Optimization for FFT")
set_property(CACHE PACK_OPTIMIZATION PROPERTY STRINGS PACK_ARRAY PACK_POINTER PACK_MEMCPY)
if(NOT PACK_OPTIMIZATION STREQUAL "PACK_ARRAY")
add_definitions(-D${PACK_OPTIMIZATION})
endif()
endif()
if(ENABLE_MISC)
option(LAMMPS_XDR "include XDR compatibility files for doing particle dumps in XTC format" OFF)
if(LAMMPS_XDR)
add_definitions(-DLAMMPS_XDR)
endif()
endif()
if(ENABLE_MSCG OR ENABLE_USER-ATC OR ENABLE_USER-AWPMD OR ENABLE_USER-QUIP)
find_package(LAPACK)
if(LAPACK_FOUND)
list(APPEND LAMMPS_LINK_LIBS ${LAPACK_LIBRARIES})
else()
enable_language(Fortran)
file(GLOB LAPACK_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/linalg/*.f)
list(APPEND LIB_SOURCES ${LAPACK_SOURCES})
endif()
endif()
if(ENABLE_PYTHON)
find_package(PythonInterp REQUIRED)
find_package(PythonLibs REQUIRED)
add_definitions(-DLMP_PYTHON)
include_directories(${PYTHON_INCLUDE_DIR})
list(APPEND LAMMPS_LINK_LIBS ${PYTHON_LIBRARY})
if(NOT PYTHON_INSTDIR)
execute_process(COMMAND ${PYTHON_EXECUTABLE}
-c "import distutils.sysconfig as cg; print(cg.get_python_lib(1,0,prefix='${CMAKE_INSTALL_PREFIX}'))"
OUTPUT_VARIABLE PYTHON_INSTDIR OUTPUT_STRIP_TRAILING_WHITESPACE)
endif()
install(FILES ${CMAKE_SOURCE_DIR}/../python/lammps.py DESTINATION ${PYTHON_INSTDIR})
if(NOT BUILD_SHARED_LIBS)
message(FATAL_ERROR "Python package need lammps to be build shared, use -DBUILD_SHARED_LIBS=ON")
endif()
endif()
find_package(JPEG)
if(JPEG_FOUND)
add_definitions(-DLAMMPS_JPEG)
include_directories(${JPEG_INCLUDE_DIR})
list(APPEND LAMMPS_LINK_LIBS ${JPEG_LIBRARIES})
endif()
find_package(PNG)
find_package(ZLIB)
if(PNG_FOUND AND ZLIB_FOUND)
include_directories(${PNG_INCLUDE_DIRS} ${ZLIB_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${PNG_LIBRARIES} ${ZLIB_LIBRARIES})
add_definitions(-DLAMMPS_PNG)
endif()
find_program(GZIP_EXECUTABLE gzip)
find_package_handle_standard_args(GZIP REQUIRED_VARS GZIP_EXECUTABLE)
if(GZIP_FOUND)
add_definitions(-DLAMMPS_GZIP)
endif()
find_program(FFMPEG_EXECUTABLE ffmpeg)
find_package_handle_standard_args(FFMPEG REQUIRED_VARS FFMPEG_EXECUTABLE)
if(FFMPEG_FOUND)
add_definitions(-DLAMMPS_FFMPEG)
endif()
if(ENABLE_VORONOI)
find_package(VORO REQUIRED) #some distros
include_directories(${VORO_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${VORO_LIBRARIES})
endif()
if(ENABLE_USER-MOLFILE)
list(APPEND LAMMPS_LINK_LIBS ${CMAKE_DL_LIBS})
endif()
if(ENABLE_USER-NETCDF)
find_package(NetCDF REQUIRED)
include_directories(NETCDF_INCLUDE_DIR)
list(APPEND LAMMPS_LINK_LIBS ${NETCDF_LIBRARY})
add_definitions(-DLMP_HAS_NETCDF -DNC_64BIT_DATA=0x0020)
endif()
if(ENABLE_USER-SMD)
find_package(Eigen3 REQUIRED)
include_directories(${EIGEN3_INCLUDE_DIR})
endif()
if(ENABLE_USER-QUIP)
find_package(QUIP REQUIRED)
list(APPEND LAMMPS_LINK_LIBS ${QUIP_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
endif()
if(ENABLE_USER-QMMM)
find_package(QE REQUIRED)
include_directories(${QE_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${QE_LIBRARIES} ${CMAKE_Fortran_IMPLICIT_LINK_LIBRARIES})
endif()
if(ENABLE_USER-AWPMD)
include_directories(${LAMMPS_LIB_SOURCE_DIR}/awpmd/systems/interact
${LAMMPS_LIB_SOURCE_DIR}/awpmd/ivutils/include)
endif()
if(ENABLE_USER-H5MD)
find_package(HDF5 REQUIRED)
list(APPEND LAMMPS_LINK_LIBS ${HDF5_LIBRARIES})
include_directories(${HDF5_INCLUDE_DIRS} ${LAMMPS_LIB_SOURCE_DIR}/h5md/include)
endif()
if(ENABLE_USER-VTK)
find_package(VTK REQUIRED NO_MODULE)
include(${VTK_USE_FILE})
add_definitions(-DLAMMPS_VTK)
list(APPEND LAMMPS_LINK_LIBS ${VTK_LIBRARIES})
endif()
if(ENABLE_KIM)
find_package(KIM REQUIRED)
list(APPEND LAMMPS_LINK_LIBS ${KIM_LIBRARIES})
include_directories(${KIM_INCLUDE_DIRS})
endif()
if(ENABLE_MSCG)
find_package(GSL REQUIRED)
set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/mscg)
set(MSCG_TARBALL ${LAMMPS_LIB_MSCG_BIN_DIR}/MS-CG-master.zip)
set(LAMMPS_LIB_MSCG_BIN_DIR ${LAMMPS_LIB_MSCG_BIN_DIR}/MSCG-release-master/src)
if(NOT EXISTS ${LAMMPS_LIB_MSCG_BIN_DIR})
if(NOT EXISTS ${MSCG_TARBALL})
message(STATUS "Downloading ${MSCG_TARBALL}")
file(DOWNLOAD
https://github.com/uchicago-voth/MSCG-release/archive/master.zip
${MSCG_TARBALL} SHOW_PROGRESS) #EXPECTED_MD5 cannot be due due to master
endif()
message(STATUS "Unpacking ${MSCG_TARBALL}")
execute_process(COMMAND ${CMAKE_COMMAND} -E tar xvf ${MSCG_TARBALL}
WORKING_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/mscg)
endif()
file(GLOB MSCG_SOURCES ${LAMMPS_LIB_MSCG_BIN_DIR}/*.cpp)
list(APPEND LIB_SOURCES ${MSCG_SOURCES})
foreach(MSCG_SOURCE ${MSCG_SOURCES})
set_property(SOURCE ${MSCG_SOURCE} APPEND PROPERTY COMPILE_DEFINITIONS
DIMENSION=3 _exclude_gromacs=1)
endforeach()
include_directories(${LAMMPS_LIB_MSCG_BIN_DIR} ${GSL_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${GSL_LIBRARIES})
endif()
########################################################################
# Basic system tests (standard libraries, headers, functions, types) #
########################################################################
include(CheckIncludeFile)
foreach(HEADER math.h)
check_include_file(${HEADER} FOUND_${HEADER})
if(NOT FOUND_${HEADER})
message(FATAL_ERROR "Could not find needed header - ${HEADER}")
endif(NOT FOUND_${HEADER})
endforeach(HEADER)
set(MATH_LIBRARIES "m" CACHE STRING "math library")
mark_as_advanced( MATH_LIBRARIES )
include(CheckLibraryExists)
foreach(FUNC sin cos)
check_library_exists(${MATH_LIBRARIES} ${FUNC} "" FOUND_${FUNC}_${MATH_LIBRARIES})
if(NOT FOUND_${FUNC}_${MATH_LIBRARIES})
message(FATAL_ERROR "Could not find needed math function - ${FUNC}")
endif(NOT FOUND_${FUNC}_${MATH_LIBRARIES})
endforeach(FUNC)
list(APPEND LAMMPS_LINK_LIBS ${MATH_LIBRARIES})
######################################
# Generate Basic Style files
######################################
include(StyleHeaderUtils)
RegisterStyles(${LAMMPS_SOURCE_DIR})
##############################################
# add sources of enabled packages
############################################
foreach(PKG ${DEFAULT_PACKAGES} ${OTHER_PACKAGES})
if(ENABLE_${PKG})
set(${PKG}_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/${PKG})
# detects styles in package and adds them to global list
RegisterStyles(${${PKG}_SOURCES_DIR})
file(GLOB ${PKG}_SOURCES ${${PKG}_SOURCES_DIR}/*.cpp)
list(APPEND LIB_SOURCES ${${PKG}_SOURCES})
include_directories(${${PKG}_SOURCES_DIR})
endif()
endforeach()
##############################################
# add lib sources of (simple) enabled packages
############################################
foreach(SIMPLE_LIB REAX MEAM POEMS USER-ATC USER-AWPMD USER-COLVARS USER-H5MD
USER-MOLFILE USER-QMMM)
if(ENABLE_${SIMPLE_LIB})
string(REGEX REPLACE "^USER-" "" SIMPLE_LIB "${SIMPLE_LIB}")
string(TOLOWER "${SIMPLE_LIB}" INC_DIR)
file(GLOB_RECURSE ${SIMPLE_LIB}_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR}/*.F
${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR}/*.c ${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR}/*.cpp)
list(APPEND LIB_SOURCES ${${SIMPLE_LIB}_SOURCES})
include_directories(${LAMMPS_LIB_SOURCE_DIR}/${INC_DIR})
endif()
endforeach()
######################################################################
# packages which selectively include variants based on enabled styles
# e.g. accelerator packages
######################################################################
if(ENABLE_USER-OMP)
set(USER-OMP_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-OMP)
set(USER-OMP_SOURCES ${USER-OMP_SOURCES_DIR}/thr_data.cpp
${USER-OMP_SOURCES_DIR}/thr_omp.cpp
${USER-OMP_SOURCES_DIR}/fix_nh_omp.cpp
${USER-OMP_SOURCES_DIR}/fix_nh_sphere_omp.cpp)
set_property(GLOBAL PROPERTY "OMP_SOURCES" "${USER-OMP_SOURCES}")
# detects styles which have USER-OMP version
RegisterStylesExt(${USER-OMP_SOURCES_DIR} omp OMP_SOURCES)
get_property(USER-OMP_SOURCES GLOBAL PROPERTY OMP_SOURCES)
list(APPEND LIB_SOURCES ${USER-OMP_SOURCES})
include_directories(${USER-OMP_SOURCES_DIR})
endif()
if(ENABLE_KOKKOS)
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)
set(LAMMPS_LIB_KOKKOS_BIN_DIR ${LAMMPS_LIB_BINARY_DIR}/kokkos)
add_definitions(-DLMP_KOKKOS)
add_subdirectory(${LAMMPS_LIB_KOKKOS_SRC_DIR} ${LAMMPS_LIB_KOKKOS_BIN_DIR})
set(Kokkos_INCLUDE_DIRS ${LAMMPS_LIB_KOKKOS_SRC_DIR}/core/src
${LAMMPS_LIB_KOKKOS_SRC_DIR}/containers/src
${LAMMPS_LIB_KOKKOS_SRC_DIR}/algorithms/src
${LAMMPS_LIB_KOKKOS_BIN_DIR})
include_directories(${Kokkos_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS kokkos)
set(KOKKOS_PKG_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/KOKKOS)
set(KOKKOS_PKG_SOURCES ${KOKKOS_PKG_SOURCES_DIR}/kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/atom_vec_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/comm_tiled_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/neighbor_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/neigh_list_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/neigh_bond_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/fix_nh_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/domain_kokkos.cpp
${KOKKOS_PKG_SOURCES_DIR}/modify_kokkos.cpp)
set_property(GLOBAL PROPERTY "KOKKOS_PKG_SOURCES" "${KOKKOS_PKG_SOURCES}")
# detects styles which have KOKKOS version
RegisterStylesExt(${KOKKOS_PKG_SOURCES_DIR} kokkos KOKKOS_PKG_SOURCES)
get_property(KOKKOS_PKG_SOURCES GLOBAL PROPERTY KOKKOS_PKG_SOURCES)
list(APPEND LIB_SOURCES ${KOKKOS_PKG_SOURCES})
include_directories(${KOKKOS_PKG_SOURCES_DIR})
endif()
if(ENABLE_OPT)
set(OPT_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/OPT)
set(OPT_SOURCES)
set_property(GLOBAL PROPERTY "OPT_SOURCES" "${OPT_SOURCES}")
# detects styles which have OPT version
RegisterStylesExt(${OPT_SOURCES_DIR} opt OPT_SOURCES)
get_property(OPT_SOURCES GLOBAL PROPERTY OPT_SOURCES)
list(APPEND LIB_SOURCES ${OPT_SOURCES})
include_directories(${OPT_SOURCES_DIR})
endif()
if(ENABLE_USER-INTEL)
set(USER-INTEL_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/USER-INTEL)
set(USER-INTEL_SOURCES ${USER-INTEL_SOURCES_DIR}/intel_preprocess.h
${USER-INTEL_SOURCES_DIR}/intel_buffers.h
${USER-INTEL_SOURCES_DIR}/intel_buffers.cpp
${USER-INTEL_SOURCES_DIR}/math_extra_intel.h
${USER-INTEL_SOURCES_DIR}/nbin_intel.h
${USER-INTEL_SOURCES_DIR}/nbin_intel.cpp
${USER-INTEL_SOURCES_DIR}/npair_intel.h
${USER-INTEL_SOURCES_DIR}/npair_intel.cpp
${USER-INTEL_SOURCES_DIR}/intel_simd.h
${USER-INTEL_SOURCES_DIR}/intel_intrinsics.h)
set_property(GLOBAL PROPERTY "USER-INTEL_SOURCES" "${USER-INTEL_SOURCES}")
# detects styles which have USER-INTEL version
RegisterStylesExt(${USER-INTEL_SOURCES_DIR} opt USER-INTEL_SOURCES)
get_property(USER-INTEL_SOURCES GLOBAL PROPERTY USER-INTEL_SOURCES)
list(APPEND LIB_SOURCES ${USER-INTEL_SOURCES})
include_directories(${USER-INTEL_SOURCES_DIR})
endif()
if(ENABLE_GPU)
find_package(CUDA REQUIRED)
find_program(BIN2C bin2c)
if(NOT BIN2C)
message(FATAL_ERROR "Couldn't find bin2c, use -DBIN2C helping cmake to find it.")
endif()
include_directories(${CUDA_INCLUDE_DIRS})
list(APPEND LAMMPS_LINK_LIBS ${CUDA_LIBRARIES} ${CUDA_CUDA_LIBRARY})
set(GPU_PREC "SINGLE_DOUBLE" CACHE STRING "Lammps gpu precision size")
set_property(CACHE GPU_PREC PROPERTY STRINGS SINGLE_DOUBLE SINGLE_SINGLE DOUBLE_DOUBLE)
add_definitions(-D_${GPU_PREC})
add_definitions(-DNV_KERNEL -DUCL_CUDADR)
option(CUDPP_OPT "Enable CUDPP_OPT" ON)
set(GPU_SOURCES_DIR ${LAMMPS_SOURCE_DIR}/GPU)
set(GPU_SOURCES ${GPU_SOURCES_DIR}/gpu_extra.h)
set_property(GLOBAL PROPERTY "GPU_SOURCES" "${GPU_SOURCES}")
# detects styles which have GPU version
RegisterStylesExt(${GPU_SOURCES_DIR} opt GPU_SOURCES)
get_property(GPU_SOURCES GLOBAL PROPERTY GPU_SOURCES)
file(GLOB GPU_LIB_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cpp)
file(GLOB GPU_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/*.cu ${CMAKE_SOURCE_DIR}/gpu/*.cu)
file(GLOB_RECURSE GPU_NOT_LIB_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/lal_pppm.cu)
list(REMOVE_ITEM GPU_LIB_CU ${GPU_NOT_LIB_CU})
include_directories(${GPU_SOURCES_DIR} ${LAMMPS_LIB_SOURCE_DIR}/gpu ${LAMMPS_LIB_BINARY_DIR}/gpu)
if(CUDPP_OPT)
include_directories(${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini)
add_definitions(-DCUDPP_OPT)
file(GLOB GPU_LIB_CUDPP_SOURCES ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/*.cpp)
file(GLOB GPU_LIB_CUDPP_CU ${LAMMPS_LIB_SOURCE_DIR}/gpu/cudpp_mini/*.cu)
endif()
cuda_compile(GPU_OBJS ${GPU_LIB_CU} ${GPU_LIB_CUDPP_CU} OPTIONS $<$<BOOL:${BUILD_SHARED_LIBS}>:-Xcompiler=-fPIC>)
file(MAKE_DIRECTORY ${LAMMPS_LIB_BINARY_DIR}/gpu)
foreach(CU_OBJ ${GPU_OBJS})
get_filename_component(CU_NAME ${CU_OBJ} NAME_WE)
string(REGEX REPLACE "^.*_lal_" "" CU_NAME "${CU_NAME}")
add_custom_command(OUTPUT ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
COMMAND ${BIN2C} -c -n ${CU_NAME} ${CU_OBJ} > ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h
DEPENDS ${CU_OBJ}
COMMENT "Generating ${CU_NAME}_cubin.h")
list(APPEND LIB_SOURCES ${LAMMPS_LIB_BINARY_DIR}/gpu/${CU_NAME}_cubin.h)
if(${CU_NAME} STREQUAL "pppm_d") #pppm_d doesn't get linked into the lib
set(CU_FORBIDDEN_OBJ "${CU_OBJ}")
endif()
endforeach()
list(REMOVE_ITEM GPU_OBJS "${CU_FORBIDDEN_OBJ}")
list(APPEND LIB_SOURCES ${GPU_SOURCES} ${GPU_LIB_SOURCES} ${GPU_LIB_CUDPP_SOURCES} ${GPU_OBJS})
set_directory_properties(PROPERTIES ADDITIONAL_MAKE_CLEAN_FILES "${LAMMPS_LIB_BINARY_DIR}/gpu/*_cubin.h")
endif()
######################################################
# Generate style headers based on global list of
# styles registered during package selection
######################################################
set(LAMMPS_STYLE_HEADERS_DIR ${CMAKE_CURRENT_BINARY_DIR}/styles)
GenerateStyleHeaders(${LAMMPS_STYLE_HEADERS_DIR})
include_directories(${LAMMPS_SOURCE_DIR})
include_directories(${LAMMPS_STYLE_HEADERS_DIR})
###########################################
# Actually add executable and lib to build
############################################
add_library(lammps ${LIB_SOURCES})
target_link_libraries(lammps ${LAMMPS_LINK_LIBS})
set_target_properties(lammps PROPERTIES SOVERSION ${SOVERSION})
if(INSTALL_LIB)
install(TARGETS lammps LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR})
install(FILES ${LAMMPS_SOURCE_DIR}/lammps.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR})
elseif(BUILD_SHARED_LIBS)
message(FATAL_ERROR "Shared library has to be installed, use -DINSTALL_LIB=ON to install lammps with a library")
endif()
add_executable(lmp ${LMP_SOURCES})
target_link_libraries(lmp lammps)
install(TARGETS lmp DESTINATION ${CMAKE_INSTALL_BINDIR})
if(ENABLE_TESTING)
add_test(ShowHelp ${CMAKE_CURRENT_BINARY_DIR}/lmp -help)
endif()
##################################
# Print package summary
##################################
foreach(PKG ${DEFAULT_PACKAGES} ${OTHER_PACKAGES} ${ACCEL_PACKAGES})
if(ENABLE_${PKG})
message(STATUS "Building package: ${PKG}")
endif()
endforeach()

View File

@ -0,0 +1,22 @@
# - Find fftw2
# Find the native FFTW2 headers and libraries.
#
# FFTW2_INCLUDE_DIRS - where to find fftw2.h, etc.
# FFTW2_LIBRARIES - List of libraries when using fftw2.
# FFTW2_FOUND - True if fftw2 found.
#
find_path(FFTW2_INCLUDE_DIR fftw.h)
find_library(FFTW2_LIBRARY NAMES fftw)
set(FFTW2_LIBRARIES ${FFTW2_LIBRARY})
set(FFTW2_INCLUDE_DIRS ${FFTW2_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set FFTW2_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(FFTW2 DEFAULT_MSG FFTW2_LIBRARY FFTW2_INCLUDE_DIR)
mark_as_advanced(FFTW2_INCLUDE_DIR FFTW2_LIBRARY )

View File

@ -0,0 +1,25 @@
# - Find fftw3
# Find the native FFTW3 headers and libraries.
#
# FFTW3_INCLUDE_DIRS - where to find fftw3.h, etc.
# FFTW3_LIBRARIES - List of libraries when using fftw3.
# FFTW3_FOUND - True if fftw3 found.
#
find_package(PkgConfig)
pkg_check_modules(PC_FFTW3 fftw3)
find_path(FFTW3_INCLUDE_DIR fftw3.h HINTS ${PC_FFTW3_INCLUDE_DIRS})
find_library(FFTW3_LIBRARY NAMES fftw3 HINTS ${PC_FFTW3_LIBRARY_DIRS})
set(FFTW3_LIBRARIES ${FFTW3_LIBRARY})
set(FFTW3_INCLUDE_DIRS ${FFTW3_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set FFTW3_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(FFTW3 DEFAULT_MSG FFTW3_LIBRARY FFTW3_INCLUDE_DIR)
mark_as_advanced(FFTW3_INCLUDE_DIR FFTW3_LIBRARY )

View File

@ -0,0 +1,22 @@
# - Find kim
# Find the native KIM headers and libraries.
#
# KIM_INCLUDE_DIRS - where to find kim.h, etc.
# KIM_LIBRARIES - List of libraries when using kim.
# KIM_FOUND - True if kim found.
#
find_path(KIM_INCLUDE_DIR KIM_API.h PATH_SUFFIXES kim-api-v1)
find_library(KIM_LIBRARY NAMES kim-api-v1)
set(KIM_LIBRARIES ${KIM_LIBRARY})
set(KIM_INCLUDE_DIRS ${KIM_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set KIM_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(KIM DEFAULT_MSG KIM_LIBRARY KIM_INCLUDE_DIR)
mark_as_advanced(KIM_INCLUDE_DIR KIM_LIBRARY )

View File

@ -0,0 +1,22 @@
# - Find mkl
# Find the native MKL headers and libraries.
#
# MKL_INCLUDE_DIRS - where to find mkl.h, etc.
# MKL_LIBRARIES - List of libraries when using mkl.
# MKL_FOUND - True if mkl found.
#
find_path(MKL_INCLUDE_DIR mkl_dfti.h HINTS $ENV{MKLROOT}/include)
find_library(MKL_LIBRARY NAMES mkl_rt HINTS $ENV{MKLROOT}/lib $ENV{MKLROOT}/lib/intel64)
set(MKL_LIBRARIES ${MKL_LIBRARY})
set(MKL_INCLUDE_DIRS ${MKL_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set MKL_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(MKL DEFAULT_MSG MKL_LIBRARY MKL_INCLUDE_DIR)
mark_as_advanced(MKL_INCLUDE_DIR MKL_LIBRARY )

View File

@ -0,0 +1,118 @@
# - Find NetCDF
# Find the native NetCDF includes and library
#
# NETCDF_INCLUDE_DIR - user modifiable choice of where netcdf headers are
# NETCDF_LIBRARY - user modifiable choice of where netcdf libraries are
#
# Your package can require certain interfaces to be FOUND by setting these
#
# NETCDF_CXX - require the C++ interface and link the C++ library
# NETCDF_F77 - require the F77 interface and link the fortran library
# NETCDF_F90 - require the F90 interface and link the fortran library
#
# Or equivalently by calling FindNetCDF with a COMPONENTS argument containing one or
# more of "CXX;F77;F90".
#
# When interfaces are requested the user has access to interface specific hints:
#
# NETCDF_${LANG}_INCLUDE_DIR - where to search for interface header files
# NETCDF_${LANG}_LIBRARY - where to search for interface libraries
#
# This module returns these variables for the rest of the project to use.
#
# NETCDF_FOUND - True if NetCDF found including required interfaces (see below)
# NETCDF_LIBRARIES - All netcdf related libraries.
# NETCDF_INCLUDE_DIRS - All directories to include.
# NETCDF_HAS_INTERFACES - Whether requested interfaces were found or not.
# NETCDF_${LANG}_INCLUDE_DIRS/NETCDF_${LANG}_LIBRARIES - C/C++/F70/F90 only interface
#
# Normal usage would be:
# set (NETCDF_F90 "YES")
# find_package (NetCDF REQUIRED)
# target_link_libraries (uses_everthing ${NETCDF_LIBRARIES})
# target_link_libraries (only_uses_f90 ${NETCDF_F90_LIBRARIES})
#search starting from user editable cache var
if (NETCDF_INCLUDE_DIR AND NETCDF_LIBRARY)
# Already in cache, be silent
set (NETCDF_FIND_QUIETLY TRUE)
endif ()
set(USE_DEFAULT_PATHS "NO_DEFAULT_PATH")
if(NETCDF_USE_DEFAULT_PATHS)
set(USE_DEFAULT_PATHS "")
endif()
find_path (NETCDF_INCLUDE_DIR netcdf.h
HINTS "${NETCDF_DIR}/include")
mark_as_advanced (NETCDF_INCLUDE_DIR)
set (NETCDF_C_INCLUDE_DIRS ${NETCDF_INCLUDE_DIR})
find_library (NETCDF_LIBRARY NAMES netcdf
HINTS "${NETCDF_DIR}/lib")
mark_as_advanced (NETCDF_LIBRARY)
set (NETCDF_C_LIBRARIES ${NETCDF_LIBRARY})
#start finding requested language components
set (NetCDF_libs "")
set (NetCDF_includes "${NETCDF_INCLUDE_DIR}")
get_filename_component (NetCDF_lib_dirs "${NETCDF_LIBRARY}" PATH)
set (NETCDF_HAS_INTERFACES "YES") # will be set to NO if we're missing any interfaces
macro (NetCDF_check_interface lang header libs)
if (NETCDF_${lang})
#search starting from user modifiable cache var
find_path (NETCDF_${lang}_INCLUDE_DIR NAMES ${header}
HINTS "${NETCDF_INCLUDE_DIR}"
HINTS "${NETCDF_${lang}_ROOT}/include"
${USE_DEFAULT_PATHS})
find_library (NETCDF_${lang}_LIBRARY NAMES ${libs}
HINTS "${NetCDF_lib_dirs}"
HINTS "${NETCDF_${lang}_ROOT}/lib"
${USE_DEFAULT_PATHS})
mark_as_advanced (NETCDF_${lang}_INCLUDE_DIR NETCDF_${lang}_LIBRARY)
#export to internal varS that rest of project can use directly
set (NETCDF_${lang}_LIBRARIES ${NETCDF_${lang}_LIBRARY})
set (NETCDF_${lang}_INCLUDE_DIRS ${NETCDF_${lang}_INCLUDE_DIR})
if (NETCDF_${lang}_INCLUDE_DIR AND NETCDF_${lang}_LIBRARY)
list (APPEND NetCDF_libs ${NETCDF_${lang}_LIBRARY})
list (APPEND NetCDF_includes ${NETCDF_${lang}_INCLUDE_DIR})
else ()
set (NETCDF_HAS_INTERFACES "NO")
message (STATUS "Failed to find NetCDF interface for ${lang}")
endif ()
endif ()
endmacro ()
list (FIND NetCDF_FIND_COMPONENTS "CXX" _nextcomp)
if (_nextcomp GREATER -1)
set (NETCDF_CXX 1)
endif ()
list (FIND NetCDF_FIND_COMPONENTS "F77" _nextcomp)
if (_nextcomp GREATER -1)
set (NETCDF_F77 1)
endif ()
list (FIND NetCDF_FIND_COMPONENTS "F90" _nextcomp)
if (_nextcomp GREATER -1)
set (NETCDF_F90 1)
endif ()
NetCDF_check_interface (CXX netcdfcpp.h netcdf_c++)
NetCDF_check_interface (F77 netcdf.inc netcdff)
NetCDF_check_interface (F90 netcdf.mod netcdff)
#export accumulated results to internal varS that rest of project can depend on
list (APPEND NetCDF_libs "${NETCDF_C_LIBRARIES}")
set (NETCDF_LIBRARIES ${NetCDF_libs})
set (NETCDF_INCLUDE_DIRS ${NetCDF_includes})
# handle the QUIETLY and REQUIRED arguments and set NETCDF_FOUND to TRUE if
# all listed variables are TRUE
include (FindPackageHandleStandardArgs)
find_package_handle_standard_args (NetCDF
DEFAULT_MSG NETCDF_LIBRARIES NETCDF_INCLUDE_DIRS NETCDF_HAS_INTERFACES)

View File

@ -0,0 +1,29 @@
# - Find quantum-espresso
# Find the native QE headers and libraries.
#
# QE_INCLUDE_DIRS - where to find quantum-espresso.h, etc.
# QE_LIBRARIES - List of libraries when using quantum-espresso.
# QE_FOUND - True if quantum-espresso found.
#
find_path(QE_INCLUDE_DIR libqecouple.h PATH_SUFFIXES COUPLE/include)
find_library(QECOUPLE_LIBRARY NAMES qecouple)
find_library(PW_LIBRARY NAMES pw)
find_library(QEMOD_LIBRARY NAMES qemod)
find_library(QEFFT_LIBRARY NAMES qefft)
find_library(QELA_LIBRARY NAMES qela)
find_library(CLIB_LIBRARY NAMES clib)
find_library(IOTK_LIBRARY NAMES iotk)
set(QE_LIBRARIES ${QECOUPLE_LIBRARY} ${PW_LIBRARY} ${QEMOD_LIBRARY} ${QEFFT_LIBRARY} ${QELA_LIBRARY} ${CLIB_LIBRARY} ${IOTK_LIBRARY})
set(QE_INCLUDE_DIRS ${QE_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set QE_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(QE DEFAULT_MSG QECOUPLE_LIBRARY PW_LIBRARY QEMOD_LIBRARY QEFFT_LIBRARY QELA_LIBRARY CLIB_LIBRARY IOTK_LIBRARY QE_INCLUDE_DIR)
mark_as_advanced(QE_INCLUDE_DIR QECOUPLE_LIBRARY PW_LIBRARY QEMOD_LIBRARY QEFFT_LIBRARY QELA_LIBRARY CLIB_LIBRARY IOTK_LIBRARY)

View File

@ -0,0 +1,18 @@
# - Find quip
# Find the native QUIP libraries.
#
# QUIP_LIBRARIES - List of libraries when using fftw3.
# QUIP_FOUND - True if fftw3 found.
#
find_library(QUIP_LIBRARY NAMES quip)
set(QUIP_LIBRARIES ${QUIP_LIBRARY})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set QUIP_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(QUIP DEFAULT_MSG QUIP_LIBRARY)
mark_as_advanced(QUIP_LIBRARY)

View File

@ -0,0 +1,22 @@
# - Find voro++
# Find the native VORO headers and libraries.
#
# VORO_INCLUDE_DIRS - where to find voro++.hh, etc.
# VORO_LIBRARIES - List of libraries when using voro++.
# VORO_FOUND - True if voro++ found.
#
find_path(VORO_INCLUDE_DIR voro++.hh PATH_SUFFIXES voro++)
find_library(VORO_LIBRARY NAMES voro++)
set(VORO_LIBRARIES ${VORO_LIBRARY})
set(VORO_INCLUDE_DIRS ${VORO_INCLUDE_DIR})
include(FindPackageHandleStandardArgs)
# handle the QUIETLY and REQUIRED arguments and set VORO_FOUND to TRUE
# if all listed variables are TRUE
find_package_handle_standard_args(VORO DEFAULT_MSG VORO_LIBRARY VORO_INCLUDE_DIR)
mark_as_advanced(VORO_INCLUDE_DIR VORO_LIBRARY )

View File

@ -0,0 +1,132 @@
function(FindStyleHeaders path style_class file_pattern headers)
file(GLOB files "${path}/${file_pattern}*.h")
get_property(hlist GLOBAL PROPERTY ${headers})
foreach(file_name ${files})
file(STRINGS ${file_name} is_style LIMIT_COUNT 1 REGEX ${style_class})
if(is_style)
list(APPEND hlist ${file_name})
endif()
endforeach()
set_property(GLOBAL PROPERTY ${headers} "${hlist}")
endfunction(FindStyleHeaders)
function(FindStyleHeadersExt path style_class extension headers sources)
get_property(hlist GLOBAL PROPERTY ${headers})
get_property(slist GLOBAL PROPERTY ${sources})
set(ext_list)
get_filename_component(abs_path "${path}" ABSOLUTE)
foreach(file_name ${hlist})
get_filename_component(basename ${file_name} NAME_WE)
set(ext_file_name "${abs_path}/${basename}_${extension}.h")
if(EXISTS "${ext_file_name}")
file(STRINGS ${ext_file_name} is_style LIMIT_COUNT 1 REGEX ${style_class})
if(is_style)
list(APPEND ext_list ${ext_file_name})
set(source_file_name "${abs_path}/${basename}_${extension}.cpp")
if(EXISTS "${source_file_name}")
list(APPEND slist ${source_file_name})
endif()
endif()
endif()
endforeach()
list(APPEND hlist ${ext_list})
set_property(GLOBAL PROPERTY ${headers} "${hlist}")
set_property(GLOBAL PROPERTY ${sources} "${slist}")
endfunction(FindStyleHeadersExt)
function(CreateStyleHeader path filename)
math(EXPR N "${ARGC}-2")
set(temp "")
if(N GREATER 0)
math(EXPR ARG_END "${ARGC}-1")
foreach(IDX RANGE 2 ${ARG_END})
list(GET ARGV ${IDX} FNAME)
get_filename_component(FNAME ${FNAME} NAME)
set(temp "${temp}#include \"${FNAME}\"\n")
endforeach()
endif()
message(STATUS "Generating ${filename}...")
file(WRITE "${path}/${filename}.tmp" "${temp}" )
execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different "${path}/${filename}.tmp" "${path}/${filename}")
endfunction(CreateStyleHeader)
function(GenerateStyleHeader path property style)
get_property(files GLOBAL PROPERTY ${property})
#message("${property} = ${files}")
CreateStyleHeader("${path}" "style_${style}.h" ${files})
endfunction(GenerateStyleHeader)
function(RegisterStyles search_path)
FindStyleHeaders(${search_path} ANGLE_CLASS angle_ ANGLE ) # angle ) # force
FindStyleHeaders(${search_path} ATOM_CLASS atom_vec_ ATOM_VEC ) # atom ) # atom atom_vec_hybrid
FindStyleHeaders(${search_path} BODY_CLASS body_ BODY ) # body ) # atom_vec_body
FindStyleHeaders(${search_path} BOND_CLASS bond_ BOND ) # bond ) # force
FindStyleHeaders(${search_path} COMMAND_CLASS "" COMMAND ) # command ) # input
FindStyleHeaders(${search_path} COMPUTE_CLASS compute_ COMPUTE ) # compute ) # modify
FindStyleHeaders(${search_path} DIHEDRAL_CLASS dihedral_ DIHEDRAL ) # dihedral ) # force
FindStyleHeaders(${search_path} DUMP_CLASS dump_ DUMP ) # dump ) # output write_dump
FindStyleHeaders(${search_path} FIX_CLASS fix_ FIX ) # fix ) # modify
FindStyleHeaders(${search_path} IMPROPER_CLASS improper_ IMPROPER ) # improper ) # force
FindStyleHeaders(${search_path} INTEGRATE_CLASS "" INTEGRATE ) # integrate ) # update
FindStyleHeaders(${search_path} KSPACE_CLASS "" KSPACE ) # kspace ) # force
FindStyleHeaders(${search_path} MINIMIZE_CLASS min_ MINIMIZE ) # minimize ) # update
FindStyleHeaders(${search_path} NBIN_CLASS nbin_ NBIN ) # nbin ) # neighbor
FindStyleHeaders(${search_path} NPAIR_CLASS npair_ NPAIR ) # npair ) # neighbor
FindStyleHeaders(${search_path} NSTENCIL_CLASS nstencil_ NSTENCIL ) # nstencil ) # neighbor
FindStyleHeaders(${search_path} NTOPO_CLASS ntopo_ NTOPO ) # ntopo ) # neighbor
FindStyleHeaders(${search_path} PAIR_CLASS pair_ PAIR ) # pair ) # force
FindStyleHeaders(${search_path} READER_CLASS reader_ READER ) # reader ) # read_dump
FindStyleHeaders(${search_path} REGION_CLASS region_ REGION ) # region ) # domain
endfunction(RegisterStyles)
function(RegisterStylesExt search_path extension sources)
FindStyleHeadersExt(${search_path} ANGLE_CLASS ${extension} ANGLE ${sources})
FindStyleHeadersExt(${search_path} ATOM_CLASS ${extension} ATOM_VEC ${sources})
FindStyleHeadersExt(${search_path} BODY_CLASS ${extension} BODY ${sources})
FindStyleHeadersExt(${search_path} BOND_CLASS ${extension} BOND ${sources})
FindStyleHeadersExt(${search_path} COMMAND_CLASS ${extension} COMMAND ${sources})
FindStyleHeadersExt(${search_path} COMPUTE_CLASS ${extension} COMPUTE ${sources})
FindStyleHeadersExt(${search_path} DIHEDRAL_CLASS ${extension} DIHEDRAL ${sources})
FindStyleHeadersExt(${search_path} DUMP_CLASS ${extension} DUMP ${sources})
FindStyleHeadersExt(${search_path} FIX_CLASS ${extension} FIX ${sources})
FindStyleHeadersExt(${search_path} IMPROPER_CLASS ${extension} IMPROPER ${sources})
FindStyleHeadersExt(${search_path} INTEGRATE_CLASS ${extension} INTEGRATE ${sources})
FindStyleHeadersExt(${search_path} KSPACE_CLASS ${extension} KSPACE ${sources})
FindStyleHeadersExt(${search_path} MINIMIZE_CLASS ${extension} MINIMIZE ${sources})
FindStyleHeadersExt(${search_path} NBIN_CLASS ${extension} NBIN ${sources})
FindStyleHeadersExt(${search_path} NPAIR_CLASS ${extension} NPAIR ${sources})
FindStyleHeadersExt(${search_path} NSTENCIL_CLASS ${extension} NSTENCIL ${sources})
FindStyleHeadersExt(${search_path} NTOPO_CLASS ${extension} NTOPO ${sources})
FindStyleHeadersExt(${search_path} PAIR_CLASS ${extension} PAIR ${sources})
FindStyleHeadersExt(${search_path} READER_CLASS ${extension} READER ${sources})
FindStyleHeadersExt(${search_path} REGION_CLASS ${extension} REGION ${sources})
endfunction(RegisterStylesExt)
function(GenerateStyleHeaders output_path)
GenerateStyleHeader(${output_path} ANGLE angle ) # force
GenerateStyleHeader(${output_path} ATOM_VEC atom ) # atom atom_vec_hybrid
GenerateStyleHeader(${output_path} BODY body ) # atom_vec_body
GenerateStyleHeader(${output_path} BOND bond ) # force
GenerateStyleHeader(${output_path} COMMAND command ) # input
GenerateStyleHeader(${output_path} COMPUTE compute ) # modify
GenerateStyleHeader(${output_path} DIHEDRAL dihedral ) # force
GenerateStyleHeader(${output_path} DUMP dump ) # output write_dump
GenerateStyleHeader(${output_path} FIX fix ) # modify
GenerateStyleHeader(${output_path} IMPROPER improper ) # force
GenerateStyleHeader(${output_path} INTEGRATE integrate ) # update
GenerateStyleHeader(${output_path} KSPACE kspace ) # force
GenerateStyleHeader(${output_path} MINIMIZE minimize ) # update
GenerateStyleHeader(${output_path} NBIN nbin ) # neighbor
GenerateStyleHeader(${output_path} NPAIR npair ) # neighbor
GenerateStyleHeader(${output_path} NSTENCIL nstencil ) # neighbor
GenerateStyleHeader(${output_path} NTOPO ntopo ) # neighbor
GenerateStyleHeader(${output_path} PAIR pair ) # force
GenerateStyleHeader(${output_path} READER reader ) # read_dump
GenerateStyleHeader(${output_path} REGION region ) # domain
endfunction(GenerateStyleHeaders)

19
cmake/README Normal file
View File

@ -0,0 +1,19 @@
cmake-buildsystem
-----------------
To use the cmake build system instead of the make-driven one, do:
```
cmake /path/to/lammps/source/cmake
```
(please note the cmake directory as the very end)
To enable package, e.g. GPU do
```
cmake /path/to/lammps/source/cmake -DENABLE_GPU=ON
```
cmake has many many options, do get an overview use the curses-based cmake interface, ccmake:
```
ccmake /path/to/lammps/source/cmake
```
(Don't forget to press "g" for generate once you are done with configuring)

4
cmake/gpu/lal_pppm_d.cu Normal file
View File

@ -0,0 +1,4 @@
#define grdtyp double
#define grdtyp4 double4
#include "lal_pppm.cu"

4
cmake/gpu/lal_pppm_f.cu Normal file
View File

@ -0,0 +1,4 @@
#define grdtyp float
#define grdtyp4 float4
#include "lal_pppm.cu"

BIN
doc/src/Eqs/fix_mvv_dpd.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

View File

@ -0,0 +1,21 @@
\documentclass[12pt]{article}
\begin{document}
$$
v(t+\frac{\Delta t}{2}) = v(t) + \frac{\Delta t}{2}\cdot a(t),
$$
$$
r(t+\Delta t) = r(t) + \Delta t\cdot v(t+\frac{\Delta t}{2}),
$$
$$
a(t+\Delta t) = \frac{1}{m}\cdot F\left[ r(t+\Delta t), v(t) +\lambda \cdot \Delta t\cdot a(t)\right],
$$
$$
v(t+\Delta t) = v(t+\frac{\Delta t}{2}) + \frac{\Delta t}{2}\cdot a(t+\Delta t)
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

View File

@ -0,0 +1,33 @@
\documentclass[12pt]{article}
\begin{document}
$$
\mathbf{F}_{ij}^{C} = \alpha_{ij}{\omega_{C}}(r_{ij})\mathbf{e}_{ij},
$$
$$
\mathbf{F}_{ij}^{D} = -\gamma {\omega_{D}}(r_{ij})(\mathbf{e}_{ij} \cdot \mathbf{v}_{ij})\mathbf{e}_{ij},
$$
$$
\mathbf{F}_{ij}^{R} = \sigma {\omega_{R}}(r_{ij}){\xi_{ij}}\Delta t^{-1/2} \mathbf{e}_{ij},
$$
$$
\omega_{C}(r) = 1 - r/r_c,
$$
$$
\alpha_{ij} = A\cdot k_B(T_i + T_j)/2,
$$
$$
\omega_{D}(r) = \omega^2_{R}(r) = (1-r/r_c)^s,
$$
$$
\sigma_{ij}^2 = 4\gamma k_B T_i T_j/(T_i + T_j),
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

View File

@ -0,0 +1,15 @@
\documentclass[12pt]{article}
\begin{document}
$$
\frac{\mathrm{d}^2 \mathbf{r}_i}{\mathrm{d} t^2}=
\frac{\mathrm{d} \mathbf{v}_i}{\mathrm{d} t}
=\mathbf{F}_{i}=\sum_{i\neq j}(\mathbf{F}_{ij}^{C}+\mathbf{F}_{ij}^{D}+\mathbf{F}_{ij}^{R}),
$$
$$
C_v\frac{\mathrm{d} T_i}{\mathrm{d} t}= q_{i} = \sum_{i\neq j}(q_{ij}^{C}+q_{ij}^{V}+q_{ij}^{R}),
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

View File

@ -0,0 +1,29 @@
\documentclass[12pt]{article}
\begin{document}
$$
q_i^C = \sum_{j \ne i} k_{ij} \omega_{CT}(r_{ij}) \left( \frac{1}{T_i} - \frac{1}{T_j} \right),
$$
$$
q_i^V = \frac{1}{2 C_v}\sum_{j \ne i}{ \left\{ \omega_D(r_{ij})\left[\gamma_{ij} \left( \mathbf{e}_{ij} \cdot \mathbf{v}_{ij} \right)^2 - \frac{\left( \sigma _{ij} \right)^2}{m}\right] - \sigma _{ij} \omega_R(r_{ij})\left( \mathbf{e}_{ij} \cdot \mathbf{v}_{ij} \right){\xi_{ij}} \right\} },
$$
$$
q_i^R = \sum_{j \ne i} \beta _{ij} \omega_{RT}(r_{ij}) d {t^{ - 1/2}} \xi_{ij}^e,
$$
$$
\omega_{CT}(r)=\omega_{RT}^2(r)=\left(1-r/r_{ct}\right)^{s_T},
$$
$$
k_{ij}=C_v^2\kappa(T_i + T_j)^2/4k_B,
$$
$$
\beta_{ij}^2=2k_Bk_{ij},
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 5.5 KiB

View File

@ -0,0 +1,9 @@
\documentclass[12pt]{article}
\begin{document}
$$
\kappa = \frac{315k_B\upsilon }{2\pi \rho C_v r_{ct}^5}\frac{1}{Pr},
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 19 KiB

View File

@ -0,0 +1,17 @@
\documentclass[12pt]{article}
\begin{document}
$$
\mathbf{F}_{ij}^C = Aw_c(r_{ij})\mathbf{e}_{ij} + B(\rho_i+\rho_j)w_d(r_{ij})\mathbf{e}_{ij},
$$
$$
\mathbf{F}_{ij}^{D} = -\gamma {\omega_{D}}(r_{ij})(\mathbf{e}_{ij} \cdot \mathbf{v}_{ij})\mathbf{e}_{ij},
$$
$$
\mathbf{F}_{ij}^{R} = \sigma {\omega_{R}}(r_{ij}){\xi_{ij}}\Delta t^{-1/2} \mathbf{e}_{ij},
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

View File

@ -0,0 +1,21 @@
\documentclass[12pt]{article}
\begin{document}
$$
Q_{ij}^D = -\kappa_{ij} w_{DC}(r_{ij}) \left( C_i - C_j \right),
$$
$$
Q_{ij}^R = \epsilon_{ij}\left( C_i + C_j \right) w_{RC}(r_{ij}) \xi_{ij},
$$
$$
w_{DC}(r_{ij})=w^2_{RC}(r_{ij}) = (1 - r/r_{cc})^{\rm power\_{cc}},
$$
$$
\epsilon_{ij}^2 = m_s^2\kappa_{ij}\rho,
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

View File

@ -0,0 +1,29 @@
\documentclass[12pt]{article}
\begin{document}
$$
\mathbf{F}_{ij}^{C} = A{\omega_{C}}(r_{ij})\mathbf{e}_{ij},
$$
$$
\mathbf{F}_{ij}^{D} = -\gamma {\omega_{D}}(r_{ij})(\mathbf{e}_{ij} \cdot \mathbf{v}_{ij})\mathbf{e}_{ij},
$$
$$
\mathbf{F}_{ij}^{R} = \sigma {\omega_{R}}(r_{ij}){\xi_{ij}}\Delta t^{-1/2} \mathbf{e}_{ij},
$$
$$
\omega_{C}(r) = 1 - r/r_c,
$$
$$
\omega_{D}(r) = \omega^2_{R}(r) = (1-r/r_c)^{\rm power\_f},
$$
$$
\sigma^2 = 2\gamma k_B T,
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

View File

@ -0,0 +1,13 @@
\documentclass[12pt]{article}
\begin{document}
$$
\frac{\mathrm{d}^2 \mathbf{r}_i}{\mathrm{d} t^2} = \frac{\mathrm{d} \mathbf{v}_i}{\mathrm{d} t}=\mathbf{F}_{i}=\sum_{i\neq j}(\mathbf{F}_{ij}^{C}+\mathbf{F}_{ij}^{D}+\mathbf{F}_{ij}^{R}),
$$
$$
\frac{\mathrm{d} C_{i}}{\mathrm{d} t}= Q_{i} = \sum_{i\neq j}(Q_{ij}^{D}+Q_{ij}^{R}) + Q_{i}^{S},
$$
\end{document}

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 895 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

View File

@ -1,7 +1,7 @@
<!-- HTML_ONLY -->
<HEAD>
<TITLE>LAMMPS Users Manual</TITLE>
<META NAME="docnumber" CONTENT="17 Aug 2017 version">
<META NAME="docnumber" CONTENT="1 Sep 2017 version">
<META NAME="author" CONTENT="http://lammps.sandia.gov - Sandia National Laboratories">
<META NAME="copyright" CONTENT="Copyright (2003) Sandia Corporation. This software and manual is distributed under the GNU General Public License.">
</HEAD>
@ -21,7 +21,7 @@
<H1></H1>
LAMMPS Documentation :c,h3
17 Aug 2017 version :c,h4
1 Sep 2017 version :c,h4
Version info: :h4

View File

@ -685,6 +685,7 @@ package"_Section_start.html#start_3.
"drude"_fix_drude.html,
"drude/transform/direct"_fix_drude_transform.html,
"drude/transform/reverse"_fix_drude_transform.html,
"edpd/source"_fix_dpd_source.html,
"eos/cv"_fix_eos_cv.html,
"eos/table"_fix_eos_table.html,
"eos/table/rx"_fix_eos_table_rx.html,
@ -704,6 +705,9 @@ package"_Section_start.html#start_3.
"meso"_fix_meso.html,
"manifoldforce"_fix_manifoldforce.html,
"meso/stationary"_fix_meso_stationary.html,
"mvv/dpd"_fix_mvv_dpd.html,
"mvv/edpd"_fix_mvv_dpd.html,
"mvv/tdpd"_fix_mvv_dpd.html,
"nve/dot"_fix_nve_dot.html,
"nve/dotc/langevin"_fix_nve_dotc_langevin.html,
"nve/manifold/rattle"_fix_nve_manifold_rattle.html,
@ -732,6 +736,7 @@ package"_Section_start.html#start_3.
"smd/move/triangulated/surface"_fix_smd_move_triangulated_surface.html,
"smd/setvel"_fix_smd_setvel.html,
"smd/wall/surface"_fix_smd_wall_surface.html,
"tdpd/source"_fix_dpd_source.html,
"temp/rescale/eff"_fix_temp_rescale_eff.html,
"ti/spring"_fix_ti_spring.html,
"ttm/mod"_fix_ttm.html,
@ -775,6 +780,7 @@ KOKKOS, o = USER-OMP, t = OPT.
"erotate/sphere"_compute_erotate_sphere.html,
"erotate/sphere/atom"_compute_erotate_sphere_atom.html,
"event/displace"_compute_event_displace.html,
"fragment/atom"_compute_cluster_atom.html,
"global/atom"_compute_global_atom.html,
"group/group"_compute_group_group.html,
"gyration"_compute_gyration.html,
@ -836,6 +842,7 @@ package"_Section_start.html#start_3.
"cnp/atom"_compute_cnp_atom.html,
"dpd"_compute_dpd.html,
"dpd/atom"_compute_dpd_atom.html,
"edpd/temp/atom"_compute_edpd_temp_atom.html,
"fep"_compute_fep.html,
"force/tally"_compute_tally.html,
"heat/flux/tally"_compute_tally.html,
@ -868,6 +875,7 @@ package"_Section_start.html#start_3.
"smd/ulsph/stress"_compute_smd_ulsph_stress.html,
"smd/vol"_compute_smd_vol.html,
"stress/tally"_compute_tally.html,
"tdpd/cc/atom"_compute_tdpd_cc_atom.html,
"temp/drude"_compute_temp_drude.html,
"temp/eff"_compute_temp_eff.html,
"temp/deform/eff"_compute_temp_deform_eff.html,
@ -1024,6 +1032,7 @@ package"_Section_start.html#start_3.
"eam/cd (o)"_pair_eam.html,
"edip (o)"_pair_edip.html,
"edip/multi"_pair_edip.html,
"edpd"_pair_meso.html,
"eff/cut"_pair_eff.html,
"exp6/rx"_pair_exp6_rx.html,
"gauss/cut"_pair_gauss.html,
@ -1041,6 +1050,8 @@ package"_Section_start.html#start_3.
"lj/sdk (gko)"_pair_sdk.html,
"lj/sdk/coul/long (go)"_pair_sdk.html,
"lj/sdk/coul/msm (o)"_pair_sdk.html,
"mdpd"_pair_meso.html,
"mdpd/rhosum"_pair_meso.html,
"meam/c"_pair_meam.html,
"meam/spline (o)"_pair_meam_spline.html,
"meam/sw/spline"_pair_meam_sw_spline.html,
@ -1074,6 +1085,7 @@ package"_Section_start.html#start_3.
"sph/taitwater/morris"_pair_sph_taitwater_morris.html,
"srp"_pair_srp.html,
"table/rx"_pair_table_rx.html,
"tdpd"_pair_meso.html,
"tersoff/table (o)"_pair_tersoff.html,
"thole"_pair_thole.html,
"tip4p/long/soft (o)"_pair_lj_soft.html :tb(c=4,ea=c)

View File

@ -112,7 +112,7 @@ Package, Description, Doc page, Example, Library
"REPLICA"_#REPLICA, multi-replica methods, "Section 6.6.5"_Section_howto.html#howto_5, tad, -
"RIGID"_#RIGID, rigid bodies and constraints, "fix rigid"_fix_rigid.html, rigid, -
"SHOCK"_#SHOCK, shock loading methods, "fix msst"_fix_msst.html, -, -
"SNAP"_#SNAP, quantum-fitted potential, "pair snap"_pair_snap.html, snap, -
"SNAP"_#SNAP, quantum-fitted potential, "pair_style snap"_pair_snap.html, snap, -
"SRD"_#SRD, stochastic rotation dynamics, "fix srd"_fix_srd.html, srd, -
"VORONOI"_#VORONOI, Voronoi tesselation, "compute voronoi/atom"_compute_voronoi_atom.html, -, ext :tb(ea=c,ca1=l)
@ -134,6 +134,7 @@ Package, Description, Doc page, Example, Library
"USER-LB"_#USER-LB, Lattice Boltzmann fluid,"fix lb/fluid"_fix_lb_fluid.html, USER/lb, -
"USER-MANIFOLD"_#USER-MANIFOLD, motion on 2d surfaces,"fix manifoldforce"_fix_manifoldforce.html, USER/manifold, -
"USER-MEAMC"_#USER-MEAMC, modified EAM potential (C++), "pair_style meam/c"_pair_meam.html, meam, -
"USER-MESO"_#USER-MESO, mesoscale DPD models, "pair_style edpd"_pair_meso.html, USER/meso, -
"USER-MGPT"_#USER-MGPT, fast MGPT multi-ion potentials, "pair_style mgpt"_pair_mgpt.html, USER/mgpt, -
"USER-MISC"_#USER-MISC, single-file contributions, USER-MISC/README, USER/misc, -
"USER-MOLFILE"_#USER-MOLFILE, "VMD"_vmd_home molfile plug-ins,"dump molfile"_dump_molfile.html, -, ext
@ -1342,7 +1343,7 @@ make machine :pre
[Supporting info:]
src/SNAP: filenames -> commands
"pair snap"_pair_snap.html
"pair_style snap"_pair_snap.html
"compute sna/atom"_compute_sna_atom.html
"compute snad/atom"_compute_sna_atom.html
"compute snav/atom"_compute_sna_atom.html
@ -1556,7 +1557,7 @@ make machine :pre
src/USER-AWPMD: filenames -> commands
src/USER-AWPMD/README
"pair awpmd/cut"_pair_awpmd.html
"pair_style awpmd/cut"_pair_awpmd.html
examples/USER/awpmd :ul
:line
@ -1745,12 +1746,12 @@ src/USER-DPD: filenames -> commands
"fix eos/table/rx"_fix_eos_table_rx.html
"fix shardlow"_fix_shardlow.html
"fix rx"_fix_rx.html
"pair table/rx"_pair_table_rx.html
"pair dpd/fdt"_pair_dpd_fdt.html
"pair dpd/fdt/energy"_pair_dpd_fdt.html
"pair exp6/rx"_pair_exp6_rx.html
"pair multi/lucy"_pair_multi_lucy.html
"pair multi/lucy/rx"_pair_multi_lucy_rx.html
"pair_style table/rx"_pair_table_rx.html
"pair_style dpd/fdt"_pair_dpd_fdt.html
"pair_style dpd/fdt/energy"_pair_dpd_fdt.html
"pair_style exp6/rx"_pair_exp6_rx.html
"pair_style multi/lucy"_pair_multi_lucy.html
"pair_style multi/lucy/rx"_pair_multi_lucy_rx.html
examples/USER/dpd :ul
:line
@ -1785,8 +1786,8 @@ src/USER-DRUDE/README
"fix drude"_fix_drude.html
"fix drude/transform/*"_fix_drude_transform.html
"compute temp/drude"_compute_temp_drude.html
"pair thole"_pair_thole.html
"pair lj/cut/thole/long"_pair_thole.html
"pair_style thole"_pair_thole.html
"pair_style lj/cut/thole/long"_pair_thole.html
examples/USER/drude
tools/drude :ul
@ -1824,8 +1825,8 @@ src/USER-EFF/README
"fix npt/eff"_fix_nh_eff.html
"fix langevin/eff"_fix_langevin_eff.html
"compute temp/eff"_compute_temp_eff.html
"pair eff/cut"_pair_eff.html
"pair eff/inline"_pair_eff.html
"pair_style eff/cut"_pair_eff.html
"pair_style eff/inline"_pair_eff.html
examples/USER/eff
tools/eff/README
tools/eff
@ -2155,11 +2156,47 @@ make machine :pre
src/USER-MEAMC: filenames -> commands
src/USER-MEAMC/README
"pair meam/c"_pair_meam.html
"pair_style meam/c"_pair_meam.html
examples/meam :ul
:line
USER-MESO package :link(USER-MESO),h4
[Contents:]
Several extensions of the the dissipative particle dynamics (DPD)
method. Specifically, energy-conserving DPD (eDPD) that can model
non-isothermal processes, many-body DPD (mDPD) for simulating
vapor-liquid coexistence, and transport DPD (tDPD) for modeling
advection-diffuion-reaction systems. The equations of motion of these
DPD extensions are integrated through a modified velocity-Verlet (MVV)
algorithm.
[Author:] Zhen Li (Division of Applied Mathematics, Brown University)
[Install or un-install:]
make yes-user-meso
make machine :pre
make no-user-meso
make machine :pre
[Supporting info:]
src/USER-MESO: filenames -> commands
src/USER-MESO/README
"atom_style edpd"_atom_style.html
"pair_style edpd"_pair_meso.html
"pair_style mdpd"_pair_meso.html
"pair_style tdpd"_pair_meso.html
"fix mvv/dpd"_fix_mvv.html
examples/USER/meso
http://lammps.sandia.gov/movies.html#mesodpd :ul
:line
USER-MOLFILE package :link(USER-MOLFILE),h4
[Contents:]

View File

@ -536,7 +536,7 @@ You should get the executable lmp_foo when the build is complete.
:line
Errors that can occur when making LAMMPS: h5 :link(start_2_3)
Errors that can occur when making LAMMPS :h5 :link(start_2_3)
If an error occurs when building LAMMPS, the compiler or linker will
state very explicitly what the problem is. The error message should

View File

@ -13,17 +13,19 @@ atom_style command :h3
atom_style style args :pre
style = {angle} or {atomic} or {body} or {bond} or {charge} or {dipole} or \
{dpd} or {electron} or {ellipsoid} or {full} or {line} or {meso} or \
{molecular} or {peri} or {smd} or {sphere} or {tri} or \
{template} or {hybrid} :ulb,l
{dpd} or {edpd} or {mdpd} or {tdpd} or {electron} or {ellipsoid} or \
{full} or {line} or {meso} or {molecular} or {peri} or {smd} or \
{sphere} or {tri} or {template} or {hybrid} :ulb,l
args = none for any style except the following
{body} args = bstyle bstyle-args
bstyle = style of body particles
bstyle-args = additional arguments specific to the bstyle
see the "body"_body.html doc page for details
{template} args = template-ID
template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command
{hybrid} args = list of one or more sub-styles, each with their args :pre
{body} args = bstyle bstyle-args
bstyle = style of body particles
bstyle-args = additional arguments specific to the bstyle
see the "body"_body.html doc page for details
{tdpd} arg = Nspecies
Nspecies = # of chemical species
{template} arg = template-ID
template-ID = ID of molecule template specified in a separate "molecule"_molecule.html command
{hybrid} args = list of one or more sub-styles, each with their args :pre
accelerated styles (with same args) = {angle/kk} or {atomic/kk} or {bond/kk} or {charge/kk} or {full/kk} or {molecular/kk} :l
:ule
@ -36,7 +38,8 @@ atom_style full
atom_style body nparticle 2 10
atom_style hybrid charge bond
atom_style hybrid charge body nparticle 2 5
atom_style template myMols :pre
atom_style template myMols
atom_style tdpd 2 :pre
[Description:]
@ -74,6 +77,9 @@ quantities.
{charge} | charge | atomic system with charges |
{dipole} | charge and dipole moment | system with dipolar particles |
{dpd} | internal temperature and internal energies | DPD particles |
{edpd} | temperature and heat capacity | eDPD particles |
{mdpd} | density | mDPD particles |
{tdpd} | chemical concentration | tDPD particles |
{electron} | charge and spin and eradius | electronic force field |
{ellipsoid} | shape, quaternion, angular momentum | aspherical particles |
{full} | molecular + charge | bio-molecules |
@ -145,6 +151,19 @@ properties with internal temperature (dpdTheta), internal conductive
energy (uCond), internal mechanical energy (uMech), and internal
chemical energy (uChem).
The {edpd} style is for energy-conserving dissipative particle
dynamics (eDPD) particles which store a temperature (edpd_temp), and
heat capacity(edpd_cv).
The {mdpd} style is for many-body dissipative particle dynamics (mDPD)
particles which store a density (rho) for considering
density-dependent many-body interactions.
The {tdpd} style is for transport dissipative particle dynamics (tDPD)
particles which store a set of chemical concentration. An integer
"cc_species" is required to specify the number of chemical species
involved in a tDPD system.
The {meso} style is for smoothed particle hydrodynamics (SPH)
particles which store a density (rho), energy (e), and heat capacity
(cv).
@ -284,6 +303,11 @@ force fields"_pair_eff.html.
The {dpd} style is part of the USER-DPD package for dissipative
particle dynamics (DPD).
The {edpd}, {mdpd}, and {tdpd} styles are part of the USER-MESO package
for energy-conserving dissipative particle dynamics (eDPD), many-body
dissipative particle dynamics (mDPD), and transport dissipative particle
dynamics (tDPD), respectively.
The {meso} style is part of the USER-SPH package for smoothed particle
hydrodynamics (SPH). See "this PDF
guide"_USER/sph/SPH_LAMMPS_userguide.pdf to using SPH in LAMMPS.

View File

@ -7,37 +7,62 @@
:line
compute cluster/atom command :h3
compute fragment/atom command :h3
compute aggregate/atom command :h3
[Syntax:]
compute ID group-ID cluster/atom cutoff :pre
compute ID group-ID cluster/atom cutoff
compute ID group-ID fragment/atom
compute ID group-ID aggregate/atom cutoff :pre
ID, group-ID are documented in "compute"_compute.html command
cluster/atom = style name of this compute command
{cluster/atom} or {fragment/atom} or {aggregate/atom} = style name of this compute command
cutoff = distance within which to label atoms as part of same cluster (distance units) :ul
[Examples:]
compute 1 all cluster/atom 1.0 :pre
compute 1 all cluster/atom 3.5
compute 1 all fragment/atom :pre
compute 1 all aggregate/atom 3.5 :pre
[Description:]
Define a computation that assigns each atom a cluster ID.
Define a computation that assigns each atom a cluster, fragement,
or aggregate ID.
A cluster is defined as a set of atoms, each of which is within the
cutoff distance from one or more other atoms in the cluster. If an
atom has no neighbors within the cutoff distance, then it is a 1-atom
cluster. The ID of every atom in the cluster will be the smallest
atom ID of any atom in the cluster.
cluster.
A fragment is similarly defined as a set of atoms, each of
which has an explicit bond (i.e. defined via a "data file"_read_data.html,
the "create_bonds"_create_bonds.html command, or through fixes like
"fix bond/create"_fix_bond_create.html, "fix bond/swap"_fix_bond_swap.html,
or "fix bond/break"_fix_bond_break.html). The cluster ID or fragment ID
of every atom in the cluster will be set to the smallest atom ID of any atom
in the cluster or fragment, respectively.
An aggregate is defined by combining the rules for clusters and
fragments, i.e. a set of atoms, where each of it is within the cutoff
distance from one or more atoms within a fragment that is part of
the same cluster. This measure can be used to track molecular assemblies
like micelles.
Only atoms in the compute group are clustered and assigned cluster
IDs. Atoms not in the compute group are assigned a cluster ID = 0.
IDs. Atoms not in the compute group are assigned a cluster ID = 0.
For fragments, only bonds where [both] atoms of the bond are included
in the compute group are assigned to fragments, so that only fragmets
are detected where [all] atoms are in the compute group. Thus atoms
may be included in the compute group, yes still have a fragment ID of 0.
The neighbor list needed to compute this quantity is constructed each
time the calculation is performed (i.e. each time a snapshot of atoms
is dumped). Thus it can be inefficient to compute/dump this quantity
too frequently or to have multiple compute/dump commands, each of a
{cluster/atom} style.
For computes {cluster/atom} and {aggregate/atom} the neighbor list needed
to compute this quantity is constructed each time the calculation is
performed (i.e. each time a snapshot of atoms is dumped). Thus it can be
inefficient to compute/dump this quantity too frequently or to have
multiple compute/dump commands, each of a {cluster/atom} or
{aggregate/atom} style.
NOTE: If you have a bonded system, then the settings of
"special_bonds"_special_bonds.html command can remove pairwise

View File

@ -0,0 +1,62 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
compute edpd/temp/atom command :h3
[Syntax:]
compute ID group-ID edpd/temp/atom :pre
ID, group-ID are documented in "compute"_compute.html command
edpd/temp/atom = style name of this compute command :ul
[Examples:]
compute 1 all edpd/temp/atom :pre
[Description:]
Define a computation that calculates the per-atom temperature
for each eDPD particle in a group.
The temperature is a local temperature derived from the internal energy
of each eDPD particle based on the local equilibrium hypothesis.
For more details please see "(Espanol1997)"_#Espanol1997 and
"(Li2014)"_#Li2014a.
[Output info:]
This compute calculates a per-atom vector, which can be accessed by
any command that uses per-atom values from a compute as input. See
"Section 6.15"_Section_howto.html#howto_15 for an overview of
LAMMPS output options.
The per-atom vector values will be in temperature "units"_units.html.
[Restrictions:]
This compute is part of the USER-MESO package. It is only enabled if
LAMMPS was built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
[Related commands:]
"pair_style edpd"_pair_meso.html
[Default:] none
:line
:link(Espanol1997)
[(Espanol1997)] Espanol, Europhys Lett, 40(6): 631-636 (1997). DOI:
10.1209/epl/i1997-00515-8
:link(Li2014a)
[(Li2014)] Li, Tang, Lei, Caswell, Karniadakis, J Comput Phys, 265:
113-127 (2014). DOI: 10.1016/j.jcp.2014.02.003.

View File

@ -0,0 +1,60 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
compute tdpd/cc/atom command :h3
[Syntax:]
compute ID group-ID tdpd/cc/atom index :pre
ID, group-ID are documented in "compute"_compute.html command
tdpd/cc/atom = style name of this compute command
index = index of chemical species (1 to Nspecies) :ul
[Examples:]
compute 1 all tdpd/cc/atom 2 :pre
[Description:]
Define a computation that calculates the per-atom chemical
concentration of a specified species for each tDPD particle in a
group.
The chemical concentration of each species is defined as the number of
molecules carried by a tDPD particle for dilute solution. For more
details see "(Li2015)"_#Li2015a.
[Output info:]
This compute calculates a per-atom vector, which can be accessed by
any command that uses per-atom values from a compute as input. See
"Section 6.15"_Section_howto.html#howto_15 for an overview of
LAMMPS output options.
The per-atom vector values will be in the units of chemical species
per unit mass.
[Restrictions:]
This compute is part of the USER-MESO package. It is only enabled if
LAMMPS was built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
[Related commands:]
"pair_style tdpd"_pair_meso.html
[Default:] none
:line
:link(Li2015a)
[(Li2015)] Li, Yazdani, Tartakovsky, Karniadakis, J Chem Phys, 143:
014101 (2015). DOI: 10.1063/1.4923254

View File

@ -30,6 +30,7 @@ Computes :h1
compute_displace_atom
compute_dpd
compute_dpd_atom
compute_edpd_temp_atom
compute_erotate_asphere
compute_erotate_rigid
compute_erotate_sphere
@ -95,6 +96,7 @@ Computes :h1
compute_sna_atom
compute_stress_atom
compute_tally
compute_tdpd_cc_atom
compute_temp
compute_temp_asphere
compute_temp_body

101
doc/src/fix_dpd_source.txt Normal file
View File

@ -0,0 +1,101 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
fix edpd/source command :h3
fix tdpd/source command :h3
[Syntax:]
fix ID group-ID edpd/source keyword values ...
fix ID group-ID tdpd/source cc_index keyword values ... :pre
ID, group-ID are documented in "fix"_fix.html command :ulb,l
edpd/source or tdpd/source = style name of this fix command :l
index (only specified for tdpd/source) = index of chemical species (1 to Nspecies) :l
keyword = {sphere} or {cuboid} :l
{sphere} values = cx,cy,cz,radius,source
cx,cy,cz = x,y,z center of spherical domain (distance units)
radius = radius of a spherical domain (distance units)
source = heat source or concentration source (flux units, see below)
{cuboid} values = cx,cy,cz,dLx,dLy,dLz,source
cx,cy,cz = x,y,z lower left corner of a cuboid domain (distance units)
dLx,dLy,dLz = x,y,z side length of a cuboid domain (distance units)
source = heat source or concentration source (flux units, see below) :pre
:ule
[Examples:]
fix 1 all edpd/source sphere 0.0 0.0 0.0 5.0 0.01
fix 1 all edpd/source cuboid 0.0 0.0 0.0 20.0 10.0 10.0 -0.01
fix 1 all tdpd/source 1 sphere 5.0 0.0 0.0 5.0 0.01
fix 1 all tdpd/source 2 cuboid 0.0 0.0 0.0 20.0 10.0 10.0 0.01 :pre
[Description:]
Fix {edpd/source} adds a heat source as an external heat flux to each
atom in a spherical or cuboid domain, where the {source} is in units
of energy/time. Fix {tdpd/source} adds an external concentration
source of the chemical species specified by {index} as an external
concentration flux for each atom in a spherical or cuboid domain,
where the {source} is in units of mole/volume/time.
This command can be used to give an additional heat/concentration
source term to atoms in a simulation, such as for a simulation of a
heat conduction with a source term (see Fig.12 in "(Li2014)"_#Li2014b)
or diffusion with a source term (see Fig.1 in "(Li2015)"_#Li2015b), as
an analog of a periodic Poiseuille flow problem.
If the {sphere} keyword is used, the {cx,cy,cz,radius} defines a
spherical domain to apply the source flux to.
If the {cuboid} keyword is used, the {cx,cy,cz,dLx,dLy,dLz} defines a
cuboid domain to apply the source flux to.
:line
[Restart, fix_modify, output, run start/stop, minimize info:]
No information about this fix is written to "binary restart
files"_restart.html. None of the "fix_modify"_fix_modify.html options
are relevant to this fix. No global or per-atom quantities are stored
by this fix for access by various "output
commands"_Section_howto.html#howto_15. No parameter of this fix can
be used with the {start/stop} keywords of the "run"_run.html command.
This fix is not invoked during "energy minimization"_minimize.html.
[Restrictions:]
This fix is part of the USER-MESO package. It is only enabled if
LAMMPS was built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
Fix {edpd/source} must be used with the "pair_style
edpd"_pair_meso.html command. Fix {tdpd/source} must be used with the
"pair_style tdpd"_pair_meso.html command.
[Related commands:]
"pair_style edpd"_pair_meso.html, "pair_style tdpd"_pair_meso.html,
"compute edpd/temp/atom"_compute_edpd_temp_atom.html, "compute
tdpd/cc/atom"_compute_tdpd_cc_atom.html
[Default:] none
:line
:link(Li2014b)
[(Li2014)] Z. Li, Y.-H. Tang, H. Lei, B. Caswell and G.E. Karniadakis,
"Energy-conserving dissipative particle dynamics with
temperature-dependent properties", J. Comput. Phys., 265: 113-127
(2014). DOI: 10.1016/j.jcp.2014.02.003
:link(Li2015b)
[(Li2015)] Z. Li, A. Yazdani, A. Tartakovsky and G.E. Karniadakis,
"Transport dissipative particle dynamics model for mesoscopic
advection-diffusion-reaction problems", J. Chem. Phys., 143: 014101
(2015). DOI: 10.1063/1.4923254

97
doc/src/fix_mvv_dpd.txt Normal file
View File

@ -0,0 +1,97 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
fix mvv/dpd command :h3
fix mvv/edpd command :h3
fix mvv/tdpd command :h3
[Syntax:]
fix ID group-ID mvv/dpd lambda :pre
fix ID group-ID mvv/edpd lambda :pre
fix ID group-ID mvv/tdpd lambda :pre
ID, group-ID are documented in "fix"_fix.html command
mvv/dpd, mvv/edpd, mvv/tdpd = style name of this fix command
lambda = (optional) relaxation parameter (unitless) :ul
[Examples:]
fix 1 all mvv/dpd
fix 1 all mvv/dpd 0.5
fix 1 all mvv/edpd
fix 1 all mvv/edpd 0.5
fix 1 all mvv/tdpd
fix 1 all mvv/tdpd 0.5 :pre
[Description:]
Perform time integration using the modified velocity-Verlet (MVV)
algorithm to update position and velocity (fix mvv/dpd), or position,
velocity and temperature (fix mvv/edpd), or position, velocity and
concentration (fix mvv/tdpd) for particles in the group each timestep.
The modified velocity-Verlet (MVV) algorithm aims to improve the
stability of the time integrator by using an extrapolated version of
the velocity for the force evaluation:
:c,image(Eqs/fix_mvv_dpd.jpg)
where the parameter <font size="4">&lambda;</font> depends on the
specific choice of DPD parameters, and needs to be tuned on a
case-by-case basis. Specification of a {lambda} value is opttional.
If specified, the setting must be from 0.0 to 1.0. If not specified,
a default value of 0.5 is used, which effectively reproduces the
standard velocity-Verlet (VV) scheme. For more details, see
"Groot"_#Groot2.
Fix {mvv/dpd} updates the position and velocity of each atom. It can
be used with the "pair_style mdpd"_pair_meso.html command or other
pair styles such as "pair dpd"_pair_dpd.html.
Fix {mvv/edpd} updates the per-atom temperature, in addition to
position and velocity, and must be used with the "pair_style
edpd"_pair_meso.html command.
Fix {mvv/tdpd} updates the per-atom chemical concentration, in
addition to position and velocity, and must be used with the
"pair_style tdpd"_pair_meso.html command.
:line
[Restart, fix_modify, output, run start/stop, minimize info:]
No information about this fix is written to "binary restart
files"_restart.html. None of the "fix_modify"_fix_modify.html options
are relevant to this fix. No global or per-atom quantities are stored
by this fix for access by various "output
commands"_Section_howto.html#howto_15. No parameter of this fix can
be used with the {start/stop} keywords of the "run"_run.html command.
This fix is not invoked during "energy minimization"_minimize.html.
[Restrictions:]
This fix is part of the USER-MESO package. It is only enabled if
LAMMPS was built with that package. See the "Making
LAMMPS"_Section_start.html#start_3 section for more info.
[Related commands:]
"pair_style mdpd"_pair_meso.html, "pair_style edpd"_pair_meso.html,
"pair_style tdpd"_pair_meso.html
[Default:]
The default value for the optional {lambda} parameter is 0.5.
:line
:link(Groot2)
[(Groot)] Groot and Warren, J Chem Phys, 107: 4423-4435 (1997). DOI:
10.1063/1.474784

View File

@ -90,9 +90,14 @@ file specified by {qfile}. The file has the following format
...
Ntype chi eta gamma zeta qcore :pre
There is one line per atom type with the following parameters.
There have to be parameters given for every atom type. Wildcard entries
are possible using the same syntax as elsewhere in LAMMPS
(i.e., n*m, n*, *m, *). Later entries will overwrite previous ones.
Empty lines or any text following the pound sign (#) are ignored.
Each line starts with the atom type followed by five parameters.
Only a subset of the parameters is used by each QEq style as described
below, thus the others can be set to 0.0 if desired.
below, thus the others can be set to 0.0 if desired, but all five
entries per line are required.
{chi} = electronegativity in energy units
{eta} = self-Coulomb potential in energy units

View File

@ -50,17 +50,17 @@ fix ees_cube all wall/region/ees myCube 1.0 1.0 2.5 :pre
Fix {wall/ees} bounds the simulation domain on one or more of its
faces with a flat wall that interacts with the ellipsoidal atoms in the
group by generating a force on the atom in a direction perpendicular to
the wall and a torque parallel with the wall.  The energy of
the wall and a torque parallel with the wall. The energy of
wall-particle interactions E is given by:
:c,image(Eqs/fix_wall_ees.jpg)
Introduced by Babadi and Ejtehadi in "(Babadi)"_#BabadiEjtehadi. Here,
{r} is the distance from the particle to the wall at position {coord},
and Rc is the {cutoff} distance at which the  particle and wall no
longer interact. Also,  sigma_n is the distance between center of
ellipsoid and the nearest point of its surface to the wall  The energy
of the wall (see the image below).
and Rc is the {cutoff} distance at which the particle and wall no
longer interact. Also, sigma_n is the distance between center of
ellipsoid and the nearest point of its surface to the wall. The energy
of the wall is:
:c,image(JPG/fix_wall_ees_image.jpg)
@ -68,20 +68,21 @@ Details of using this command and specifications are the same as
fix/wall command. You can also find an example in USER/ees/ under
examples/ directory.
The prefactor {epsilon} can be thought of as an
effective Hamaker constant with energy units for the strength of the
ellipsoid-wall interaction.  More specifically, the {epsilon} pre-factor
= 8 * pi^2 * rho_wall * rho_ellipsoid * epsilon
* sigma_a * sigma_b * sigma_c, where epsilon is the LJ parameters for
the constituent LJ particles and sigma_a, sigma_b, and sigma_c are radii
of ellipsoidal particles. Rho_wall and rho_ellipsoid are the number
The prefactor {epsilon} can be thought of as an
effective Hamaker constant with energy units for the strength of the
ellipsoid-wall interaction. More specifically, the {epsilon} pre-factor
= 8 * pi^2 * rho_wall * rho_ellipsoid * epsilon
* sigma_a * sigma_b * sigma_c, where epsilon is the LJ parameters for
the constituent LJ particles and sigma_a, sigma_b, and sigma_c are radii
of ellipsoidal particles. Rho_wall and rho_ellipsoid are the number
density of the constituent particles, in the wall and ellipsoid
respectively, in units of 1/volume.
NOTE: You must insure that r is always bigger than sigma_n for
all particles in the group, or LAMMPS will generate an error.  This
all particles in the group, or LAMMPS will generate an error. This
means you cannot start your simulation with particles touching the wall
position {coord} (r = sigma_n) or with particles penetrating the wall (0 =< r < sigma_n) or with particles on the wrong side of the
position {coord} (r = sigma_n) or with particles penetrating the wall
(0 =< r < sigma_n) or with particles on the wrong side of the
wall (r < 0).

View File

@ -33,6 +33,7 @@ Fixes :h1
fix_drude
fix_drude_transform
fix_dpd_energy
fix_dpd_source
fix_dt_reset
fix_efield
fix_ehex
@ -71,6 +72,7 @@ Fixes :h1
fix_move
fix_mscg
fix_msst
fix_mvv_dpd
fix_neb
fix_nh
fix_nh_eff

View File

@ -21,6 +21,7 @@ Section_python.html
Section_errors.html
Section_history.html
tutorial_bash_on_windows.html
tutorial_drude.html
tutorial_github.html
tutorial_pylammps.html
@ -156,6 +157,7 @@ fix_controller.html
fix_deform.html
fix_deposit.html
fix_dpd_energy.html
fix_dpd_source.html
fix_drag.html
fix_drude.html
fix_drude_transform.html
@ -197,6 +199,7 @@ fix_momentum.html
fix_move.html
fix_mscg.html
fix_msst.html
fix_mvv_dpd.html
fix_neb.html
fix_nh.html
fix_nh_eff.html
@ -315,6 +318,7 @@ compute_dipole_chunk.html
compute_displace_atom.html
compute_dpd.html
compute_dpd_atom.html
compute_edpd_temp_atom.html
compute_erotate_asphere.html
compute_erotate_rigid.html
compute_erotate_sphere.html
@ -380,6 +384,7 @@ compute_smd_vol.html
compute_sna_atom.html
compute_stress_atom.html
compute_tally.html
compute_tdpd_cc_atom.html
compute_temp.html
compute_temp_asphere.html
compute_temp_body.html
@ -457,6 +462,7 @@ pair_mdf.html
pair_meam.html
pair_meam_spline.html
pair_meam_sw_spline.html
pair_meso.html
pair_mgpt.html
pair_mie.html
pair_momb.html
@ -644,4 +650,3 @@ USER/atc/man_unfix_flux.html
USER/atc/man_unfix_nodes.html
USER/atc/man_write_atom_weights.html
USER/atc/man_write_restart.html

View File

@ -36,7 +36,7 @@ pair_coeff 1 1 1.0 1.0 :pre
[Description:]
Style {dpd} computes a force field for dissipative particle dynamics
(DPD) following the exposition in "(Groot)"_#Groot.
(DPD) following the exposition in "(Groot)"_#Groot1.
Style {dpd/tstat} invokes a DPD thermostat on pairwise interactions,
which is equivalent to the non-conservative portion of the DPD force
@ -196,7 +196,7 @@ langevin"_fix_langevin.html, "pair_style srp"_pair_srp.html
:line
:link(Groot)
:link(Groot1)
[(Groot)] Groot and Warren, J Chem Phys, 107, 4423-35 (1997).
:link(Afshar)

277
doc/src/pair_meso.txt Normal file
View File

@ -0,0 +1,277 @@
"LAMMPS WWW Site"_lws - "LAMMPS Documentation"_ld - "LAMMPS Commands"_lc :c
:link(lws,http://lammps.sandia.gov)
:link(ld,Manual.html)
:link(lc,Section_commands.html#comm)
:line
pair_style edpd command :h3
pair_style mdpd command :h3
pair_style mdpd/rhosum command :h3
pair_style tdpd command :h3
[Syntax:]
pair_style style args :pre
style = {edpd} or {mdpd} or {mdpd/rhosum} or {tdpd} :ulb,l
args = list of arguments for a particular style :l
{edpd} args = cutoff seed
cutoff = global cutoff for eDPD interactions (distance units)
seed = random # seed (integer) (if <= 0, eDPD will use current time as the seed)
{mdpd} args = T cutoff seed
T = temperature (temperature units)
cutoff = global cutoff for mDPD interactions (distance units)
seed = random # seed (integer) (if <= 0, mDPD will use current time as the seed)
{mdpd/rhosum} args =
{tdpd} args = T cutoff seed
T = temperature (temperature units)
cutoff = global cutoff for tDPD interactions (distance units)
seed = random # seed (integer) (if <= 0, tDPD will use current time as the seed) :pre
:ule
[Examples:]
pair_style edpd 1.58 9872598
pair_coeff * * 18.75 4.5 0.41 1.58 1.42E-5 2.0 1.58
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.42E-5 2.0 1.58 power 10.54 -3.66 3.44 -4.10
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.42E-5 2.0 1.58 power 10.54 -3.66 3.44 -4.10 kappa -0.44 -3.21 5.04 0.00 :pre
pair_style hybrid/overlay mdpd/rhosum mdpd 1.0 1.0 65689
pair_coeff 1 1 mdpd/rhosum 0.75
pair_coeff 1 1 mdpd -40.0 25.0 18.0 1.0 0.75 :pre
pair_style tdpd 1.0 1.58 935662
pair_coeff * * 18.75 4.5 0.41 1.58 1.58 1.0 1.0E-5 2.0
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.58 1.0 1.0E-5 2.0 3.0 1.0E-5 2.0 :pre
[Description:]
The {edpd} style computes the pairwise interactions and heat fluxes
for eDPD particles following the formulations in
"(Li2014_JCP)"_#Li2014_JCP and "Li2015_CC"_#Li2015_CC. The time
evolution of an eDPD particle is governed by the conservation of
momentum and energy given by
:c,image(Eqs/pair_edpd_gov.jpg)
where the three components of <font size="4">F<sub>i</sub></font>
including the conservative force <font
size="4">F<sub>ij</sub><sup>C</sup></font>, dissipative force <font
size="4">F<sub>ij</sub><sup>D</sup></font> and random force <font
size="4">F<sub>ij</sub><sup>R</sup></font> are expressed as
:c,image(Eqs/pair_edpd_force.jpg)
in which the exponent of the weighting function <font
size="4"><i>s</i></font> can be defined as a temperature-dependent
variable. The heat flux between particles accounting for the
collisional heat flux <font size="4">q<sup>C</sup></font>, viscous
heat flux <font size="4">q<sup>V</sup></font>, and random heat flux
<font size="4">q<sup>R</sup></font> are given by
:c,image(Eqs/pair_edpd_heat.jpg)
where the mesoscopic heat friction <font size="4">&kappa;</font> is given by
:c,image(Eqs/pair_edpd_kappa.jpg)
with <font size="4">&upsilon;</font> being the kinematic
viscosity. For more details, see Eq.(15) in "(Li2014_JCP)"_#Li2014_JCP.
The following coefficients must be defined in eDPD system for each
pair of atom types via the "pair_coeff"_pair_coeff.html command as in
the examples above.
A (force units)
gamma (force/velocity units)
power_f (positive real)
cutoff (distance units)
kappa (thermal conductivity units)
power_T (positive real)
cutoff_T (distance units)
optional keyword = power or kappa :ul
The keyword {power} or {kappa} is optional. Both "power" and "kappa"
require 4 parameters <font size="4">c<sub>1</sub>, c<sub>2</sub>,
c<sub>4</sub>, c<sub>4</sub></font> showing the temperature dependence
of the exponent <center><font size="4"> <i>s</i>(<i>T</i>) =
power_f*(1+c<sub>1</sub>*(T-1)+c<sub>2</sub>*(T-1)<sup>2</sup>
+c<sub>3</sub>*(T-1)<sup>3</sup>+c<sub>4</sub>*(T-1)<sup>4</sup>)</font></center>
and of the mesoscopic heat friction <center><font size="4">
<i>s<sub>T</sub>(T)</i> =
kappa*(1+c<sub>1</sub>*(T-1)+c<sub>2</sub>*(T-1)<sup>2</sup>
+c<sub>3</sub>*(T-1)<sup>3</sup>+c<sub>4</sub>*(T-1)<sup>4</sup>)</font></center>
If the keyword {power} or {kappa} is not specified, the eDPD system
will use constant power_f and kappa, which is independent to
temperature changes.
:line
The {mdpd/rhosum} style computes the local particle mass density rho
for mDPD particles by kernel function interpolation.
The following coefficients must be defined for each pair of atom types
via the "pair_coeff"_pair_coeff.html command as in the examples above.
cutoff (distance units) :ul
:line
The {mdpd} style computes the many-body interactions between mDPD
particles following the formulations in
"(Li2013_POF)"_#Li2013_POF. The dissipative and random forces are in
the form same as the classical DPD, but the conservative force is
local density dependent, which are given by
:c,image(Eqs/pair_mdpd_force.jpg)
where the first term in <font size="4">F<sup>C</sup></font> with a
negative coefficient A < 0 stands for an attractive force within an
interaction range <font size="4">r<sub>c</sub></font>, and the second
term with B > 0 is the density-dependent repulsive force within an
interaction range <font size="4">r<sub>d</sub></font>.
The following coefficients must be defined for each pair of atom types via the
"pair_coeff"_pair_coeff.html command as in the examples above.
A (force units)
B (force units)
gamma (force/velocity units)
cutoff_c (distance units)
cutoff_d (distance units) :ul
:line
The {tdpd} style computes the pairwise interactions and chemical
concentration fluxes for tDPD particles following the formulations in
"(Li2015_JCP)"_#Li2015_JCP. The time evolution of a tDPD particle is
governed by the conservation of momentum and concentration given by
:c,image(Eqs/pair_tdpd_gov.jpg)
where the three components of <font size="4">F<sub>i</sub></font>
including the conservative force <font
size="4">F<sub>ij</sub><sup>C</sup></font>, dissipative force <font
size="4">F<sub>ij</sub><sup>D</sup></font> and random force <font
size="4">F<sub>ij</sub><sup>R</sup></font> are expressed as
:c,image(Eqs/pair_tdpd_force.jpg)
The concentration flux between two tDPD particles includes the Fickian
flux <font size="4">Q<sub>ij</sub><sup>D</sup></font> and random flux
<font size="4">Q<sub>ij</sub><sup>R</sup></font>, which are given by
:c,image(Eqs/pair_tdpd_flux.jpg)
where the parameters kappa and epsilon determine the strength of the
Fickian and random fluxes. <font size="4"><i>m</i><sub>s</sub></font>
is the mass of a single solute molecule. In general, <font
size="4"><i>m</i><sub>s</sub></font> is much smaller than the mass of
a tDPD particle <font size="4"><i>m</i></font>. For more details, see
"(Li2015_JCP)"_#Li2015_JCP.
The following coefficients must be defined for each pair of atom types via the
"pair_coeff"_pair_coeff.html command as in the examples above.
A (force units)
gamma (force/velocity units)
power_f (positive real)
cutoff (distance units)
cutoff_CC (distance units)
kappa_i (diffusivity units)
epsilon_i (diffusivity units)
power_cc_i (positive real) :ul
The last 3 values must be repeated Nspecies times, so that values for
each of the Nspecies chemical species are specified, as indicated by
the "I" suffix. In the first pair_coeff example above for pair_style
tdpd, Nspecies = 1. In the second example, Nspecies = 2, so 3
additional coeffs are specified (for species 2).
:line
[Example scripts]
There are example scripts for using all these pair styles in
examples/USER/meso. The example for an eDPD simulation models heat
conduction with source terms analog of periodic Poiseuille flow
problem. The setup follows Fig.12 in "(Li2014_JCP)"_#Li2014_JCP. The
output of the short eDPD simulation (about 2 minutes on a single core)
gives a temperature and density profiles as
:c,image(JPG/examples_edpd.jpg)
The example for a mDPD simulation models the oscillations of a liquid
droplet started from a liquid film. The mDPD parameters are adopted
from "(Li2013_POF)"_#Li2013_POF. The short mDPD run (about 2 minutes
on a single core) generates a particle trajectory which can
be visualized as follows.
:c,image(JPG/examples_mdpd_first.jpg,JPG/examples_mdpd.gif)
:c,image(JPG/examples_mdpd_last.jpg)
The first image is the initial state of the simulation. If you
click it a GIF movie should play in your browser. The second image
is the final state of the simulation.
The example for a tDPD simulation computes the effective diffusion
coefficient of a tDPD system using a method analogous to the periodic
Poiseuille flow. The tDPD system is specified with two chemical
species, and the setup follows Fig.1 in
"(Li2015_JCP)"_#Li2015_JCP. The output of the short tDPD simulation
(about one and a half minutes on a single core) gives the
concentration profiles of the two chemical species as
:c,image(JPG/examples_tdpd.jpg)
:line
[Mixing, shift, table, tail correction, restart, rRESPA info]:
The styles {edpd}, {mdpd}, {mdpd/rhosum} and {tdpd} do not support
mixing. Thus, coefficients for all I,J pairs must be specified explicitly.
The styles {edpd}, {mdpd}, {mdpd/rhosum} and {tdpd} do not support
the "pair_modify"_pair_modify.html shift, table, and tail options.
The styles {edpd}, {mdpd}, {mdpd/rhosum} and {tdpd} do not write
information to "binary restart files"_restart.html. Thus, you need
to re-specify the pair_style and pair_coeff commands in an input script
that reads a restart file.
[Restrictions:]
The pair styles {edpd}, {mdpd}, {mdpd/rhosum} and {tdpd} are part of
the USER-MESO package. It is only enabled if LAMMPS was built with
that package. See the "Making LAMMPS"_Section_start.html#start_3
section for more info.
[Related commands:]
"pair_coeff"_pair_coeff.html, "fix mvv/dpd"_fix_mvv_dpd.html,
"fix mvv/edpd"_fix_mvv_dpd.html, "fix mvv/tdpd"_fix_mvv_dpd.html,
"fix edpd/source"_fix_dpd_source.html, "fix tdpd/source"_fix_dpd_source.html,
"compute edpd/temp/atom"_compute_edpd_temp_atom.html,
"compute tdpd/cc/atom"_compute_tdpd_cc_atom.html
[Default:] none
:line
:link(Li2014_JCP)
[(Li2014_JCP)] Li, Tang, Lei, Caswell, Karniadakis, J Comput Phys,
265: 113-127 (2014). DOI: 10.1016/j.jcp.2014.02.003.
:link(Li2015_CC)
[(Li2015_CC)] Li, Tang, Li, Karniadakis, Chem Commun, 51: 11038-11040
(2015). DOI: 10.1039/C5CC01684C.
:link(Li2013_POF)
[(Li2013_POF)] Li, Hu, Wang, Ma, Zhou, Phys Fluids, 25: 072103 (2013).
DOI: 10.1063/1.4812366.
:link(Li2015_JCP)
[(Li2015_JCP)] Li, Yazdani, Tartakovsky, Karniadakis, J Chem Phys,
143: 014101 (2015). DOI: 10.1063/1.4923254.

View File

@ -10,8 +10,7 @@ pair_style snap command :h3
[Syntax:]
pair_style snap
:pre
pair_style snap :pre
[Examples:]
@ -20,17 +19,16 @@ pair_coeff * * InP.snapcoeff In P InP.snapparam In In P P :pre
[Description:]
Pair style {snap} computes interactions
using the spectral neighbor analysis potential (SNAP)
"(Thompson)"_#Thompson20142. Like the GAP framework of Bartok et al.
"(Bartok2010)"_#Bartok20102, "(Bartok2013)"_#Bartok2013
which uses bispectrum components
Pair style {snap} computes interactions using the spectral
neighbor analysis potential (SNAP) "(Thompson)"_#Thompson20142.
Like the GAP framework of Bartok et al. "(Bartok2010)"_#Bartok20102,
"(Bartok2013)"_#Bartok2013 which uses bispectrum components
to characterize the local neighborhood of each atom
in a very general way. The mathematical definition of the
bispectrum calculation used by SNAP is identical
to that used by "compute sna/atom"_compute_sna_atom.html.
In SNAP, the total energy is decomposed into a sum over
atom energies. The energy of atom {i } is
atom energies. The energy of atom {i} is
expressed as a weighted sum over bispectrum components.
:c,image(Eqs/pair_snap.jpg)

View File

@ -58,6 +58,7 @@ Pair Styles :h1
pair_meam
pair_meam_spline
pair_meam_sw_spline
pair_meso
pair_mgpt
pair_mie
pair_momb

View File

@ -374,10 +374,9 @@ needed if new bonds (angles, dihedrals, impropers) will be added to
the system when a simulation runs, e.g. by using the "fix
bond/create"_fix_bond_create.html command. Using this header flag
is deprecated; please use the {extra/bond/per/atom} keyword (and
correspondingly for angles, dihedrals and impropers) in the
read_data command instead. Either will pre-allocate space in LAMMPS
data structures for storing the new bonds (angles,
dihedrals, impropers).
correspondingly for angles, dihedrals and impropers) in the read_data
command instead. Either will pre-allocate space in LAMMPS data
structures for storing the new bonds (angles, dihedrals, impropers).
The "extra special per atom" setting is typically only needed if new
bonds/angles/etc will be added to the system, e.g. by using the "fix
@ -547,6 +546,9 @@ bond: atom-ID molecule-ID atom-type x y z
charge: atom-ID atom-type q x y z
dipole: atom-ID atom-type q x y z mux muy muz
dpd: atom-ID atom-type theta x y z
edpd: atom-ID atom-type edpd_temp edpd_cv x y z
mdpd: atom-ID atom-type x y z
tdpd: atom-ID atom-type x y z cc1 cc2 ... ccNspecies
electron: atom-ID atom-type q spin eradius x y z
ellipsoid: atom-ID atom-type ellipsoidflag density x y z
full: atom-ID molecule-ID atom-type q x y z
@ -566,12 +568,15 @@ The per-atom values have these meanings and units, listed alphabetically:
atom-ID = integer ID of atom
atom-type = type of atom (1-Ntype)
bodyflag = 1 for body particles, 0 for point particles
cc = chemical concentration for tDPD particles for each species (mole/volume units)
contact-radius = ??? (distance units)
cs_re,cs_im = real/imaginary parts of wavepacket coefficients
cv = heat capacity (need units) for SPH particles
density = density of particle (mass/distance^3 or mass/distance^2 or mass/distance units, depending on dimensionality of particle)
diameter = diameter of spherical atom (distance units)
e = energy (need units) for SPH particles
edpd_temp = temperature for eDPD particles (temperature units)
edpd_cv = volumetric heat capacity for eDPD particles (energy/temperature/volume units)
ellipsoidflag = 1 for ellipsoidal particles, 0 for point particles
eradius = electron radius (or fixed-core radius)
etag = integer ID of electron that each wavepacket belongs to

View File

@ -24,7 +24,7 @@ keyword = {type} or {type/fraction} or {mol} or {x} or {y} or {z} or \
{bond} or {angle} or {dihedral} or {improper} or \
{meso/e} or {meso/cv} or {meso/rho} or \
{smd/contact/radius} or {smd/mass/density} or {dpd/theta} or \
{i_name} or {d_name} :l
{edpd/temp} or {edpd/cv} or {cc} or {i_name} or {d_name} :l
{type} value = atom type
value can be an atom-style variable (see below)
{type/fraction} values = type fraction seed
@ -98,6 +98,13 @@ keyword = {type} or {type/fraction} or {mol} or {x} or {y} or {z} or \
{dpd/theta} value = internal temperature of DPD particles (temperature units)
value can be an atom-style variable (see below)
value can be NULL which sets internal temp of each particle to KE temp
{edpd/temp} value = temperature of eDPD particles (temperature units)
value can be an atom-style variable (see below)
{edpd/cv} value = volumetric heat capacity of eDPD particles (energy/temperature/volume units)
value can be an atom-style variable (see below)
{cc} values = index cc
index = index of a chemical species (1 to Nspecies)
cc = chemical concentration of tDPD particles for a species (mole/volume units)
{i_name} value = value for custom integer vector with name
{d_name} value = value for custom floating-point vector with name :pre
:ule
@ -418,6 +425,19 @@ value >= 0.0, the internal temperature is set to that value. If it is
< 0.0, the computation of Tkin is performed and the internal
temperature is set to that value.
Keywords {edpd/temp} and {edpd/cv} set the temperature and volumetric
heat capacity of an eDPD particle as defined by the USER-MESO package.
Currently, only "atom_style edpd"_atom_style.html defines particles
with these attributes. The values for the temperature and heat
capacity must be positive.
Keyword {cc} sets the chemical concentration of a tDPD particle for a
specified species as defined by the USER-MESO package. Currently, only
"atom_style tdpd"_atom_style.html defines particles with this
attribute. An integer for "index" selects a chemical species (1 to
Nspecies) where Nspecies is set by the atom_style command. The value
for the chemical concentration must be >= 0.0.
Keywords {i_name} and {d_name} refer to custom integer and
floating-point properties that have been added to each atom via the
"fix property/atom"_fix_property_atom.html command. When that command

40
examples/USER/meso/README Normal file
View File

@ -0,0 +1,40 @@
This directory contains input scripts for performing
simulations with these models:
eDPD - energy-conserving dissipative particle dynamics
mDPD - many-body dissipative particle dynamics
tDPD - transport dissipative particle dynamics
1) eDPD: The input script in.mdpd is an example simulation of
measuring the thermal conductivity by heat conduction analog of
periodic Poiseuille flow. The initial eDPD system is randomly filled
by many eDPD particles, and a set command "edpd/temp" gives the
initial temperature and a set command "edpd/cv" gives the heat
capacity of eDPD particles. A non-contact heat source/sink term is
applied by a fix command "edpd/source". A compute command
"edpd/temp/atom" obtain the temperature on each eDPD particle. The
simulation will generate a file named "temp.profile" showing the
temperature profile. For details please see online LAMMPS
documentation and Fig.12 in the paper Z. Li, et al. J Comput Phys,
2014, 265: 113-127. DOI: 10.1016/j.jcp.2014.02.003
2) mDPD: The input script "in.mdpd" is an example simulation of
oscillations of a free liquid droplet. The initial configuration is a
liquid film whose particles are in a fcc lattice created by the
command "create atoms". Then the liquid film has a tendency to form a
spherical droplet under the effect of surface tension. For details
please see online LAMMPS documentation and the paper Z. Li, et
al. Phys Fluids, 2013, 25: 072103. DOI: 10.1063/1.4812366
3) tDPD: The input script in.tdpd is an example simulation of
computing the effective diffusion coefficient of a tDPD system using a
method analogous to the periodic Poiseuille flow. Command "atom_style
tdpd 2" specifies the tDPD system with two chemical species. The
initial tDPD system is randomly filled by many tDPD particles, and a
set "cc" command gives initial concentration for each chemical
species. Fix commands "tdpd/source" add source terms and compute
commands "tdpd/cc/atom" obtain the chemical concentration on each tDPD
particle. The simulation will generate a file named "cc.profile"
showing the concentration profiles of the two chemical species. For
details please see online LAMMPS documentation and Fig.1 in the paper
Z. Li, et al. J Chem Phys, 2015, 143: 014101. DOI: 10.1063/1.4923254

View File

@ -0,0 +1,54 @@
########################################################################
### Heat conduction analog of periodic Poiseuille flow problem ###
### using energy-conserving DPD (eDPD) simulation ###
### ###
### Created : Zhen Li (zhen_li@brown.edu) ###
### Division of Applied Mathematics, Brown University. ###
### ###
### mDPD system setup follows Fig.12 in the publication: ###
### Z. Li, Y.-H. Tang, H. Lei, B. Caswell and G.E. Karniadakis. ###
### "Energy-conserving dissipative particle dynamics with ###
### temperature-dependent properties". J. Comput. Phys., ###
### 2014, 265: 113-127. DOI: 10.1016/j.jcp.2014.02.003 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.2 bin
neigh_modify every 1 delay 0 check yes
atom_style edpd
region edpd block -10 10 -10 10 -5 5 units box
create_box 1 edpd
create_atoms 1 random 16000 276438 NULL
mass 1 1.0
set atom * edpd/temp 1.0
set atom * edpd/cv 1.0E5
pair_style edpd 1.58 9872598
#pair_coeff 1 1 18.75 4.5 0.41 1.58 1.45E-5 2.0 1.58
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.41E-5 2.0 1.58 &
power 10.54 -3.66 3.44 -4.10 &
kappa -0.44 -3.21 5.04 0.00
compute mythermo all temp
thermo 100
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 432982 loop local dist gaussian
fix mvv all mvv/edpd 0.5
fix upper all edpd/source cuboid 0.0 5.0 0.0 20.0 10.0 10.0 0.01
fix lower all edpd/source cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 -0.01
timestep 0.01
run 500
reset_timestep 0
compute temp all edpd/temp/atom
compute ccT all chunk/atom bin/1d y 0.0 1.0
fix stat all ave/chunk 1 500 500 ccT c_temp density/number norm sample file temp.profile
run 500

View File

@ -0,0 +1,142 @@
LAMMPS (11 Aug 2017)
########################################################################
### Heat conduction analog of periodic Poiseuille flow problem ###
### using energy-conserving DPD (eDPD) simulation ###
### ###
### Created : Zhen Li (zhen_li@brown.edu) ###
### Division of Applied Mathematics, Brown University. ###
### ###
### mDPD system setup follows Fig.12 in the publication: ###
### Z. Li, Y.-H. Tang, H. Lei, B. Caswell and G.E. Karniadakis. ###
### "Energy-conserving dissipative particle dynamics with ###
### temperature-dependent properties". J. Comput. Phys., ###
### 2014, 265: 113-127. DOI: 10.1016/j.jcp.2014.02.003 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.2 bin
neigh_modify every 1 delay 0 check yes
atom_style edpd
region edpd block -10 10 -10 10 -5 5 units box
create_box 1 edpd
Created orthogonal box = (-10 -10 -5) to (10 10 5)
1 by 1 by 1 MPI processor grid
create_atoms 1 random 16000 276438 NULL
Created 16000 atoms
mass 1 1.0
set atom * edpd/temp 1.0
16000 settings made for edpd/temp
set atom * edpd/cv 1.0E5
16000 settings made for edpd/cv
pair_style edpd 1.58 9872598
#pair_coeff 1 1 18.75 4.5 0.41 1.58 1.45E-5 2.0 1.58
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.41E-5 2.0 1.58 power 10.54 -3.66 3.44 -4.10 kappa -0.44 -3.21 5.04 0.00
compute mythermo all temp
thermo 100
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 432982 loop local dist gaussian
fix mvv all mvv/edpd 0.5
fix upper all edpd/source cuboid 0.0 5.0 0.0 20.0 10.0 10.0 0.01
fix lower all edpd/source cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 -0.01
timestep 0.01
run 500
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.78
ghost atom cutoff = 1.78
binsize = 0.89, bins = 23 23 12
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair edpd, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 11.64 | 11.64 | 11.64 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1 48.948932 0 50.448838 201.73366
100 1.0069712 43.754293 0 45.264656 199.5369
200 0.98667561 43.716052 0 45.195973 196.72854
300 1.0036944 43.706299 0 45.211746 195.35714
400 1.0024228 43.697014 0 45.200554 197.0062
500 0.99968161 43.687445 0 45.186873 193.80596
Loop time of 80.7995 on 1 procs for 500 steps with 16000 atoms
Performance: 5346.567 tau/day, 6.188 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 75.106 | 75.106 | 75.106 | 0.0 | 92.95
Neigh | 4.9836 | 4.9836 | 4.9836 | 0.0 | 6.17
Comm | 0.31199 | 0.31199 | 0.31199 | 0.0 | 0.39
Output | 0.00048232 | 0.00048232 | 0.00048232 | 0.0 | 0.00
Modify | 0.29985 | 0.29985 | 0.29985 | 0.0 | 0.37
Other | | 0.09751 | | | 0.12
Nlocal: 16000 ave 16000 max 16000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 14091 ave 14091 max 14091 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 749111 ave 749111 max 749111 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 749111
Ave neighs/atom = 46.8194
Neighbor list builds = 181
Dangerous builds = 0
reset_timestep 0
compute temp all edpd/temp/atom
compute ccT all chunk/atom bin/1d y 0.0 1.0
fix stat all ave/chunk 1 500 500 ccT c_temp density/number norm sample file temp.profile
run 500
Per MPI rank memory allocation (min/avg/max) = 12.14 | 12.14 | 12.14 Mbytes
Step Temp E_pair E_mol TotEng Press
0 0.99968161 43.687397 0 45.186825 196.38426
100 1.0041443 43.668196 0 45.174318 195.38066
200 0.99628392 43.666173 0 45.160505 197.84675
300 1.0029116 43.66224 0 45.166513 199.67414
400 0.99922193 43.64406 0 45.142799 196.94404
500 0.99355431 43.623266 0 45.113505 195.94136
Loop time of 80.7742 on 1 procs for 500 steps with 16000 atoms
Performance: 5348.242 tau/day, 6.190 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 75.073 | 75.073 | 75.073 | 0.0 | 92.94
Neigh | 4.8786 | 4.8786 | 4.8786 | 0.0 | 6.04
Comm | 0.31086 | 0.31086 | 0.31086 | 0.0 | 0.38
Output | 0.00045919 | 0.00045919 | 0.00045919 | 0.0 | 0.00
Modify | 0.4139 | 0.4139 | 0.4139 | 0.0 | 0.51
Other | | 0.09731 | | | 0.12
Nlocal: 16000 ave 16000 max 16000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 14091 ave 14091 max 14091 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 749667 ave 749667 max 749667 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 749667
Ave neighs/atom = 46.8542
Neighbor list builds = 178
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:02:41

View File

@ -0,0 +1,142 @@
LAMMPS (11 Aug 2017)
########################################################################
### Heat conduction analog of periodic Poiseuille flow problem ###
### using energy-conserving DPD (eDPD) simulation ###
### ###
### Created : Zhen Li (zhen_li@brown.edu) ###
### Division of Applied Mathematics, Brown University. ###
### ###
### mDPD system setup follows Fig.12 in the publication: ###
### Z. Li, Y.-H. Tang, H. Lei, B. Caswell and G.E. Karniadakis. ###
### "Energy-conserving dissipative particle dynamics with ###
### temperature-dependent properties". J. Comput. Phys., ###
### 2014, 265: 113-127. DOI: 10.1016/j.jcp.2014.02.003 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.2 bin
neigh_modify every 1 delay 0 check yes
atom_style edpd
region edpd block -10 10 -10 10 -5 5 units box
create_box 1 edpd
Created orthogonal box = (-10 -10 -5) to (10 10 5)
2 by 2 by 1 MPI processor grid
create_atoms 1 random 16000 276438 NULL
Created 16000 atoms
mass 1 1.0
set atom * edpd/temp 1.0
16000 settings made for edpd/temp
set atom * edpd/cv 1.0E5
16000 settings made for edpd/cv
pair_style edpd 1.58 9872598
#pair_coeff 1 1 18.75 4.5 0.41 1.58 1.45E-5 2.0 1.58
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.41E-5 2.0 1.58 power 10.54 -3.66 3.44 -4.10 kappa -0.44 -3.21 5.04 0.00
compute mythermo all temp
thermo 100
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 432982 loop local dist gaussian
fix mvv all mvv/edpd 0.5
fix upper all edpd/source cuboid 0.0 5.0 0.0 20.0 10.0 10.0 0.01
fix lower all edpd/source cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 -0.01
timestep 0.01
run 500
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.78
ghost atom cutoff = 1.78
binsize = 0.89, bins = 23 23 12
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair edpd, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.969 | 4.979 | 4.985 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1 48.948932 0 50.448838 199.51547
100 1.0106415 43.744371 0 45.260239 196.39598
200 1.0053215 43.714413 0 45.222301 195.35298
300 0.99886399 43.713356 0 45.211559 196.74821
400 1.0035264 43.699086 0 45.204282 195.47446
500 1.0025285 43.698051 0 45.20175 197.27042
Loop time of 21.165 on 4 procs for 500 steps with 16000 atoms
Performance: 20411.046 tau/day, 23.624 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 18.713 | 19.101 | 19.41 | 6.0 | 90.25
Neigh | 1.2687 | 1.2925 | 1.3177 | 1.5 | 6.11
Comm | 0.33013 | 0.66337 | 1.0747 | 34.3 | 3.13
Output | 0.00023484 | 0.00028092 | 0.00036526 | 0.0 | 0.00
Modify | 0.073931 | 0.075277 | 0.076306 | 0.3 | 0.36
Other | | 0.03227 | | | 0.15
Nlocal: 4000 ave 4067 max 3930 min
Histogram: 1 1 0 0 0 0 0 0 0 2
Nghost: 5997.5 ave 6052 max 5943 min
Histogram: 1 0 1 0 0 0 0 1 0 1
Neighs: 187388 ave 193157 max 181221 min
Histogram: 1 1 0 0 0 0 0 0 0 2
Total # of neighbors = 749552
Ave neighs/atom = 46.847
Neighbor list builds = 181
Dangerous builds = 0
reset_timestep 0
compute temp all edpd/temp/atom
compute ccT all chunk/atom bin/1d y 0.0 1.0
fix stat all ave/chunk 1 500 500 ccT c_temp density/number norm sample file temp.profile
run 500
Per MPI rank memory allocation (min/avg/max) = 5.221 | 5.23 | 5.236 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1.0025285 43.69801 0 45.201708 194.00452
100 0.9885969 43.679927 0 45.16273 196.28442
200 1.0028463 43.663067 0 45.167242 198.25592
300 1.0027516 43.648817 0 45.152851 198.82226
400 0.99695312 43.641469 0 45.136805 197.97499
500 0.98202292 43.627163 0 45.100105 199.16319
Loop time of 21.576 on 4 procs for 500 steps with 16000 atoms
Performance: 20022.203 tau/day, 23.174 timesteps/s
99.8% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 18.438 | 19.121 | 19.812 | 14.1 | 88.62
Neigh | 1.2568 | 1.2885 | 1.325 | 2.5 | 5.97
Comm | 0.29482 | 1.0219 | 1.7352 | 63.9 | 4.74
Output | 0.00027728 | 0.00029719 | 0.0003531 | 0.0 | 0.00
Modify | 0.11153 | 0.11265 | 0.1135 | 0.2 | 0.52
Other | | 0.03194 | | | 0.15
Nlocal: 4000 ave 4092 max 3899 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Nghost: 5974 ave 6019 max 5915 min
Histogram: 1 0 0 1 0 0 0 0 0 2
Neighs: 187414 ave 196149 max 178418 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Total # of neighbors = 749658
Ave neighs/atom = 46.8536
Neighbor list builds = 181
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:42

View File

@ -0,0 +1,24 @@
# Chunk-averaged data for fix stat and group density/number
# Timestep Number-of-chunks Total-count
# Chunk Coord1 Ncount c_temp density/number
500 20 16000
1 -9.5 801.636 0.986368 4.00818
2 -8.5 809.788 0.966281 4.04894
3 -7.5 819.754 0.952764 4.09877
4 -6.5 820.364 0.944592 4.10182
5 -5.5 826.146 0.940968 4.13073
6 -4.5 819.52 0.941415 4.0976
7 -3.5 815.182 0.945887 4.07591
8 -2.5 817.168 0.95487 4.08584
9 -1.5 817.282 0.969225 4.08641
10 -0.5 804.204 0.989552 4.02102
11 0.5 793.266 1.01015 3.96633
12 1.5 789.056 1.0308 3.94528
13 2.5 784.344 1.04568 3.92172
14 3.5 780.592 1.05508 3.90296
15 4.5 772.218 1.05968 3.86109
16 5.5 776.968 1.06003 3.88484
17 6.5 780.858 1.05612 3.90429
18 7.5 786.174 1.04752 3.93087
19 8.5 788.922 1.03347 3.94461
20 9.5 796.558 1.01278 3.98279

View File

@ -0,0 +1,24 @@
# Chunk-averaged data for fix stat and group density/number
# Timestep Number-of-chunks Total-count
# Chunk Coord1 Ncount c_temp density/number
500 20 16000
1 -9.5 801.642 0.986089 4.00821
2 -8.5 819.168 0.966072 4.09584
3 -7.5 817.382 0.952718 4.08691
4 -6.5 818 0.944633 4.09
5 -5.5 817.806 0.941105 4.08903
6 -4.5 826.11 0.941499 4.13055
7 -3.5 821.946 0.945922 4.10973
8 -2.5 816.202 0.954889 4.08101
9 -1.5 813.202 0.969281 4.06601
10 -0.5 798.904 0.989463 3.99452
11 0.5 798.056 1.01005 3.99028
12 1.5 793.114 1.03073 3.96557
13 2.5 782.812 1.04569 3.91406
14 3.5 775.69 1.05498 3.87845
15 4.5 778.094 1.05965 3.89047
16 5.5 778.856 1.06002 3.89428
17 6.5 780.51 1.05621 3.90255
18 7.5 780.518 1.04782 3.90259
19 8.5 789.698 1.03348 3.94849
20 9.5 792.29 1.01261 3.96145

View File

@ -0,0 +1,52 @@
########################################################################
#### 3D droplet oscilation using many-body DPD simulation ###
#### ###
#### Created : Zhen Li (zhen_li@brown.edu) ###
#### Division of Applied Mathematics, Brown University. ###
#### ###
#### mDPD parameters follow the choice of the publication: ###
#### Z. Li et al. "Three dimensional flow structures in a moving ###
#### droplet on substrate: a dissipative particle dynamics study" ###
#### Physics of Fluids, 2013, 25: 072103. DOI: 10.1063/1.4812366 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.3 bin
neigh_modify every 1 delay 0 check yes
atom_style mdpd
region mdpd block -25 25 -10 10 -10 10 units box
create_box 1 mdpd
lattice fcc 6
region film block -20 20 -7.5 7.5 -2.0 2.0 units box
create_atoms 1 region film
pair_style hybrid/overlay mdpd/rhosum mdpd 1.0 1.0 9872598
pair_coeff 1 1 mdpd/rhosum 0.75
pair_coeff 1 1 mdpd -40 25 18.0 1.0 0.75
mass 1 1.0
compute mythermo all temp
thermo 100
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 38497 loop local dist gaussian
fix mvv all mvv/dpd
#dump mydump all atom 100 atom.lammpstrj
#dump jpg all image 200 image.*.jpg type type zoom 5 adiam 0.5 &
# view 90 90 box no 0 size 600 200
#dump_modify jpg pad 4
#dump avi all movie 200 movie.avi type type zoom 5 adiam 0.5 &
# view 90 90 box no 0 size 600 200
#dump_modify avi pad 4
timestep 0.01
run 4000

View File

@ -0,0 +1,147 @@
LAMMPS (11 Aug 2017)
########################################################################
#### 3D droplet oscilation using many-body DPD simulation ###
#### ###
#### Created : Zhen Li (zhen_li@brown.edu) ###
#### Division of Applied Mathematics, Brown University. ###
#### ###
#### mDPD parameters follow the choice of the publication: ###
#### Z. Li et al. "Three dimensional flow structures in a moving ###
#### droplet on substrate: a dissipative particle dynamics study" ###
#### Physics of Fluids, 2013, 25: 072103. DOI: 10.1063/1.4812366 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.3 bin
neigh_modify every 1 delay 0 check yes
atom_style mdpd
region mdpd block -25 25 -10 10 -10 10 units box
create_box 1 mdpd
Created orthogonal box = (-25 -10 -10) to (25 10 10)
1 by 1 by 1 MPI processor grid
lattice fcc 6
Lattice spacing in x,y,z = 0.87358 0.87358 0.87358
region film block -20 20 -7.5 7.5 -2.0 2.0 units box
create_atoms 1 region film
Created 14333 atoms
pair_style hybrid/overlay mdpd/rhosum mdpd 1.0 1.0 9872598
pair_coeff 1 1 mdpd/rhosum 0.75
pair_coeff 1 1 mdpd -40 25 18.0 1.0 0.75
mass 1 1.0
compute mythermo all temp
thermo 100
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 38497 loop local dist gaussian
fix mvv all mvv/dpd
dump mydump all atom 100 atom.lammpstrj
#dump jpg all image 200 image.*.jpg type type zoom 5 adiam 0.5 # view 90 90 box no 0 size 600 200
#dump_modify jpg pad 4
#dump avi all movie 200 movie.avi type type zoom 5 adiam 0.5 # view 90 90 box no 0 size 600 200
#dump_modify avi pad 4
timestep 0.01
run 4000
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.3
ghost atom cutoff = 1.3
binsize = 0.65, bins = 77 31 31
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair mdpd/rhosum, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
(2) pair mdpd, perpetual, half/full from (1)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 9.931 | 9.931 | 9.931 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1 -13.346542 0 -11.846647 -6.8495478
100 1.0321029 -7.2846779 0 -5.7366316 -0.77640205
200 1.042287 -6.9534532 0 -5.3901317 -0.27750815
300 1.0583027 -6.8483105 0 -5.2609672 -0.30347708
400 1.0493719 -6.8648608 0 -5.2909127 -0.15312495
500 1.0723786 -6.8341085 0 -5.2256528 0.017227511
600 1.0545695 -6.8152957 0 -5.2335517 -0.024362439
700 1.0507193 -6.8076033 0 -5.2316344 -0.07101536
800 1.0531856 -6.9378568 0 -5.3581886 -0.053943939
900 1.0442995 -6.8501126 0 -5.2837726 -0.13347942
1000 1.0335049 -6.8883554 0 -5.3382062 -0.18420426
1100 1.0287276 -6.8298226 0 -5.2868389 -0.12081558
1200 1.0322527 -6.9462828 0 -5.3980117 -0.18047625
1300 1.0599443 -6.9449975 0 -5.355192 -0.011763589
1400 1.0560932 -6.845479 0 -5.2614498 0.032130055
1500 1.0432786 -6.9035877 0 -5.338779 -0.10268662
1600 1.064183 -6.9116836 0 -5.3155205 -0.060722129
1700 1.0586249 -6.8768278 0 -5.2890013 0.037005566
1800 1.0576064 -7.0060193 0 -5.4197204 -0.036211254
1900 1.0595141 -6.838741 0 -5.2495807 -0.12395681
2000 1.0650509 -6.897976 0 -5.3005111 0.003594807
2100 1.0768273 -6.8874245 0 -5.2722962 0.033283489
2200 1.0511606 -6.9823162 0 -5.4056854 0.015008427
2300 1.0461138 -6.8820601 0 -5.3129988 0.064646933
2400 1.0485369 -6.9437148 0 -5.3710191 -0.16534939
2500 1.0507221 -6.9394786 0 -5.3635054 -0.098289859
2600 1.0518352 -6.8947578 0 -5.3171152 -0.011666785
2700 1.0402369 -6.9273377 0 -5.3670913 0.035267073
2800 1.0426109 -6.912024 0 -5.3482168 0.049597305
2900 1.0358928 -6.9574778 0 -5.4037471 -0.063216561
3000 1.0351023 -6.9844192 0 -5.4318742 -0.10323465
3100 1.0255005 -6.9382486 0 -5.4001052 -0.073954735
3200 1.0150616 -6.9843183 0 -5.4618321 -0.095136405
3300 1.0118112 -6.9522082 0 -5.4345973 -0.12686179
3400 1.0071522 -6.970158 0 -5.4595351 -0.012487475
3500 1.0041758 -6.9773019 0 -5.4711433 -0.098027653
3600 1.0189298 -6.9393039 0 -5.4110158 0.061631719
3700 1.012442 -6.9341423 0 -5.4155852 0.10442772
3800 1.0021246 -6.9594374 0 -5.4563553 -0.081535223
3900 1.0165002 -6.9045321 0 -5.3798882 -0.0088283303
4000 1.0077099 -6.9145511 0 -5.4030918 0.048349691
Loop time of 135.409 on 1 procs for 4000 steps with 14333 atoms
Performance: 25522.736 tau/day, 29.540 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 93.074 | 93.074 | 93.074 | 0.0 | 68.74
Neigh | 40.192 | 40.192 | 40.192 | 0.0 | 29.68
Comm | 0.19625 | 0.19625 | 0.19625 | 0.0 | 0.14
Output | 0.41756 | 0.41756 | 0.41756 | 0.0 | 0.31
Modify | 1.0706 | 1.0706 | 1.0706 | 0.0 | 0.79
Other | | 0.4581 | | | 0.34
Nlocal: 14333 ave 14333 max 14333 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 11 ave 11 max 11 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 401803 ave 401803 max 401803 min
Histogram: 1 0 0 0 0 0 0 0 0 0
FullNghs: 803606 ave 803606 max 803606 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 803606
Ave neighs/atom = 56.0668
Neighbor list builds = 1050
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:02:15

View File

@ -0,0 +1,147 @@
LAMMPS (11 Aug 2017)
########################################################################
#### 3D droplet oscilation using many-body DPD simulation ###
#### ###
#### Created : Zhen Li (zhen_li@brown.edu) ###
#### Division of Applied Mathematics, Brown University. ###
#### ###
#### mDPD parameters follow the choice of the publication: ###
#### Z. Li et al. "Three dimensional flow structures in a moving ###
#### droplet on substrate: a dissipative particle dynamics study" ###
#### Physics of Fluids, 2013, 25: 072103. DOI: 10.1063/1.4812366 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.3 bin
neigh_modify every 1 delay 0 check yes
atom_style mdpd
region mdpd block -25 25 -10 10 -10 10 units box
create_box 1 mdpd
Created orthogonal box = (-25 -10 -10) to (25 10 10)
4 by 1 by 1 MPI processor grid
lattice fcc 6
Lattice spacing in x,y,z = 0.87358 0.87358 0.87358
region film block -20 20 -7.5 7.5 -2.0 2.0 units box
create_atoms 1 region film
Created 14333 atoms
pair_style hybrid/overlay mdpd/rhosum mdpd 1.0 1.0 9872598
pair_coeff 1 1 mdpd/rhosum 0.75
pair_coeff 1 1 mdpd -40 25 18.0 1.0 0.75
mass 1 1.0
compute mythermo all temp
thermo 100
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 38497 loop local dist gaussian
fix mvv all mvv/dpd
dump mydump all atom 100 atom.lammpstrj
#dump jpg all image 200 image.*.jpg type type zoom 5 adiam 0.5 # view 90 90 box no 0 size 600 200
#dump_modify jpg pad 4
#dump avi all movie 200 movie.avi type type zoom 5 adiam 0.5 # view 90 90 box no 0 size 600 200
#dump_modify avi pad 4
timestep 0.01
run 4000
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.3
ghost atom cutoff = 1.3
binsize = 0.65, bins = 77 31 31
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair mdpd/rhosum, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
(2) pair mdpd, perpetual, half/full from (1)
attributes: half, newton on
pair build: halffull/newton
stencil: none
bin: none
Per MPI rank memory allocation (min/avg/max) = 6.265 | 6.655 | 7.045 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1 -13.346542 0 -11.846647 -6.9757225
100 1.0406108 -7.2500697 0 -5.6892624 -0.80306477
200 1.0535506 -6.9452928 0 -5.3650772 -0.39911584
300 1.0644295 -6.8599907 0 -5.2634577 -0.2997968
400 1.0780123 -6.9471342 0 -5.3302286 -0.06274869
500 1.0672153 -6.8269984 0 -5.2262872 0.021251762
600 1.0634304 -6.8366569 0 -5.2416226 -0.021863333
700 1.0544807 -6.8272074 0 -5.2455967 -0.0064688066
800 1.0556172 -6.8859788 0 -5.3026634 0.023983333
900 1.0436201 -6.9246523 0 -5.3593313 -0.12409618
1000 1.0617016 -6.8632331 0 -5.2707919 -0.1145505
1100 1.0323831 -6.951554 0 -5.4030874 -0.030031884
1200 1.0407785 -6.931048 0 -5.3699892 -0.018362136
1300 1.0380953 -6.8785296 0 -5.3214953 -0.099308737
1400 1.0418898 -6.8998 0 -5.3370743 -0.14199421
1500 1.0487254 -6.9671212 0 -5.3941429 -0.12132644
1600 1.0561042 -6.8948881 0 -5.3108424 -0.09627292
1700 1.0524479 -6.9531441 0 -5.3745823 -0.11959782
1800 1.0541197 -6.9219819 0 -5.3409126 0.032964029
1900 1.0531221 -6.8805815 0 -5.3010085 0.030124685
2000 1.0531819 -6.8612868 0 -5.2816242 -0.076876781
2100 1.0757791 -6.919875 0 -5.3063189 -0.04060439
2200 1.069423 -6.9005754 0 -5.2965527 0.015347467
2300 1.0403109 -6.9015402 0 -5.3411827 0.0034687897
2400 1.0547448 -6.9325539 0 -5.3505471 -0.021202325
2500 1.0404195 -6.8494675 0 -5.2889472 0.086947847
2600 1.0499828 -6.9861392 0 -5.4112749 -0.018079308
2700 1.0294278 -6.8525151 0 -5.3084811 0.16911472
2800 1.0220652 -6.8993978 0 -5.366407 0.064820531
2900 1.0347904 -6.9322703 0 -5.3801929 -0.11384964
3000 1.0391372 -6.9519088 0 -5.3933117 0.003050577
3100 1.0335828 -7.0090074 0 -5.4587413 -0.17366664
3200 1.0211896 -6.9421289 0 -5.4104513 0.025299853
3300 1.0019232 -6.9426488 0 -5.4398688 -0.098334724
3400 1.0203541 -6.9310981 0 -5.4006737 -0.0015544982
3500 1.0076794 -6.9519932 0 -5.4405796 -0.056956902
3600 1.0086525 -6.9620979 0 -5.4492247 0.020014884
3700 1.0046112 -7.0011625 0 -5.4943508 -0.083936527
3800 1.0096867 -6.9470382 0 -5.4326138 -0.089521759
3900 1.0074482 -6.9959414 0 -5.4848745 -0.11873698
4000 1.01222 -6.9535694 0 -5.4353454 0.042191466
Loop time of 63.0327 on 4 procs for 4000 steps with 14333 atoms
Performance: 54828.695 tau/day, 63.459 timesteps/s
98.8% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 16.591 | 29.795 | 42.814 | 236.6 | 47.27
Neigh | 2.0347 | 10.239 | 18.555 | 255.6 | 16.24
Comm | 0.70099 | 6.0601 | 11.386 | 207.4 | 9.61
Output | 0.20713 | 0.40902 | 0.61087 | 31.5 | 0.65
Modify | 0.058089 | 0.27033 | 0.4851 | 40.7 | 0.43
Other | | 16.26 | | | 25.79
Nlocal: 3583.25 ave 7207 max 0 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Nghost: 1055.75 ave 2131 max 0 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Neighs: 100549 ave 202192 max 0 min
Histogram: 2 0 0 0 0 0 0 0 0 2
FullNghs: 201098 ave 404372 max 0 min
Histogram: 2 0 0 0 0 0 0 0 0 2
Total # of neighbors = 804390
Ave neighs/atom = 56.1215
Neighbor list builds = 1049
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:01:03

View File

@ -0,0 +1,24 @@
# Chunk-averaged data for fix stat and group c_cc2
# Timestep Number-of-chunks Total-count
# Chunk Coord1 Ncount c_cc1 c_cc2
100 20 16000
1 -9.5 797.17 0.986661 1.0077
2 -8.5 802.61 0.967974 1.02003
3 -7.5 795.46 0.957045 1.02873
4 -6.5 806.46 0.951271 1.03428
5 -5.5 802.34 0.94898 1.03692
6 -4.5 799.84 0.949378 1.03673
7 -3.5 798.4 0.952505 1.03374
8 -2.5 800.36 0.959322 1.02778
9 -1.5 797.65 0.971516 1.01867
10 -0.5 808.88 0.990644 1.00626
11 0.5 786.29 1.00924 0.993828
12 1.5 807.16 1.02831 0.981436
13 2.5 797.54 1.04071 0.972184
14 3.5 799.67 1.04749 0.966258
15 4.5 799.61 1.05063 0.963256
16 5.5 806.11 1.05105 0.963052
17 6.5 803.67 1.04877 0.965688
18 7.5 797.39 1.04305 0.971187
19 8.5 801.85 1.03208 0.97993
20 9.5 791.54 1.01351 0.992209

View File

@ -0,0 +1,24 @@
# Chunk-averaged data for fix stat and group c_cc2
# Timestep Number-of-chunks Total-count
# Chunk Coord1 Ncount c_cc1 c_cc2
100 20 16000
1 -9.5 806.92 0.986675 1.00766
2 -8.5 798.01 0.96792 1.02003
3 -7.5 805.43 0.956909 1.02883
4 -6.5 800.54 0.951207 1.03432
5 -5.5 794.14 0.948967 1.03691
6 -4.5 799.75 0.949379 1.03672
7 -3.5 799.65 0.952492 1.03374
8 -2.5 799.94 0.959331 1.02778
9 -1.5 800.96 0.971664 1.01861
10 -0.5 803.97 0.99074 1.00622
11 0.5 800.66 1.00949 0.993673
12 1.5 779.22 1.02824 0.981461
13 2.5 809.13 1.04056 0.972274
14 3.5 805.23 1.04747 0.966272
15 4.5 795.95 1.05061 0.96327
16 5.5 796.4 1.05105 0.963035
17 6.5 806.1 1.04883 0.965621
18 7.5 806.41 1.04305 0.971224
19 8.5 792.2 1.03211 0.979955
20 9.5 799.39 1.01362 0.992156

View File

@ -0,0 +1,54 @@
########################################################################
### Pure diffusion with a reaction source term analog of a periodic ###
### Poiseuille flow problem using transport DPD (tDPD) simulation ###
### ###
### Created : Zhen Li (zhen_li@brown.edu) ###
### Division of Applied Mathematics, Brown University. ###
### ###
### tDPD system setup follows Fig.1 in the publication: ###
### Z. Li, A. Yazdani, A. Tartakovsky and G.E. Karniadakis. ###
### "Transport dissipative particle dynamics model for mesoscopic ###
### advection-diffusion-reaction problems. J. Chem. Phys., ###
### 2015, 143: 014101. DOI: 10.1063/1.4923254 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.2 bin
neigh_modify every 1 delay 0 check yes
atom_style tdpd 2
region tdpd block -10 10 -10 10 -5 5 units box
create_box 1 tdpd
create_atoms 1 random 16000 276438 NULL
mass 1 1.0
set atom * cc 1 1.0
set atom * cc 2 1.0
pair_style tdpd 1.0 1.58 9872598
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.58 1.0 1.0E-5 2.0 3.0 1.0E-5 2.0
compute mythermo all temp
thermo 50
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 432982 loop local dist gaussian
fix mvv all mvv/tdpd 0.5
fix upper1 all tdpd/source 1 cuboid 0.0 5.0 0.0 20.0 10.0 10.0 0.01
fix lower1 all tdpd/source 1 cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 -0.01
fix upper2 all tdpd/source 2 cuboid 0.0 5.0 0.0 20.0 10.0 10.0 -0.01
fix lower2 all tdpd/source 2 cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 0.01
timestep 0.01
run 500
reset_timestep 0
compute cc1 all tdpd/cc/atom 1
compute cc2 all tdpd/cc/atom 2
compute bin all chunk/atom bin/1d y 0.0 1.0
fix stat all ave/chunk 1 100 100 bin c_cc1 c_cc2 norm sample file cc.profile
run 100

View File

@ -0,0 +1,146 @@
LAMMPS (11 Aug 2017)
########################################################################
### Pure diffusion with a reaction source term analog of a periodic ###
### Poiseuille flow problem using transport DPD (tDPD) simulation ###
### ###
### Created : Zhen Li (zhen_li@brown.edu) ###
### Division of Applied Mathematics, Brown University. ###
### ###
### tDPD system setup follows Fig.1 in the publication: ###
### Z. Li, A. Yazdani, A. Tartakovsky and G.E. Karniadakis. ###
### "Transport dissipative particle dynamics model for mesoscopic ###
### advection-diffusion-reaction problems. J. Chem. Phys., ###
### 2015, 143: 014101. DOI: 10.1063/1.4923254 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.2 bin
neigh_modify every 1 delay 0 check yes
atom_style tdpd 2
region tdpd block -10 10 -10 10 -5 5 units box
create_box 1 tdpd
Created orthogonal box = (-10 -10 -5) to (10 10 5)
1 by 1 by 1 MPI processor grid
create_atoms 1 random 16000 276438 NULL
Created 16000 atoms
mass 1 1.0
set atom * cc 1 1.0
16000 settings made for cc index 1
set atom * cc 2 1.0
16000 settings made for cc index 2
pair_style tdpd 1.0 1.58 9872598
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.58 1.0 1.0E-5 2.0 3.0 1.0E-5 2.0
compute mythermo all temp
thermo 50
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 432982 loop local dist gaussian
fix mvv all mvv/tdpd 0.5
fix upper1 all tdpd/source 1 cuboid 0.0 5.0 0.0 20.0 10.0 10.0 0.01
fix lower1 all tdpd/source 1 cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 -0.01
fix upper2 all tdpd/source 2 cuboid 0.0 5.0 0.0 20.0 10.0 10.0 -0.01
fix lower2 all tdpd/source 2 cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 0.01
timestep 0.01
run 500
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.78
ghost atom cutoff = 1.78
binsize = 0.89, bins = 23 23 12
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair tdpd, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 11.3 | 11.3 | 11.3 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1 48.948932 0 50.448838 202.19166
50 0.99837766 43.949877 0 45.447349 195.80936
100 0.99846831 43.756995 0 45.254604 198.22348
150 1.0026903 43.72408 0 45.228021 196.61676
200 1.0063144 43.722388 0 45.231765 194.17954
250 1.0032304 43.721864 0 45.226615 197.85829
300 0.9932656 43.703526 0 45.193331 196.57406
350 1.0002916 43.720498 0 45.220841 193.55346
400 0.99475486 43.722965 0 45.215004 196.81546
450 1.0011803 43.712447 0 45.214124 200.46118
500 1.0009006 43.708984 0 45.210241 197.38953
Loop time of 96.0326 on 1 procs for 500 steps with 16000 atoms
Performance: 4498.474 tau/day, 5.207 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 90.083 | 90.083 | 90.083 | 0.0 | 93.80
Neigh | 5.049 | 5.049 | 5.049 | 0.0 | 5.26
Comm | 0.34141 | 0.34141 | 0.34141 | 0.0 | 0.36
Output | 0.00092816 | 0.00092816 | 0.00092816 | 0.0 | 0.00
Modify | 0.45991 | 0.45991 | 0.45991 | 0.0 | 0.48
Other | | 0.09865 | | | 0.10
Nlocal: 16000 ave 16000 max 16000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 14091 ave 14091 max 14091 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 749379 ave 749379 max 749379 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 749379
Ave neighs/atom = 46.8362
Neighbor list builds = 183
Dangerous builds = 0
reset_timestep 0
compute cc1 all tdpd/cc/atom 1
compute cc2 all tdpd/cc/atom 2
compute bin all chunk/atom bin/1d y 0.0 1.0
fix stat all ave/chunk 1 100 100 bin c_cc1 c_cc2 norm sample file cc.profile
run 100
Per MPI rank memory allocation (min/avg/max) = 11.8 | 11.8 | 11.8 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1.0009006 43.708984 0 45.210241 199.3205
50 1.0007276 43.704844 0 45.205842 197.77053
100 1.0039032 43.714201 0 45.219961 197.31118
Loop time of 19.0326 on 1 procs for 100 steps with 16000 atoms
Performance: 4539.577 tau/day, 5.254 timesteps/s
99.9% CPU use with 1 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 17.842 | 17.842 | 17.842 | 0.0 | 93.74
Neigh | 0.98674 | 0.98674 | 0.98674 | 0.0 | 5.18
Comm | 0.066013 | 0.066013 | 0.066013 | 0.0 | 0.35
Output | 0.00016284 | 0.00016284 | 0.00016284 | 0.0 | 0.00
Modify | 0.11795 | 0.11795 | 0.11795 | 0.0 | 0.62
Other | | 0.02012 | | | 0.11
Nlocal: 16000 ave 16000 max 16000 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 14126 ave 14126 max 14126 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 748927 ave 748927 max 748927 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 748927
Ave neighs/atom = 46.8079
Neighbor list builds = 37
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:01:55

View File

@ -0,0 +1,146 @@
LAMMPS (11 Aug 2017)
########################################################################
### Pure diffusion with a reaction source term analog of a periodic ###
### Poiseuille flow problem using transport DPD (tDPD) simulation ###
### ###
### Created : Zhen Li (zhen_li@brown.edu) ###
### Division of Applied Mathematics, Brown University. ###
### ###
### tDPD system setup follows Fig.1 in the publication: ###
### Z. Li, A. Yazdani, A. Tartakovsky and G.E. Karniadakis. ###
### "Transport dissipative particle dynamics model for mesoscopic ###
### advection-diffusion-reaction problems. J. Chem. Phys., ###
### 2015, 143: 014101. DOI: 10.1063/1.4923254 ###
########################################################################
units lj
dimension 3
boundary p p p
neighbor 0.2 bin
neigh_modify every 1 delay 0 check yes
atom_style tdpd 2
region tdpd block -10 10 -10 10 -5 5 units box
create_box 1 tdpd
Created orthogonal box = (-10 -10 -5) to (10 10 5)
2 by 2 by 1 MPI processor grid
create_atoms 1 random 16000 276438 NULL
Created 16000 atoms
mass 1 1.0
set atom * cc 1 1.0
16000 settings made for cc index 1
set atom * cc 2 1.0
16000 settings made for cc index 2
pair_style tdpd 1.0 1.58 9872598
pair_coeff 1 1 18.75 4.5 0.41 1.58 1.58 1.0 1.0E-5 2.0 3.0 1.0E-5 2.0
compute mythermo all temp
thermo 50
thermo_modify temp mythermo
thermo_modify flush yes
velocity all create 1.0 432982 loop local dist gaussian
fix mvv all mvv/tdpd 0.5
fix upper1 all tdpd/source 1 cuboid 0.0 5.0 0.0 20.0 10.0 10.0 0.01
fix lower1 all tdpd/source 1 cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 -0.01
fix upper2 all tdpd/source 2 cuboid 0.0 5.0 0.0 20.0 10.0 10.0 -0.01
fix lower2 all tdpd/source 2 cuboid 0.0 -5.0 0.0 20.0 10.0 10.0 0.01
timestep 0.01
run 500
Neighbor list info ...
update every 1 steps, delay 0 steps, check yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 1.78
ghost atom cutoff = 1.78
binsize = 0.89, bins = 23 23 12
1 neighbor lists, perpetual/occasional/extra = 1 0 0
(1) pair tdpd, perpetual
attributes: half, newton on
pair build: half/bin/atomonly/newton
stencil: half/bin/3d/newton
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.814 | 4.823 | 4.829 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1 48.948932 0 50.448838 199.65978
50 1.0153476 43.948796 0 45.471722 198.3346
100 1.0064284 43.754875 0 45.264424 197.5308
150 0.99609985 43.726751 0 45.220807 197.50623
200 1.0016604 43.720283 0 45.22268 197.81129
250 1.0054979 43.718568 0 45.22672 195.79405
300 0.9997618 43.716617 0 45.216166 197.84788
350 0.99170101 43.72093 0 45.208389 196.07711
400 1.0043692 43.71648 0 45.22294 199.55247
450 1.0086263 43.709988 0 45.222833 198.20516
500 1.0029076 43.717879 0 45.222146 197.26281
Loop time of 24.5533 on 4 procs for 500 steps with 16000 atoms
Performance: 17594.412 tau/day, 20.364 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 22.236 | 22.418 | 22.736 | 4.0 | 91.30
Neigh | 1.2759 | 1.2883 | 1.3077 | 1.1 | 5.25
Comm | 0.35749 | 0.69526 | 0.88462 | 24.1 | 2.83
Output | 0.00043321 | 0.00050318 | 0.00070691 | 0.0 | 0.00
Modify | 0.11555 | 0.11648 | 0.11888 | 0.4 | 0.47
Other | | 0.03473 | | | 0.14
Nlocal: 4000 ave 4012 max 3982 min
Histogram: 1 0 0 0 0 1 0 0 0 2
Nghost: 5986.25 ave 6016 max 5956 min
Histogram: 1 0 0 0 1 0 1 0 0 1
Neighs: 187309 ave 188264 max 186087 min
Histogram: 1 0 0 0 1 0 0 1 0 1
Total # of neighbors = 749235
Ave neighs/atom = 46.8272
Neighbor list builds = 180
Dangerous builds = 0
reset_timestep 0
compute cc1 all tdpd/cc/atom 1
compute cc2 all tdpd/cc/atom 2
compute bin all chunk/atom bin/1d y 0.0 1.0
fix stat all ave/chunk 1 100 100 bin c_cc1 c_cc2 norm sample file cc.profile
run 100
Per MPI rank memory allocation (min/avg/max) = 5.065 | 5.074 | 5.082 Mbytes
Step Temp E_pair E_mol TotEng Press
0 1.0029076 43.717879 0 45.222146 198.45789
50 1.0077982 43.713264 0 45.224867 196.56183
100 1.0036823 43.708022 0 45.213451 196.00815
Loop time of 4.79577 on 4 procs for 100 steps with 16000 atoms
Performance: 18015.870 tau/day, 20.852 timesteps/s
99.9% CPU use with 4 MPI tasks x no OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 4.3481 | 4.39 | 4.4398 | 1.7 | 91.54
Neigh | 0.25477 | 0.25675 | 0.25963 | 0.4 | 5.35
Comm | 0.059327 | 0.11194 | 0.15608 | 11.0 | 2.33
Output | 0.00011206 | 0.00011748 | 0.00011992 | 0.0 | 0.00
Modify | 0.030417 | 0.030622 | 0.030739 | 0.1 | 0.64
Other | | 0.006301 | | | 0.13
Nlocal: 4000 ave 4010 max 3987 min
Histogram: 1 0 0 0 0 1 1 0 0 1
Nghost: 5985.25 ave 6025 max 5959 min
Histogram: 2 0 0 0 0 1 0 0 0 1
Neighs: 187304 ave 188092 max 186449 min
Histogram: 1 0 0 0 0 2 0 0 0 1
Total # of neighbors = 749216
Ave neighs/atom = 46.826
Neighbor list builds = 38
Dangerous builds = 0
Please see the log.cite file for references relevant to this simulation
Total wall time: 0:00:29

View File

@ -9,8 +9,8 @@ import sys,os,subprocess
# help message
help = """
Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix"
Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -m -o osuffix
Syntax from src dir: make lib-gpu args="-m machine -h hdir -a arch -p precision -e esuffix -b -o osuffix"
Syntax from lib dir: python Install.py -m machine -h hdir -a arch -p precision -e esuffix -b -o osuffix
specify one or more options, order does not matter

View File

@ -1,5 +1,22 @@
# Change Log
## [2.04.00](https://github.com/kokkos/kokkos/tree/2.04.00) (2017-08-16)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.13...2.04.00)
**Implemented enhancements:**
- Added ROCm backend to support AMD GPUs
- Kokkos::complex\<T\> behaves slightly differently from std::complex\<T\> [\#1011](https://github.com/kokkos/kokkos/issues/1011)
- Kokkos::Experimental::Crs constructor arguments were in the wrong order [\#992](https://github.com/kokkos/kokkos/issues/992)
- Work graph construction ease-of-use (one lambda for count and fill) [\#991](https://github.com/kokkos/kokkos/issues/991)
- when\_all returns pointer of futures (improved interface) [\#990](https://github.com/kokkos/kokkos/issues/990)
- Allow assignment of LayoutLeft to LayoutRight or vice versa for rank-0 Views [\#594](https://github.com/kokkos/kokkos/issues/594)
- Changed the meaning of Kokkos\_ENABLE\_CXX11\_DISPATCH\_LAMBDA [\#1035](https://github.com/kokkos/kokkos/issues/1035)
**Fixed bugs:**
- memory pool default constructor does not properly set member variables. [\#1007](https://github.com/kokkos/kokkos/issues/1007)
## [2.03.13](https://github.com/kokkos/kokkos/tree/2.03.13) (2017-07-27)
[Full Changelog](https://github.com/kokkos/kokkos/compare/2.03.05...2.03.13)

View File

@ -4,10 +4,16 @@
KOKKOS_PATH=../../lib/kokkos
CXXFLAGS=$(CCFLAGS)
# Options: Cuda,OpenMP,Pthreads,Qthreads,Serial
# Options: Cuda,ROCm,OpenMP,Pthreads,Qthreads,Serial
KOKKOS_DEVICES ?= "OpenMP"
#KOKKOS_DEVICES ?= "Pthreads"
# Options: KNC,SNB,HSW,Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61,ARMv80,ARMv81,ARMv8-ThunderX,BGQ,Power7,Power8,Power9,KNL,BDW,SKX
# Options:
# Intel: KNC,KNL,SNB,HSW,BDW,SKX
# NVIDIA: Kepler,Kepler30,Kepler32,Kepler35,Kepler37,Maxwell,Maxwell50,Maxwell52,Maxwell53,Pascal60,Pascal61
# ARM: ARMv80,ARMv81,ARMv8-ThunderX
# IBM: BGQ,Power7,Power8,Power9
# AMD-GPUS: Kaveri,Carrizo,Fiji,Vega
# AMD-CPUS: AMDAVX,Ryzen,Epyc
KOKKOS_ARCH ?= ""
# Options: yes,no
KOKKOS_DEBUG ?= "no"
@ -43,8 +49,8 @@ KOKKOS_INTERNAL_CUDA_USE_UVM := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | gr
KOKKOS_INTERNAL_CUDA_USE_RELOC := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "rdc" | wc -l))
KOKKOS_INTERNAL_CUDA_USE_LAMBDA := $(strip $(shell echo $(KOKKOS_CUDA_OPTIONS) | grep "enable_lambda" | wc -l))
# Check for Kokkos Host Execution Spaces one of which must be on.
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
KOKKOS_INTERNAL_USE_OPENMP := $(strip $(shell echo $(subst OpenMPTarget,,$(KOKKOS_DEVICES)) | grep OpenMP | wc -l))
KOKKOS_INTERNAL_USE_PTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Pthread | wc -l))
KOKKOS_INTERNAL_USE_QTHREADS := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Qthreads | wc -l))
@ -60,6 +66,8 @@ endif
# Check for other Execution Spaces.
KOKKOS_INTERNAL_USE_CUDA := $(strip $(shell echo $(KOKKOS_DEVICES) | grep Cuda | wc -l))
KOKKOS_INTERNAL_USE_ROCM := $(strip $(shell echo $(KOKKOS_DEVICES) | grep ROCm | wc -l))
KOKKOS_INTERNAL_USE_OPENMPTARGET := $(strip $(shell echo $(KOKKOS_DEVICES) | grep OpenMPTarget | wc -l))
ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
KOKKOS_INTERNAL_NVCC_PATH := $(shell which nvcc)
@ -87,6 +95,7 @@ ifneq ($(MPICH_CXX),)
endif
KOKKOS_INTERNAL_COMPILER_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep clang | wc -l))
KOKKOS_INTERNAL_COMPILER_APPLE_CLANG := $(strip $(shell $(CXX) --version 2>&1 | grep "apple-darwin" | wc -l))
KOKKOS_INTERNAL_COMPILER_HCC := $(strip $(shell $(CXX) --version 2>&1 | grep HCC | wc -l))
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 2)
KOKKOS_INTERNAL_COMPILER_CLANG = 1
@ -99,6 +108,10 @@ endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_APPLE_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_CLANG = 0
endif
# AMD HCC passes both clang and hcc test so turn off clang
ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
KOKKOS_INTENAL_COMPILER_CLANG = 0
endif
ifeq ($(KOKKOS_INTERNAL_COMPILER_CLANG), 1)
KOKKOS_INTERNAL_COMPILER_CLANG_VERSION := $(shell clang --version | grep version | cut -d ' ' -f3 | tr -d '.')
@ -183,8 +196,12 @@ else
ifeq ($(KOKKOS_INTERNAL_COMPILER_CRAY), 1)
KOKKOS_INTERNAL_CXX11_FLAG := -hstd=c++11
else
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
ifeq ($(KOKKOS_INTERNAL_COMPILER_HCC), 1)
KOKKOS_INTERNAL_CXX11_FLAG :=
else
KOKKOS_INTERNAL_CXX11_FLAG := --std=c++11
KOKKOS_INTERNAL_CXX1Z_FLAG := --std=c++1z
endif
endif
endif
endif
@ -259,6 +276,13 @@ KOKKOS_INTERNAL_USE_ARCH_IBM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_
# AMD based.
KOKKOS_INTERNAL_USE_ARCH_AMDAVX := $(strip $(shell echo $(KOKKOS_ARCH) | grep AMDAVX | wc -l))
KOKKOS_INTERNAL_USE_ARCH_RYZEN := $(strip $(shell echo $(KOKKOS_ARCH) | grep Ryzen | wc -l))
KOKKOS_INTERNAL_USE_ARCH_EPYC := $(strip $(shell echo $(KOKKOS_ARCH) | grep Epyc | wc -l))
KOKKOS_INTERNAL_USE_ARCH_KAVERI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Kaveri | wc -l))
KOKKOS_INTERNAL_USE_ARCH_CARRIZO := $(strip $(shell echo $(KOKKOS_ARCH) | grep Carrizo | wc -l))
KOKKOS_INTERNAL_USE_ARCH_FIJI := $(strip $(shell echo $(KOKKOS_ARCH) | grep Fiji | wc -l))
KOKKOS_INTERNAL_USE_ARCH_VEGA := $(strip $(shell echo $(KOKKOS_ARCH) | grep Vega | wc -l))
KOKKOS_INTERNAL_USE_ARCH_GFX901 := $(strip $(shell echo $(KOKKOS_ARCH) | grep gfx901 | wc -l))
# Any AVX?
KOKKOS_INTERNAL_USE_ARCH_SSE42 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM) | bc ))
@ -271,6 +295,7 @@ KOKKOS_INTERNAL_USE_ARCH_AVX512XEON := $(strip $(shell echo $(KOKKOS_INTERNAL_US
KOKKOS_INTERNAL_USE_ISA_X86_64 := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_WSM)+$(KOKKOS_INTERNAL_USE_ARCH_SNB)+$(KOKKOS_INTERNAL_USE_ARCH_HSW)+$(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_KNL)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
KOKKOS_INTERNAL_USE_ISA_KNC := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_KNC) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCLE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER8)+$(KOKKOS_INTERNAL_USE_ARCH_POWER9) | bc ))
KOKKOS_INTERNAL_USE_ISA_POWERPCBE := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_POWER7) | bc ))
# Decide whether we can support transactional memory
KOKKOS_INTERNAL_USE_TM := $(strip $(shell echo $(KOKKOS_INTERNAL_USE_ARCH_BDW)+$(KOKKOS_INTERNAL_USE_ARCH_SKX) | bc ))
@ -319,8 +344,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
tmp := $(shell echo "\#define KOKKOS_HAVE_CUDA 1" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
tmp := $(shell echo '\#define KOKKOS_ENABLE_ROCM 1' >> KokkosCore_config.tmp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMPTARGET), 1)
tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
tmp := $(shell echo '\#define KOKKOS_ENABLE_OPENMPTARGET 1' >> KokkosCore_config.tmp)
endif
ifeq ($(KOKKOS_INTERNAL_USE_OPENMP), 1)
@ -363,6 +392,12 @@ ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCLE), 1)
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
endif
ifeq ($(KOKKOS_INTERNAL_USE_ISA_POWERPCBE), 1)
tmp := $(shell echo "\#ifndef __CUDA_ARCH__" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_USE_ISA_POWERPCBE" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#endif" >> KokkosCore_config.tmp )
endif
tmp := $(shell echo "/* General Settings */" >> KokkosCore_config.tmp)
ifeq ($(KOKKOS_INTERNAL_ENABLE_CXX11), 1)
KOKKOS_CXXFLAGS += $(KOKKOS_INTERNAL_CXX11_FLAG)
@ -561,6 +596,18 @@ ifeq ($(KOKKOS_INTERNAL_USE_ARCH_AVX), 1)
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER7), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER7 1" >> KokkosCore_config.tmp )
ifeq ($(KOKKOS_INTERNAL_COMPILER_PGI), 1)
else
# Assume that this is a really a GNU compiler or it could be XL on P8.
KOKKOS_CXXFLAGS += -mcpu=power7 -mtune=power7
KOKKOS_LDFLAGS += -mcpu=power7 -mtune=power7
endif
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_POWER8), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_POWER8 1" >> KokkosCore_config.tmp )
@ -742,7 +789,49 @@ ifeq ($(KOKKOS_INTERNAL_USE_CUDA), 1)
endif
endif
# Figure out the architecture flag for ROCm.
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
# Lets start with adding architecture defines
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_KAVERI), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 701" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_KAVERI 1" >> KokkosCore_config.tmp )
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx701
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_CARRIZO), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 801" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_CARRIZO 1" >> KokkosCore_config.tmp )
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx801
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_FIJI), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 803" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_FIJI 1" >> KokkosCore_config.tmp )
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx803
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_VEGA), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 900" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_VEGA 1" >> KokkosCore_config.tmp )
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx900
endif
ifeq ($(KOKKOS_INTERNAL_USE_ARCH_GFX901), 1)
tmp := $(shell echo "\#define KOKKOS_ARCH_ROCM 901" >> KokkosCore_config.tmp )
tmp := $(shell echo "\#define KOKKOS_ARCH_GFX901 1" >> KokkosCore_config.tmp )
KOKKOS_INTERNAL_ROCM_ARCH_FLAG := --amdgpu-target=gfx901
endif
KOKKOS_INTERNAL_HCC_PATH := $(shell which $(CXX))
ROCM_HCC_PATH ?= $(KOKKOS_INTERNAL_HCC_PATH:/bin/clang++=)
KOKKOS_CXXFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --cxxflags)
KOKKOS_LDFLAGS += $(shell $(ROCM_HCC_PATH)/bin/hcc-config --ldflags) -lhc_am -lm
KOKKOS_LDFLAGS += $(KOKKOS_INTERNAL_ROCM_ARCH_FLAG)
KOKKOS_SRC += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.cpp)
KOKKOS_HEADERS += $(wildcard $(KOKKOS_PATH)/core/src/ROCm/*.hpp)
endif
KOKKOS_INTERNAL_LS_CONFIG := $(shell ls KokkosCore_config.h 2>&1)
ifeq ($(KOKKOS_INTERNAL_LS_CONFIG), KokkosCore_config.h)
KOKKOS_INTERNAL_NEW_CONFIG := $(strip $(shell diff KokkosCore_config.h KokkosCore_config.tmp | grep define | wc -l))
else

View File

@ -42,6 +42,17 @@ Kokkos_Cuda_Locks.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Cuda/Kokkos_C
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Cuda/Kokkos_Cuda_Locks.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_ROCM), 1)
Kokkos_ROCm_Exec.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Exec.cpp
Kokkos_ROCm_Space.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Space.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Space.cpp
Kokkos_ROCm_Task.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Task.cpp
Kokkos_ROCm_Impl.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/ROCm/Kokkos_ROCm_Impl.cpp
endif
ifeq ($(KOKKOS_INTERNAL_USE_PTHREADS), 1)
Kokkos_ThreadsExec_base.o: $(KOKKOS_CPP_DEPENDS) $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp
$(CXX) $(KOKKOS_CPPFLAGS) $(KOKKOS_CXXFLAGS) $(CXXFLAGS) -c $(KOKKOS_PATH)/core/src/Threads/Kokkos_ThreadsExec_base.cpp

View File

@ -80,6 +80,9 @@ Other compilers working:
X86:
Cygwin 2.1.0 64bit with gcc 4.9.3
Limited testing of the following compilers on POWER7+ systems:
GCC 4.8.5 (on RHEL7.1 POWER7+)
Known non-working combinations:
Power8:
Pthreads backend
@ -171,3 +174,22 @@ Contributions to Kokkos are welcome. In order to do so, please open an issue
where a feature request or bug can be discussed. Then issue a pull request
with your contribution. Pull requests must be issued against the develop branch.
===========================================================================
====Citing Kokkos==========================================================
===========================================================================
If you publish work which mentions Kokkos, please cite the following paper:
@article{CarterEdwards20143202,
title = "Kokkos: Enabling manycore performance portability through polymorphic memory access patterns ",
journal = "Journal of Parallel and Distributed Computing ",
volume = "74",
number = "12",
pages = "3202 - 3216",
year = "2014",
note = "Domain-Specific Languages and High-Level Frameworks for High-Performance Computing ",
issn = "0743-7315",
doi = "https://doi.org/10.1016/j.jpdc.2014.07.003",
url = "http://www.sciencedirect.com/science/article/pii/S0743731514001257",
author = "H. Carter Edwards and Christian R. Trott and Daniel Sunderland"
}

View File

@ -0,0 +1,140 @@
Summary:
- Step 1: Testing Kokkos itself using test_all_sandia
- Step 2: Testing of Kokkos integrated into Trilinos (config/trilinos-integration/*.sh)
- Step 3: Locally update CHANGELOG, merge into master, edit config/master_history.txt
- Step 4: Locally snapshot new master into corresponding Trilinos branch (develop or temporary), push with checking-test-sems.sh
- Step 5: Push local Kokkos master to GitHub (need Owner approval)
Steps 1, 2, and 4 include testing that may fail. These failures must be fixed either by pull requests to Kokkos develop, or by creating a new Trilinos branch for parts of Trilinos that must be updated. This is what usually takes the most time.
// -------------------------------------------------------------------------------- //
Step 1: The following should be repeated on enough machines to cover all
supported compilers. Those machines are:
kokkos-dev
??? <- TODO: identify other machines
1.1. Clone kokkos develop branch (or just switch to it)
git clone -b develop git@github.com:kokkos/kokkos.git
cd kokkos
1.2. Create a testing directory
mkdir testing
cd testing
1.3. Run the test_all_sandia script with no options to test all compilers
nohup ../config/test_all_sandia &
tail -f nohup.out # to watch progress
// -------------------------------------------------------------------------------- //
Step 2:
2.1. Build and test Trilinos with 4 different configurations; Run scripts for white and shepard that are provided in kokkos/config/trilinos-integration. These scripts load their own modules/environment, so don't require preparation. You can run all four at the same time, use separate directories for each.
mkdir serial
cd serial
nohup KOKKOS_PATH/config/trilinos-integration/shepard_jenkins_run_script_serial_intel &
2.2. Compare the compile errors and test failures between updated and pristine versions. There may be compile failures that happen in both, tests that fail in both, and there may be tests that only fail some times (thus, rerun tests manually as needed).
// -------------------------------------------------------------------------------- //
Step 3: This step should be run on kokkos-dev
3.1. If you don't have a GitHub token already, generate one for yourself (this will give you TOKEN):
https://github.com/settings/tokens
3.2. Get a clean copy of the Kokkos develop branch
git clone -b develop git@github.com:kokkos/kokkos.git
cd kokkos
3.3. Generate the initial changelog. Use the most recent tag as OLDTAG (`git tag -l` can show you all tags). The NEWTAG is the new version number, e.g. "2.04.00". RUN THIS OUTSIDE THE KOKKOS SOURCE TREE!
module load ruby/2.3.1/gcc/5.3.0
gitthub_changelog_generator kokkos/kokkos --token TOKEN --no-pull-requests --include-labels 'InDevelop' --enhancement-labels 'enhancement,Feature Request' --future-release 'NEWTAG' --between-tags 'NEWTAG,OLDTAG'
cat CHANGELOG.md
3.4. Manually cleanup and commit the change log. Pushing to develop requires Owner permission.
(Copy the new section from the generated CHANGELOG.md to KOKKOS_PATH/CHANGELOG.md)
(Make desired changes to CHANGELOG.md to enhance clarity (remove issues not noteworthy))
(Commit and push the CHANGELOG.md to develop)
3.5. Merge develop into master. DO NOT FAST-FORWARD THE MERGE!!!!
(From kokkos directory):
git checkout master
git merge --no-ff origin/develop
3.6. Update the tag in kokkos/config/master_history.txt
Tag description: MajorNumber.MinorNumber.WeeksSinceMinorNumberUpdate
Tag field widths: #.#.##
date description: month:day:year
date field widths: ##:##:####
master description: SHA1 of previous master commit (use `git log`?)
develop description: SHA1 of merged develop branch
SHA1 field width: ######## (8 chars)
# Append to config/master_history.txt:
tag: 2.03.13 date: 07:27:2017 master: da314444 develop: 29ccb58a
git commit --amend -a
3.7. Create the new tag:
git tag -a #.#.##
(type the following into the tag message (same as for step 4.3))
tag: #.#.##
date: mm/dd/yyyy
master: sha1
develop: sha1
3.8. DO NOT PUSH YET !!!
// -------------------------------------------------------------------------------- //
Step 4: This step can be done on any SEMS machine (e.g. kokkos-dev). Actually, the checkin step requires lots of disk space and RAM. Use ceerws1113 if you have access to it.
4.1 Clone the Trilinos corresponding branch (or just switch to it)
git clone -b develop git@github.com:trilinos/Trilinos.git
TRILINOS_PATH=$PWD/Trilinos
4.2 Snapshot Kokkos into Trilinos - this requires python/2.7.9 and that both Trilinos and Kokkos be clean - no untracked or modified files. Run the following outside of the Kokkos and Trilinos source trees.
module load sems-python/2.7.9
python KOKKOS_PATH/config/snapshot.py KOKKOS_PATH TRILINOS_PATH/packages
4.3. Run checkin-test to push to trilinos using the CI build modules (gcc/4.9.3)
cd TRILINOS_PATH
mkdir CHECKIN
cd CHECKIN
nohup ../cmake/std/sems/checkin-test-sems.sh --do-all --push &
4.4. If there are failures, fix and backtrack. Otherwise, go to next step
// -------------------------------------------------------------------------------- //
Step 5: Push Kokkos master to GitHub (requires Owner permission).
cd KOKKOS_PATH
git push --follow-tags origin master

View File

@ -8,3 +8,4 @@ tag: 2.02.15 date: 02:10:2017 master: 8c64cd93 develop: 28dea8b6
tag: 2.03.00 date: 04:25:2017 master: 120d9ce7 develop: 015ba641
tag: 2.03.05 date: 05:27:2017 master: 36b92f43 develop: 79073186
tag: 2.03.13 date: 07:27:2017 master: da314444 develop: 29ccb58a
tag: 2.04.00 date: 08:16:2017 master: 54eb75c0 develop: 32fb8ee1

View File

@ -167,7 +167,6 @@ if [ "$MACHINE" = "sems" ]; then
"intel/15.0.2 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/16.0.3 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"intel/17.0.1 $BASE_MODULE_LIST $INTEL_BUILD_LIST icpc $INTEL_WARNING_FLAGS"
"clang/3.6.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.7.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"
"clang/3.8.1 $BASE_MODULE_LIST $CLANG_BUILD_LIST clang++ $CLANG_WARNING_FLAGS"

View File

@ -1,15 +1,15 @@
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Don't include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
#else
#define KOKKOS_CORE_CONFIG_H
#endif
/* The trivial 'src/build_common.sh' creates a config
* that must stay in sync with this file.
*/
#cmakedefine KOKKOS_FOR_SIERRA
#ifndef KOKKOS_FOR_SIERRA
#if !defined(KOKKOS_FOR_SIERRA)
#if !defined(KOKKOS_MACROS_HPP) || defined(KOKKOS_CORE_CONFIG_H)
#error "Don't include KokkosCore_config.h directly; include Kokkos_Macros.hpp instead."
#else
#define KOKKOS_CORE_CONFIG_H
#endif
#cmakedefine KOKKOS_HAVE_CUDA
#cmakedefine KOKKOS_HAVE_OPENMP
@ -93,12 +93,6 @@
#cmakedefine KOKKOS_ARCH_PASCAL60 1
#cmakedefine KOKKOS_ARCH_PASCAL61 1
// Don't forbid users from defining this macro on the command line,
// but still make sure that CMake logic can control its definition.
#ifndef KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
#cmakedefine KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA 1
#endif
// TODO: These are currently not used in Kokkos. Should they be removed?
#cmakedefine KOKKOS_HAVE_MPI
#cmakedefine KOKKOS_HAVE_CUSPARSE
@ -107,4 +101,4 @@
#cmakedefine KOKKOS_USING_DEPRECATED_VIEW
#cmakedefine KOKKOS_HAVE_CXX11
#endif // KOKKOS_FOR_SIERRA
#endif // !defined(KOKKOS_FOR_SIERRA)

View File

@ -9,30 +9,6 @@ TRIBITS_ADD_OPTION_AND_DEFINE(
ASSERT_DEFINED(${PROJECT_NAME}_ENABLE_CXX11)
ASSERT_DEFINED(${PACKAGE_NAME}_ENABLE_CUDA)
# Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA governs whether Kokkos allows
# use of lambdas at the outer level of parallel dispatch (that is, as
# the argument to an outer parallel_for, parallel_reduce, or
# parallel_scan). This works with non-CUDA execution spaces if C++11
# is enabled. It does not currently work with public releases of
# CUDA. If that changes, please change the default here to ON if CUDA
# and C++11 are ON.
IF (${PROJECT_NAME}_ENABLE_CXX11)
IF (${PACKAGE_NAME}_ENABLE_CUDA)
SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF)
ELSE ()
SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT ON)
ENDIF ()
ELSE ()
SET(Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT OFF)
ENDIF ()
TRIBITS_ADD_OPTION_AND_DEFINE(
Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA
KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA
"Whether Kokkos allows use of lambdas at the outer level of parallel dispatch (that is, as the argument to an outer parallel_for, parallel_reduce, or parallel_scan). This requires C++11. It also does not currently work with public releases of CUDA. As a result, even if C++11 is enabled, this will be OFF by default if CUDA is enabled. If this option is ON, the macro KOKKOS_HAVE_CXX11_DISPATCH_LAMBDA will be defined. For compatibility with Kokkos' Makefile build system, it is also possible to define that macro on the command line."
${Kokkos_ENABLE_CXX11_DISPATCH_LAMBDA_DEFAULT}
)
TRIBITS_CONFIGURE_FILE(${PACKAGE_NAME}_config.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_BINARY_DIR})

View File

@ -152,10 +152,10 @@ public:
KOKKOS_INLINE_FUNCTION pointer data() { return pointer(0) ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return const_pointer(0); }
~Array() = default ;
Array() = default ;
Array( const Array & ) = default ;
Array & operator = ( const Array & ) = default ;
KOKKOS_FUNCTION_DEFAULTED ~Array() = default ;
KOKKOS_FUNCTION_DEFAULTED Array() = default ;
KOKKOS_FUNCTION_DEFAULTED Array( const Array & ) = default ;
KOKKOS_FUNCTION_DEFAULTED Array & operator = ( const Array & ) = default ;
// Some supported compilers are not sufficiently C++11 compliant
// for default move constructor and move assignment operator.
@ -209,7 +209,7 @@ public:
KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
~Array() = default ;
KOKKOS_FUNCTION_DEFAULTED ~Array() = default ;
Array() = delete ;
Array( const Array & rhs ) = delete ;
@ -278,7 +278,7 @@ public:
KOKKOS_INLINE_FUNCTION pointer data() { return m_elem ; }
KOKKOS_INLINE_FUNCTION const_pointer data() const { return m_elem ; }
~Array() = default ;
KOKKOS_FUNCTION_DEFAULTED ~Array() = default ;
Array() = delete ;
Array( const Array & ) = delete ;

View File

@ -80,6 +80,11 @@
// Compiling NVIDIA device code, must use Cuda atomics:
#define KOKKOS_ENABLE_CUDA_ATOMICS
#elif defined(KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU)
#define KOKKOS_ENABLE_ROCM_ATOMICS
#endif
#if ! defined( KOKKOS_ENABLE_GNU_ATOMICS ) && \
@ -154,6 +159,19 @@ const char * atomic_query_version()
} // namespace Kokkos
#if defined( KOKKOS_ENABLE_ROCM )
#include <ROCm/Kokkos_ROCm_Atomic.hpp>
namespace Kokkos {
namespace Impl {
extern KOKKOS_INLINE_FUNCTION
bool lock_address_rocm_space(void* ptr);
extern KOKKOS_INLINE_FUNCTION
void unlock_address_rocm_space(void* ptr);
}
}
#endif
#ifdef _WIN32
#include "impl/Kokkos_Atomic_Windows.hpp"
#else

View File

@ -107,6 +107,11 @@ public:
re_ (val), im_ (0.0)
{}
// BUG HCC WORKAROUND
KOKKOS_INLINE_FUNCTION complex( const RealType& re, const RealType& im):
re_ (re), im_ (im)
{}
//! Constructor that takes the real and imaginary parts.
template<class RealType1, class RealType2>
KOKKOS_INLINE_FUNCTION complex (const RealType1& re, const RealType2& im) :
@ -227,6 +232,16 @@ public:
return re_;
}
//! Set the imaginary part of this complex number.
KOKKOS_INLINE_FUNCTION void imag (RealType v) {
im_ = v;
}
//! Set the real part of this complex number.
KOKKOS_INLINE_FUNCTION void real (RealType v) {
re_ = v;
}
KOKKOS_INLINE_FUNCTION
complex<RealType>& operator += (const complex<RealType>& src) {
re_ += src.re_;
@ -299,7 +314,7 @@ public:
// Scale (by the "1-norm" of y) to avoid unwarranted overflow.
// If the real part is +/-Inf and the imaginary part is -/+Inf,
// this won't change the result.
const RealType s = ::fabs (y.real ()) + ::fabs (y.imag ());
const RealType s = std::fabs (y.real ()) + std::fabs (y.imag ());
// If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
// In that case, the relation x/y == (x/s) / (y/s) doesn't hold,
@ -537,7 +552,7 @@ operator / (const complex<RealType>& x, const complex<RealType>& y) {
// Scale (by the "1-norm" of y) to avoid unwarranted overflow.
// If the real part is +/-Inf and the imaginary part is -/+Inf,
// this won't change the result.
const RealType s = ::fabs (real (y)) + ::fabs (imag (y));
const RealType s = std::fabs (real (y)) + std::fabs (imag (y));
// If s is 0, then y is zero, so x/y == real(x)/0 + i*imag(x)/0.
// In that case, the relation x/y == (x/s) / (y/s) doesn't hold,

View File

@ -74,6 +74,10 @@
#include <Kokkos_Cuda.hpp>
#endif
#if defined( KOKKOS_ENABLE_ROCM )
#include <Kokkos_ROCm.hpp>
#endif
#include <Kokkos_Pair.hpp>
#include <Kokkos_MemoryPool.hpp>
#include <Kokkos_Array.hpp>

View File

@ -122,6 +122,13 @@ class CudaHostPinnedSpace; ///< Memory space on Host accessible to Cuda GPU
class Cuda; ///< Execution space for Cuda GPU
#endif
#if defined( KOKKOS_ENABLE_ROCM )
namespace Experimental {
class ROCmSpace ; ///< Memory space on ROCm GPU
class ROCm ; ///< Execution space for ROCm GPU
}
#endif
template<class ExecutionSpace, class MemorySpace>
struct Device;
@ -140,6 +147,8 @@ namespace Kokkos {
typedef Cuda DefaultExecutionSpace;
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET )
typedef Experimental::OpenMPTarget DefaultExecutionSpace ;
#elif defined ( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM )
typedef Experimental::ROCm DefaultExecutionSpace ;
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
typedef OpenMP DefaultExecutionSpace;
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
@ -185,6 +194,8 @@ namespace Impl {
#if defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA ) && defined( KOKKOS_ENABLE_CUDA )
typedef Kokkos::CudaSpace ActiveExecutionMemorySpace;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace ;
#elif defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef Kokkos::HostSpace ActiveExecutionMemorySpace;
#else

View File

@ -98,18 +98,18 @@ public:
typedef View<size_type* , array_layout, device_type> row_map_type;
typedef View<DataType* , array_layout, device_type> entries_type;
entries_type entries;
row_map_type row_map;
entries_type entries;
//! Construct an empty view.
Crs () : entries(), row_map() {}
Crs() : row_map(), entries() {}
//! Copy constructor (shallow copy).
Crs (const Crs& rhs) : entries (rhs.entries), row_map (rhs.row_map)
Crs(const Crs& rhs) : row_map(rhs.row_map), entries(rhs.entries)
{}
template<class EntriesType, class RowMapType>
Crs (const EntriesType& entries_,const RowMapType& row_map_) : entries (entries_), row_map (row_map_)
Crs(const RowMapType& row_map_, const EntriesType& entries_) : row_map(row_map_), entries(entries_)
{}
/** \brief Assign to a view of the rhs array.
@ -117,8 +117,8 @@ public:
* then allocated memory is deallocated.
*/
Crs& operator= (const Crs& rhs) {
entries = rhs.entries;
row_map = rhs.row_map;
entries = rhs.entries;
return *this;
}
@ -151,7 +151,7 @@ void get_crs_transpose_counts(
template< class OutCounts,
class InCrs>
void get_crs_row_map_from_counts(
typename OutCounts::value_type get_crs_row_map_from_counts(
OutCounts& out,
InCrs const& in,
std::string const& name = "row_map");
@ -204,18 +204,20 @@ class CrsRowMapFromCounts {
using execution_space = typename InCounts::execution_space;
using value_type = typename OutRowMap::value_type;
using index_type = typename InCounts::size_type;
using last_value_type = Kokkos::View<value_type, execution_space>;
private:
InCounts in;
OutRowMap out;
InCounts m_in;
OutRowMap m_out;
last_value_type m_last_value;
public:
KOKKOS_INLINE_FUNCTION
void operator()(index_type i, value_type& update, bool final_pass) const {
update += in(i);
if (final_pass) {
out(i + 1) = update;
if (i == 0) {
out(0) = 0;
}
if (i < m_in.size()) {
update += m_in(i);
if (final_pass) m_out(i + 1) = update;
} else if (final_pass) {
m_out(0) = 0;
m_last_value() = update;
}
}
KOKKOS_INLINE_FUNCTION
@ -226,12 +228,16 @@ class CrsRowMapFromCounts {
}
using self_type = CrsRowMapFromCounts<InCounts, OutRowMap>;
CrsRowMapFromCounts(InCounts const& arg_in, OutRowMap const& arg_out):
in(arg_in),out(arg_out) {
m_in(arg_in), m_out(arg_out), m_last_value("last_value") {
}
value_type execute() {
using policy_type = RangePolicy<index_type, execution_space>;
using closure_type = Kokkos::Impl::ParallelScan<self_type, policy_type>;
closure_type closure(*this, policy_type(0, in.size()));
closure_type closure(*this, policy_type(0, m_in.size() + 1));
closure.execute();
execution_space::fence();
auto last_value = Kokkos::create_mirror_view(m_last_value);
Kokkos::deep_copy(last_value, m_last_value);
return last_value();
}
};
@ -297,13 +303,14 @@ void get_crs_transpose_counts(
template< class OutRowMap,
class InCounts>
void get_crs_row_map_from_counts(
typename OutRowMap::value_type get_crs_row_map_from_counts(
OutRowMap& out,
InCounts const& in,
std::string const& name) {
out = OutRowMap(ViewAllocateWithoutInitializing(name), in.size() + 1);
Kokkos::Impl::Experimental::
CrsRowMapFromCounts<InCounts, OutRowMap> functor(in, out);
return functor.execute();
}
template< class DataType,
@ -328,6 +335,65 @@ void transpose_crs(
FillCrsTransposeEntries<crs_type, crs_type> entries_functor(in, out);
}
template< class CrsType,
class Functor>
struct CountAndFill {
using data_type = typename CrsType::size_type;
using size_type = typename CrsType::size_type;
using row_map_type = typename CrsType::row_map_type;
using entries_type = typename CrsType::entries_type;
using counts_type = row_map_type;
CrsType m_crs;
Functor m_functor;
counts_type m_counts;
struct Count {};
KOKKOS_INLINE_FUNCTION void operator()(Count, size_type i) const {
m_counts(i) = m_functor(i, nullptr);
}
struct Fill {};
KOKKOS_INLINE_FUNCTION void operator()(Fill, size_type i) const {
auto j = m_crs.row_map(i);
data_type* fill = &(m_crs.entries(j));
m_functor(i, fill);
}
using self_type = CountAndFill<CrsType, Functor>;
CountAndFill(CrsType& crs, size_type nrows, Functor const& f):
m_crs(crs),
m_functor(f)
{
using execution_space = typename CrsType::execution_space;
m_counts = counts_type("counts", nrows);
{
using count_policy_type = RangePolicy<size_type, execution_space, Count>;
using count_closure_type =
Kokkos::Impl::ParallelFor<self_type, count_policy_type>;
const count_closure_type closure(*this, count_policy_type(0, nrows));
closure.execute();
}
auto nentries = Kokkos::Experimental::
get_crs_row_map_from_counts(m_crs.row_map, m_counts);
m_counts = counts_type();
m_crs.entries = entries_type("entries", nentries);
{
using fill_policy_type = RangePolicy<size_type, execution_space, Fill>;
using fill_closure_type =
Kokkos::Impl::ParallelFor<self_type, fill_policy_type>;
const fill_closure_type closure(*this, fill_policy_type(0, nrows));
closure.execute();
}
crs = m_crs;
}
};
template< class CrsType,
class Functor>
void count_and_fill_crs(
CrsType& crs,
typename CrsType::size_type nrows,
Functor const& f) {
Kokkos::Experimental::CountAndFill<CrsType, Functor>(crs, nrows, f);
}
}} // namespace Kokkos::Experimental
#endif /* #define KOKKOS_CRS_HPP */

View File

@ -96,6 +96,14 @@
//----------------------------------------------------------------------------
#if defined(KOKKOS_ENABLE_SERIAL) || defined(KOKKOS_ENABLE_THREADS) || \
defined(KOKKOS_ENABLE_OPENMP) || defined(KOKKOS_ENABLE_QTHREADS) || \
defined(KOKKOS_ENABLE_ROCM) || defined(KOKKOS_ENABLE_OPENMPTARGET)
#define KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND
#endif
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
#if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
// Compiling with a CUDA compiler.
//
@ -133,6 +141,9 @@
#if ( CUDA_VERSION < 8000 ) && defined( __NVCC__ )
#define KOKKOS_LAMBDA [=]__device__
#if defined( KOKKOS_INTERNAL_ENABLE_NON_CUDA_BACKEND )
#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
#endif
#else
#define KOKKOS_LAMBDA [=]__host__ __device__
@ -141,16 +152,13 @@
#endif
#endif
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
#if defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
// Cuda version 8.0 still needs the functor wrapper
#if /* ( CUDA_VERSION < 8000 ) && */ defined( __NVCC__ )
#if defined( __NVCC__ )
#define KOKKOS_IMPL_NEED_FUNCTOR_WRAPPER
#endif
#endif
#endif
#else // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
#undef KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA
#endif // !defined(KOKKOS_ENABLE_CUDA_LAMBDA)
#endif // #if defined( KOKKOS_ENABLE_CUDA ) && defined( __CUDACC__ )
//----------------------------------------------------------------------------
// Language info: C++, CUDA, OPENMP
@ -161,8 +169,20 @@
#define KOKKOS_FORCEINLINE_FUNCTION __device__ __host__ __forceinline__
#define KOKKOS_INLINE_FUNCTION __device__ __host__ inline
#define KOKKOS_FUNCTION __device__ __host__
#ifdef KOKKOS_COMPILER_CLANG
#define KOKKOS_FUNCTION_DEFAULTED KOKKOS_FUNCTION
#endif
#endif // #if defined( __CUDA_ARCH__ )
#if defined( KOKKOS_ENABLE_ROCM ) && defined( __HCC__ )
#define KOKKOS_FORCEINLINE_FUNCTION __attribute__((amp,cpu)) inline
#define KOKKOS_INLINE_FUNCTION __attribute__((amp,cpu)) inline
#define KOKKOS_FUNCTION __attribute__((amp,cpu))
#define KOKKOS_LAMBDA [=] __attribute__((amp,cpu))
#define KOKKOS_FUNCTION_DEFAULTED KOKKOS_FUNCTION
#endif
#if defined( _OPENMP )
// Compiling with OpenMP.
// The value of _OPENMP is an integer value YYYYMM
@ -179,15 +199,6 @@
// Host code is compiled again with another compiler.
// Device code is compile to 'ptx'.
#define KOKKOS_COMPILER_NVCC __NVCC__
#else
#if !defined( KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA )
#if !defined( KOKKOS_ENABLE_CUDA ) // Compiling with clang for Cuda does not work with LAMBDAs either
// CUDA (including version 6.5) does not support giving lambdas as
// arguments to global functions. Thus its not currently possible
// to dispatch lambdas from the host.
#define KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA 1
#endif
#endif
#endif // #if defined( __NVCC__ )
#if !defined( KOKKOS_LAMBDA )
@ -321,6 +332,10 @@
//#define KOKKOS_ENABLE_PRAGMA_LOOPCOUNT 1
//#define KOKKOS_ENABLE_PRAGMA_VECTOR 1
//#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#if ! defined( KOKKOS_ENABLE_ASM )
#define KOKKOS_ENABLE_ASM 1
#endif
#endif
//----------------------------------------------------------------------------
@ -397,6 +412,10 @@
#define KOKKOS_FUNCTION /**/
#endif
#if !defined( KOKKOS_FUNCTION_DEFAULTED )
#define KOKKOS_FUNCTION_DEFAULTED /**/
#endif
//----------------------------------------------------------------------------
// Define empty macro for restrict if necessary:
@ -424,6 +443,7 @@
// There is zero or one default execution space specified.
#if 1 < ( ( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA ) ? 1 : 0 ) + \
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM ) ? 1 : 0 ) + \
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET ) ? 1 : 0 ) + \
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP ) ? 1 : 0 ) + \
( defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS ) ? 1 : 0 ) + \
@ -435,6 +455,7 @@
// If default is not specified then chose from enabled execution spaces.
// Priority: CUDA, OPENMP, THREADS, QTHREADS, SERIAL
#if defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMP )
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_THREADS )
@ -442,6 +463,8 @@
#elif defined( KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_SERIAL )
#elif defined( KOKKOS_ENABLE_CUDA )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_CUDA
#elif defined( KOKKOS_ENABLE_ROCM )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_ROCM
#elif defined( KOKKOS_ENABLE_OPENMPTARGET )
#define KOKKOS_ENABLE_DEFAULT_DEVICE_TYPE_OPENMPTARGET
#elif defined( KOKKOS_ENABLE_OPENMP )
@ -459,6 +482,8 @@
#if defined( __CUDACC__ ) && defined( __CUDA_ARCH__ ) && defined( KOKKOS_ENABLE_CUDA )
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_CUDA
#elif defined( __HCC__ ) && defined( __HCC_ACCELERATOR__ ) && defined( KOKKOS_ENABLE_ROCM )
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_ROCM_GPU
#else
#define KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST
#endif

View File

@ -233,12 +233,24 @@ public:
//--------------------------------------------------------------------------
MemoryPool() = default ;
MemoryPool( MemoryPool && ) = default ;
MemoryPool( const MemoryPool & ) = default ;
MemoryPool & operator = ( MemoryPool && ) = default ;
MemoryPool & operator = ( const MemoryPool & ) = default ;
MemoryPool()
: m_tracker()
, m_sb_state_array(0)
, m_sb_state_size(0)
, m_sb_size_lg2(0)
, m_max_block_size_lg2(0)
, m_min_block_size_lg2(0)
, m_sb_count(0)
, m_hint_offset(0)
, m_data_offset(0)
, m_unused_padding(0)
{}
/**\brief Allocate a memory pool from 'memspace'.
*
* The memory pool will have at least 'min_total_alloc_size' bytes

View File

@ -1016,7 +1016,7 @@ parallel_reduce( std::string const & arg_label
//------------------------------
#if (KOKKOS_ENABLE_PROFILING)
#if defined(KOKKOS_ENABLE_PROFILING)
uint64_t kpID = 0;
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::beginParallelReduce(arg_label, 0, &kpID);
@ -1042,7 +1042,7 @@ parallel_reduce( std::string const & arg_label
//------------------------------
#if (KOKKOS_ENABLE_PROFILING)
#if defined(KOKKOS_ENABLE_PROFILING)
if(Kokkos::Profiling::profileLibraryLoaded()) {
Kokkos::Profiling::endParallelReduce(kpID);
}

View File

@ -0,0 +1,220 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ROCM_HPP
#define KOKKOS_ROCM_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_ENABLE_ROCM )
#include <ROCm/hc_math_std.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#include <cstddef>
#include <iosfwd>
#include <Kokkos_HostSpace.hpp>
#include <Kokkos_ROCmSpace.hpp>
#include <ROCm/Kokkos_ROCm_Exec.hpp>
#include <Kokkos_ScratchSpace.hpp>
#include <Kokkos_Parallel.hpp>
#include <Kokkos_Layout.hpp>
#include <impl/Kokkos_Tags.hpp>
/*--------------------------------------------------------------------------*/
#include <hc.hpp>
#include <hc_am.hpp>
#include <amp_math.h>
#if defined( __HCC_ACCELERATOR__ )
using namespace ::Concurrency::precise_math ;
#endif
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
class ROCmExec ;
} // namespace Impl
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
/// \class ROCm
/// \brief Kokkos device for multicore processors in the host memory space.
class ROCm {
public:
//------------------------------------
//! \name Type declarations that all Kokkos devices must provide.
//@{
//! Tag this class as a kokkos execution space
typedef ROCm execution_space ;
typedef ROCmSpace memory_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef LayoutLeft array_layout ;
typedef HostSpace::size_type size_type ;
typedef ScratchMemorySpace< ROCm > scratch_memory_space ;
~ROCm() {}
ROCm();
// explicit ROCm( const int instance_id );
ROCm( ROCm && ) = default ;
ROCm( const ROCm & ) = default ;
ROCm & operator = ( ROCm && ) = default ;
ROCm & operator = ( const ROCm & ) = default ;
//@}
//------------------------------------
//! \name Functions that all Kokkos devices must implement.
//@{
KOKKOS_INLINE_FUNCTION static int in_parallel() {
#if defined( __HCC_ACCELERATOR__ )
return true;
#else
return false;
#endif
}
/** \brief Set the device in a "sleep" state. */
static bool sleep() ;
/** \brief Wake the device from the 'sleep' state. A noop for OpenMP. */
static bool wake() ;
/** \brief Wait until all dispatched functors complete. A noop for OpenMP. */
static void fence() ;
/// \brief Print configuration information to the given output stream.
static void print_configuration( std::ostream & , const bool detail = false );
/// \brief Free any resources being consumed by the device.
static void finalize() ;
/** \brief Initialize the device.
*
*/
struct SelectDevice {
int rocm_device_id ;
SelectDevice() : rocm_device_id(1) {}
explicit SelectDevice( int id ) : rocm_device_id( id+1 ) {}
};
int rocm_device() const { return m_device ; }
bool isAPU();
bool isAPU(int device);
static void initialize( const SelectDevice = SelectDevice());
static int is_initialized();
// static size_type device_arch();
// static size_type detect_device_count();
static int concurrency() ;
static const char* name();
private:
int m_device ;
};
}
} // namespace Kokkos
namespace Kokkos {
namespace Impl {
template<>
struct MemorySpaceAccess
< Kokkos::Experimental::ROCmSpace
, Kokkos::Experimental::ROCm::scratch_memory_space
>
{
enum { assignable = false };
enum { accessible = true };
enum { deepcopy = false };
};
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::Experimental::ROCm::memory_space
, Kokkos::Experimental::ROCm::scratch_memory_space
>
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
template<>
struct VerifyExecutionCanAccessMemorySpace
< Kokkos::HostSpace
, Kokkos::Experimental::ROCm::scratch_memory_space
>
{
enum { value = false };
inline static void verify( void ) { Experimental::ROCmSpace::access_error(); }
inline static void verify( const void * p ) { Experimental::ROCmSpace::access_error(p); }
};
} // namespace Experimental
} // namespace Kokkos
#include <ROCm/Kokkos_ROCm_Parallel.hpp>
#include <ROCm/Kokkos_ROCm_Task.hpp>
#endif
#endif

View File

@ -0,0 +1,622 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ROCMSPACE_HPP
#define KOKKOS_ROCMSPACE_HPP
#include <Kokkos_Core_fwd.hpp>
#if defined( KOKKOS_ENABLE_ROCM )
#include <iosfwd>
#include <typeinfo>
#include <string>
#include <Kokkos_HostSpace.hpp>
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
/** \brief ROCm on-device memory management */
class ROCmSpace {
public:
//! Tag this class as a kokkos memory space
typedef ROCmSpace memory_space ;
typedef Kokkos::Experimental::ROCm execution_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
/*--------------------------------*/
ROCmSpace();
ROCmSpace( ROCmSpace && rhs ) = default ;
ROCmSpace( const ROCmSpace & rhs ) = default ;
ROCmSpace & operator = ( ROCmSpace && rhs ) = default ;
ROCmSpace & operator = ( const ROCmSpace & rhs ) = default ;
~ROCmSpace() = default ;
/**\brief Allocate untracked memory in the rocm space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate untracked memory in the rocm space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/**\brief Return Name of the MemorySpace */
static constexpr const char* name() { return m_name; };
/*--------------------------------*/
/** \brief Error reporting for HostSpace attempt to access ROCmSpace */
static void access_error();
static void access_error( const void * const );
private:
int m_device ; ///< Which ROCm device
static constexpr const char* m_name = "ROCm";
friend class Kokkos::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void > ;
};
} // namespace Experimental
namespace Impl {
void * rocm_device_allocate(int);
void * rocm_hostpinned_allocate(int);
void rocm_device_free(void * );
/// \brief Initialize lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function initializes the locks to zero (unset).
void init_lock_arrays_rocm_space();
/// \brief Retrieve the pointer to the lock array for arbitrary size atomics.
///
/// Arbitrary atomics are implemented using a hash table of locks
/// where the hash value is derived from the address of the
/// object for which an atomic operation is performed.
/// This function retrieves the lock array pointer.
/// If the array is not yet allocated it will do so.
int* atomic_lock_array_rocm_space_ptr(bool deallocate = false);
/// \brief Retrieve the pointer to the scratch array for team and thread private global memory.
///
/// Team and Thread private scratch allocations in
/// global memory are aquired via locks.
/// This function retrieves the lock array pointer.
/// If the array is not yet allocated it will do so.
int* scratch_lock_array_rocm_space_ptr(bool deallocate = false);
/// \brief Retrieve the pointer to the scratch array for unique identifiers.
///
/// Unique identifiers in the range 0-ROCm::concurrency
/// are provided via locks.
/// This function retrieves the lock array pointer.
/// If the array is not yet allocated it will do so.
int* threadid_lock_array_rocm_space_ptr(bool deallocate = false);
}
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Experimental {
/** \brief Host memory that is accessible to ROCm execution space
* through ROCm's host-pinned memory allocation.
*/
class ROCmHostPinnedSpace {
public:
//! Tag this class as a kokkos memory space
/** \brief Memory is in HostSpace so use the HostSpace::execution_space */
typedef HostSpace::execution_space execution_space ;
typedef ROCmHostPinnedSpace memory_space ;
typedef Kokkos::Device<execution_space,memory_space> device_type;
typedef unsigned int size_type ;
/*--------------------------------*/
ROCmHostPinnedSpace();
ROCmHostPinnedSpace( ROCmHostPinnedSpace && rhs ) = default ;
ROCmHostPinnedSpace( const ROCmHostPinnedSpace & rhs ) = default ;
ROCmHostPinnedSpace & operator = ( ROCmHostPinnedSpace && rhs ) = default ;
ROCmHostPinnedSpace & operator = ( const ROCmHostPinnedSpace & rhs ) = default ;
~ROCmHostPinnedSpace() = default ;
/**\brief Allocate untracked memory in the space */
void * allocate( const size_t arg_alloc_size ) const ;
/**\brief Deallocate untracked memory in the space */
void deallocate( void * const arg_alloc_ptr
, const size_t arg_alloc_size ) const ;
/**\brief Return Name of the MemorySpace */
static constexpr const char* name() { return m_name; };
private:
static constexpr const char* m_name = "ROCmHostPinned";
/*--------------------------------*/
};
} // namespace Experimental
} // namespace Kokkos
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
static_assert( Kokkos::Impl::MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace >::assignable , "" );
//----------------------------------------
template<>
struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::ROCmSpace > {
enum { assignable = false };
enum { accessible = false };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace > {
// HostSpace::execution_space == ROCmHostPinnedSpace::execution_space
enum { assignable = true };
enum { accessible = true };
enum { deepcopy = true };
};
//----------------------------------------
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::HostSpace > {
enum { assignable = false };
enum { accessible = false };
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace > {
// ROCmSpace::execution_space != ROCmHostPinnedSpace::execution_space
enum { assignable = false };
enum { accessible = true }; // ROCmSpace::execution_space
enum { deepcopy = true };
};
//----------------------------------------
// ROCmHostPinnedSpace::execution_space == HostSpace::execution_space
// ROCmHostPinnedSpace accessible to both ROCm and Host
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::HostSpace > {
enum { assignable = false }; // Cannot access from ROCm
enum { accessible = true }; // ROCmHostPinnedSpace::execution_space
enum { deepcopy = true };
};
template<>
struct MemorySpaceAccess< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmSpace > {
enum { assignable = false }; // Cannot access from Host
enum { accessible = false };
enum { deepcopy = true };
};
};
//----------------------------------------
} // namespace Kokkos::Impl
/*--------------------------------------------------------------------------*/
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
hc::completion_future DeepCopyAsyncROCm( void * dst , const void * src , size_t n);
template<> struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm>
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
fut.wait();
// DeepCopy (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm>( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
template<> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm>
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<> struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >
{
DeepCopy( void * dst , const void * src , size_t );
DeepCopy( const Kokkos::Experimental::ROCm & , void * dst , const void * src , size_t );
};
template<class ExecutionSpace>
struct DeepCopy< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace>
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmSpace , HostSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
fut.wait();
// DeepCopyROCm (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
fut.wait();
// DeepCopyROCm (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
// hc::completion_future fut = DeepCopyAsyncROCm (dst,src,n);
// fut.wait();
// DeepCopyAsyncROCm (dst,src,n);
DeepCopy (dst,src,n);
}
};
template<class ExecutionSpace> struct DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< Kokkos::Experimental::ROCmHostPinnedSpace , HostSpace , Kokkos::Experimental::ROCm>( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
template<class ExecutionSpace>
struct DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , ExecutionSpace >
{
inline
DeepCopy( void * dst , const void * src , size_t n )
{ (void) DeepCopy< HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace , Kokkos::Experimental::ROCm >( dst , src , n ); }
inline
DeepCopy( const ExecutionSpace& exec, void * dst , const void * src , size_t n )
{
exec.fence();
DeepCopy (dst,src,n);
}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
/** Running in ROCmSpace attempting to access HostSpace: error */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::ROCmSpace , Kokkos::HostSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("ROCm code attempted to access HostSpace memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("ROCm code attempted to access HostSpace memory"); }
};
/** Running in ROCmSpace accessing ROCmHostPinnedSpace: ok */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::Experimental::ROCmSpace , Kokkos::Experimental::ROCmHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) { }
KOKKOS_INLINE_FUNCTION static void verify( const void * ) { }
};
/** Running in ROCmSpace attempting to access an unknown space: error */
template< class OtherSpace >
struct VerifyExecutionCanAccessMemorySpace<
typename enable_if< ! is_same<Kokkos::Experimental::ROCmSpace,OtherSpace>::value , Kokkos::Experimental::ROCmSpace >::type ,
OtherSpace >
{
enum { value = false };
KOKKOS_INLINE_FUNCTION static void verify( void )
{ Kokkos::abort("ROCm code attempted to access unknown Space memory"); }
KOKKOS_INLINE_FUNCTION static void verify( const void * )
{ Kokkos::abort("ROCm code attempted to access unknown Space memory"); }
};
//----------------------------------------------------------------------------
/** Running in HostSpace attempting to access ROCmSpace */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::ROCmSpace >
{
enum { value = false };
inline static void verify( void ) { Kokkos::Experimental::ROCmSpace::access_error(); }
inline static void verify( const void * p ) { Kokkos::Experimental::ROCmSpace::access_error(p); }
};
/** Running in HostSpace accessing ROCmHostPinnedSpace is OK */
template<>
struct VerifyExecutionCanAccessMemorySpace< Kokkos::HostSpace , Kokkos::Experimental::ROCmHostPinnedSpace >
{
enum { value = true };
KOKKOS_INLINE_FUNCTION static void verify( void ) {}
KOKKOS_INLINE_FUNCTION static void verify( const void * ) {}
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Impl {
template<>
class SharedAllocationRecord< Kokkos::Experimental::ROCmSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
const Kokkos::Experimental::ROCmSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord( const Kokkos::Experimental::ROCmSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::Experimental::ROCmSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
/**\brief Allocate tracked memory in the space */
static
void * allocate_tracked( const Kokkos::Experimental::ROCmSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
/**\brief Reallocate tracked memory in the space */
static
void * reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size );
/**\brief Deallocate tracked memory in the space */
static
void deallocate_tracked( void * const arg_alloc_ptr );
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::Experimental::ROCmSpace & , bool detail = false );
};
template<>
class SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace , void >
: public SharedAllocationRecord< void , void >
{
private:
typedef SharedAllocationRecord< void , void > RecordBase ;
SharedAllocationRecord( const SharedAllocationRecord & ) = delete ;
SharedAllocationRecord & operator = ( const SharedAllocationRecord & ) = delete ;
static void deallocate( RecordBase * );
static RecordBase s_root_record ;
const Kokkos::Experimental::ROCmHostPinnedSpace m_space ;
protected:
~SharedAllocationRecord();
SharedAllocationRecord() : RecordBase(), m_space() {}
SharedAllocationRecord( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
, const RecordBase::function_type arg_dealloc = & deallocate
);
public:
std::string get_label() const ;
static SharedAllocationRecord * allocate( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size
);
/**\brief Allocate tracked memory in the space */
static
void * allocate_tracked( const Kokkos::Experimental::ROCmHostPinnedSpace & arg_space
, const std::string & arg_label
, const size_t arg_alloc_size );
/**\brief Reallocate tracked memory in the space */
static
void * reallocate_tracked( void * const arg_alloc_ptr
, const size_t arg_alloc_size );
/**\brief Deallocate tracked memory in the space */
static
void deallocate_tracked( void * const arg_alloc_ptr );
static SharedAllocationRecord * get_record( void * arg_alloc_ptr );
static void print_records( std::ostream & , const Kokkos::Experimental::ROCmHostPinnedSpace & , bool detail = false );
};
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
#endif /* #if defined( KOKKOS_ENABLE_ROCM ) */
#endif /* #define KOKKOS_ROCMSPACE_HPP */

View File

@ -681,6 +681,67 @@ public:
return f ;
}
template < class F >
KOKKOS_FUNCTION
Future< execution_space >
when_all( int narg , F const func )
{
using input_type = decltype( func(0) );
using future_type = Future< execution_space > ;
using task_base = Kokkos::Impl::TaskBase< void , void , void > ;
static_assert( is_future< input_type >::value
, "Functor must return a Kokkos::Future" );
future_type f ;
if ( 0 == narg ) return f ;
size_t const alloc_size = m_queue->when_all_allocation_size( narg );
f.m_task =
reinterpret_cast< task_base * >( m_queue->allocate( alloc_size ) );
if ( f.m_task ) {
// Reference count starts at two:
// +1 to match decrement when task completes
// +1 for the future
new( f.m_task ) task_base();
f.m_task->m_queue = m_queue ;
f.m_task->m_ref_count = 2 ;
f.m_task->m_alloc_size = alloc_size ;
f.m_task->m_dep_count = narg ;
f.m_task->m_task_type = task_base::Aggregate ;
// Assign dependences, reference counts were already incremented
task_base * volatile * const dep =
f.m_task->aggregate_dependences();
for ( int i = 0 ; i < narg ; ++i ) {
const input_type arg_f = func(i);
if ( 0 != arg_f.m_task ) {
if ( m_queue != static_cast< queue_type * >( arg_f.m_task->m_queue ) ) {
Kokkos::abort("Kokkos when_all Futures must be in the same scheduler" );
}
// Increment reference count to track subsequent assignment.
Kokkos::atomic_increment( &(arg_f.m_task->m_ref_count) );
dep[i] = arg_f.m_task ;
}
}
Kokkos::memory_fence();
m_queue->schedule_aggregate( f.m_task );
// this when_all may be processed at any moment
}
return f ;
}
//----------------------------------------
KOKKOS_INLINE_FUNCTION

View File

@ -2429,6 +2429,7 @@ template < class ValueType >
struct CommonViewAllocProp< void, ValueType >
{
using value_type = ValueType;
using scalar_array_type = ValueType;
template < class ... Views >
CommonViewAllocProp( const Views & ... ) {}

View File

@ -0,0 +1,439 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#include <hc.hpp>
//#include <hsa_atomic.h>
#ifdef KOKKOS_ENABLE_ROCM_ATOMICS
namespace Kokkos {
//ROCm can do:
//Types int/unsigned int
//variants: atomic_exchange/compare_exchange/fetch_add/fetch_sub/fetch_max/fetch_min/fetch_and/fetch_or/fetch_xor/fetch_inc/fetch_dec
KOKKOS_INLINE_FUNCTION
int atomic_exchange(int* dest, const int& val) {
return hc::atomic_exchange_int(dest, val);
}
KOKKOS_INLINE_FUNCTION
unsigned int atomic_exchange(unsigned int* dest, const unsigned int& val) {
return hc::atomic_exchange_unsigned(dest, val);
}
KOKKOS_INLINE_FUNCTION
int64_t atomic_exchange(int64_t* dest, const int64_t& val) {
return (int64_t)hc::atomic_exchange_uint64((uint64_t*)dest, (const uint64_t&)val);
}
KOKKOS_INLINE_FUNCTION
uint64_t atomic_exchange(uint64_t* dest, const uint64_t& val) {
return hc::atomic_exchange_uint64(dest, val);
}
KOKKOS_INLINE_FUNCTION
long long atomic_exchange(long long* dest, const long long& val) {
return (long long)hc::atomic_exchange_uint64((uint64_t*)dest, (const uint64_t&)val);
}
KOKKOS_INLINE_FUNCTION
unsigned long long atomic_exchange(unsigned long long* dest, const unsigned long long& val) {
return (unsigned long long)hc::atomic_exchange_uint64((uint64_t*)dest, (const uint64_t&)val);
}
KOKKOS_INLINE_FUNCTION
float atomic_exchange(float* dest, const float& val) {
union U {
int i ;
float f ;
KOKKOS_INLINE_FUNCTION U() {};
} idest,ival;
idest.f = *dest;
ival.f = val;
idest.i = hc::atomic_exchange_int((int*)dest, ival.i);
return idest.f;
}
KOKKOS_INLINE_FUNCTION
double atomic_exchange(double* dest, const double& val) {
union U {
uint64_t i ;
double d ;
KOKKOS_INLINE_FUNCTION U() {};
} idest,ival;
idest.d = *dest;
ival.d = val;
idest.i = hc::atomic_exchange_uint64((uint64_t*)dest, ival.i);
return idest.d;
}
KOKKOS_INLINE_FUNCTION
int atomic_compare_exchange(int* dest, int compare, const int& val);
KOKKOS_INLINE_FUNCTION
int64_t atomic_compare_exchange(int64_t* dest, int64_t compare, const int64_t& val);
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_exchange(T* dest, typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) {
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
assume.i = oldval.i ;
newval.t = val ;
atomic_compare_exchange( reinterpret_cast<int*>(dest) , assume.i, newval.i );
return oldval.t ;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_exchange(T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(int64_t), const T&>::type val) {
union U {
uint64_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
assume.i = oldval.i ;
newval.t = val ;
atomic_compare_exchange( (int64_t*)(dest) , assume.i, newval.i );
return oldval.t ;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_exchange(T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(int64_t), const T&>::type val) {
return val;
}
KOKKOS_INLINE_FUNCTION
int atomic_compare_exchange(int* dest, int compare, const int& val) {
return hc::atomic_compare_exchange_int(dest, compare, val);
}
KOKKOS_INLINE_FUNCTION
unsigned int atomic_compare_exchange(unsigned int* dest, unsigned int compare, const unsigned int& val) {
return hc::atomic_compare_exchange_unsigned(dest, compare, val);
}
KOKKOS_INLINE_FUNCTION
int64_t atomic_compare_exchange(int64_t* dest, int64_t compare, const int64_t& val) {
return (int64_t) hc::atomic_compare_exchange_uint64((uint64_t*)dest, (uint64_t)compare, (const uint64_t&)val);
}
KOKKOS_INLINE_FUNCTION
uint64_t atomic_compare_exchange(uint64_t* dest, uint64_t compare, const uint64_t& val) {
return hc::atomic_compare_exchange_uint64(dest, compare, val);
}
KOKKOS_INLINE_FUNCTION
long long atomic_compare_exchange(long long* dest, long long compare, const long long& val) {
return (long long)hc::atomic_compare_exchange_uint64((uint64_t*)(dest), (uint64_t)(compare), (const uint64_t&)(val));
}
KOKKOS_INLINE_FUNCTION
float atomic_compare_exchange(float* dest, float compare, const float& val) {
union U {
int i ;
float f ;
KOKKOS_INLINE_FUNCTION U() {};
} idest,icompare,ival;
idest.f = *dest;
icompare.f = compare;
ival.f = val;
idest.i = hc::atomic_compare_exchange_int(reinterpret_cast<int*>(dest), icompare.i, ival.i);
return idest.f;
}
KOKKOS_INLINE_FUNCTION
double atomic_compare_exchange(double* dest, double compare, const double& val) {
union U {
uint64_t i ;
double d ;
KOKKOS_INLINE_FUNCTION U() {};
} idest,icompare,ival;
idest.d = *dest;
icompare.d = compare;
ival.d = val;
idest.i = hc::atomic_compare_exchange_uint64(reinterpret_cast<uint64_t*>(dest), icompare.i, ival.i);
return idest.d;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T* dest, T compare, typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) {
union U {
int i ;
T f ;
KOKKOS_INLINE_FUNCTION U() {};
} idest,icompare,ival;
idest.f = *dest;
icompare.f = compare;
ival.f = val;
idest.i = hc::atomic_compare_exchange_int((int*)(dest), icompare.i, ival.i);
return idest.f;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T* dest, T compare, typename std::enable_if<sizeof(T) == sizeof(int64_t), const T&>::type val) {
union U {
uint64_t i ;
T f ;
KOKKOS_INLINE_FUNCTION U() {};
} idest,icompare,ival;
idest.f = *dest;
icompare.f = compare;
ival.f = val;
idest.i = hc::atomic_compare_exchange_uint64((uint64_t*)(dest), icompare.i, ival.i);
return idest.f;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_compare_exchange(volatile T* dest, T compare, typename std::enable_if<(sizeof(T) != sizeof(int32_t)) && (sizeof(T) != sizeof(int64_t)), const T&>::type val) {
return val;
}
KOKKOS_INLINE_FUNCTION
int atomic_fetch_add (volatile int * dest, const int& val) {
return hc::atomic_fetch_add((int *)dest, val);
}
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_add(unsigned int* dest, const unsigned int& val) {
return hc::atomic_fetch_add(dest, val);
}
KOKKOS_INLINE_FUNCTION
unsigned long atomic_fetch_add(volatile unsigned long* dest, const unsigned long& val) {
return (unsigned long)hc::atomic_fetch_add((uint64_t *)dest, (const uint64_t)val);
}
KOKKOS_INLINE_FUNCTION
int64_t atomic_fetch_add(volatile int64_t* dest, const int64_t& val) {
return (int64_t)hc::atomic_fetch_add((uint64_t *)dest, (const uint64_t&)val);
}
KOKKOS_INLINE_FUNCTION
char atomic_fetch_add(volatile char * dest, const char& val) {
unsigned int oldval,newval,assume;
oldval = *(int *)dest ;
do {
assume = oldval ;
newval = assume&0x7fffff00 + ((assume&0xff)+val)&0xff ;
oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
} while ( assume != oldval );
return oldval ;
}
KOKKOS_INLINE_FUNCTION
short atomic_fetch_add(volatile short * dest, const short& val) {
unsigned int oldval,newval,assume;
oldval = *(int *)dest ;
do {
assume = oldval ;
newval = assume&0x7fff0000 + ((assume&0xffff)+val)&0xffff ;
oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
} while ( assume != oldval );
return oldval ;
}
KOKKOS_INLINE_FUNCTION
long long atomic_fetch_add(volatile long long * dest, const long long& val) {
return (long long)hc::atomic_fetch_add((uint64_t*)dest, (const uint64_t&)val);
}
KOKKOS_INLINE_FUNCTION
int atomic_fetch_sub (volatile int * dest, const int& val) {
return hc::atomic_fetch_sub((int *)dest, val);
}
KOKKOS_INLINE_FUNCTION
unsigned int atomic_fetch_sub(volatile unsigned int* dest, const unsigned int& val) {
return hc::atomic_fetch_sub((unsigned int *)dest, val);
}
KOKKOS_INLINE_FUNCTION
int64_t atomic_fetch_sub(int64_t* dest, const int64_t& val) {
return (int64_t)hc::atomic_fetch_add((uint64_t *)dest, -(const uint64_t&)val);
// return (int64_t)hc::atomic_fetch_sub_uint64((uint64_t*)dest, (const uint64_t&)val);
}
KOKKOS_INLINE_FUNCTION
char atomic_fetch_sub(volatile char * dest, const char& val) {
unsigned int oldval,newval,assume;
oldval = *(int *)dest ;
do {
assume = oldval ;
newval = assume&0x7fffff00 + ((assume&0xff)-val)&0xff ;
oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
} while ( assume != oldval );
return oldval ;
}
KOKKOS_INLINE_FUNCTION
short atomic_fetch_sub(volatile short * dest, const short& val) {
unsigned int oldval,newval,assume;
oldval = *(int *)dest ;
do {
assume = oldval ;
newval = assume&0x7fff0000 + ((assume&0xffff)-val)&0xffff;
oldval = hc::atomic_compare_exchange_unsigned((unsigned int*)dest, assume,newval);
} while ( assume != oldval );
return oldval ;
}
KOKKOS_INLINE_FUNCTION
long long atomic_fetch_sub(volatile long long * dest, const long long& val) {
return (long long)hc::atomic_fetch_add((uint64_t*)dest, -(const uint64_t&)val);
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add(volatile T* dest, typename std::enable_if<sizeof(T) == sizeof(int), const T&>::type val) {
union U {
unsigned int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomic_compare_exchange( (unsigned int*)(dest) , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add(volatile T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(int64_t), const T&>::type val) {
union U {
uint64_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t + val ;
oldval.i = atomic_compare_exchange( (uint64_t*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
//WORKAROUND
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_fetch_add(volatile T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) != sizeof(int64_t), const T&>::type val) {
return val ;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_fetch_sub(volatile T* dest, typename std::enable_if<sizeof(T) == sizeof(int),T>::type & val) {
union U {
int i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = Kokkos::atomic_compare_exchange( (int*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
template<class T>
KOKKOS_INLINE_FUNCTION
T atomic_fetch_sub(volatile T* dest, typename std::enable_if<sizeof(T) != sizeof(int) && sizeof(T) == sizeof(int64_t), const T&>::type val) {
union U {
int64_t i ;
T t ;
KOKKOS_INLINE_FUNCTION U() {};
} assume , oldval , newval ;
oldval.t = *dest ;
do {
assume.i = oldval.i ;
newval.t = assume.t - val ;
oldval.i = atomic_compare_exchange( (int64_t*)dest , assume.i , newval.i );
} while ( assume.i != oldval.i );
return oldval.t ;
}
}
#endif

View File

@ -0,0 +1,51 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP
#define GUARD_CORE_KOKKOS_ROCM_CONFIG_HPP
#ifndef KOKKOS_ROCM_HAS_WORKAROUNDS
#define KOKKOS_ROCM_HAS_WORKAROUNDS 1
#endif
#endif

View File

@ -0,0 +1,133 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ROCMEXEC_HPP
#define KOKKOS_ROCMEXEC_HPP
#include <algorithm>
#include <typeinfo>
#include <Kokkos_Macros.hpp>
//#include <ROCm/Kokkos_ROCmExec.hpp>
#include <hc.hpp>
#define ROCM_SPACE_ATOMIC_MASK 0x1FFFF
#define ROCM_SPACE_ATOMIC_XOR_MASK 0x15A39
#define ROCM_CONCURRENCY 20480
//#define ROCM_CONCURRENCY 81920 # for fiji
namespace Kokkos {
static int rocm_space_atomic_locks[ROCM_SPACE_ATOMIC_MASK+1];
static int rocm_space_scratch_locks[ROCM_CONCURRENCY];
static int rocm_space_threadid_locks[ROCM_CONCURRENCY];
namespace Impl {
// TODO: mimic cuda implemtation, add dgpu capability
void init_rocm_atomic_lock_array() {
static int is_initialized = 0;
if(!is_initialized)
{
for(int i = 0; i < ROCM_SPACE_ATOMIC_MASK+1; i++)
rocm_space_atomic_locks[i] = 0;
is_initialized = 1;
}
}
void init_rocm_scratch_lock_array() {
static int is_initialized = 0;
if(!is_initialized)
{
for(int i = 0; i < ROCM_CONCURRENCY; i++)
rocm_space_scratch_locks[i] = 0;
is_initialized = 1;
}
}
void init_rocm_threadid_lock_array() {
static int is_initialized = 0;
if(!is_initialized)
{
for(int i = 0; i < ROCM_CONCURRENCY; i++)
rocm_space_threadid_locks[i] = 0;
is_initialized = 1;
}
}
void init_lock_arrays_rocm_space() {
init_rocm_atomic_lock_array();
// init_rocm_scratch_lock_array();
// init_rocm_threadid_lock_array();
}
}
} // namespace Kokkos
#if 0
namespace Kokkos {
namespace Impl {
KOKKOS_INLINE_FUNCTION
bool lock_address_rocm_space(void* ptr) {
#if 0
return(Kokkos::Impl::lock_address_host_space(ptr));
#else
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & ROCM_SPACE_ATOMIC_MASK;
return (0 == hc::atomic_compare_exchange(&rocm_space_atomic_locks[offset],0,1));
#endif
}
KOKKOS_INLINE_FUNCTION
void unlock_address_rocm_space(void* ptr) {
#if 0
Kokkos::Impl::unlock_address_host_space(ptr) ;
#else
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & ROCM_SPACE_ATOMIC_MASK;
hc::atomic_exchange( &rocm_space_atomic_locks[ offset ], 0);
#endif
}
}
} // namespace Kokkos
#endif
#endif /* #ifndef KOKKOS_ROCMEXEC_HPP */

View File

@ -0,0 +1,137 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
#ifndef KOKKOS_ROCMEXEC_HPP
#define KOKKOS_ROCMEXEC_HPP
#include <algorithm>
#include <typeinfo>
#if defined(__HCC_ACCELERATOR__)
#define printf(...)
#endif
namespace Kokkos {
namespace Impl {
struct ROCmTraits {
// TODO: determine if needed
enum { WavefrontSize = 64 /* 64 */ };
enum { WorkgroupSize = 64 /* 64 */ };
enum { WavefrontIndexMask = 0x001f /* Mask for warpindex */ };
enum { WavefrontIndexShift = 5 /* WarpSize == 1 << WarpShift */ };
enum { SharedMemoryBanks = 32 /* Compute device 2.0 */ };
enum { SharedMemoryCapacity = 0x0C000 /* 48k shared / 16k L1 Cache */ };
enum { SharedMemoryUsage = 0x04000 /* 16k shared / 48k L1 Cache */ };
enum { UpperBoundExtentCount = 65535 /* Hard upper bound */ };
#if 0
KOKKOS_INLINE_FUNCTION static
ROCmSpace::size_type wavefront_count( ROCmSpace::size_type i )
{ return ( i + WavefrontIndexMask ) >> WavefrontIndexShift ; }
KOKKOS_INLINE_FUNCTION static
ROCmSpace::size_type wavefront_align( ROCmSpace::size_type i )
{
enum { Mask = ~ROCmSpace::size_type( WavefrontIndexMask ) };
return ( i + WavefrontIndexMask ) & Mask ;
}
#endif
};
size_t rocm_internal_cu_count();
size_t rocm_internal_maximum_workgroup_count();
size_t * rocm_internal_scratch_flags( const size_t size );
size_t * rocm_internal_scratch_space( const size_t size );
}
} // namespace Kokkos
#define ROCM_SPACE_ATOMIC_MASK 0x1FFFF
#define ROCM_SPACE_ATOMIC_XOR_MASK 0x15A39
//int rocm_space_atomic_locks[ROCM_SPACE_ATOMIC_MASK+1];
extern int
*rocm_space_atomic_locks;
namespace Kokkos {
namespace Impl {
void init_lock_arrays_rocm_space();
void* rocm_resize_scratch_space(size_t bytes, bool force_shrink = false);
// TODO: determine if needed
KOKKOS_INLINE_FUNCTION
bool lock_address_rocm_space(void* ptr) {
#if 0
return(Kokkos::Impl::lock_address_host_space(ptr));
#else
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & ROCM_SPACE_ATOMIC_MASK;
return (0 == hc::atomic_compare_exchange(&rocm_space_atomic_locks[offset],0,1));
#endif
}
KOKKOS_INLINE_FUNCTION
void unlock_address_rocm_space(void* ptr) {
#if 0
Kokkos::Impl::unlock_address_host_space(ptr) ;
#else
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & ROCM_SPACE_ATOMIC_MASK;
hc::atomic_exchange( &rocm_space_atomic_locks[ offset ], 0);
#endif
}
}
} // namespace Kokkos
namespace Kokkos {
namespace Impl {
//extern
//KOKKOS_INLINE_FUNCTION
//void init_lock_arrays_rocm_space();
}
} // namespace Kokkos
#endif /* #ifndef KOKKOS_ROCMEXEC_HPP */

View File

@ -0,0 +1,753 @@
/*
//@HEADER
// ************************************************************************
//
// Kokkos v. 2.0
// Copyright (2014) Sandia Corporation
//
// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation,
// the U.S. Government retains certain rights in this software.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// 1. Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
//
// 3. Neither the name of the Corporation nor the names of the
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// Questions? Contact H. Carter Edwards (hcedwar@sandia.gov)
//
// ************************************************************************
//@HEADER
*/
/*--------------------------------------------------------------------------*/
/* Kokkos interfaces */
#include <Kokkos_Core.hpp>
/* only compile this file if ROCM is enabled for Kokkos */
#ifdef KOKKOS_ENABLE_ROCM
//#include <ROCm/Kokkos_ROCm_Internal.hpp>
#include <impl/Kokkos_Error.hpp>
#include <Kokkos_ROCmSpace.hpp>
#include <ROCm/Kokkos_ROCm_Exec.hpp>
/*--------------------------------------------------------------------------*/
/* Standard 'C' libraries */
#include <stdlib.h>
/* Standard 'C++' libraries */
#include <vector>
#include <iostream>
#include <sstream>
#include <string>
//KOKKOS_INLINE_FUNCTION
// Kokkos::Impl::ROCmLockArraysStruct kokkos_impl_rocm_lock_arrays ;
/*--------------------------------------------------------------------------*/
namespace Kokkos {
namespace Impl {
#if 0
namespace {
__global__
void query_rocm_kernel_arch( int * d_arch )
{
#if defined( __HCC_ACCELERATOR__ )
*d_arch = OCM_ARCH__ ;
#else
*d_arch = 0 ;
#endif
}
/** Query what compute capability is actually launched to the device: */
int rocm_kernel_arch()
{
int * d_arch = 0 ;
rocmMalloc( (void **) & d_arch , sizeof(int) );
query_rocm_kernel_arch<<<1,1>>>( d_arch );
int arch = 0 ;
rocmMemcpy( & arch , d_arch , sizeof(int) , rocmMemcpyDefault );
rocmFree( d_arch );
return arch ;
}
bool rocm_launch_blocking()
{
const char * env = getenv("ROCM_LAUNCH_BLOCKING");
if (env == 0) return false;
return atoi(env);
}
}
#endif
// true device memory allocation, not visible from host
void * rocm_device_allocate(int size)
{
void * ptr;
hc::accelerator acc;
ptr = hc::am_alloc(size,acc,0);
return ptr;
}
// host pinned allocation
// flag = 1, non-coherent, host resident, but with gpu address space pointer
// flag = 2, coherent, host resident, but with host address space pointer
void * rocm_hostpinned_allocate(int size)
{
void * ptr;
hc::accelerator acc;
ptr = hc::am_alloc(size,acc,2);
return ptr;
}
// same free used by all rocm memory allocations
void rocm_device_free(void * ptr)
{
hc::am_free(ptr);
}
KOKKOS_INLINE_FUNCTION
void rocm_device_synchronize()
{
hc::accelerator_view av = hc::accelerator().get_default_view();
hc::completion_future fut = av.create_marker();
fut.wait();
}
void rocm_internal_error_throw( const char * name, const char * file, const int line )
{
#if 0
std::ostringstream out ;
out << name << " error( " << rocmGetErrorName(e) << "): " << rocmGetErrorString(e);
if (file) {
out << " " << file << ":" << line;
}
throw_runtime_exception( out.str() );
#endif
}
//----------------------------------------------------------------------------
// Some significant rocm device properties:
//
// rocmDeviceProp::name : Text label for device
// rocmDeviceProp::major : Device major number
// rocmDeviceProp::minor : Device minor number
// rocmDeviceProp::workgroupSize : number of threads per workgroup
// rocmDeviceProp::multiProcessorCount : number of multiprocessors
// rocmDeviceProp::sharedMemPerBlock : capacity of shared memory per wavefront
// rocmDeviceProp::totalConstMem : capacity of constant memory
// rocmDeviceProp::totalGlobalMem : capacity of global memory
// rocmDeviceProp::maxGridSize[3] : maximum grid size
//
//
// the data we have available from a ROCm accelerator
// std::wstring get_device_path()
// std::wstring get_description()
// unsigned int get_version()
// bool get_has_display()
// size_t get_dedicated_memory()
// bool get_supports_double_precision()
// bool get_supports_limited_double_precision()
// bool get_is_debug()
// bool get_supports_cpu_shared_memory()
// size_t get_max_tile_static_size()
// unsigned int get_cu_count()
// bool has_cpu_accessible_am()
struct rocmDeviceProp {
char name[256];
char description[256];
unsigned int version;
int device_type;
int device_ordinal;
int major;
int minor;
size_t totalGlobalMem;
size_t sharedMemPerWavefront;
int WavefrontSize;
int WorkgroupSize;
int MaxTileCount;
int maxThreadsPerWorkgroup;
int multiProcessorCount;
int canMapHostMemory;
bool APU;
};
void rocmGetDeviceProperties(struct rocmDeviceProp* devProp, int device)
{
std::wstring s;
int i,n;
hc::accelerator acc;
std::vector<hc::accelerator> accv = acc.get_all() ;
hc::accelerator a = accv[device];
s=a.get_device_path();
i = 0;
for(wchar_t c: s)
if((n=std::wctomb(&devProp->name[i],c))>0)
i+=n;
/* assume a CPU */
devProp->version = a.get_version();
devProp->major = a.get_version()>>16; // for CPU, these are meaningless
devProp->minor = a.get_version()&0xff;
devProp->device_ordinal = 0;
/* is this an AMD graphics card */
if((devProp->name[0]=='g') && (devProp->name[1]=='f')
&& (devProp->name[2]=='x')) {
/* for AMD cards, the name has the format gfxMmmO */
devProp->device_type = ((devProp->name[3]-0x30)<<16)
+ ((devProp->name[4]-0x30)<<8)
+ (devProp->name[5]-0x30);
devProp->device_ordinal = devProp->name[6]-0x30;
devProp->major = devProp->name[3]-0x30;
devProp->minor = devProp->name[5]-0x30;
}
s=a.get_description();
i = 0;
for(wchar_t c: s)
if((n=std::wctomb(&devProp->description[i],c))>0)
i+=n;
devProp->totalGlobalMem = a.get_dedicated_memory();
devProp->sharedMemPerWavefront = a.get_max_tile_static_size();
devProp->WavefrontSize = 64;
devProp->WorkgroupSize = 256; // preferred
devProp->MaxTileCount = 409600; // as defined in /opt/rocm/hcc-lc/include/hsa_new.h
devProp->maxThreadsPerWorkgroup = 1024;
devProp->multiProcessorCount = a.get_cu_count();
devProp->canMapHostMemory = a.get_supports_cpu_shared_memory();
// Kaveri has 64KB L2 per CU, 16KB L1, 64KB Vector Regs/SIMD, or 128 regs/thread
// GCN has 64KB LDS per CU
//Kaveri APU is 7:0:0
//Carrizo APU is 8:0:1
devProp->APU = (((devProp->major==7)&&(devProp->minor==0))|
((devProp->major==8)&&(devProp->minor==1)))?true:false;
}
namespace {
class ROCmInternalDevices {
public:
enum { MAXIMUM_DEVICE_COUNT = 64 };
struct rocmDeviceProp m_rocmProp[ MAXIMUM_DEVICE_COUNT ] ;
int m_rocmDevCount ;
ROCmInternalDevices();
static const ROCmInternalDevices & singleton();
};
ROCmInternalDevices::ROCmInternalDevices()
{
hc::accelerator acc;
std::vector<hc::accelerator> accv = acc.get_all() ;
m_rocmDevCount = accv.size();
if(m_rocmDevCount > MAXIMUM_DEVICE_COUNT) {
Kokkos::abort("Sorry, you have more GPUs per node than we thought anybody would ever have. Please report this to github.com/kokkos/kokkos.");
}
for ( int i = 0 ; i < m_rocmDevCount ; ++i ) {
rocmGetDeviceProperties( m_rocmProp + i , i );
}
}
const ROCmInternalDevices & ROCmInternalDevices::singleton()
{
static ROCmInternalDevices* self = nullptr;
if (!self) {
self = new ROCmInternalDevices();
}
return *self;
}
}
//----------------------------------------------------------------------------
class ROCmInternal {
private:
ROCmInternal( const ROCmInternal & );
ROCmInternal & operator = ( const ROCmInternal & );
public:
typedef Kokkos::Experimental::ROCm::size_type size_type ;
int m_rocmDev ;
int m_rocmArch ;
unsigned m_multiProcCount ;
unsigned m_maxWorkgroup ;
unsigned m_maxSharedWords ;
size_type m_scratchSpaceCount ;
size_type m_scratchFlagsCount ;
size_type * m_scratchSpace ;
size_type * m_scratchFlags ;
static int was_finalized;
static ROCmInternal & singleton();
int verify_is_initialized( const char * const label ) const ;
int is_initialized() const
{ return 0 != m_scratchSpace && 0 != m_scratchFlags ; }
void initialize( int rocm_device_id );
void finalize();
void print_configuration( std::ostream & ) const ;
~ROCmInternal();
ROCmInternal()
: m_rocmDev( -1 )
, m_rocmArch( -1 )
, m_multiProcCount( 0 )
, m_maxWorkgroup( 0 )
, m_maxSharedWords( 0 )
, m_scratchSpaceCount( 0 )
, m_scratchFlagsCount( 0 )
, m_scratchSpace( 0 )
, m_scratchFlags( 0 )
{}
size_type * scratch_space( const size_type size );
size_type * scratch_flags( const size_type size );
};
int ROCmInternal::was_finalized = 0;
//----------------------------------------------------------------------------
void ROCmInternal::print_configuration( std::ostream & s ) const
{
const ROCmInternalDevices & dev_info = ROCmInternalDevices::singleton();
#if defined( KOKKOS_ENABLE_ROCM )
s << "macro KOKKOS_ENABLE_ROCM : defined" << std::endl ;
#endif
#if defined( __hcc_version__ )
s << "macro __hcc_version__ = " << __hcc_version__
<< std::endl ;
#endif
for ( int i = 0 ; i < dev_info.m_rocmDevCount ; ++i ) {
s << "Kokkos::Experimental::ROCm[ " << i << " ] "
<< dev_info.m_rocmProp[i].name
<< " version " << (dev_info.m_rocmProp[i].major) << "." << dev_info.m_rocmProp[i].minor
<< ", Total Global Memory: " << human_memory_size(dev_info.m_rocmProp[i].totalGlobalMem)
<< ", Shared Memory per Wavefront: " << human_memory_size(dev_info.m_rocmProp[i].sharedMemPerWavefront);
if ( m_rocmDev == i ) s << " : Selected" ;
s << std::endl ;
}
}
//----------------------------------------------------------------------------
ROCmInternal::~ROCmInternal()
{
if ( m_scratchSpace ||
m_scratchFlags ) {
std::cerr << "Kokkos::Experimental::ROCm ERROR: Failed to call Kokkos::Experimental::ROCm::finalize()"
<< std::endl ;
std::cerr.flush();
}
m_rocmDev = -1 ;
m_rocmArch = -1 ;
m_multiProcCount = 0 ;
m_maxWorkgroup = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
}
int ROCmInternal::verify_is_initialized( const char * const label ) const
{
if ( m_rocmDev < 0 ) {
std::cerr << "Kokkos::Experimental::ROCm::" << label << " : ERROR device not initialized" << std::endl ;
}
return 0 <= m_rocmDev ;
}
ROCmInternal & ROCmInternal::singleton()
{
static ROCmInternal* self = nullptr ;
if (!self) {
self = new ROCmInternal();
}
return *self ;
}
void ROCmInternal::initialize( int rocm_device_id )
{
if ( was_finalized ) Kokkos::abort("Calling ROCm::initialize after ROCm::finalize is illegal\n");
if ( is_initialized() ) return;
enum { WordSize = sizeof(size_type) };
if ( ! HostSpace::execution_space::is_initialized() ) {
const std::string msg("ROCm::initialize ERROR : HostSpace::execution_space is not initialized");
throw_runtime_exception( msg );
}
const ROCmInternalDevices & dev_info = ROCmInternalDevices::singleton();
const bool ok_init = 0 == m_scratchSpace || 0 == m_scratchFlags ;
const bool ok_id = 1 <= rocm_device_id &&
rocm_device_id < dev_info.m_rocmDevCount ;
// Need at least a GPU device
const bool ok_dev = ok_id &&
( 1 <= dev_info.m_rocmProp[ rocm_device_id ].major &&
0 <= dev_info.m_rocmProp[ rocm_device_id ].minor );
if ( ok_init && ok_dev ) {
const struct rocmDeviceProp & rocmProp =
dev_info.m_rocmProp[ rocm_device_id ];
m_rocmDev = rocm_device_id ;
// rocmSetDevice( m_rocmDev ) );
Kokkos::Impl::rocm_device_synchronize();
/*
// Query what compute capability architecture a kernel executes:
m_rocmArch = rocm_kernel_arch();
if ( m_rocmArch != rocmProp.major * 100 + rocmProp.minor * 10 ) {
std::cerr << "Kokkos::Experimental::ROCm::initialize WARNING: running kernels compiled for compute capability "
<< ( m_rocmArch / 100 ) << "." << ( ( m_rocmArch % 100 ) / 10 )
<< " on device with compute capability "
<< rocmProp.major << "." << rocmProp.minor
<< " , this will likely reduce potential performance."
<< std::endl ;
}
*/
// number of multiprocessors
m_multiProcCount = rocmProp.multiProcessorCount ;
//----------------------------------
// Maximum number of wavefronts,
// at most one workgroup per thread in a workgroup for reduction.
m_maxSharedWords = rocmProp.sharedMemPerWavefront/ WordSize ;
//----------------------------------
// Maximum number of Workgroups:
m_maxWorkgroup = 5*rocmProp.multiProcessorCount; //TODO: confirm usage and value
//----------------------------------
// Multiblock reduction uses scratch flags for counters
// and scratch space for partial reduction values.
// Allocate some initial space. This will grow as needed.
{
const unsigned reduce_block_count = m_maxWorkgroup * Impl::ROCmTraits::WorkgroupSize ;
(void) scratch_flags( reduce_block_count * 2 * sizeof(size_type) );
(void) scratch_space( reduce_block_count * 16 * sizeof(size_type) );
}
//----------------------------------
}
else {
std::ostringstream msg ;
msg << "Kokkos::Experimental::ROCm::initialize(" << rocm_device_id << ") FAILED" ;
if ( ! ok_init ) {
msg << " : Already initialized" ;
}
if ( ! ok_id ) {
msg << " : Device identifier out of range "
<< "[0.." << (dev_info.m_rocmDevCount-1) << "]" ;
}
else if ( ! ok_dev ) {
msg << " : Device " ;
msg << dev_info.m_rocmProp[ rocm_device_id ].major ;
msg << "." ;
msg << dev_info.m_rocmProp[ rocm_device_id ].minor ;
msg << " Need at least a GPU" ;
msg << std::endl;
}
Kokkos::Impl::throw_runtime_exception( msg.str() );
}
// Init the array for used for arbitrarily sized atomics
Kokkos::Impl::init_lock_arrays_rocm_space();
// Kokkos::Impl::ROCmLockArraysStruct locks;
// locks.atomic = atomic_lock_array_rocm_space_ptr(false);
// locks.scratch = scratch_lock_array_rocm_space_ptr(false);
// locks.threadid = threadid_lock_array_rocm_space_ptr(false);
// rocmMemcpyToSymbol( kokkos_impl_rocm_lock_arrays , & locks , sizeof(ROCmLockArraysStruct) );
}
//----------------------------------------------------------------------------
typedef Kokkos::Experimental::ROCm::size_type ScratchGrain[ Impl::ROCmTraits::WorkgroupSize ] ;
enum { sizeScratchGrain = sizeof(ScratchGrain) };
void rocmMemset( Kokkos::Experimental::ROCm::size_type * ptr , Kokkos::Experimental::ROCm::size_type value , Kokkos::Experimental::ROCm::size_type size)
{
char * mptr = (char * ) ptr;
#if 0
parallel_for_each(hc::extent<1>(size),
[=, &ptr]
(hc::index<1> idx) __HC__
{
int i = idx[0];
ptr[i] = value;
}).wait();
#else
for (int i= 0; i<size ; i++)
{
mptr[i] = (char) value;
}
#endif
}
Kokkos::Experimental::ROCm::size_type *
ROCmInternal::scratch_flags( const Kokkos::Experimental::ROCm::size_type size )
{
if ( verify_is_initialized("scratch_flags") && m_scratchFlagsCount * sizeScratchGrain < size ) {
m_scratchFlagsCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
Record * const r = Record::allocate( Kokkos::HostSpace()
, "InternalScratchFlags"
, ( sizeScratchGrain * m_scratchFlagsCount ) );
Record::increment( r );
m_scratchFlags = reinterpret_cast<size_type *>( r->data() );
rocmMemset( m_scratchFlags , 0 , m_scratchFlagsCount * sizeScratchGrain );
}
return m_scratchFlags ;
}
Kokkos::Experimental::ROCm::size_type *
ROCmInternal::scratch_space( const Kokkos::Experimental::ROCm::size_type size )
{
if ( verify_is_initialized("scratch_space") && m_scratchSpaceCount * sizeScratchGrain < size ) {
m_scratchSpaceCount = ( size + sizeScratchGrain - 1 ) / sizeScratchGrain ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::HostSpace , void > Record ;
Record * const r = Record::allocate( Kokkos::HostSpace()
, "InternalScratchSpace"
, ( sizeScratchGrain * m_scratchSpaceCount ) );
Record::increment( r );
m_scratchSpace = reinterpret_cast<size_type *>( r->data() );
}
return m_scratchSpace ;
}
//----------------------------------------------------------------------------
void ROCmInternal::finalize()
{
was_finalized = 1;
if ( 0 != m_scratchSpace || 0 != m_scratchFlags ) {
// atomic_lock_array_rocm_space_ptr(false);
// scratch_lock_array_rocm_space_ptr(false);
// threadid_lock_array_rocm_space_ptr(false);
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< HostSpace > RecordROCm ;
typedef Kokkos::Experimental::Impl::SharedAllocationRecord< Kokkos::Experimental::ROCmHostPinnedSpace > RecordHost ;
RecordROCm::decrement( RecordROCm::get_record( m_scratchFlags ) );
RecordROCm::decrement( RecordROCm::get_record( m_scratchSpace ) );
m_rocmDev = -1 ;
m_multiProcCount = 0 ;
m_maxWorkgroup = 0 ;
m_maxSharedWords = 0 ;
m_scratchSpaceCount = 0 ;
m_scratchFlagsCount = 0 ;
m_scratchSpace = 0 ;
m_scratchFlags = 0 ;
}
}
//----------------------------------------------------------------------------
Kokkos::Experimental::ROCm::size_type rocm_internal_cu_count()
{ return ROCmInternal::singleton().m_multiProcCount ; }
Kokkos::Experimental::ROCm::size_type rocm_internal_maximum_extent_size()
{ return ROCmInternal::singleton().m_maxWorkgroup ; }
Kokkos::Experimental::ROCm::size_type rocm_internal_maximum_shared_words()
{ return ROCmInternal::singleton().m_maxSharedWords ; }
Kokkos::Experimental::ROCm::size_type * rocm_internal_scratch_space( const Kokkos::Experimental::ROCm::size_type size )
{ return ROCmInternal::singleton().scratch_space( size ); }
Kokkos::Experimental::ROCm::size_type * rocm_internal_scratch_flags( const Kokkos::Experimental::ROCm::size_type size )
{ return ROCmInternal::singleton().scratch_flags( size ); }
} // namespace Impl
} // namespace Kokkos
//----------------------------------------------------------------------------
namespace Kokkos {
namespace Experimental {
//ROCm::size_type ROCm::detect_device_count()
//{ return Impl::ROCmInternalDevices::singleton().m_rocmDevCount ; }
int ROCm::concurrency() {
#if defined(KOKKOS_ARCH_KAVERI)
return 8*64*40; // 20480 kaveri
#else
return 32*8*40; // 81920 fiji and hawaii
#endif
}
int ROCm::is_initialized()
{ return Kokkos::Impl::ROCmInternal::singleton().is_initialized(); }
void ROCm::initialize( const ROCm::SelectDevice config )
{
Kokkos::Impl::ROCmInternal::singleton().initialize( config.rocm_device_id );
#if defined(KOKKOS_ENABLE_PROFILING)
Kokkos::Profiling::initialize();
#endif
}
#if 0
std::vector<unsigned>
ROCm::detect_device_arch()
{
const Impl::ROCmInternalDevices & s = Impl::ROCmInternalDevices::singleton();
std::vector<unsigned> output( s.m_rocmDevCount );
for ( int i = 0 ; i < s.m_rocmDevCount ; ++i ) {
output[i] = s.m_rocmProp[i].major * 100 + s.m_rocmProp[i].minor ;
}
return output ;
}
ROCm::size_type ROCm::device_arch()
{
return 1 ;
}
#endif
void ROCm::finalize()
{
Kokkos::Impl::ROCmInternal::singleton().finalize();
#if defined(KOKKOS_ENABLE_PROFILING)
Kokkos::Profiling::finalize();
#endif
}
ROCm::ROCm()
: m_device( Kokkos::Impl::ROCmInternal::singleton().m_rocmDev )
{
Kokkos::Impl::ROCmInternal::singleton().verify_is_initialized( "ROCm instance constructor" );
}
bool ROCm::isAPU(int device) {
const Kokkos::Impl::ROCmInternalDevices & dev_info =
Kokkos::Impl::ROCmInternalDevices::singleton();
return (dev_info.m_rocmProp[device].APU);
}
bool ROCm::isAPU() {
return ROCm::isAPU(rocm_device());
}
//ROCm::ROCm( const int instance_id )
// : m_device( Impl::ROCmInternal::singleton().m_rocmDev )
//{}
void ROCm::print_configuration( std::ostream & s , const bool )
{ Kokkos::Impl::ROCmInternal::singleton().print_configuration( s ); }
bool ROCm::sleep() { return false ; }
bool ROCm::wake() { return true ; }
void ROCm::fence()
{
Kokkos::Impl::rocm_device_synchronize();
}
const char* ROCm::name() { return "ROCm"; }
} // namespace Experimental
} // namespace Kokkos
#endif // KOKKOS_ENABLE_ROCM
//----------------------------------------------------------------------------

Some files were not shown because too many files have changed in this diff Show More