Merge branch 'develop' into collected-small-fixes

This commit is contained in:
Axel Kohlmeyer
2023-06-06 08:36:25 -04:00
55 changed files with 2442 additions and 256 deletions

View File

@ -49,8 +49,8 @@ if(DOWNLOAD_KOKKOS)
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_CXX_EXTENSIONS=${CMAKE_CXX_EXTENSIONS}")
list(APPEND KOKKOS_LIB_BUILD_ARGS "-DCMAKE_TOOLCHAIN_FILE=${CMAKE_TOOLCHAIN_FILE}")
include(ExternalProject)
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.7.01.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "f140e02b826223b1045207d9bc10d404" CACHE STRING "MD5 checksum of KOKKOS tarball")
set(KOKKOS_URL "https://github.com/kokkos/kokkos/archive/3.7.02.tar.gz" CACHE STRING "URL for KOKKOS tarball")
set(KOKKOS_MD5 "34d7860d548c06a4040236d959c9f99a" CACHE STRING "MD5 checksum of KOKKOS tarball")
mark_as_advanced(KOKKOS_URL)
mark_as_advanced(KOKKOS_MD5)
GetFallbackURL(KOKKOS_URL KOKKOS_FALLBACK)
@ -75,7 +75,7 @@ if(DOWNLOAD_KOKKOS)
add_dependencies(LAMMPS::KOKKOSCORE kokkos_build)
add_dependencies(LAMMPS::KOKKOSCONTAINERS kokkos_build)
elseif(EXTERNAL_KOKKOS)
find_package(Kokkos 3.7.01 REQUIRED CONFIG)
find_package(Kokkos 3.7.02 REQUIRED CONFIG)
target_link_libraries(lammps PRIVATE Kokkos::kokkos)
else()
set(LAMMPS_LIB_KOKKOS_SRC_DIR ${LAMMPS_LIB_SOURCE_DIR}/kokkos)

View File

@ -19,7 +19,7 @@ Syntax
* ex,ey,ez = E-field component values (electric field units)
* any of ex,ey,ez can be a variable (see below)
* zero or more keyword/value pairs may be appended to args
* keyword = *region* or *energy*
* keyword = *region* or *energy* or *potential*
.. parsed-literal::
@ -27,6 +27,8 @@ Syntax
region-ID = ID of region atoms must be in to have added force
*energy* value = v_name
v_name = variable with name that calculates the potential energy of each atom in the added E-field
*potential* value = v_name
v_name = variable with name that calculates the electric potential of each atom in the added E-field
Examples
""""""""
@ -112,7 +114,8 @@ one or more variables, and if you are performing dynamics via the
:doc:`run <run>` command. If the keyword is not used, LAMMPS will set
the energy to 0.0, which is typically fine for dynamics.
The *energy* keyword is required if the added force is defined with
The *energy* keyword (or *potential* keyword, described below)
is required if the added force is defined with
one or more variables, and you are performing energy minimization via
the "minimize" command for charged particles. It is not required for
point-dipoles, but a warning is issued since the minimizer in LAMMPS
@ -122,7 +125,7 @@ minimize the orientation of dipoles in an applied electric field.
The *energy* keyword specifies the name of an atom-style
:doc:`variable <variable>` which is used to compute the energy of each
atom as function of its position. Like variables used for *ex*,
*ey*, *ez*, the energy variable is specified as v_name, where name
*ey*, *ez*, the energy variable is specified as "v_name", where "name"
is the variable name.
Note that when the *energy* keyword is used during an energy
@ -133,6 +136,27 @@ due to the electric field were a spring-like F = kx, then the energy
formula should be E = -0.5kx\^2. If you don't do this correctly, the
minimization will not converge properly.
.. versionadded:: TBD
The *potential* keyword can be used as an alternative to the *energy* keyword
to specify the name of an atom-style variable, which is used to compute the
added electric potential to each atom as a function of its position. The
variable should have units of electric field multiplied by distance (that is,
in `units real`, the potential should be in volts). As with the *energy*
keyword, the variable name is specified as "v_name". The energy added by this
fix is then calculated as the electric potential multiplied by charge.
The *potential* keyword is mainly intended for correct charge
equilibration in simulations with :doc:`fix qeq/reaxff<fix_qeq_reaxff>`,
since with variable charges the electric potential can be known
beforehand but the energy cannot. A small additional benefit is that
the *energy* keyword requires an additional conversion to energy units
which the *potential* keyword avoids. Thus, when the *potential*
keyword is specified, the *energy* keyword must not be used. As with
*energy*, the *potential* keyword is not allowed if the added field is a
constant vector. The *potential* keyword is not supported by *fix
efield/tip4p*.
----------
Restart, fix_modify, output, run start/stop, minimize info

View File

@ -128,9 +128,12 @@ periodic cell dimensions less than 10 Angstroms.
This fix may be used in combination with :doc:`fix efield <fix_efield>`
and will apply the external electric field during charge equilibration,
but there may be only one fix efield instance used, it may only use a
constant electric field, and the electric field vector may only have
components in non-periodic directions.
but there may be only one fix efield instance used and the electric field
vector may only have components in non-periodic directions. Equal-style
variables can be used for electric field vector components without any further
settings. Atom-style variables can be used for spatially-varying electric field
vector components, but the resulting electric potential must be specified
as an atom-style variable using the *potential* keyword for `fix efield`.
Related commands
""""""""""""""""

View File

@ -67,7 +67,7 @@ Syntax
bound(group,dir,region), gyration(group,region), ke(group,reigon),
angmom(group,dim,region), torque(group,dim,region),
inertia(group,dimdim,region), omega(group,dim,region)
special functions = sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label)
special functions = sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label)
feature functions = is_available(category,feature), is_active(category,feature), is_defined(category,id)
atom value = id[i], mass[i], type[i], mol[i], x[i], y[i], z[i], vx[i], vy[i], vz[i], fx[i], fy[i], fz[i], q[i]
atom vector = id, mass, type, mol, radius, q, x, y, z, vx, vy, vz, fx, fy, fz
@ -532,7 +532,7 @@ variables.
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Region functions | count(ID,IDR), mass(ID,IDR), charge(ID,IDR), xcm(ID,dim,IDR), vcm(ID,dim,IDR), fcm(ID,dim,IDR), bound(ID,dir,IDR), gyration(ID,IDR), ke(ID,IDR), angmom(ID,dim,IDR), torque(ID,dim,IDR), inertia(ID,dimdim,IDR), omega(ID,dim,IDR) |
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label) |
| Special functions | sum(x), min(x), max(x), ave(x), trap(x), slope(x), gmask(x), rmask(x), grmask(x,y), next(x), is_file(name), is_os(name), extract_setting(name), label2type(kind,label), is_typelabel(kind,label) |
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| Feature functions | is_available(category,feature), is_active(category,feature), is_defined(category,id) |
+--------------------+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
@ -996,10 +996,17 @@ via the link in this paragraph.
The label2type(kind,label) function converts type labels into numeric
types, using label maps created by the :doc:`labelmap <labelmap>` or
:doc:`read_data <read_data>` commands. The first argument is the
label map kind (atom, bond, angle, dihedral, or improper) and the
second argument is the label. The function returns the corresponding
numeric type.
:doc:`read_data <read_data>` commands. The first argument is the label
map kind (atom, bond, angle, dihedral, or improper) and the second
argument is the label. The function returns the corresponding numeric
type or triggers an error if the queried label does not exist.
.. versionadded:: TBD
The is_typelabel(kind,label) function has the same arguments as
label2type(), but returns 1 if the type label has been assigned,
otherwise it returns 0. This function can be used to check if a
particular type label already exists in the simulation.
----------

View File

@ -37,4 +37,4 @@ velocity all create 300.0 41279 loop geom
fix 1 all nve
fix 2 all qeq/reax 1 0.0 10.0 1e-6 reax/c
run 100
run $t

View File

@ -0,0 +1 @@
../../potentials/C_SNAP_2021.10.15.quadratic.snapcoeff

View File

@ -0,0 +1 @@
../../potentials/C_SNAP_2021.10.15.quadratic.snapparam

31
examples/snap/in.C_SNAP Normal file
View File

@ -0,0 +1,31 @@
#Carbon SNAP example: 216 atom diamond unit cell simulated NVT at ~1,000GPa and 5,000K
units metal
atom_style atomic
boundary p p p
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Crystal orientation and MD box creation #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
lattice diamond 2.845
region Bbox block 0 3 0 3 0 3
create_box 1 Bbox
create_atoms 1 region Bbox basis 1 1
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Interatomic potential parameters #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Specify hybrid with SNAP, ZBL, and long-range C_SNAP_2021.10.15.quadratic.ulomb
pair_style hybrid/overlay zbl 0.1 0.2 snap
pair_coeff 1 1 zbl 10 10
pair_coeff * * snap C_SNAP_2021.10.15.quadratic.snapcoeff C_SNAP_2021.10.15.quadratic.snapparam C
mass * 12.01
velocity all create 8000.0 3412461 loop geom
fix NVE all nve
fix NVT all langevin 5000.0 5000.0 0.1 3216548
thermo 50
thermo_style custom step temp ke pe etotal press pxx pyy pzz
run 500

View File

@ -0,0 +1,105 @@
LAMMPS (28 Mar 2023 - Development)
using 1 OpenMP thread(s) per MPI task
#Carbon SNAP example: 216 atom diamond unit cell simulated NVT at ~1,000GPa and 5,000K
units metal
atom_style atomic
boundary p p p
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Crystal orientation and MD box creation #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
lattice diamond 2.845
Lattice spacing in x,y,z = 2.845 2.845 2.845
region Bbox block 0 3 0 3 0 3
create_box 1 Bbox
Created orthogonal box = (0 0 0) to (8.535 8.535 8.535)
1 by 1 by 1 MPI processor grid
create_atoms 1 region Bbox basis 1 1
Created 216 atoms
using lattice units in orthogonal box = (0 0 0) to (8.535 8.535 8.535)
create_atoms CPU = 0.000 seconds
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Interatomic potential parameters #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Specify hybrid with SNAP, ZBL, and long-range C_SNAP_2021.10.15.quadratic.ulomb
pair_style hybrid/overlay zbl 0.1 0.2 snap
pair_coeff 1 1 zbl 10 10
pair_coeff * * snap C_SNAP_2021.10.15.quadratic.snapcoeff C_SNAP_2021.10.15.quadratic.snapparam C
SNAP Element = C, Radius 0.5, Weight 1
SNAP keyword rcutfac 2.7
SNAP keyword twojmax 8
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0.0
SNAP keyword bzeroflag 0
SNAP keyword quadraticflag 1
mass * 12.01
velocity all create 8000.0 3412461 loop geom
fix NVE all nve
fix NVT all langevin 5000.0 5000.0 0.1 3216548
thermo 50
thermo_style custom step temp ke pe etotal press pxx pyy pzz
run 500
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 4.7
ghost atom cutoff = 4.7
binsize = 2.35, bins = 4 4 4
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair zbl, perpetual, half/full trim from (2)
attributes: half, newton on, cut 2.2
pair build: halffull/newton/trim
stencil: none
bin: none
(2) pair snap, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.835 | 4.835 | 4.835 Mbytes
Step Temp KinEng PotEng TotEng Press Pxx Pyy Pzz
0 8000 222.32745 -846.66062 -624.33318 10234249 10210805 10267435 10224506
50 4199.4229 116.70587 -723.2423 -606.53643 10256033 10157803 10280166 10330129
100 3820.2509 106.16833 -710.43537 -604.26704 10232872 10216484 10241309 10240825
150 4413.2948 122.64957 -710.09702 -587.44745 10254093 10323013 10265454 10173810
200 4688.024 130.28455 -702.26198 -571.97742 10306186 10281632 10342390 10294536
250 4997.165 138.87587 -720.58476 -581.70889 10284438 10220856 10360231 10272226
300 4640.4911 128.96357 -710.75063 -581.78706 10263301 10264007 10290526 10235369
350 4929.5117 136.99572 -707.2526 -570.25688 10290742 10359920 10284236 10228071
400 4700.9354 130.64337 -697.90277 -567.2594 10250682 10277287 10246032 10228729
450 5108.4971 141.96989 -700.57144 -558.60155 10289765 10323648 10306588 10239058
500 5146.7039 143.03169 -700.33221 -557.30052 10334303 10349736 10358785 10294387
Loop time of 36.7771 on 1 procs for 500 steps with 216 atoms
Performance: 1.175 ns/day, 20.432 hours/ns, 13.595 timesteps/s, 2.937 katom-step/s
99.8% CPU use with 1 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 36.766 | 36.766 | 36.766 | 0.0 | 99.97
Neigh | 0.0010226 | 0.0010226 | 0.0010226 | 0.0 | 0.00
Comm | 0.0033205 | 0.0033205 | 0.0033205 | 0.0 | 0.01
Output | 0.00020657 | 0.00020657 | 0.00020657 | 0.0 | 0.00
Modify | 0.0047621 | 0.0047621 | 0.0047621 | 0.0 | 0.01
Other | | 0.001464 | | | 0.00
Nlocal: 216 ave 216 max 216 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Nghost: 1746 ave 1746 max 1746 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Neighs: 1767 ave 1767 max 1767 min
Histogram: 1 0 0 0 0 0 0 0 0 0
FullNghs: 32846 ave 32846 max 32846 min
Histogram: 1 0 0 0 0 0 0 0 0 0
Total # of neighbors = 32846
Ave neighs/atom = 152.06481
Neighbor list builds = 1
Dangerous builds = 0
Total wall time: 0:00:36

View File

@ -0,0 +1,105 @@
LAMMPS (28 Mar 2023 - Development)
using 1 OpenMP thread(s) per MPI task
#Carbon SNAP example: 216 atom diamond unit cell simulated NVT at ~1,000GPa and 5,000K
units metal
atom_style atomic
boundary p p p
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Crystal orientation and MD box creation #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
lattice diamond 2.845
Lattice spacing in x,y,z = 2.845 2.845 2.845
region Bbox block 0 3 0 3 0 3
create_box 1 Bbox
Created orthogonal box = (0 0 0) to (8.535 8.535 8.535)
1 by 2 by 2 MPI processor grid
create_atoms 1 region Bbox basis 1 1
Created 216 atoms
using lattice units in orthogonal box = (0 0 0) to (8.535 8.535 8.535)
create_atoms CPU = 0.000 seconds
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Interatomic potential parameters #
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# Specify hybrid with SNAP, ZBL, and long-range C_SNAP_2021.10.15.quadratic.ulomb
pair_style hybrid/overlay zbl 0.1 0.2 snap
pair_coeff 1 1 zbl 10 10
pair_coeff * * snap C_SNAP_2021.10.15.quadratic.snapcoeff C_SNAP_2021.10.15.quadratic.snapparam C
SNAP Element = C, Radius 0.5, Weight 1
SNAP keyword rcutfac 2.7
SNAP keyword twojmax 8
SNAP keyword rfac0 0.99363
SNAP keyword rmin0 0.0
SNAP keyword bzeroflag 0
SNAP keyword quadraticflag 1
mass * 12.01
velocity all create 8000.0 3412461 loop geom
fix NVE all nve
fix NVT all langevin 5000.0 5000.0 0.1 3216548
thermo 50
thermo_style custom step temp ke pe etotal press pxx pyy pzz
run 500
Neighbor list info ...
update: every = 1 steps, delay = 0 steps, check = yes
max neighbors/atom: 2000, page size: 100000
master list distance cutoff = 4.7
ghost atom cutoff = 4.7
binsize = 2.35, bins = 4 4 4
2 neighbor lists, perpetual/occasional/extra = 2 0 0
(1) pair zbl, perpetual, half/full trim from (2)
attributes: half, newton on, cut 2.2
pair build: halffull/newton/trim
stencil: none
bin: none
(2) pair snap, perpetual
attributes: full, newton on
pair build: full/bin/atomonly
stencil: full/bin/3d
bin: standard
Per MPI rank memory allocation (min/avg/max) = 4.681 | 4.681 | 4.681 Mbytes
Step Temp KinEng PotEng TotEng Press Pxx Pyy Pzz
0 8000 222.32745 -846.66062 -624.33318 10234249 10210805 10267435 10224506
50 4382.3571 121.78978 -715.70492 -593.91513 10262157 10278549 10181545 10326376
100 4546.1549 126.34188 -713.11818 -586.77631 10261694 10257647 10333666 10193770
150 5109.4576 141.99658 -708.87952 -566.88294 10268132 10248182 10248240 10307974
200 4764.2181 132.40206 -712.16881 -579.76675 10329903 10238991 10379394 10371323
250 4989.5099 138.66313 -710.39748 -571.73435 10282678 10321057 10274124 10252854
300 4853.3102 134.87801 -699.98167 -565.10366 10343314 10204138 10430172 10395634
350 4788.1153 133.06618 -705.14381 -572.07763 10325571 10312657 10267999 10396058
400 5055.7813 140.50487 -707.38537 -566.8805 10323176 10357258 10310733 10301536
450 5182.3198 144.02149 -695.11614 -551.09465 10345564 10358486 10346325 10331881
500 5311.077 147.59977 -691.32767 -543.7279 10308823 10242668 10214102 10469700
Loop time of 11.5932 on 4 procs for 500 steps with 216 atoms
Performance: 3.726 ns/day, 6.441 hours/ns, 43.129 timesteps/s, 9.316 katom-step/s
94.9% CPU use with 4 MPI tasks x 1 OpenMP threads
MPI task timing breakdown:
Section | min time | avg time | max time |%varavg| %total
---------------------------------------------------------------
Pair | 10.241 | 10.446 | 10.695 | 5.0 | 90.11
Neigh | 0 | 0 | 0 | 0.0 | 0.00
Comm | 0.87613 | 1.1187 | 1.3179 | 14.9 | 9.65
Output | 0.0002656 | 0.0010016 | 0.0015521 | 1.5 | 0.01
Modify | 0.0019493 | 0.0020668 | 0.0021577 | 0.2 | 0.02
Other | | 0.02508 | | | 0.22
Nlocal: 54 ave 54 max 54 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Nghost: 1082 ave 1082 max 1082 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Neighs: 432 ave 432 max 432 min
Histogram: 4 0 0 0 0 0 0 0 0 0
FullNghs: 8532 ave 8532 max 8532 min
Histogram: 4 0 0 0 0 0 0 0 0 0
Total # of neighbors = 34128
Ave neighs/atom = 158
Neighbor list builds = 0
Dangerous builds = 0
Total wall time: 0:00:11

View File

@ -1,4 +1,25 @@
# Change Log
# CHANGELOG
## [3.7.02](https://github.com/kokkos/kokkos/tree/3.7.02) (2023-05-17)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.01...3.7.02)
### Backends and Archs Enhancements:
#### CUDA
- Add Hopper support and update nvcc_wrapper to work with CUDA-12 [\#5693](https://github.com/kokkos/kokkos/pull/5693)
### General Enhancements:
- sprintf -> snprintf [\#5787](https://github.com/kokkos/kokkos/pull/5787)
### Build System:
- Add error message when not using `hipcc` and when `CMAKE_CXX_STANDARD` is not set [\#5945](https://github.com/kokkos/kokkos/pull/5945)
### Bug Fixes:
- Fix Scratch allocation alignment issues [\#5692](https://github.com/kokkos/kokkos/pull/5692)
- Fix Intel Classic Compiler ICE [\#5710](https://github.com/kokkos/kokkos/pull/5710)
- Don't install std algorithm headers multiple times [\#5711](https://github.com/kokkos/kokkos/pull/5711)
- Fix static init order issue in InitalizationSettings [\#5721](https://github.com/kokkos/kokkos/pull/5721)
- Fix src/dst Properties in deep_copy(DynamicView,View) [\#5732](https://github.com/kokkos/kokkos/pull/5732)
- Fix build on Fedora Rawhide [\#5782](https://github.com/kokkos/kokkos/pull/5782)
- Finalize HIP lock arrays [\#5694](https://github.com/kokkos/kokkos/pull/5694)
- Fix CUDA lock arrays for current Desul [\#5812](https://github.com/kokkos/kokkos/pull/5812)
- Set the correct device/context in InterOp tests [\#5701](https://github.com/kokkos/kokkos/pull/5701)
## [3.7.01](https://github.com/kokkos/kokkos/tree/3.7.01) (2022-12-01)
[Full Changelog](https://github.com/kokkos/kokkos/compare/3.7.00...3.7.01)

View File

@ -129,7 +129,7 @@ ENDIF()
set(Kokkos_VERSION_MAJOR 3)
set(Kokkos_VERSION_MINOR 7)
set(Kokkos_VERSION_PATCH 01)
set(Kokkos_VERSION_PATCH 02)
set(Kokkos_VERSION "${Kokkos_VERSION_MAJOR}.${Kokkos_VERSION_MINOR}.${Kokkos_VERSION_PATCH}")
math(EXPR KOKKOS_VERSION "${Kokkos_VERSION_MAJOR} * 10000 + ${Kokkos_VERSION_MINOR} * 100 + ${Kokkos_VERSION_PATCH}")

View File

@ -12,7 +12,7 @@ endif
KOKKOS_VERSION_MAJOR = 3
KOKKOS_VERSION_MINOR = 7
KOKKOS_VERSION_PATCH = 01
KOKKOS_VERSION_PATCH = 02
KOKKOS_VERSION = $(shell echo $(KOKKOS_VERSION_MAJOR)*10000+$(KOKKOS_VERSION_MINOR)*100+$(KOKKOS_VERSION_PATCH) | bc)
# Options: Cuda,HIP,SYCL,OpenMPTarget,OpenMP,Threads,Serial

View File

@ -25,7 +25,7 @@ INSTALL (
# These will get ignored for standalone CMake and a true interface library made
KOKKOS_ADD_INTERFACE_LIBRARY(
kokkosalgorithms
HEADERS ${ALGO_HEADERS}
NOINSTALLHEADERS ${ALGO_HEADERS}
SOURCES ${ALGO_SOURCES}
)
KOKKOS_LIB_INCLUDE_DIRECTORIES(kokkosalgorithms

View File

@ -214,6 +214,9 @@ GLOBAL_SET(KOKKOS_AMDGPU_OPTIONS)
IF(KOKKOS_ENABLE_HIP)
SET(AMDGPU_ARCH_FLAG "--offload-arch")
IF(NOT KOKKOS_CXX_COMPILER_ID STREQUAL HIPCC)
IF(KOKKOS_CXX_STANDARD STREQUAL 14 AND NOT CMAKE_CXX_STANDARD)
message(FATAL_ERROR "Set CMAKE_CXX_STANDARD to 14")
ENDIF()
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS -x hip)
IF(DEFINED ENV{ROCM_PATH})
GLOBAL_APPEND(KOKKOS_AMDGPU_OPTIONS --rocm-path=$ENV{ROCM_PATH})

View File

@ -534,13 +534,6 @@ FUNCTION(KOKKOS_ADD_INTERFACE_LIBRARY NAME)
IF (KOKKOS_HAS_TRILINOS)
TRIBITS_ADD_LIBRARY(${NAME} ${ARGN})
ELSE()
CMAKE_PARSE_ARGUMENTS(PARSE
""
""
"HEADERS;SOURCES"
${ARGN}
)
ADD_LIBRARY(${NAME} INTERFACE)
KOKKOS_INTERNAL_ADD_LIBRARY_INSTALL(${NAME})
ENDIF()

View File

@ -915,8 +915,8 @@ inline void deep_copy(const View<T, DP...>& dst,
template <class T, class... DP, class... SP>
inline void deep_copy(const Kokkos::Experimental::DynamicView<T, DP...>& dst,
const View<T, SP...>& src) {
using dst_type = Kokkos::Experimental::DynamicView<T, SP...>;
using src_type = View<T, DP...>;
using dst_type = Kokkos::Experimental::DynamicView<T, DP...>;
using src_type = View<T, SP...>;
using dst_execution_space = typename ViewTraits<T, DP...>::execution_space;
using src_memory_space = typename ViewTraits<T, SP...>::memory_space;

View File

@ -240,6 +240,83 @@ struct TestDynamicView {
ASSERT_EQ(new_result_sum, (value_type)(da_resize * (da_resize - 1) / 2));
#endif
} // end scope
// Test: Reproducer to demonstrate compile-time error of deep_copy
// of DynamicView to/from on-host View.
// Case 4:
{
using device_view_type = Kokkos::View<Scalar*, Space>;
using host_view_type = typename Kokkos::View<Scalar*, Space>::HostMirror;
view_type device_dynamic_view("on-device DynamicView", 1024,
arg_total_size);
device_view_type device_view("on-device View", arg_total_size);
host_view_type host_view("on-host View", arg_total_size);
unsigned da_size = arg_total_size / 8;
device_dynamic_view.resize_serial(da_size);
// Use parallel_for to populate device_dynamic_view and verify values
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { device_dynamic_view(i) = Scalar(i); });
value_type result_sum = 0.0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i, value_type& partial_sum) {
partial_sum += (value_type)device_dynamic_view(i);
},
result_sum);
ASSERT_EQ(result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
// Use an on-device View as intermediate to deep_copy the
// device_dynamic_view to host, zero out the device_dynamic_view,
// deep_copy from host back to the device_dynamic_view and verify
Kokkos::deep_copy(device_view, device_dynamic_view);
Kokkos::deep_copy(host_view, device_view);
Kokkos::deep_copy(device_view, host_view);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
Kokkos::parallel_for(
Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i) { device_dynamic_view(i) = Scalar(0); });
#endif
Kokkos::deep_copy(device_dynamic_view, device_view);
#if defined(KOKKOS_ENABLE_CXX11_DISPATCH_LAMBDA)
value_type new_result_sum = 0.0;
Kokkos::parallel_reduce(
Kokkos::RangePolicy<execution_space>(0, da_size),
KOKKOS_LAMBDA(const int i, value_type& partial_sum) {
partial_sum += (value_type)device_dynamic_view(i);
},
new_result_sum);
ASSERT_EQ(new_result_sum, (value_type)(da_size * (da_size - 1) / 2));
#endif
// Try to deep_copy device_dynamic_view directly to/from host.
// host-to-device currently fails to compile because DP and SP are
// swapped in the deep_copy implementation.
// Once that's fixed, both deep_copy's will fail at runtime because the
// destination execution space cannot access the source memory space.
try {
Kokkos::deep_copy(host_view, device_dynamic_view);
} catch (std::runtime_error const& error) {
std::string msg = error.what();
std::cerr << "Copy from on-device DynamicView to on-host View failed:\n"
<< msg << std::endl;
}
try {
Kokkos::deep_copy(device_dynamic_view, host_view);
} catch (std::runtime_error const& error) {
std::string msg = error.what();
std::cerr << "Copy from on-host View to on-device DynamicView failed:\n"
<< msg << std::endl;
}
}
}
};

View File

@ -73,7 +73,7 @@ void textcolor(int attr, int fg, int bg) {
char command[40];
/* Command is the control command to the terminal */
sprintf(command, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
snprintf(command, 40, "%c[%d;%d;%dm", 0x1B, attr, fg + 30, bg + 40);
printf("%s", command);
}
void textcolor_standard() { textcolor(RESET, BLACK, WHITE); }

View File

@ -710,7 +710,7 @@ struct CudaParallelLaunchImpl<
" occupancy requests are currently broken."));
}
KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
ensure_cuda_lock_arrays_on_device();
// Invoke the driver function on the device
base_t::invoke_kernel(driver, grid, block, shmem, cuda_instance);

View File

@ -79,8 +79,7 @@ CudaLockArrays g_host_cuda_lock_arrays = {nullptr, 0};
void initialize_host_cuda_lock_arrays() {
#ifdef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
desul::Impl::init_lock_arrays();
DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE();
desul::ensure_cuda_lock_arrays_on_device();
#endif
if (g_host_cuda_lock_arrays.atomic != nullptr) return;
KOKKOS_IMPL_CUDA_SAFE_CALL(
@ -89,7 +88,7 @@ void initialize_host_cuda_lock_arrays() {
Impl::cuda_device_synchronize(
"Kokkos::Impl::initialize_host_cuda_lock_arrays: Pre Init Lock Arrays");
g_host_cuda_lock_arrays.n = Cuda::concurrency();
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
copy_cuda_lock_arrays_to_device();
init_lock_array_kernel_atomic<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256,
256>>>();
Impl::cuda_device_synchronize(
@ -106,7 +105,7 @@ void finalize_host_cuda_lock_arrays() {
g_host_cuda_lock_arrays.atomic = nullptr;
g_host_cuda_lock_arrays.n = 0;
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
copy_cuda_lock_arrays_to_device();
#endif
}

View File

@ -67,7 +67,7 @@ struct CudaLockArrays {
/// \brief This global variable in Host space is the central definition
/// of these arrays.
extern Kokkos::Impl::CudaLockArrays g_host_cuda_lock_arrays;
extern CudaLockArrays g_host_cuda_lock_arrays;
/// \brief After this call, the g_host_cuda_lock_arrays variable has
/// valid, initialized arrays.
@ -105,12 +105,12 @@ namespace Impl {
/// instances in other translation units, we must update this CUDA global
/// variable based on the Host global variable prior to running any kernels
/// that will use it.
/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro.
/// That is the purpose of the ensure_cuda_lock_arrays_on_device function.
__device__
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
__constant__ extern
#endif
Kokkos::Impl::CudaLockArrays g_device_cuda_lock_arrays;
CudaLockArrays g_device_cuda_lock_arrays;
#define CUDA_SPACE_ATOMIC_MASK 0x1FFFF
@ -123,9 +123,7 @@ __device__ inline bool lock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
return (
0 ==
atomicCAS(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0, 1));
return (0 == atomicCAS(&g_device_cuda_lock_arrays.atomic[offset], 0, 1));
}
/// \brief Release lock for the address
@ -138,7 +136,7 @@ __device__ inline void unlock_address_cuda_space(void* ptr) {
size_t offset = size_t(ptr);
offset = offset >> 2;
offset = offset & CUDA_SPACE_ATOMIC_MASK;
atomicExch(&Kokkos::Impl::g_device_cuda_lock_arrays.atomic[offset], 0);
atomicExch(&g_device_cuda_lock_arrays.atomic[offset], 0);
}
} // namespace Impl
@ -151,45 +149,49 @@ namespace {
static int lock_array_copied = 0;
inline int eliminate_warning_for_lock_array() { return lock_array_copied; }
} // namespace
} // namespace Impl
} // namespace Kokkos
/* Dan Ibanez: it is critical that this code be a macro, so that it will
capture the right address for Kokkos::Impl::g_device_cuda_lock_arrays!
putting this in an inline function will NOT do the right thing! */
#define KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \
{ \
if (::Kokkos::Impl::lock_array_copied == 0) { \
KOKKOS_IMPL_CUDA_SAFE_CALL( \
cudaMemcpyToSymbol(Kokkos::Impl::g_device_cuda_lock_arrays, \
&Kokkos::Impl::g_host_cuda_lock_arrays, \
sizeof(Kokkos::Impl::CudaLockArrays))); \
} \
lock_array_copied = 1; \
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
inline
#else
inline static
#endif
void
copy_cuda_lock_arrays_to_device() {
if (lock_array_copied == 0) {
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMemcpyToSymbol(g_device_cuda_lock_arrays,
&g_host_cuda_lock_arrays,
sizeof(CudaLockArrays)));
}
lock_array_copied = 1;
}
#ifndef KOKKOS_ENABLE_IMPL_DESUL_ATOMICS
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
inline void ensure_cuda_lock_arrays_on_device() {}
#else
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()
inline static void ensure_cuda_lock_arrays_on_device() {
copy_cuda_lock_arrays_to_device();
}
#endif
#else
#ifdef KOKKOS_ENABLE_CUDA_RELOCATABLE_DEVICE_CODE
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
inline void ensure_cuda_lock_arrays_on_device() {}
#else
// Still Need COPY_CUDA_LOCK_ARRAYS for team scratch etc.
#define KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
KOKKOS_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \
DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
inline static void ensure_cuda_lock_arrays_on_device() {
copy_cuda_lock_arrays_to_device();
desul::ensure_cuda_lock_arrays_on_device();
}
#endif
#endif /* defined( KOKKOS_ENABLE_IMPL_DESUL_ATOMICS ) */
} // namespace Impl
} // namespace Kokkos
#endif /* defined( KOKKOS_ENABLE_CUDA ) */
#endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP */

View File

@ -428,6 +428,8 @@ void HIPInternal::finalize() {
if (this == &singleton()) {
(void)Kokkos::Impl::hip_global_unique_token_locks(true);
Kokkos::Impl::finalize_host_hip_lock_arrays();
KOKKOS_IMPL_HIP_SAFE_CALL(hipHostFree(constantMemHostStaging));
KOKKOS_IMPL_HIP_SAFE_CALL(hipEventDestroy(constantMemReusable));
}

View File

@ -228,11 +228,6 @@
#define KOKKOS_ENABLE_PRAGMA_SIMD 1
#endif
// FIXME Workaround for ICE with intel 17,18,19,20,21 in Trilinos
#if (KOKKOS_COMPILER_INTEL <= 2100)
#define KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
#endif
// FIXME_SYCL
#if !defined(KOKKOS_ENABLE_SYCL)
#define KOKKOS_ENABLE_PRAGMA_IVDEP 1
@ -653,7 +648,8 @@ static constexpr bool kokkos_omp_on_host() { return false; }
#if (defined(KOKKOS_COMPILER_GNU) || defined(KOKKOS_COMPILER_CLANG) || \
defined(KOKKOS_COMPILER_INTEL) || defined(KOKKOS_COMPILER_PGI)) && \
!defined(_WIN32)
#if (!defined(__linux__) || defined(__GLIBC_MINOR__))
// disable stacktrace for musl-libc
#if !defined(__linux__) || defined(__GLIBC_MINOR__)
#define KOKKOS_IMPL_ENABLE_STACKTRACE
#endif
#define KOKKOS_IMPL_ENABLE_CXXABI

View File

@ -73,9 +73,8 @@ class ScratchMemorySpace {
"Instantiating ScratchMemorySpace on non-execution-space type.");
public:
// Alignment of memory chunks returned by 'get'
// must be a power of two
enum { ALIGN = 8 };
// Minimal overalignment used by view scratch allocations
constexpr static int ALIGN = 8;
private:
mutable char* m_iter_L0 = nullptr;
@ -87,7 +86,9 @@ class ScratchMemorySpace {
mutable int m_offset = 0;
mutable int m_default_level = 0;
enum { MASK = ALIGN - 1 }; // Alignment used by View::shmem_size
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
constexpr static int DEFAULT_ALIGNMENT_MASK = ALIGN - 1;
#endif
public:
//! Tag this class as a memory space
@ -101,39 +102,59 @@ class ScratchMemorySpace {
static constexpr const char* name() { return "ScratchMemorySpace"; }
#ifdef KOKKOS_ENABLE_DEPRECATED_CODE_4
// This function is unused
template <typename IntType>
KOKKOS_INLINE_FUNCTION static IntType align(const IntType& size) {
return (size + MASK) & ~MASK;
KOKKOS_DEPRECATED KOKKOS_INLINE_FUNCTION static constexpr IntType align(
const IntType& size) {
return (size + DEFAULT_ALIGNMENT_MASK) & ~DEFAULT_ALIGNMENT_MASK;
}
#endif
template <typename IntType>
KOKKOS_INLINE_FUNCTION void* get_shmem(const IntType& size,
int level = -1) const {
return get_shmem_common</*aligned*/ false>(size, 1, level);
return get_shmem_common</*alignment_requested*/ false>(size, 1, level);
}
template <typename IntType>
KOKKOS_INLINE_FUNCTION void* get_shmem_aligned(const IntType& size,
const ptrdiff_t alignment,
int level = -1) const {
return get_shmem_common</*aligned*/ true>(size, alignment, level);
return get_shmem_common</*alignment_requested*/ true>(size, alignment,
level);
}
private:
template <bool aligned, typename IntType>
template <bool alignment_requested, typename IntType>
KOKKOS_INLINE_FUNCTION void* get_shmem_common(const IntType& size,
const ptrdiff_t alignment,
int level = -1) const {
if (level == -1) level = m_default_level;
auto& m_iter = (level == 0) ? m_iter_L0 : m_iter_L1;
auto& m_end = (level == 0) ? m_end_L0 : m_end_L1;
char* previous = m_iter;
const ptrdiff_t missalign = size_t(m_iter) % alignment;
if (missalign) m_iter += alignment - missalign;
auto& m_iter = (level == 0) ? m_iter_L0 : m_iter_L1;
auto& m_end = (level == 0) ? m_end_L0 : m_end_L1;
void* tmp = m_iter + m_offset * (aligned ? size : align(size));
if (m_end < (m_iter += (aligned ? size : align(size)) * m_multiplier)) {
m_iter = previous; // put it back like it was
if (alignment_requested) {
const ptrdiff_t missalign = size_t(m_iter) % alignment;
if (missalign) m_iter += alignment - missalign;
}
// This is each thread's start pointer for its allocation
// Note: for team scratch m_offset is 0, since every
// thread will get back the same shared pointer
void* tmp = m_iter + m_offset * size;
ptrdiff_t increment = size * m_multiplier;
// increment m_iter first and decrement it again if not
// enough memory was available. In the non-failing path
// this will save instructions.
m_iter += increment;
if (m_end < m_iter) {
// Request did overflow: reset the base team ptr, and
// return nullptr
m_iter -= increment;
tmp = nullptr;
#ifdef KOKKOS_ENABLE_DEBUG
// mfh 23 Jun 2015: printf call consumes 25 registers
// in a CUDA build, so only print in debug mode. The
@ -143,7 +164,6 @@ class ScratchMemorySpace {
"%ld byte(s); remaining capacity is %ld byte(s)\n",
long(size), long(m_end - m_iter));
#endif // KOKKOS_ENABLE_DEBUG
tmp = nullptr;
}
return tmp;
}

View File

@ -203,19 +203,11 @@ class Serial {
static const char* name();
Impl::SerialInternal* impl_internal_space_instance() const {
#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
return m_space_instance;
#else
return m_space_instance.get();
#endif
}
private:
#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
Impl::SerialInternal* m_space_instance;
#else
Kokkos::Impl::HostSharedPtr<Impl::SerialInternal> m_space_instance;
#endif
//--------------------------------------------------------------------------
};

View File

@ -67,6 +67,8 @@ KOKKOS_IMPL_WARNING("Including non-public Kokkos header files is not allowed.")
#include <impl/Kokkos_Tools.hpp>
#include <Kokkos_MinMaxClamp.hpp>
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
@ -1692,19 +1694,27 @@ class View : public ViewTraits<DataType, Properties...> {
arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6, arg_N7));
}
private:
// Want to be able to align to minimum scratch alignment or sizeof or alignof
// elements
static constexpr size_t scratch_value_alignment =
::Kokkos::max(::Kokkos::max(sizeof(typename traits::value_type),
alignof(typename traits::value_type)),
static_cast<size_t>(
traits::execution_space::scratch_memory_space::ALIGN));
public:
static KOKKOS_INLINE_FUNCTION size_t
shmem_size(typename traits::array_layout const& arg_layout) {
return map_type::memory_span(arg_layout) +
sizeof(typename traits::value_type);
return map_type::memory_span(arg_layout) + scratch_value_alignment;
}
explicit KOKKOS_INLINE_FUNCTION View(
const typename traits::execution_space::scratch_memory_space& arg_space,
const typename traits::array_layout& arg_layout)
: View(Impl::ViewCtorProp<pointer_type>(
reinterpret_cast<pointer_type>(arg_space.get_shmem_aligned(
map_type::memory_span(arg_layout),
sizeof(typename traits::value_type)))),
: View(Impl::ViewCtorProp<pointer_type>(reinterpret_cast<pointer_type>(
arg_space.get_shmem_aligned(map_type::memory_span(arg_layout),
scratch_value_alignment))),
arg_layout) {}
explicit KOKKOS_INLINE_FUNCTION View(
@ -1722,7 +1732,7 @@ class View : public ViewTraits<DataType, Properties...> {
map_type::memory_span(typename traits::array_layout(
arg_N0, arg_N1, arg_N2, arg_N3, arg_N4, arg_N5, arg_N6,
arg_N7)),
sizeof(typename traits::value_type)))),
scratch_value_alignment))),
typename traits::array_layout(arg_N0, arg_N1, arg_N2, arg_N3,
arg_N4, arg_N5, arg_N6, arg_N7),
check_input_args::yes) {

View File

@ -173,14 +173,8 @@ void SerialInternal::resize_thread_team_data(size_t pool_reduce_bytes,
} // namespace Impl
Serial::Serial()
#ifdef KOKKOS_IMPL_WORKAROUND_ICE_IN_TRILINOS_WITH_OLD_INTEL_COMPILERS
: m_space_instance(&Impl::SerialInternal::singleton()) {
}
#else
: m_space_instance(&Impl::SerialInternal::singleton(),
[](Impl::SerialInternal*) {}) {
}
#endif
[](Impl::SerialInternal*) {}) {}
void Serial::print_configuration(std::ostream& os, bool /*verbose*/) const {
os << "Host Serial Execution Space:\n";

View File

@ -63,11 +63,10 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
const FunctorType m_functor;
const MDRangePolicy m_mdr_policy;
const Policy m_policy;
void exec() const {
const typename Policy::member_type e = m_policy.end();
for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) {
const typename Policy::member_type e = m_mdr_policy.m_num_tiles;
for (typename Policy::member_type i = 0; i < e; ++i) {
iterate_type(m_mdr_policy, m_functor)(i);
}
}
@ -85,9 +84,7 @@ class ParallelFor<FunctorType, Kokkos::MDRangePolicy<Traits...>,
}
inline ParallelFor(const FunctorType& arg_functor,
const MDRangePolicy& arg_policy)
: m_functor(arg_functor),
m_mdr_policy(arg_policy),
m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)) {}
: m_functor(arg_functor), m_mdr_policy(arg_policy) {}
};
template <class FunctorType, class ReducerType, class... Traits>
@ -120,13 +117,12 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
const FunctorType m_functor;
const MDRangePolicy m_mdr_policy;
const Policy m_policy;
const ReducerType m_reducer;
const pointer_type m_result_ptr;
inline void exec(reference_type update) const {
const typename Policy::member_type e = m_policy.end();
for (typename Policy::member_type i = m_policy.begin(); i < e; ++i) {
const typename Policy::member_type e = m_mdr_policy.m_num_tiles;
for (typename Policy::member_type i = 0; i < e; ++i) {
iterate_type(m_mdr_policy, m_functor, update)(i);
}
}
@ -148,7 +144,8 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
const size_t team_shared_size = 0; // Never shrinks
const size_t thread_local_size = 0; // Never shrinks
auto* internal_instance = m_policy.space().impl_internal_space_instance();
auto* internal_instance =
m_mdr_policy.space().impl_internal_space_instance();
// Need to lock resize_thread_team_data
std::lock_guard<std::mutex> lock(
internal_instance->m_thread_team_data_mutex);
@ -181,7 +178,6 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
void*> = nullptr)
: m_functor(arg_functor),
m_mdr_policy(arg_policy),
m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)),
m_reducer(InvalidType()),
m_result_ptr(arg_result_view.data()) {
static_assert(Kokkos::is_view<HostViewType>::value,
@ -197,7 +193,6 @@ class ParallelReduce<FunctorType, Kokkos::MDRangePolicy<Traits...>, ReducerType,
MDRangePolicy arg_policy, const ReducerType& reducer)
: m_functor(arg_functor),
m_mdr_policy(arg_policy),
m_policy(Policy(0, m_mdr_policy.m_num_tiles).set_chunk_size(1)),
m_reducer(reducer),
m_result_ptr(reducer.view().data()) {
/*static_assert( std::is_same< typename ViewType::memory_space

View File

@ -1165,6 +1165,5 @@ void _kokkos_pgi_compiler_bug_workaround() {}
#endif
} // namespace Kokkos
Kokkos::Impl::InitializationSettingsHelper<std::string>::storage_type const
Kokkos::Impl::InitializationSettingsHelper<std::string>::unspecified =
"some string we don't expect user would ever provide";
constexpr char
Kokkos::Impl::InitializationSettingsHelper<std::string>::unspecified[];

View File

@ -104,7 +104,9 @@ struct InitializationSettingsHelper<std::string> {
using value_type = std::string;
using storage_type = std::string;
static storage_type const unspecified;
// prefer c-string to avoid static initialization order nightmare
static constexpr char unspecified[] =
"some string we don't expect user would ever provide";
};
} // namespace Impl

View File

@ -655,9 +655,9 @@ void initialize(const std::string& profileLibrary) {
char* envProfileLibrary = const_cast<char*>(profileLibrary.c_str());
const auto envProfileCopy =
std::make_unique<char[]>(strlen(envProfileLibrary) + 1);
sprintf(envProfileCopy.get(), "%s", envProfileLibrary);
const size_t envProfileLen = strlen(envProfileLibrary) + 1;
const auto envProfileCopy = std::make_unique<char[]>(envProfileLen);
snprintf(envProfileCopy.get(), envProfileLen, "%s", envProfileLibrary);
char* profileLibraryName = strtok(envProfileCopy.get(), ";");

View File

@ -69,9 +69,10 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device,
nargs = (do_threads ? 1 : 0) + (do_numa ? 1 : 0) + (do_device ? 1 : 0) +
(do_other ? 4 : 0) + (do_tune ? 1 : 0);
char** args_kokkos = new char*[nargs];
char** args_kokkos = new char*[nargs];
const int max_args_size = 45;
for (int i = 0; i < nargs; i++) {
args_kokkos[i] = new char[45];
args_kokkos[i] = new char[max_args_size];
delete_these.insert(args_kokkos[i]);
}
@ -112,7 +113,7 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device,
#endif
init_args.num_threads = nthreads;
sprintf(args_kokkos[threads_idx], "--threads=%i", nthreads);
snprintf(args_kokkos[threads_idx], max_args_size, "--threads=%i", nthreads);
}
if (do_numa) {
@ -130,24 +131,27 @@ char** init_kokkos_args(bool do_threads, bool do_numa, bool do_device,
#endif
init_args.num_numa = numa;
sprintf(args_kokkos[numa_idx], "--numa=%i", numa);
snprintf(args_kokkos[numa_idx], max_args_size, "--numa=%i", numa);
}
if (do_device) {
init_args.device_id = 0;
sprintf(args_kokkos[device_idx], "--device-id=%i", 0);
snprintf(args_kokkos[device_idx], max_args_size, "--device-id=%i", 0);
}
if (do_other) {
sprintf(args_kokkos[0], "--dummyarg=1");
sprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0)], "--dummy2arg");
sprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0) + 1], "dummy3arg");
sprintf(args_kokkos[device_idx + (do_device ? 1 : 0)], "dummy4arg=1");
snprintf(args_kokkos[0], max_args_size, "--dummyarg=1");
snprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0)], max_args_size,
"--dummy2arg");
snprintf(args_kokkos[threads_idx + (do_threads ? 1 : 0) + 1], max_args_size,
"dummy3arg");
snprintf(args_kokkos[device_idx + (do_device ? 1 : 0)], max_args_size,
"dummy4arg=1");
}
if (do_tune) {
init_args.tune_internals = true;
sprintf(args_kokkos[tune_idx], "--kokkos-tune-internals");
snprintf(args_kokkos[tune_idx], max_args_size, "--kokkos-tune-internals");
}
return args_kokkos;

View File

@ -91,7 +91,7 @@ void test_shared_alloc() {
// Since always executed on host space, leave [=]
Kokkos::parallel_for(range, [=](int i) {
char name[64];
sprintf(name, "test_%.2d", i);
snprintf(name, 64, "test_%.2d", i);
r[i] = RecordMemS::allocate(s, name, size * (i + 1));
h[i] = Header::get_header(r[i]->data());
@ -135,7 +135,7 @@ void test_shared_alloc() {
Kokkos::parallel_for(range, [=](size_t i) {
char name[64];
sprintf(name, "test_%.2d", int(i));
snprintf(name, 64, "test_%.2d", int(i));
RecordFull* rec = RecordFull::allocate(s, name, size * (i + 1));

View File

@ -1551,14 +1551,16 @@ struct TestScratchAlignment {
double x, y, z;
};
TestScratchAlignment() {
test(true);
test(false);
test_view(true);
test_view(false);
test_minimal();
test_raw();
}
using ScratchView =
Kokkos::View<TestScalar *, typename ExecSpace::scratch_memory_space>;
using ScratchViewInt =
Kokkos::View<int *, typename ExecSpace::scratch_memory_space>;
void test(bool allocate_small) {
void test_view(bool allocate_small) {
int shmem_size = ScratchView::shmem_size(11);
#ifdef KOKKOS_ENABLE_OPENMPTARGET
int team_size =
@ -1580,12 +1582,68 @@ struct TestScratchAlignment {
});
Kokkos::fence();
}
void test_minimal() {
using member_type = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
Kokkos::TeamPolicy<ExecSpace> policy(1, 1);
size_t scratch_size = sizeof(int);
Kokkos::View<int, ExecSpace> flag("Flag");
Kokkos::parallel_for(
policy.set_scratch_size(0, Kokkos::PerTeam(scratch_size)),
KOKKOS_LAMBDA(const member_type &team) {
int *scratch_ptr = (int *)team.team_shmem().get_shmem(scratch_size);
if (scratch_ptr == nullptr) flag() = 1;
});
Kokkos::fence();
int minimal_scratch_allocation_failed = 0;
Kokkos::deep_copy(minimal_scratch_allocation_failed, flag);
ASSERT_TRUE(minimal_scratch_allocation_failed == 0);
}
void test_raw() {
using member_type = typename Kokkos::TeamPolicy<ExecSpace>::member_type;
Kokkos::TeamPolicy<ExecSpace> policy(1, 1);
Kokkos::View<int, ExecSpace> flag("Flag");
Kokkos::parallel_for(
policy.set_scratch_size(0, Kokkos::PerTeam(1024)),
KOKKOS_LAMBDA(const member_type &team) {
int *scratch_ptr1 = (int *)team.team_shmem().get_shmem(24);
int *scratch_ptr2 = (int *)team.team_shmem().get_shmem(32);
int *scratch_ptr3 = (int *)team.team_shmem().get_shmem(12);
if ((int(scratch_ptr2 - scratch_ptr1) != 6) ||
(int(scratch_ptr3 - scratch_ptr2) != 8))
flag() = 1;
if (((scratch_ptr3 - static_cast<int *>(nullptr)) + 3) % 2 == 1)
scratch_ptr1 = (int *)team.team_shmem().get_shmem_aligned(24, 4);
else {
scratch_ptr1 = (int *)team.team_shmem().get_shmem_aligned(12, 4);
}
scratch_ptr2 = (int *)team.team_shmem().get_shmem_aligned(32, 8);
scratch_ptr3 = (int *)team.team_shmem().get_shmem_aligned(8, 4);
if ((int(scratch_ptr2 - scratch_ptr1) != 7) &&
(int(scratch_ptr2 - scratch_ptr1) != 4))
flag() = 1;
if (int(scratch_ptr3 - scratch_ptr2) != 8) flag() = 1;
if ((int(size_t(scratch_ptr1) % 4) != 0) ||
(int(size_t(scratch_ptr2) % 8) != 0) ||
(int(size_t(scratch_ptr3) % 4) != 0))
flag() = 1;
});
Kokkos::fence();
int raw_get_shmem_alignment_failed = 0;
Kokkos::deep_copy(raw_get_shmem_alignment_failed, flag);
ASSERT_TRUE(raw_get_shmem_alignment_failed == 0);
}
};
} // namespace
namespace {
template <class ExecSpace>
struct TestTeamPolicyHandleByValue {
using scalar = double;

View File

@ -59,9 +59,11 @@ __global__ void offset(int* p) {
// Test whether allocations survive Kokkos initialize/finalize if done via Raw
// Cuda.
TEST(cuda, raw_cuda_interop) {
// Make sure that we use the same device for all allocations
Kokkos::initialize();
int* p;
KOKKOS_IMPL_CUDA_SAFE_CALL(cudaMalloc(&p, sizeof(int) * 100));
Kokkos::initialize();
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, 100);
Kokkos::deep_copy(v, 5);

View File

@ -48,9 +48,11 @@
namespace Test {
// Test Interoperability with Cuda Streams
TEST(cuda, raw_cuda_streams) {
// Make sure that we use the same device for all allocations
Kokkos::initialize();
cudaStream_t stream;
cudaStreamCreate(&stream);
Kokkos::initialize();
int* p;
cudaMalloc(&p, sizeof(int) * 100);
using MemorySpace = typename TEST_EXECSPACE::memory_space;

View File

@ -59,9 +59,11 @@ __global__ void offset(int* p) {
// Test whether allocations survive Kokkos initialize/finalize if done via Raw
// HIP.
TEST(hip, raw_hip_interop) {
// Make sure that we use the same device for all allocations
Kokkos::initialize();
int* p;
KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&p, sizeof(int) * 100));
Kokkos::initialize();
Kokkos::View<int*, Kokkos::MemoryTraits<Kokkos::Unmanaged>> v(p, 100);
Kokkos::deep_copy(v, 5);

View File

@ -50,9 +50,11 @@ namespace Test {
// The difference with the CUDA tests are: raw HIP vs raw CUDA and no launch
// bound in HIP due to an error when computing the block size.
TEST(hip, raw_hip_streams) {
// Make sure that we use the same device for all allocations
Kokkos::initialize();
hipStream_t stream;
KOKKOS_IMPL_HIP_SAFE_CALL(hipStreamCreate(&stream));
Kokkos::initialize();
int* p;
KOKKOS_IMPL_HIP_SAFE_CALL(hipMalloc(&p, sizeof(int) * 100));
using MemorySpace = typename TEST_EXECSPACE::memory_space;

View File

@ -52,8 +52,8 @@ namespace Test {
// Test whether allocations survive Kokkos initialize/finalize if done via Raw
// SYCL.
TEST(sycl, raw_sycl_interop) {
// Make sure all queues use the same context
Kokkos::initialize();
Kokkos::Experimental::SYCL default_space;
sycl::context default_context = default_space.sycl_queue().get_context();

View File

@ -51,6 +51,7 @@ namespace Test {
// Test whether external allocations can be accessed by the default queue.
TEST(sycl, raw_sycl_interop_context_1) {
// Make sure all queues use the same context
Kokkos::Experimental::SYCL default_space;
sycl::context default_context = default_space.sycl_queue().get_context();

View File

@ -48,9 +48,13 @@
namespace Test {
// Test Interoperability with SYCL Streams
TEST(sycl, raw_sycl_queues) {
sycl::default_selector device_selector;
sycl::queue queue(device_selector);
// Make sure all queues use the same context
Kokkos::initialize();
Kokkos::Experimental::SYCL default_space;
sycl::context default_context = default_space.sycl_queue().get_context();
sycl::default_selector device_selector;
sycl::queue queue(default_context, device_selector);
int* p = sycl::malloc_device<int>(100, queue);
using MemorySpace = typename TEST_EXECSPACE::memory_space;

View File

@ -76,7 +76,7 @@ namespace Impl {
/// instances in other translation units, we must update this CUDA global
/// variable based on the Host global variable prior to running any kernels
/// that will use it.
/// That is the purpose of the KOKKOS_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE macro.
/// That is the purpose of the ensure_cuda_lock_arrays_on_device function.
__device__
#ifdef __CUDACC_RDC__
__constant__ extern
@ -138,33 +138,42 @@ namespace {
static int lock_array_copied = 0;
inline int eliminate_warning_for_lock_array() { return lock_array_copied; }
} // namespace
#ifdef __CUDACC_RDC__
inline
#else
inline static
#endif
void
copy_cuda_lock_arrays_to_device() {
if (lock_array_copied == 0) {
cudaMemcpyToSymbol(CUDA_SPACE_ATOMIC_LOCKS_DEVICE,
&CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h,
sizeof(int32_t*));
cudaMemcpyToSymbol(CUDA_SPACE_ATOMIC_LOCKS_NODE,
&CUDA_SPACE_ATOMIC_LOCKS_NODE_h,
sizeof(int32_t*));
}
lock_array_copied = 1;
}
} // namespace Impl
} // namespace desul
/* It is critical that this code be a macro, so that it will
capture the right address for desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE
putting this in an inline function will NOT do the right thing! */
#define DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE() \
{ \
if (::desul::Impl::lock_array_copied == 0) { \
cudaMemcpyToSymbol(::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE, \
&::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h, \
sizeof(int32_t*)); \
cudaMemcpyToSymbol(::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_NODE, \
&::desul::Impl::CUDA_SPACE_ATOMIC_LOCKS_NODE_h, \
sizeof(int32_t*)); \
} \
::desul::Impl::lock_array_copied = 1; \
}
#endif /* defined( __CUDACC__ ) */
#endif /* defined( DESUL_HAVE_CUDA_ATOMICS ) */
namespace desul {
#if defined(__CUDACC_RDC__) || (!defined(__CUDACC__))
#define DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE()
inline void ensure_cuda_lock_arrays_on_device() {}
#else
#define DESUL_ENSURE_CUDA_LOCK_ARRAYS_ON_DEVICE() \
DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE()
static inline void ensure_cuda_lock_arrays_on_device() {
Impl::copy_cuda_lock_arrays_to_device();
}
#endif
#endif /* #ifndef KOKKOS_CUDA_LOCKS_HPP_ */
} // namespace desul
#endif /* #ifndef DESUL_ATOMICS_LOCK_ARRAY_CUDA_HPP_ */

View File

@ -70,7 +70,7 @@ void init_lock_arrays_cuda() {
"init_lock_arrays_cuda: cudaMalloc host locks");
auto error_sync1 = cudaDeviceSynchronize();
DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
copy_cuda_lock_arrays_to_device();
check_error_and_throw_cuda(error_sync1, "init_lock_arrays_cuda: post mallocs");
init_lock_arrays_cuda_kernel<<<(CUDA_SPACE_ATOMIC_MASK + 1 + 255) / 256, 256>>>();
auto error_sync2 = cudaDeviceSynchronize();
@ -85,7 +85,7 @@ void finalize_lock_arrays_cuda() {
CUDA_SPACE_ATOMIC_LOCKS_DEVICE_h = nullptr;
CUDA_SPACE_ATOMIC_LOCKS_NODE_h = nullptr;
#ifdef __CUDACC_RDC__
DESUL_IMPL_COPY_CUDA_LOCK_ARRAYS_TO_DEVICE();
copy_cuda_lock_arrays_to_device();
#endif
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,10 @@
# required
rcutfac 2.7
twojmax 8
# optional
rfac0 0.99363
rmin0 0.0
bzeroflag 0
quadraticflag 1

View File

@ -47,6 +47,7 @@ void FixEfieldTIP4P::init()
if (atom->tag_enable == 0) error->all(FLERR, "Fix efield/tip4p requires atom IDs");
if (!atom->q_flag) error->all(FLERR, "Fix efield/tip4p requires atom attribute q");
if (!force->pair) error->all(FLERR, "A TIP4P pair style must be defined fix efield/tip4p");
if (pstr) error->all(FLERR, "Fix efield/tip4p does not support the potential keyword");
int itmp;
double *p_qdist = (double *) force->pair->extract("qdist", itmp);

View File

@ -66,10 +66,10 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
FixACKS2ReaxFFKokkos(class LAMMPS *, int, char **);
~FixACKS2ReaxFFKokkos();
void init() override;
void setup_pre_force(int) override;
void pre_force(int) override;
void cleanup_copy();
void init();
void setup_pre_force(int);
void pre_force(int);
DAT::tdual_ffloat_1d get_s() {return k_s;}
@ -235,11 +235,11 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
void init_shielding_k();
void init_hist();
void allocate_matrix();
void allocate_matrix() override;
void allocate_array();
void deallocate_array();
int bicgstab_solve();
void calculate_Q();
void calculate_Q() override;
int neighflag;
int nlocal,nall,nmax,newton_pair;
@ -251,13 +251,13 @@ class FixACKS2ReaxFFKokkos : public FixACKS2ReaxFF, public KokkosBase {
typename AT::t_int_2d d_sendlist;
typename AT::t_xfloat_1d_um v_buf;
void grow_arrays(int);
void copy_arrays(int, int, int);
void grow_arrays(int) override;
void copy_arrays(int, int, int) override;
void sort_kokkos(Kokkos::BinSort<KeyViewType, BinOp> &Sorter) override;
int pack_exchange(int, double *);
int unpack_exchange(int, double *);
void get_chi_field();
double memory_usage();
int pack_exchange(int, double *) override;
int unpack_exchange(int, double *) override;
void get_chi_field() override;
double memory_usage() override;
void sparse_matvec_acks2(typename AT::t_ffloat_1d &, typename AT::t_ffloat_1d &);
};

View File

@ -2298,7 +2298,7 @@ void SNAKokkos<DeviceType, real_type, vector_length>::compute_s_dsfac(const real
constexpr real_type zero = static_cast<real_type>(0.0);
constexpr real_type onehalf = static_cast<real_type>(0.5);
if (switch_flag == 0) { sfac_outer = zero; dsfac_outer = zero; }
if (switch_flag == 0) { sfac_outer = one; dsfac_outer = zero; }
else if (switch_flag == 1) {
if (r <= rmin0) { sfac_outer = one; dsfac_outer = zero; }
else if (r > rcut) { sfac = zero; dsfac = zero; return; }

View File

@ -404,9 +404,15 @@ void FixQEqReaxFF::init()
efield->init();
if (strcmp(update->unit_style,"real") != 0)
error->all(FLERR,"Must use unit_style real with fix {} and external fields", style);
if (efield->varflag != FixEfield::CONSTANT)
error->all(FLERR,"Cannot (yet) use fix {} with variable efield", style);
if (efield->varflag == FixEfield::ATOM && efield->pstyle != FixEfield::ATOM)
error->all(FLERR,"Atom-style external electric field requires atom-style "
"potential variable when used with fix {}", style);
if (((efield->xstyle != FixEfield::CONSTANT) && domain->xperiodic) ||
((efield->ystyle != FixEfield::CONSTANT) && domain->yperiodic) ||
((efield->zstyle != FixEfield::CONSTANT) && domain->zperiodic))
error->all(FLERR,"Must not have electric field component in direction of periodic "
"boundary when using charge equilibration with ReaxFF.");
if (((fabs(efield->ex) > SMALL) && domain->xperiodic) ||
((fabs(efield->ey) > SMALL) && domain->yperiodic) ||
((fabs(efield->ez) > SMALL) && domain->zperiodic))
@ -1101,26 +1107,36 @@ void FixQEqReaxFF::get_chi_field()
// efield energy is in real units of kcal/mol/angstrom, need to convert to eV
const double factor = -1.0/force->qe2f;
const double qe2f = force->qe2f;
const double factor = -1.0/qe2f;
if (efield->varflag != FixEfield::CONSTANT)
efield->update_efield_variables();
// currently we only support constant efield
// atom selection is for the group of fix efield
if (efield->varflag == FixEfield::CONSTANT) {
double unwrap[3];
const double fx = efield->ex;
const double fy = efield->ey;
const double fz = efield->ez;
const int efgroupbit = efield->groupbit;
double unwrap[3];
const double ex = efield->ex;
const double ey = efield->ey;
const double ez = efield->ez;
const int efgroupbit = efield->groupbit;
// charge interactions
// force = qE, potential energy = F dot x in unwrapped coords
if (efield->varflag != FixEfield::ATOM) {
for (int i = 0; i < nlocal; i++) {
if (mask[i] & efgroupbit) {
if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue;
domain->unmap(x[i],image[i],unwrap);
chi_field[i] = factor*(fx*unwrap[0] + fy*unwrap[1] + fz*unwrap[2]);
chi_field[i] = factor*(ex*unwrap[0] + ey*unwrap[1] + ez*unwrap[2]);
}
}
} else { // must use atom-style potential from FixEfield
for (int i = 0; i < nlocal; i++) {
if (mask[i] & efgroupbit) {
if (region && !region->match(x[i][0],x[i][1],x[i][2])) continue;
chi_field[i] = -efield->efield[i][3];
}
}
}

View File

@ -2129,14 +2129,18 @@ void Atom::add_molecule_atom(Molecule *onemol, int iatom, int ilocal, tagint off
// initialize custom per-atom properties to zero if present
for (int i = 0; i < nivector; ++i) ivector[i][ilocal] = 0;
for (int i = 0; i < ndvector; ++i) dvector[i][ilocal] = 0.0;
for (int i = 0; i < nivector; ++i)
if (ivname[i]) ivector[i][ilocal] = 0;
for (int i = 0; i < ndvector; ++i)
if (dvname[i]) dvector[i][ilocal] = 0.0;
for (int i = 0; i < niarray; ++i)
for (int j = 0; j < icols[i]; ++j)
iarray[i][ilocal][j] = 0;
if (ianame[i])
for (int j = 0; j < icols[i]; ++j)
iarray[i][ilocal][j] = 0;
for (int i = 0; i < ndarray; ++i)
for (int j = 0; j < dcols[i]; ++j)
darray[i][ilocal][j] = 0.0;
if (daname[i])
for (int j = 0; j < dcols[i]; ++j)
darray[i][ilocal][j] = 0.0;
if (molecular != Atom::MOLECULAR) return;

View File

@ -41,7 +41,7 @@ using namespace FixConst;
FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
Fix(lmp, narg, arg), xstr(nullptr), ystr(nullptr), zstr(nullptr), estr(nullptr),
idregion(nullptr), region(nullptr), efield(nullptr)
pstr(nullptr), idregion(nullptr), region(nullptr), efield(nullptr)
{
if (narg < 6) utils::missing_cmd_args(FLERR, std::string("fix ") + style, error);
@ -58,7 +58,7 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
virial_global_flag = virial_peratom_flag = 1;
qe2f = force->qe2f;
xstr = ystr = zstr = nullptr;
xstyle = ystyle = zstyle = estyle = pstyle = NONE;
if (utils::strmatch(arg[3], "^v_")) {
xstr = utils::strdup(arg[3] + 2);
@ -100,11 +100,22 @@ FixEfield::FixEfield(LAMMPS *lmp, int narg, char **arg) :
} else
error->all(FLERR, "Unsupported argument for fix {} energy command: {}", style, arg[iarg]);
iarg += 2;
} else if (strcmp(arg[iarg], "potential") == 0) {
if (iarg + 2 > narg)
utils::missing_cmd_args(FLERR, std::string("fix ") + style + "potential", error);
if (utils::strmatch(arg[iarg + 1], "^v_")) {
pstr = utils::strdup(arg[iarg + 1] + 2);
} else
error->all(FLERR, "Unsupported argument for fix {} energy command: {}", style, arg[iarg]);
iarg += 2;
} else {
error->all(FLERR, "Unknown keyword for fix {} command: {}", style, arg[iarg]);
}
}
if (estr && pstr)
error->all(FLERR, "Must not use energy and potential keywords at the same time with fix efield");
force_flag = 0;
fsum[0] = fsum[1] = fsum[2] = fsum[3] = 0.0;
@ -122,6 +133,7 @@ FixEfield::~FixEfield()
delete[] ystr;
delete[] zstr;
delete[] estr;
delete[] pstr;
delete[] idregion;
memory->destroy(efield);
}
@ -157,43 +169,54 @@ void FixEfield::init()
if (xstr) {
xvar = input->variable->find(xstr);
if (xvar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", xstr, style);
if (xvar < 0) error->all(FLERR, "Variable {} for x-field in fix {} does not exist", xstr, style);
if (input->variable->equalstyle(xvar))
xstyle = EQUAL;
else if (input->variable->atomstyle(xvar))
xstyle = ATOM;
else
error->all(FLERR, "Variable {} for fix {} is invalid style", xstr, style);
error->all(FLERR, "Variable {} for x-field in fix {} is invalid style", xstr, style);
}
if (ystr) {
yvar = input->variable->find(ystr);
if (yvar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", ystr, style);
if (yvar < 0) error->all(FLERR, "Variable {} for y-field in fix {} does not exist", ystr, style);
if (input->variable->equalstyle(yvar))
ystyle = EQUAL;
else if (input->variable->atomstyle(yvar))
ystyle = ATOM;
else
error->all(FLERR, "Variable {} for fix {} is invalid style", ystr, style);
error->all(FLERR, "Variable {} for y-field in fix {} is invalid style", ystr, style);
}
if (zstr) {
zvar = input->variable->find(zstr);
if (zvar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", zstr, style);
if (zvar < 0) error->all(FLERR, "Variable {} for z-field in fix {} does not exist", zstr, style);
if (input->variable->equalstyle(zvar))
zstyle = EQUAL;
else if (input->variable->atomstyle(zvar))
zstyle = ATOM;
else
error->all(FLERR, "Variable {} for fix {} is invalid style", zstr, style);
error->all(FLERR, "Variable {} for z-field in fix {} is invalid style", zstr, style);
}
if (estr) {
evar = input->variable->find(estr);
if (evar < 0) error->all(FLERR, "Variable {} for fix {} does not exist", estr, style);
if (evar < 0) error->all(FLERR, "Variable {} for energy in fix {} does not exist", estr, style);
if (input->variable->atomstyle(evar))
estyle = ATOM;
else
error->all(FLERR, "Variable {} for fix {} is invalid style", estr, style);
} else
estyle = NONE;
error->all(FLERR, "Variable {} for energy in fix {} must be atom-style", estr, style);
}
if (pstr) {
pvar = input->variable->find(pstr);
if (pvar < 0) error->all(FLERR, "Variable {} for potential in fix {} does not exist", pstr, style);
if (input->variable->atomstyle(pvar))
pstyle = ATOM;
else
error->all(FLERR, "Variable {} for potential in fix {} must be atom-style", pstr, style);
}
// set index and check validity of region
@ -217,8 +240,10 @@ void FixEfield::init()
if (varflag == CONSTANT && estyle != NONE)
error->all(FLERR, "Cannot use variable energy with constant efield in fix {}", style);
if ((varflag == EQUAL || varflag == ATOM) && update->whichflag == 2 && estyle == NONE)
error->all(FLERR, "Must use variable energy with fix {}", style);
if (varflag == CONSTANT && pstyle != NONE)
error->all(FLERR, "Cannot use variable potential with constant efield in fix {}", style);
if ((varflag == EQUAL || varflag == ATOM) && update->whichflag == 2 && estyle == NONE && pstyle == NONE)
error->all(FLERR, "Must use variable energy or potential with fix {} during minimization", style);
if (utils::strmatch(update->integrate_style, "^respa")) {
ilevel_respa = (dynamic_cast<Respa *>(update->integrate))->nlevels - 1;
@ -346,26 +371,7 @@ void FixEfield::post_force(int vflag)
} else {
modify->clearstep_compute();
if (xstyle == EQUAL) {
ex = qe2f * input->variable->compute_equal(xvar);
} else if (xstyle == ATOM) {
input->variable->compute_atom(xvar, igroup, &efield[0][0], 4, 0);
}
if (ystyle == EQUAL) {
ey = qe2f * input->variable->compute_equal(yvar);
} else if (ystyle == ATOM) {
input->variable->compute_atom(yvar, igroup, &efield[0][1], 4, 0);
}
if (zstyle == EQUAL) {
ez = qe2f * input->variable->compute_equal(zvar);
} else if (zstyle == ATOM) {
input->variable->compute_atom(zvar, igroup, &efield[0][2], 4, 0);
}
if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0);
modify->addstep_compute(update->ntimestep + 1);
update_efield_variables();
// charge interactions
// force = qE
@ -395,7 +401,8 @@ void FixEfield::post_force(int vflag)
}
f[i][2] += fz;
fsum[3] += fz;
if (estyle == ATOM) fsum[0] += efield[i][3];
if (pstyle == ATOM) fsum[0] += qe2f * q[i] * efield[i][3];
else if (estyle == ATOM) fsum[0] += efield[i][3];
}
}
@ -470,3 +477,33 @@ double FixEfield::compute_vector(int n)
}
return fsum_all[n + 1];
}
/* ----------------------------------------------------------------------
update efield variables without doing anything else
called by fix_qeq_reaxff
------------------------------------------------------------------------- */
void FixEfield::update_efield_variables()
{
modify->clearstep_compute();
if (xstyle == EQUAL) {
ex = qe2f * input->variable->compute_equal(xvar);
} else if (xstyle == ATOM) {
input->variable->compute_atom(xvar, igroup, &efield[0][0], 4, 0);
}
if (ystyle == EQUAL) {
ey = qe2f * input->variable->compute_equal(yvar);
} else if (ystyle == ATOM) {
input->variable->compute_atom(yvar, igroup, &efield[0][1], 4, 0);
}
if (zstyle == EQUAL) {
ez = qe2f * input->variable->compute_equal(zvar);
} else if (zstyle == ATOM) {
input->variable->compute_atom(zvar, igroup, &efield[0][2], 4, 0);
}
if (pstyle == ATOM) input->variable->compute_atom(pvar, igroup, &efield[0][3], 4, 0);
else if (estyle == ATOM) input->variable->compute_atom(evar, igroup, &efield[0][3], 4, 0);
modify->addstep_compute(update->ntimestep + 1);
}

View File

@ -46,10 +46,11 @@ class FixEfield : public Fix {
protected:
double ex, ey, ez;
int varflag;
char *xstr, *ystr, *zstr, *estr;
char *xstr, *ystr, *zstr, *estr, *pstr;
char *idregion;
class Region *region;
int xvar, yvar, zvar, evar, xstyle, ystyle, zstyle, estyle;
int xvar, yvar, zvar, xstyle, ystyle, zstyle;
int evar, pvar, estyle, pstyle;
int ilevel_respa;
double qe2f;
int qflag, muflag;
@ -59,6 +60,7 @@ class FixEfield : public Fix {
int force_flag;
double fsum[4], fsum_all[4];
void update_efield_variables();
};
} // namespace LAMMPS_NS
#endif

View File

@ -4040,8 +4040,8 @@ Region *Variable::region_function(char *id, int ivar)
return 0 if not a match, 1 if successfully processed
customize by adding a special function:
sum(x),min(x),max(x),ave(x),trap(x),slope(x),
gmask(x),rmask(x),grmask(x,y),next(x),
is_file(x),is_ox(x),extract_setting(x),label2type(x,y)
gmask(x),rmask(x),grmask(x,y),next(x),is_file(x),is_ox(x),
extract_setting(x),label2type(x,y),is_typelabel(x,y)
------------------------------------------------------------------------- */
int Variable::special_function(char *word, char *contents, Tree **tree, Tree **treestack,
@ -4056,20 +4056,28 @@ int Variable::special_function(char *word, char *contents, Tree **tree, Tree **t
strcmp(word,"ave") != 0 && strcmp(word,"trap") != 0 && strcmp(word,"slope") != 0 &&
strcmp(word,"gmask") != 0 && strcmp(word,"rmask") != 0 && strcmp(word,"grmask") != 0 &&
strcmp(word,"next") != 0 && strcmp(word,"is_file") != 0 && strcmp(word,"is_os") != 0 &&
strcmp(word,"extract_setting") != 0 && strcmp(word,"label2type") != 0)
strcmp(word,"extract_setting") != 0 && strcmp(word,"label2type") != 0 &&
strcmp(word,"is_typelabel") != 0)
return 0;
// process label2type() separately b/c its label arg can have commas in it
if (strcmp(word,"label2type") == 0) {
if (strcmp(word,"label2type") == 0 || strcmp(word,"is_typelabel") == 0) {
if (!atom->labelmapflag)
print_var_error(FLERR,"Cannot use label2type() function without a labelmap",ivar);
print_var_error(FLERR,fmt::format("Cannot use {}() function without a labelmap",word),ivar);
std::string contents_copy(contents);
auto pos = contents_copy.find_first_of(',');
if (pos == std::string::npos)
print_var_error(FLERR, fmt::format("Invalid label2type({}) function in variable formula",
contents_copy), ivar);
if (pos == std::string::npos) {
if (strcmp(word,"label2type") == 0) {
print_var_error(FLERR, fmt::format("Invalid label2type({}) function in variable formula",
contents_copy), ivar);
} else {
print_var_error(FLERR, fmt::format("Invalid is_typelabel({}) function in variable formula",
contents_copy), ivar);
}
}
std::string typestr = contents_copy.substr(pos+1);
std::string kind = contents_copy.substr(0, pos);
@ -4085,12 +4093,14 @@ int Variable::special_function(char *word, char *contents, Tree **tree, Tree **t
} else if (kind == "improper") {
value = atom->lmap->find(typestr,Atom::IMPROPER);
} else {
print_var_error(FLERR, fmt::format("Invalid kind {} in label2type() in variable",kind),ivar);
print_var_error(FLERR, fmt::format("Invalid kind {} in {}() in variable", kind, word),ivar);
}
if (value == -1)
print_var_error(FLERR, fmt::format("Invalid {} type label {} in label2type() in variable",
kind, typestr), ivar);
if (strcmp(word,"label2type") == 0) {
if (value == -1)
print_var_error(FLERR, fmt::format("Invalid {} type label {} in label2type() in variable",
kind, typestr), ivar);
} else value = (value == -1) ? 0.0 : 1.0;
// save value in tree or on argstack

View File

@ -590,7 +590,7 @@ TEST_F(VariableTest, NextCommand)
command("next five four"););
}
TEST_F(VariableTest, Label2TypeAtomic)
TEST_F(VariableTest, LabelMapAtomic)
{
BEGIN_HIDE_OUTPUT();
command("region box block 0 2 0 2 0 2");
@ -608,14 +608,20 @@ TEST_F(VariableTest, Label2TypeAtomic)
ASSERT_DOUBLE_EQ(variable->compute_equal("label2type(atom,N1)"), 2.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("label2type(atom,O1)"), 3.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("label2type(atom,H1)"), 4.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,N1)"), 1.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,N2)"), 0.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,O)"), 0.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,H1)"), 1.0);
TEST_FAILURE(".*ERROR: Variable t1: Invalid atom type label C1 in label2type.. in variable.*",
command("print \"${t1}\""););
TEST_FAILURE(".*ERROR: Invalid bond type label H1 in label2type.. in variable.*",
variable->compute_equal("label2type(bond,H1)"););
TEST_FAILURE(".*ERROR: Invalid kind xxx in label2type.. in variable.*",
variable->compute_equal("label2type(xxx,H1)"););
TEST_FAILURE(".*ERROR: Invalid kind xxx in is_typelabel.. in variable.*",
variable->compute_equal("is_typelabel(xxx,H1)"););
}
TEST_F(VariableTest, Label2TypeMolecular)
TEST_F(VariableTest, LabelMapMolecular)
{
if (!info->has_style("atom", "full")) GTEST_SKIP();
@ -637,6 +643,14 @@ TEST_F(VariableTest, Label2TypeMolecular)
command("variable a2 equal \"\"\"label2type(angle,N2'-C1\"-N2')\"\"\"");
command("variable d1 equal label2type(dihedral,C1-N2-C1-N2)");
command("variable i1 equal label2type(improper,C1-N2-C1-N2)");
command("variable l1 equal is_typelabel(atom,C2)+is_typelabel(bond,C2-N1)"
"+is_typelabel(bond,[X1][Y1])+is_typelabel(angle,C1-C2-N1)"
"+is_typelabel(dihedral,N2-C1-C1-N2)+is_typelabel(improper,N2-C1-C1-N2)");
command("variable l2 equal is_typelabel(atom,C1)+is_typelabel(bond,C1-N2)"
"+is_typelabel(bond,[C1][C1])+is_typelabel(angle,C1-N2-C1)"
"+is_typelabel(dihedral,C1-N2-C1-N2)+is_typelabel(improper,C1-N2-C1-N2)");
END_HIDE_OUTPUT();
ASSERT_THAT(variable->retrieve("t1"), StrEq("1"));
@ -647,6 +661,30 @@ TEST_F(VariableTest, Label2TypeMolecular)
ASSERT_THAT(variable->retrieve("a2"), StrEq("2"));
ASSERT_THAT(variable->retrieve("d1"), StrEq("1"));
ASSERT_THAT(variable->retrieve("i1"), StrEq("1"));
ASSERT_THAT(variable->retrieve("l1"), StrEq("0"));
ASSERT_THAT(variable->retrieve("l2"), StrEq("6"));
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,N2')"), 1.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(atom,\"N2'\")"), 0.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,C1-N2)"), 1.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,C2-N1)"), 0.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,[C1][C1])"), 1.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(bond,[X1][Y1])"), 0.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(angle,C1-C2-N1)"), 0.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(angle,C1-N2-C1)"), 1.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(dihedral,C1-N2-C1-N2)"), 1.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(dihedral,N2-C1-C1-N2)"), 0.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(improper,C1-N2-C1-N2)"), 1.0);
ASSERT_DOUBLE_EQ(variable->compute_equal("is_typelabel(improper,N2-C1-C1-N2)"), 0.0);
TEST_FAILURE(".*ERROR: Invalid bond type label H1 in label2type.. in variable.*",
variable->compute_equal("label2type(bond,H1)"););
TEST_FAILURE(".*ERROR: Invalid angle type label H1 in label2type.. in variable.*",
variable->compute_equal("label2type(angle,H1)"););
TEST_FAILURE(".*ERROR: Invalid dihedral type label H1 in label2type.. in variable.*",
variable->compute_equal("label2type(dihedral,H1)"););
TEST_FAILURE(".*ERROR: Invalid improper type label H1 in label2type.. in variable.*",
variable->compute_equal("label2type(improper,H1)"););
}
TEST_F(VariableTest, Format)